From a56f2393c48176125e163992e1f1b3a26fb91562 Mon Sep 17 00:00:00 2001 From: PKumarAditya Date: Mon, 12 May 2025 14:40:15 +0530 Subject: [PATCH 001/115] i386/kvm: Move architectural CPUID leaf generation to separate helper commit a5acf4f26c208a05d05ef1bde65553ce2ab5e5d0 upstream Move the architectural (for lack of a better term) CPUID leaf generation to a separate helper so that the generation code can be reused by TDX, which needs to generate a canonical VM-scoped configuration. For now this is just a cleanup, so keep the function static. Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-23-xiaoyao.li@intel.com> Reviewed-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 417 +++++++++++++++++++++--------------------- 1 file changed, 211 insertions(+), 206 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 2f379876e6..b83e85ae4d 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1701,195 +1701,22 @@ static void kvm_init_nested_state(CPUX86State *env) } } -int kvm_arch_init_vcpu(CPUState *cs) +static uint32_t kvm_x86_build_cpuid(CPUX86State *env, + struct kvm_cpuid_entry2 *entries, + uint32_t cpuid_i) { - struct { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; - } cpuid_data; - /* - * The kernel defines these structs with padding fields so there - * should be no extra padding in our cpuid_data struct. - */ - QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != - sizeof(struct kvm_cpuid2) + - sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); - - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - uint32_t limit, i, j, cpuid_i; + uint32_t limit, i, j; uint32_t unused; struct kvm_cpuid_entry2 *c; - uint32_t signature[3]; - int kvm_base = KVM_CPUID_SIGNATURE; - int max_nested_state_len; - int r; - Error *local_err = NULL; - - memset(&cpuid_data, 0, sizeof(cpuid_data)); - - cpuid_i = 0; - - has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); - - r = kvm_arch_set_tsc_khz(cs); - if (r < 0) { - return r; - } - - /* vcpu's TSC frequency is either specified by user, or following - * the value used by KVM if the former is not present. In the - * latter case, we query it from KVM and record in env->tsc_khz, - * so that vcpu's TSC frequency can be migrated later via this field. - */ - if (!env->tsc_khz) { - r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? - kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : - -ENOTSUP; - if (r > 0) { - env->tsc_khz = r; - } - } - - env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; - - /* - * kvm_hyperv_expand_features() is called here for the second time in case - * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle - * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to - * check which Hyper-V enlightenments are supported and which are not, we - * can still proceed and check/expand Hyper-V enlightenments here so legacy - * behavior is preserved. - */ - if (!kvm_hyperv_expand_features(cpu, &local_err)) { - error_report_err(local_err); - return -ENOSYS; - } - - if (hyperv_enabled(cpu)) { - r = hyperv_init_vcpu(cpu); - if (r) { - return r; - } - - cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); - kvm_base = KVM_CPUID_SIGNATURE_NEXT; - has_msr_hv_hypercall = true; - } - - if (cs->kvm_state->xen_version) { -#ifdef CONFIG_XEN_EMU - struct kvm_cpuid_entry2 *xen_max_leaf; - - memcpy(signature, "XenVMMXenVMM", 12); - - xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_SIGNATURE; - c->eax = kvm_base + XEN_CPUID_TIME; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_VENDOR; - c->eax = cs->kvm_state->xen_version; - c->ebx = 0; - c->ecx = 0; - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_HVM_MSR; - /* Number of hypercall-transfer pages */ - c->eax = 1; - /* Hypercall MSR base address */ - if (hyperv_enabled(cpu)) { - c->ebx = XEN_HYPERCALL_MSR_HYPERV; - kvm_xen_init(cs->kvm_state, c->ebx); - } else { - c->ebx = XEN_HYPERCALL_MSR; - } - c->ecx = 0; - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_TIME; - c->eax = ((!!tsc_is_stable_and_known(env) << 1) | - (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); - /* default=0 (emulate if necessary) */ - c->ebx = 0; - /* guest tsc frequency */ - c->ecx = env->user_tsc_khz; - /* guest tsc incarnation (migration count) */ - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = kvm_base + XEN_CPUID_HVM; - xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { - c->function = kvm_base + XEN_CPUID_HVM; - - if (cpu->xen_vapic) { - c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; - c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; - } - - c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; - - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { - c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; - c->ebx = cs->cpu_index; - } - - if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { - c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; - } - } - - r = kvm_xen_init_vcpu(cs); - if (r) { - return r; - } - - kvm_base += 0x100; -#else /* CONFIG_XEN_EMU */ - /* This should never happen as kvm_arch_init() would have died first. */ - fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); - abort(); -#endif - } else if (cpu->expose_kvm) { - memcpy(signature, "KVMKVMKVM\0\0\0", 12); - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = KVM_CPUID_FEATURES | kvm_base; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_FEATURES | kvm_base; - c->eax = env->features[FEAT_KVM]; - c->edx = env->features[FEAT_KVM_HINTS]; - } cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); - if (cpu->kvm_pv_enforce_cpuid) { - r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); - if (r < 0) { - fprintf(stderr, - "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", - strerror(-r)); - abort(); - } - } - for (i = 0; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported level value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; - + c = &entries[cpuid_i++]; switch (i) { case 2: { /* Keep reading function 2 till all the input is received */ @@ -1905,11 +1732,9 @@ int kvm_arch_init_vcpu(CPUState *cs) for (j = 1; j < times; ++j) { if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:2):eax & 0xf = 0x%x\n", times); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); @@ -1949,11 +1774,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; case 0x12: @@ -1968,11 +1791,9 @@ int kvm_arch_init_vcpu(CPUState *cs) } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x12,ecx:0x%x)\n", j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; case 0x7: @@ -1989,11 +1810,9 @@ int kvm_arch_init_vcpu(CPUState *cs) for (j = 1; j <= times; ++j) { if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->index = j; c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; @@ -2046,11 +1865,11 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); for (i = 0x80000000; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; switch (i) { case 0x8000001d: @@ -2065,11 +1884,9 @@ int kvm_arch_init_vcpu(CPUState *cs) break; } if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; } break; default: @@ -2092,11 +1909,11 @@ int kvm_arch_init_vcpu(CPUState *cs) cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); for (i = 0xC0000000; i <= limit; i++) { + j = 0; if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); - abort(); + goto full; } - c = &cpuid_data.entries[cpuid_i++]; + c = &entries[cpuid_i++]; c->function = i; c->flags = 0; @@ -2104,6 +1921,194 @@ int kvm_arch_init_vcpu(CPUState *cs) } } + return cpuid_i; + +full: + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); +} + +int kvm_arch_init_vcpu(CPUState *cs) +{ + struct { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; + } cpuid_data; + /* + * The kernel defines these structs with padding fields so there + * should be no extra padding in our cpuid_data struct. + */ + QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != + sizeof(struct kvm_cpuid2) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint32_t cpuid_i; + struct kvm_cpuid_entry2 *c; + uint32_t signature[3]; + int kvm_base = KVM_CPUID_SIGNATURE; + int max_nested_state_len; + int r; + Error *local_err = NULL; + + memset(&cpuid_data, 0, sizeof(cpuid_data)); + + cpuid_i = 0; + + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; + } + + /* vcpu's TSC frequency is either specified by user, or following + * the value used by KVM if the former is not present. In the + * latter case, we query it from KVM and record in env->tsc_khz, + * so that vcpu's TSC frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (r > 0) { + env->tsc_khz = r; + } + } + + env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; + + /* + * kvm_hyperv_expand_features() is called here for the second time in case + * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle + * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to + * check which Hyper-V enlightenments are supported and which are not, we + * can still proceed and check/expand Hyper-V enlightenments here so legacy + * behavior is preserved. + */ + if (!kvm_hyperv_expand_features(cpu, &local_err)) { + error_report_err(local_err); + return -ENOSYS; + } + + if (hyperv_enabled(cpu)) { + r = hyperv_init_vcpu(cpu); + if (r) { + return r; + } + + cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); + kvm_base = KVM_CPUID_SIGNATURE_NEXT; + has_msr_hv_hypercall = true; + } + + if (cs->kvm_state->xen_version) { +#ifdef CONFIG_XEN_EMU + struct kvm_cpuid_entry2 *xen_max_leaf; + + memcpy(signature, "XenVMMXenVMM", 12); + + xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_SIGNATURE; + c->eax = kvm_base + XEN_CPUID_TIME; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_VENDOR; + c->eax = cs->kvm_state->xen_version; + c->ebx = 0; + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM_MSR; + /* Number of hypercall-transfer pages */ + c->eax = 1; + /* Hypercall MSR base address */ + if (hyperv_enabled(cpu)) { + c->ebx = XEN_HYPERCALL_MSR_HYPERV; + kvm_xen_init(cs->kvm_state, c->ebx); + } else { + c->ebx = XEN_HYPERCALL_MSR; + } + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_TIME; + c->eax = ((!!tsc_is_stable_and_known(env) << 1) | + (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); + /* default=0 (emulate if necessary) */ + c->ebx = 0; + /* guest tsc frequency */ + c->ecx = env->user_tsc_khz; + /* guest tsc incarnation (migration count) */ + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM; + xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { + c->function = kvm_base + XEN_CPUID_HVM; + + if (cpu->xen_vapic) { + c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; + c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; + } + + c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { + c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; + c->ebx = cs->cpu_index; + } + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { + c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; + } + } + + r = kvm_xen_init_vcpu(cs); + if (r) { + return r; + } + + kvm_base += 0x100; +#else /* CONFIG_XEN_EMU */ + /* This should never happen as kvm_arch_init() would have died first. */ + fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); + abort(); +#endif + } else if (cpu->expose_kvm) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | kvm_base; + c->eax = KVM_CPUID_FEATURES | kvm_base; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_FEATURES | kvm_base; + c->eax = env->features[FEAT_KVM]; + c->edx = env->features[FEAT_KVM_HINTS]; + } + + if (cpu->kvm_pv_enforce_cpuid) { + r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); + if (r < 0) { + fprintf(stderr, + "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", + strerror(-r)); + abort(); + } + } + + cpuid_i = kvm_x86_build_cpuid(env, cpuid_data.entries, cpuid_i); cpuid_data.cpuid.nent = cpuid_i; if (((env->cpuid_version >> 8)&0xF) >= 6 -- Gitee From 200df8a7d73d6bc97849070e706008c1867b29bf Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 20 Mar 2024 03:39:13 -0500 Subject: [PATCH 002/115] pci-host/q35: Move PAM initialization above SMRAM initialization commit 42c11ae2416dcbcd694ec3ee574fe2f3e70099ae upstream In mch_realize(), process PAM initialization before SMRAM initialization so that later patch can skill all the SMRAM related with a single check. Signed-off-by: Isaku Yamahata Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-18-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/pci-host/q35.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 08534bc7cc..4ac44975c7 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -568,6 +568,16 @@ static void mch_realize(PCIDevice *d, Error **errp) /* setup pci memory mapping */ pc_pci_as_mapping_init(mch->system_memory, mch->pci_address_space); + /* PAM */ + init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_BIOS_BASE, PAM_BIOS_SIZE); + for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { + init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); + } + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, @@ -634,15 +644,6 @@ static void mch_realize(PCIDevice *d, Error **errp) object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram)); - - init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_BIOS_BASE, PAM_BIOS_SIZE); - for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { - init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, - mch->system_memory, mch->pci_address_space, - PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); - } } uint64_t mch_mcfg_base(void) -- Gitee From 8cd81389aae2ddf138b0c3960fb35c0b940107c0 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 20 Mar 2024 03:39:14 -0500 Subject: [PATCH 003/115] q35: Introduce smm_ranges property for q35-pci-host commit b07bf7b73fd02d24a7baa64a580f4974b86bbc86 upstream Add a q35 property to check whether or not SMM ranges, e.g. SMRAM, TSEG, etc... exist for the target platform. TDX doesn't support SMM and doesn't play nice with QEMU modifying related guest memory ranges. Signed-off-by: Isaku Yamahata Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-19-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/i386/pc_q35.c | 2 ++ hw/pci-host/q35.c | 42 +++++++++++++++++++++++++++------------ include/hw/i386/pc.h | 1 + include/hw/pci-host/q35.h | 1 + 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 4f3e5412f6..3392b0c110 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -236,6 +236,8 @@ static void pc_q35_init(MachineState *machine) x86ms->above_4g_mem_size, NULL); object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, pcms->default_bus_bypass_iommu, NULL); + object_property_set_bool(phb, PCI_HOST_PROP_SMM_RANGES, + x86_machine_is_smm_enabled(x86ms), NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 4ac44975c7..8facd8b63f 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -179,6 +179,8 @@ static Property q35_host_props[] = { mch.below_4g_mem_size, 0), DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, Q35PCIHost, mch.above_4g_mem_size, 0), + DEFINE_PROP_BOOL(PCI_HOST_PROP_SMM_RANGES, Q35PCIHost, + mch.has_smm_ranges, true), DEFINE_PROP_BOOL("x-pci-hole64-fix", Q35PCIHost, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; @@ -214,6 +216,7 @@ static void q35_host_initfn(Object *obj) /* mch's object_initialize resets the default value, set it again */ qdev_prop_set_uint64(DEVICE(s), PCI_HOST_PROP_PCI_HOLE64_SIZE, Q35_PCI_HOST_HOLE64_SIZE_DEFAULT); + object_property_add(obj, PCI_HOST_PROP_PCI_HOLE_START, "uint32", q35_host_get_pci_hole_start, NULL, NULL, NULL); @@ -476,6 +479,10 @@ static void mch_write_config(PCIDevice *d, mch_update_pciexbar(mch); } + if (!mch->has_smm_ranges) { + return; + } + if (ranges_overlap(address, len, MCH_HOST_BRIDGE_SMRAM, MCH_HOST_BRIDGE_SMRAM_SIZE)) { mch_update_smram(mch); @@ -494,10 +501,13 @@ static void mch_write_config(PCIDevice *d, static void mch_update(MCHPCIState *mch) { mch_update_pciexbar(mch); + mch_update_pam(mch); - mch_update_smram(mch); - mch_update_ext_tseg_mbytes(mch); - mch_update_smbase_smram(mch); + if (mch->has_smm_ranges) { + mch_update_smram(mch); + mch_update_ext_tseg_mbytes(mch); + mch_update_smbase_smram(mch); + } /* * pci hole goes from end-of-low-ram to io-apic. @@ -538,18 +548,20 @@ static void mch_reset(DeviceState *qdev) pci_set_quad(d->config + MCH_HOST_BRIDGE_PCIEXBAR, MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); - d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; - d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; - d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; - d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; + if (mch->has_smm_ranges) { + d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; + d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; + d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; + d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; - if (mch->ext_tseg_mbytes > 0) { - pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, - MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); - } + if (mch->ext_tseg_mbytes > 0) { + pci_set_word(d->config + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES, + MCH_HOST_BRIDGE_EXT_TSEG_MBYTES_QUERY); + } - d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; - d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; + d->config[MCH_HOST_BRIDGE_F_SMBASE] = 0; + d->wmask[MCH_HOST_BRIDGE_F_SMBASE] = 0xff; + } mch_update(mch); } @@ -578,6 +590,10 @@ static void mch_realize(PCIDevice *d, Error **errp) PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } + if (!mch->has_smm_ranges) { + return; + } + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, MCH_HOST_BRIDGE_SMRAM_C_BASE, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index a10ceeabbf..c49e2970f1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -165,6 +165,7 @@ void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_PROP_PCI_HOLE64_SIZE "pci-hole64-size" #define PCI_HOST_BELOW_4G_MEM_SIZE "below-4g-mem-size" #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" +#define PCI_HOST_PROP_SMM_RANGES "smm-ranges" void pc_pci_as_mapping_init(MemoryRegion *system_memory, diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index bafcbe6752..22fadfa3ed 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -50,6 +50,7 @@ struct MCHPCIState { MemoryRegion tseg_blackhole, tseg_window; MemoryRegion smbase_blackhole, smbase_window; bool has_smram_at_smbase; + bool has_smm_ranges; Range pci_hole; uint64_t below_4g_mem_size; uint64_t above_4g_mem_size; -- Gitee From a2175e23f11cbc27479e42c2cfd3a778cdbaabb9 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 3 Apr 2024 10:59:53 -0400 Subject: [PATCH 004/115] hw/i386/acpi: Set PCAT_COMPAT bit only when pic is not disabled commit 292dd287e78e0cbafde9d1522c729349d132d844 upstream A value 1 of PCAT_COMPAT (bit 0) of MADT.Flags indicates that the system also has a PC-AT-compatible dual-8259 setup, i.e., the PIC. When PIC is not enabled (pic=off) for x86 machine, the PCAT_COMPAT bit needs to be cleared. The PIC probe should then print: [ 0.155970] Using NULL legacy PIC However, no such log printed in guest kernel unless PCAT_COMPAT is cleared. Signed-off-by: Xiaoyao Li Message-ID: <20240403145953.3082491-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/i386/acpi-common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 43dc23f7e0..57e81c7322 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -107,7 +107,9 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, acpi_table_begin(&table, table_data); /* Local APIC Address */ build_append_int_noprefix(table_data, APIC_DEFAULT_ADDRESS, 4); - build_append_int_noprefix(table_data, 1 /* PCAT_COMPAT */, 4); /* Flags */ + /* Flags. bit 0: PCAT_COMPAT */ + build_append_int_noprefix(table_data, + x86ms->pic != ON_OFF_AUTO_OFF ? 1 : 0 , 4); for (i = 0; i < apic_ids->len; i++) { pc_madt_cpu_entry(i, apic_ids, table_data, false); -- Gitee From b886dc65f66b9f931f5fc5717568a8a3f4a3fa79 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:35 -0500 Subject: [PATCH 005/115] confidential guest support: Add kvm_init() and kvm_reset() in class commit 41a605944e3fecae43ca18ded95ec31f28e0c7fe upstream Different confidential VMs in different architectures all have the same needs to do their specific initialization (and maybe resetting) stuffs with KVM. Currently each of them exposes individual *_kvm_init() functions and let machine code or kvm code to call it. To facilitate the introduction of confidential guest technology from different x86 vendors, add two virtual functions, kvm_init() and kvm_reset() in ConfidentialGuestSupportClass, and expose two helpers functions for invodking them. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- include/exec/confidential-guest-support.h | 34 ++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index 2cba27642f..765dab4f83 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -23,7 +23,10 @@ #include "qom/object.h" #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support" -OBJECT_DECLARE_SIMPLE_TYPE(ConfidentialGuestSupport, CONFIDENTIAL_GUEST_SUPPORT) +OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, + ConfidentialGuestSupportClass, + CONFIDENTIAL_GUEST_SUPPORT) + struct ConfidentialGuestSupport { Object parent; @@ -101,8 +104,37 @@ struct ConfidentialGuestMemoryEncryptionOps { typedef struct ConfidentialGuestSupportClass { ObjectClass parent; struct ConfidentialGuestMemoryEncryptionOps *memory_encryption_ops; + + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); + int (*kvm_reset)(ConfidentialGuestSupport *cgs, Error **errp); } ConfidentialGuestSupportClass; +static inline int confidential_guest_kvm_init(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_init) { + return klass->kvm_init(cgs, errp); + } + + return 0; +} + +static inline int confidential_guest_kvm_reset(ConfidentialGuestSupport *cgs, + Error **errp) +{ + ConfidentialGuestSupportClass *klass; + + klass = CONFIDENTIAL_GUEST_SUPPORT_GET_CLASS(cgs); + if (klass->kvm_reset) { + return klass->kvm_reset(cgs, errp); + } + + return 0; +} + #endif /* !CONFIG_USER_ONLY */ #endif /* QEMU_CONFIDENTIAL_GUEST_SUPPORT_H */ -- Gitee From e1142a05281b4d5dc08d77dd9bc275214d590364 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:36 -0500 Subject: [PATCH 006/115] i386/sev: Switch to use confidential_guest_kvm_init() commit 637c95b37b106c2eeba313e0abb38ec12e918a59 upstream Use confidential_guest_kvm_init() instead of calling SEV specific sev_kvm_init(). This allows the introduction of multiple confidential-guest-support subclasses for different x86 vendors. As a bonus, stubs are not needed anymore since there is no direct call from target/i386/kvm/kvm.c to SEV code. [Backport Changes] 1) In the upstream, the file target/i386/kvm/sev-stub.c was removed entirely. However, in this backport, a minimal version of the file is retained, preserving only the relevant header definitions and logic needed to support non-upstream and CSV specific features such as SEV shared regions, GHCB handling, and SEV live migration. The corresponding build configuration in target/i386/kvm/meson.build was also retained. The retained codes was originally introduced by the following Euler commits: i) 6a8b58a3ce6dc162cae4b74ca8f39392672e6cba (target/i386: get/set/migrate GHCB state). ii) 02e6bfc88ce5e944ce36b8ccb7d2af103a969980 (kvm: Add support for SEV shared regions list and KVM_EXIT_HYPERCALL). iii) 7aced2a5fff91e0fcff97bb5eafddafece0cb983 (kvm: Add support for userspace MSR filtering and handling of MSR_KVM_MIGRATION_CONTROL). 2)In target/i386/sev.c, the upstream commit moved the sev_guest_class_init() function from below sev_guest_set_kernel_hashes() to a new location after the sev_ops structure, as part of code reorganization. However, in current codebase, sev_guest_class_init() includes additional non-upstream QOM property initializations introduced by the following Hygon CSV support commits: i) 8f4f8a2071e69130f0b9327ce8f9b92a5ae42c8d (target/i386: sev: Add support for reuse ASID for different CSV guests). ii) 10f5fa07068f54b23b01bf875259dc1a259d66b4 (qapi/qom,target/i386: csv-guest: Introduce secret-header-file and secret-file options). Since these QOM properties are not part of the upstream commit, relocated sev_guest_class_init() function to its new location along with these non-upstream changes to continue support for Hygon-specific functionalities. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 10 ++- target/i386/kvm/meson.build | 1 - target/i386/kvm/sev-stub.c | 19 ----- target/i386/sev.c | 158 ++++++++++++++++++------------------ target/i386/sev.h | 2 - 5 files changed, 85 insertions(+), 105 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index b83e85ae4d..f90f098592 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2598,10 +2598,12 @@ int kvm_arch_init(MachineState *ms, KVMState *s) * mechanisms are supported in future (e.g. TDX), they'll need * their own initialization either here or elsewhere. */ - ret = sev_kvm_init(ms->cgs, &local_err); - if (ret < 0) { - error_report_err(local_err); - return ret; + if (ms->cgs) { + ret = confidential_guest_kvm_init(ms->cgs, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } } has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build index 3c3f8cf93c..7c5eadac66 100644 --- a/target/i386/kvm/meson.build +++ b/target/i386/kvm/meson.build @@ -7,7 +7,6 @@ i386_kvm_ss.add(files( i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) -i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) i386_kvm_ss.add(when: 'CONFIG_CSV', if_false: files('csv-stub.c')) i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c index a0aac1117f..8db0031097 100644 --- a/target/i386/kvm/sev-stub.c +++ b/target/i386/kvm/sev-stub.c @@ -1,27 +1,8 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - #include "qemu/osdep.h" #include "sev.h" bool sev_kvm_has_msr_ghcb; -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - /* If we get here, cgs must be some non-SEV thing */ - return 0; -} - int sev_remove_shared_regions_list(unsigned long gfn_start, unsigned long gfn_end) { diff --git a/target/i386/sev.c b/target/i386/sev.c index 8c1f4d653e..70238e3402 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -454,78 +454,6 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) sev->kernel_hashes = value; } -static void -sev_guest_class_init(ObjectClass *oc, void *data) -{ - object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); - object_class_property_set_description(oc, "sev-device", - "SEV device to use"); - object_class_property_add_str(oc, "dh-cert-file", - sev_guest_get_dh_cert_file, - sev_guest_set_dh_cert_file); - object_class_property_set_description(oc, "dh-cert-file", - "guest owners DH certificate (encoded with base64)"); - object_class_property_add_str(oc, "session-file", - sev_guest_get_session_file, - sev_guest_set_session_file); - object_class_property_set_description(oc, "session-file", - "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); - object_class_property_add_str(oc, "user-id", - sev_guest_get_user_id, - sev_guest_set_user_id); - object_class_property_set_description(oc, "user-id", - "user id of the guest owner"); - object_class_property_add_str(oc, "secret-header-file", - sev_guest_get_secret_header_file, - sev_guest_set_secret_header_file); - object_class_property_set_description(oc, "secret-header-file", - "header file of the guest owner's secret"); - object_class_property_add_str(oc, "secret-file", - sev_guest_get_secret_file, - sev_guest_set_secret_file); - object_class_property_set_description(oc, "secret-file", - "file of the guest owner's secret"); -} - -static void -sev_guest_instance_init(Object *obj) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, - OBJ_PROP_FLAG_READWRITE); -} - -/* sev guest info */ -static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, - .name = TYPE_SEV_GUEST, - .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, - .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } -}; - bool sev_enabled(void) { @@ -1164,20 +1092,15 @@ sev_migration_state_notifier(Notifier *notifier, void *data) static Notifier sev_migration_state; -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev - = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); + SevGuestState *sev = SEV_GUEST(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; - if (!sev) { - return 0; - } - ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); @@ -2800,6 +2723,83 @@ struct sev_ops sev_ops = { .sev_receive_start = _sev_receive_start, }; +static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = sev_kvm_init; + + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, + sev_guest_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); + object_class_property_set_description(oc, "dh-cert-file", + "guest owners DH certificate (encoded with base64)"); + object_class_property_add_str(oc, "session-file", + sev_guest_get_session_file, + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); + + object_class_property_add_str(oc, "user-id", + sev_guest_get_user_id, + sev_guest_set_user_id); + object_class_property_set_description(oc, "user-id", + "user id of the guest owner"); + object_class_property_add_str(oc, "secret-header-file", + sev_guest_get_secret_header_file, + sev_guest_set_secret_header_file); + object_class_property_set_description(oc, "secret-header-file", + "header file of the guest owner's secret"); + object_class_property_add_str(oc, "secret-file", + sev_guest_get_secret_file, + sev_guest_set_secret_file); + object_class_property_set_description(oc, "secret-file", + "file of the guest owner's secret"); +} + +static void +sev_guest_instance_init(Object *obj) +{ + SevGuestState *sev = SEV_GUEST(obj); + + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + sev->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "policy", &sev->policy, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "handle", &sev->handle, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); +} + +/* sev guest info */ +static const TypeInfo sev_guest_info = { + .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, + .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + static void sev_register_types(void) { diff --git a/target/i386/sev.h b/target/i386/sev.h index 647b426b16..0436c966cc 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -76,8 +76,6 @@ int sev_load_incoming_shared_regions_list(QEMUFile *f); bool sev_is_gfn_in_unshared_region(unsigned long gfn); void sev_del_migrate_blocker(void); -int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); - extern bool sev_kvm_has_msr_ghcb; struct sev_ops { -- Gitee From 9c161960c558f70b9d4b2e5185457867440397b6 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:37 -0500 Subject: [PATCH 007/115] ppc/pef: switch to use confidential_guest_kvm_init/reset() commit 00a238b1a845fd5f0acd771664c5e184a63ed9b6 upstream Use the unified interface to call confidential guest related kvm_init() and kvm_reset(), to avoid exposing pef specific functions. As a bonus, pef.h goes away since there is no direct call from sPAPR board code to PEF code anymore. Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/ppc/pef.c | 9 ++++++--- hw/ppc/spapr.c | 10 +++++++--- include/hw/ppc/pef.h | 17 ----------------- 3 files changed, 13 insertions(+), 23 deletions(-) delete mode 100644 include/hw/ppc/pef.h diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c index d28ed3ba73..47553348b1 100644 --- a/hw/ppc/pef.c +++ b/hw/ppc/pef.c @@ -15,7 +15,6 @@ #include "sysemu/kvm.h" #include "migration/blocker.h" #include "exec/confidential-guest-support.h" -#include "hw/ppc/pef.h" #define TYPE_PEF_GUEST "pef-guest" OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) @@ -93,7 +92,7 @@ static int kvmppc_svm_off(Error **errp) #endif } -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -107,7 +106,7 @@ int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return kvmppc_svm_init(cgs, errp); } -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) +static int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { return 0; @@ -131,6 +130,10 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, static void pef_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = pef_kvm_init; + klass->kvm_reset = pef_kvm_reset; } static void pef_guest_init(Object *obj) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index df09aa9d6a..df1a93a58b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -74,6 +74,7 @@ #include "hw/virtio/vhost-scsi-common.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/usb.h" #include "qemu/config-file.h" #include "qemu/error-report.h" @@ -86,7 +87,6 @@ #include "hw/ppc/spapr_tpm_proxy.h" #include "hw/ppc/spapr_nvdimm.h" #include "hw/ppc/spapr_numa.h" -#include "hw/ppc/pef.h" #include "monitor/monitor.h" @@ -1687,7 +1687,9 @@ static void spapr_machine_reset(MachineState *machine, ShutdownCause reason) qemu_guest_getrandom_nofail(spapr->fdt_rng_seed, 32); } - pef_kvm_reset(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_reset(machine->cgs, &error_fatal); + } spapr_caps_apply(spapr); first_ppc_cpu = POWERPC_CPU(first_cpu); @@ -2810,7 +2812,9 @@ static void spapr_machine_init(MachineState *machine) /* * if Secure VM (PEF) support is configured, then initialize it */ - pef_kvm_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } msi_nonbroken = true; diff --git a/include/hw/ppc/pef.h b/include/hw/ppc/pef.h deleted file mode 100644 index 707dbe524c..0000000000 --- a/include/hw/ppc/pef.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * PEF (Protected Execution Facility) for POWER support - * - * Copyright Red Hat. - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef HW_PPC_PEF_H -#define HW_PPC_PEF_H - -int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp); - -#endif /* HW_PPC_PEF_H */ -- Gitee From 9f1d13027567244db9d5a809a3493c2d6d2177a9 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:00:38 -0500 Subject: [PATCH 008/115] s390: Switch to use confidential_guest_kvm_init() commit a14a2b0148e657cc526b7a75f2a1937628764e7a upstream Use unified confidential_guest_kvm_init() for consistency with other architectures. Signed-off-by: Xiaoyao Li Message-Id: <20240229060038.606591-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/s390x/s390-virtio-ccw.c | 5 ++++- target/s390x/kvm/pv.c | 10 +++++++++- target/s390x/kvm/pv.h | 14 -------------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 7262725d2e..62a94c08bb 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "exec/ram_addr.h" +#include "exec/confidential-guest-support.h" #include "hw/s390x/s390-virtio-hcall.h" #include "hw/s390x/sclp.h" #include "hw/s390x/s390_flic.h" @@ -259,7 +260,9 @@ static void ccw_init(MachineState *machine) s390_init_cpus(machine); /* Need CPU model to be determined before we can set up PV */ - s390_pv_init(machine->cgs, &error_fatal); + if (machine->cgs) { + confidential_guest_kvm_init(machine->cgs, &error_fatal); + } s390_flic_init(); diff --git a/target/s390x/kvm/pv.c b/target/s390x/kvm/pv.c index 6a69be7e5c..902dbb6f34 100644 --- a/target/s390x/kvm/pv.c +++ b/target/s390x/kvm/pv.c @@ -319,12 +319,17 @@ static bool s390_pv_guest_check(ConfidentialGuestSupport *cgs, Error **errp) return s390_pv_check_cpus(errp); } -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { if (!object_dynamic_cast(OBJECT(cgs), TYPE_S390_PV_GUEST)) { return 0; } + if (!kvm_enabled()) { + error_setg(errp, "Protected Virtualization requires KVM"); + return -1; + } + if (!s390_has_feat(S390_FEAT_UNPACK)) { error_setg(errp, "CPU model does not support Protected Virtualization"); @@ -349,6 +354,9 @@ OBJECT_DEFINE_TYPE_WITH_INTERFACES(S390PVGuest, static void s390_pv_guest_class_init(ObjectClass *oc, void *data) { + ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + + klass->kvm_init = s390_pv_kvm_init; } static void s390_pv_guest_init(Object *obj) diff --git a/target/s390x/kvm/pv.h b/target/s390x/kvm/pv.h index 7b935e2246..5e1527a849 100644 --- a/target/s390x/kvm/pv.h +++ b/target/s390x/kvm/pv.h @@ -79,18 +79,4 @@ static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } #endif /* CONFIG_KVM */ -int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); -static inline int s390_pv_init(ConfidentialGuestSupport *cgs, Error **errp) -{ - if (!cgs) { - return 0; - } - if (kvm_enabled()) { - return s390_pv_kvm_init(cgs, errp); - } - - error_setg(errp, "Protected Virtualization requires KVM"); - return -1; -} - #endif /* HW_S390_PV_H */ -- Gitee From b0230f7e49d8e3cb93a515310e6e91f8aed8ef16 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Sun, 18 Feb 2024 23:35:02 -0600 Subject: [PATCH 009/115] scripts/update-linux-headers: Add setup_data.h to import list commit 66210a1a30f2384bb59f9dad8d769dba56dd30f1 upstream Data structures like struct setup_data have been moved to a separate setup_data.h header which bootparam.h relies on. Add setup_data.h to the cp_portable() list and sync it along with the other header files. Note that currently struct setup_data is stripped away as part of generating bootparam.h, but that handling is no currently needed for setup_data.h since it doesn't pull in many external headers/dependencies. However, QEMU currently redefines struct setup_data in hw/i386/x86.c, so that will need to be removed as part of any header update that pulls in the new setup_data.h to avoid build bisect breakage. Because is the first architecture specific #include in include/standard-headers/, add a new sed substitution to rewrite asm/ include to the standard-headers/asm-* subdirectory for the current architecture. And while at it, remove asm-generic/kvm_para.h from the list of allowed includes: it does not have a matching substitution, and therefore it would not be possible to use it on non-Linux systems where there is no /usr/include/asm-generic/ directory. Signed-off-by: Michael Roth Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- scripts/update-linux-headers.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 88c76b8f69..77f5f063b0 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -61,7 +61,7 @@ cp_portable() { -e 'linux/const' \ -e 'linux/kernel' \ -e 'linux/sysinfo' \ - -e 'asm-generic/kvm_para' \ + -e 'asm/setup_data.h' \ > /dev/null then echo "Unexpected #include in input file $f". @@ -77,6 +77,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ + -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ @@ -155,12 +156,15 @@ for arch in $ARCHLIST; do "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" cp_portable "$tmpdir/bootparam.h" \ "$output/include/standard-headers/asm-$arch" + cp_portable "$tmpdir/include/asm/setup_data.h" \ + "$output/standard-headers/asm-x86" fi if [ $arch = loongarch ]; then cp "$hdrdir/include/asm/kvm_para.h" "$output/linux-headers/asm-loongarch/" cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-loongarch/" fi done +arch= rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" -- Gitee From d780082292d42a385e08ac0563eb46113cb50b1a Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 21 Feb 2024 10:51:38 -0600 Subject: [PATCH 010/115] scripts/update-linux-headers: Add bits.h to file imports commit b40b8eb609d3549ac14aab43849b20f5cba951c9 upstream Signed-off-by: Michael Roth Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- scripts/update-linux-headers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 77f5f063b0..4153a00957 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -170,7 +170,7 @@ rm -rf "$output/linux-headers/linux" mkdir -p "$output/linux-headers/linux" for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ - vduse.h iommufd.h; do + vduse.h iommufd.h bits.h; do cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" done -- Gitee From e89762d70c17e34e8e1bce6a1a81de47d4ab6de8 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 4 Mar 2024 10:47:31 -0300 Subject: [PATCH 011/115] linux-headers: Update to Linux v6.8-rc6 commit 6a02465f917d549c7ea8e0ca799724fb818e1120 upstream. The idea with this update is to get the latest KVM caps for RISC-V. [Backport Changes] 1. In file linux-headers/linux/kvm.h updated the custom ioctl definition KVM_LOAD_USER_DATA in the Euler codebase to use the unused ioctl number 0x43 instead of 0x49, to avoid conflict with the upstream definition KVM_SET_USER_MEMORY_REGION2. The command number 0x49 is allocated in upstream QEMU for KVM_SET_USER_MEMORY_REGION2 which was added as part sev-snp support and is used extensively even in latest QEMU releases. Therefore, changing this number causes deviation from the upstream which is risky, as it may introduce compatibility issues across KVM and QEMU interactions. So, this change ensures compatibility with upstream KVM/QEMU interfaces and prevents potential issues in future backports. 2. Changes to file linux-headers/linux/iommufd.h were skipped since the current codebase has already been updated with latest changes in commit ac715e361fdb (Update iommufd.h header for vSVA) Signed-off-by: Daniel Henrique Barboza Acked-by: Alistair Francis Message-ID: <20240304134732.386590-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/standard-headers/drm/drm_fourcc.h | 10 +- include/standard-headers/linux/ethtool.h | 41 +++-- .../standard-headers/linux/virtio_config.h | 8 +- include/standard-headers/linux/virtio_pci.h | 68 +++++++++ include/standard-headers/linux/virtio_pmem.h | 7 + linux-headers/asm-generic/unistd.h | 15 +- linux-headers/asm-mips/mman.h | 2 +- linux-headers/asm-mips/unistd_n32.h | 5 + linux-headers/asm-mips/unistd_n64.h | 5 + linux-headers/asm-mips/unistd_o32.h | 5 + linux-headers/asm-powerpc/unistd_32.h | 5 + linux-headers/asm-powerpc/unistd_64.h | 5 + linux-headers/asm-riscv/kvm.h | 40 +++++ linux-headers/asm-s390/unistd_32.h | 5 + linux-headers/asm-s390/unistd_64.h | 5 + linux-headers/asm-x86/kvm.h | 3 + linux-headers/asm-x86/unistd_32.h | 5 + linux-headers/asm-x86/unistd_64.h | 5 + linux-headers/asm-x86/unistd_x32.h | 5 + linux-headers/linux/kvm.h | 144 +++++++----------- linux-headers/linux/userfaultfd.h | 29 +++- linux-headers/linux/vfio.h | 1 + 22 files changed, 305 insertions(+), 113 deletions(-) diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h index 3afb70160f..b72917073d 100644 --- a/include/standard-headers/drm/drm_fourcc.h +++ b/include/standard-headers/drm/drm_fourcc.h @@ -53,7 +53,7 @@ extern "C" { * Format modifiers may change any property of the buffer, including the number * of planes and/or the required allocation size. Format modifiers are * vendor-namespaced, and as such the relationship between a fourcc code and a - * modifier is specific to the modifer being used. For example, some modifiers + * modifier is specific to the modifier being used. For example, some modifiers * may preserve meaning - such as number of planes - from the fourcc code, * whereas others may not. * @@ -78,7 +78,7 @@ extern "C" { * format. * - Higher-level programs interfacing with KMS/GBM/EGL/Vulkan/etc: these users * see modifiers as opaque tokens they can check for equality and intersect. - * These users musn't need to know to reason about the modifier value + * These users mustn't need to know to reason about the modifier value * (i.e. they are not expected to extract information out of the modifier). * * Vendors should document their modifier usage in as much detail as @@ -539,7 +539,7 @@ extern "C" { * This is a tiled layout using 4Kb tiles in row-major layout. * Within the tile pixels are laid out in 16 256 byte units / sub-tiles which * are arranged in four groups (two wide, two high) with column-major layout. - * Each group therefore consits out of four 256 byte units, which are also laid + * Each group therefore consists out of four 256 byte units, which are also laid * out as 2x2 column-major. * 256 byte units are made out of four 64 byte blocks of pixels, producing * either a square block or a 2:1 unit. @@ -1102,7 +1102,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) */ /* - * The top 4 bits (out of the 56 bits alloted for specifying vendor specific + * The top 4 bits (out of the 56 bits allotted for specifying vendor specific * modifiers) denote the category for modifiers. Currently we have three * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of * sixteen different categories. @@ -1418,7 +1418,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) * Amlogic FBC Memory Saving mode * * Indicates the storage is packed when pixel size is multiple of word - * boudaries, i.e. 8bit should be stored in this mode to save allocation + * boundaries, i.e. 8bit should be stored in this mode to save allocation * memory. * * This mode reduces body layout to 3072 bytes per 64x32 superblock with diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index 99fcddf04f..dfb54eff6f 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir { * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. * @rsvd8: Reserved for future use; see the note on reserved space. * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table @@ -1285,7 +1287,8 @@ struct ethtool_rxfh { uint32_t indir_size; uint32_t key_size; uint8_t hfunc; - uint8_t rsvd8[3]; + uint8_t input_xfrm; + uint8_t rsvd8[2]; uint32_t rsvd32; uint32_t rss_config[]; }; @@ -1992,6 +1995,15 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define WOL_MODE_COUNT 8 +/* RSS hash function data + * XOR the corresponding source and destination fields of each specified + * protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH + * calculation. Note that this XORing reduces the input set entropy and could + * be exploited to reduce the RSS queue spread. + */ +#define RXH_XFRM_SYM_XOR (1 << 0) +#define RXH_XFRM_NO_CHANGE 0xff + /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ @@ -2128,18 +2140,6 @@ enum ethtool_reset_flags { * refused. For drivers: ignore this field (use kernel's * __ETHTOOL_LINK_MODE_MASK_NBITS instead), any change to it will * be overwritten by kernel. - * @supported: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features for which the interface - * supports autonegotiation or auto-detection. Read-only. - * @advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, physical - * connectors and other link features that are advertised through - * autonegotiation or enabled for auto-detection. - * @lp_advertising: Bitmap with each bit meaning given by - * %ethtool_link_mode_bit_indices for the link modes, and other - * link features that the link partner advertised through - * autonegotiation; 0 if unknown or not applicable. Read-only. * @transceiver: Used to distinguish different possible PHY types, * reported consistently by PHYLIB. Read-only. * @master_slave_cfg: Master/slave port mode. @@ -2181,6 +2181,21 @@ enum ethtool_reset_flags { * %set_link_ksettings() should validate all fields other than @cmd * and @link_mode_masks_nwords that are not described as read-only or * deprecated, and must ignore all fields described as read-only. + * + * @link_mode_masks is divided into three bitfields, each of length + * @link_mode_masks_nwords: + * - supported: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features for which the interface + * supports autonegotiation or auto-detection. Read-only. + * - advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, physical + * connectors and other link features that are advertised through + * autonegotiation or enabled for auto-detection. + * - lp_advertising: Bitmap with each bit meaning given by + * %ethtool_link_mode_bit_indices for the link modes, and other + * link features that the link partner advertised through + * autonegotiation; 0 if unknown or not applicable. Read-only. */ struct ethtool_link_settings { uint32_t cmd; diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index bfd1ca643e..45be0fa1bc 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -52,7 +52,7 @@ * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 41 +#define VIRTIO_TRANSPORT_F_END 42 #ifndef VIRTIO_CONFIG_NO_LEGACY /* Do we get callbacks when the ring is completely used, even if we've @@ -112,4 +112,10 @@ * This feature indicates that the driver can reset a queue individually. */ #define VIRTIO_F_RING_RESET 40 + +/* + * This feature indicates that the device support administration virtqueues. + */ +#define VIRTIO_F_ADMIN_VQ 41 + #endif /* _LINUX_VIRTIO_CONFIG_H */ diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index b7fdfd0668..3e2bc2c97e 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -175,6 +175,9 @@ struct virtio_pci_modern_common_cfg { uint16_t queue_notify_data; /* read-write */ uint16_t queue_reset; /* read-write */ + + uint16_t admin_queue_index; /* read-only */ + uint16_t admin_queue_num; /* read-only */ }; /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ @@ -215,7 +218,72 @@ struct virtio_pci_cfg_cap { #define VIRTIO_PCI_COMMON_Q_USEDHI 52 #define VIRTIO_PCI_COMMON_Q_NDATA 56 #define VIRTIO_PCI_COMMON_Q_RESET 58 +#define VIRTIO_PCI_COMMON_ADM_Q_IDX 60 +#define VIRTIO_PCI_COMMON_ADM_Q_NUM 62 #endif /* VIRTIO_PCI_NO_MODERN */ +/* Admin command status. */ +#define VIRTIO_ADMIN_STATUS_OK 0 + +/* Admin command opcode. */ +#define VIRTIO_ADMIN_CMD_LIST_QUERY 0x0 +#define VIRTIO_ADMIN_CMD_LIST_USE 0x1 + +/* Admin command group type. */ +#define VIRTIO_ADMIN_GROUP_TYPE_SRIOV 0x1 + +/* Transitional device admin command. */ +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_WRITE 0x2 +#define VIRTIO_ADMIN_CMD_LEGACY_COMMON_CFG_READ 0x3 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_WRITE 0x4 +#define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 +#define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 + +struct QEMU_PACKED virtio_admin_cmd_hdr { + uint16_t opcode; + /* + * 1 - SR-IOV + * 2-65535 - reserved + */ + uint16_t group_type; + /* Unused, reserved for future extensions. */ + uint8_t reserved1[12]; + uint64_t group_member_id; +}; + +struct QEMU_PACKED virtio_admin_cmd_status { + uint16_t status; + uint16_t status_qualifier; + /* Unused, reserved for future extensions. */ + uint8_t reserved2[4]; +}; + +struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { + uint8_t offset; /* Starting offset of the register(s) to write. */ + uint8_t reserved[7]; + uint8_t registers[]; +}; + +struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { + uint8_t offset; /* Starting offset of the register(s) to read. */ +}; + +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_END 0 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_DEV 0x1 +#define VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM 0x2 + +#define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 + +struct QEMU_PACKED virtio_admin_cmd_notify_info_data { + uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ + uint8_t bar; /* BAR of the member or the owner device */ + uint8_t padding[6]; + uint64_t offset; /* Offset within bar. */ +}; + +struct virtio_admin_cmd_notify_info_result { + struct virtio_admin_cmd_notify_info_data entries[VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO]; +}; + #endif diff --git a/include/standard-headers/linux/virtio_pmem.h b/include/standard-headers/linux/virtio_pmem.h index fc029de798..1a2576d017 100644 --- a/include/standard-headers/linux/virtio_pmem.h +++ b/include/standard-headers/linux/virtio_pmem.h @@ -14,6 +14,13 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_config.h" +/* Feature bits */ +/* guest physical address range will be indicated as shared memory region 0 */ +#define VIRTIO_PMEM_F_SHMEM_REGION 0 + +/* shmid of the shared memory region corresponding to the pmem */ +#define VIRTIO_PMEM_SHMEM_REGION_ID 0 + struct virtio_pmem_config { uint64_t start; uint64_t size; diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h index 756b013fb8..75f00965ab 100644 --- a/linux-headers/asm-generic/unistd.h +++ b/linux-headers/asm-generic/unistd.h @@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_statmount 457 +__SYSCALL(__NR_statmount, sys_statmount) + +#define __NR_listmount 458 +__SYSCALL(__NR_listmount, sys_listmount) + +#define __NR_lsm_get_self_attr 459 +__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr) +#define __NR_lsm_set_self_attr 460 +__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr) +#define __NR_lsm_list_modules 461 +__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) + #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 462 /* * 32 bit systems traditionally used different diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h index c6e1fc77c9..9c48d9a21a 100644 --- a/linux-headers/asm-mips/mman.h +++ b/linux-headers/asm-mips/mman.h @@ -88,7 +88,7 @@ #define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ #define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ -#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, +#define MADV_DONTDUMP 16 /* Explicitly exclude from core dump, overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h index 994b6f008f..ce2e050a9b 100644 --- a/linux-headers/asm-mips/unistd_n32.h +++ b/linux-headers/asm-mips/unistd_n32.h @@ -385,5 +385,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_N32_H */ diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h index 41dcf5877a..5bfb3733ff 100644 --- a/linux-headers/asm-mips/unistd_n64.h +++ b/linux-headers/asm-mips/unistd_n64.h @@ -361,5 +361,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_N64_H */ diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h index ae9d334d96..02eaecd020 100644 --- a/linux-headers/asm-mips/unistd_o32.h +++ b/linux-headers/asm-mips/unistd_o32.h @@ -431,5 +431,10 @@ #define __NR_futex_wake (__NR_Linux + 454) #define __NR_futex_wait (__NR_Linux + 455) #define __NR_futex_requeue (__NR_Linux + 456) +#define __NR_statmount (__NR_Linux + 457) +#define __NR_listmount (__NR_Linux + 458) +#define __NR_lsm_get_self_attr (__NR_Linux + 459) +#define __NR_lsm_set_self_attr (__NR_Linux + 460) +#define __NR_lsm_list_modules (__NR_Linux + 461) #endif /* _ASM_UNISTD_O32_H */ diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h index b9b23d66d7..bbab08d6ec 100644 --- a/linux-headers/asm-powerpc/unistd_32.h +++ b/linux-headers/asm-powerpc/unistd_32.h @@ -438,6 +438,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h index cbb4b3e8f7..af34cde70f 100644 --- a/linux-headers/asm-powerpc/unistd_64.h +++ b/linux-headers/asm-powerpc/unistd_64.h @@ -410,6 +410,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 60d3b21dea..7499e88a94 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -139,6 +139,33 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZIHPM, KVM_RISCV_ISA_EXT_SMSTATEEN, KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_ZBC, + KVM_RISCV_ISA_EXT_ZBKB, + KVM_RISCV_ISA_EXT_ZBKC, + KVM_RISCV_ISA_EXT_ZBKX, + KVM_RISCV_ISA_EXT_ZKND, + KVM_RISCV_ISA_EXT_ZKNE, + KVM_RISCV_ISA_EXT_ZKNH, + KVM_RISCV_ISA_EXT_ZKR, + KVM_RISCV_ISA_EXT_ZKSED, + KVM_RISCV_ISA_EXT_ZKSH, + KVM_RISCV_ISA_EXT_ZKT, + KVM_RISCV_ISA_EXT_ZVBB, + KVM_RISCV_ISA_EXT_ZVBC, + KVM_RISCV_ISA_EXT_ZVKB, + KVM_RISCV_ISA_EXT_ZVKG, + KVM_RISCV_ISA_EXT_ZVKNED, + KVM_RISCV_ISA_EXT_ZVKNHA, + KVM_RISCV_ISA_EXT_ZVKNHB, + KVM_RISCV_ISA_EXT_ZVKSED, + KVM_RISCV_ISA_EXT_ZVKSH, + KVM_RISCV_ISA_EXT_ZVKT, + KVM_RISCV_ISA_EXT_ZFH, + KVM_RISCV_ISA_EXT_ZFHMIN, + KVM_RISCV_ISA_EXT_ZIHINTNTL, + KVM_RISCV_ISA_EXT_ZVFH, + KVM_RISCV_ISA_EXT_ZVFHMIN, + KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_MAX, }; @@ -157,9 +184,16 @@ enum KVM_RISCV_SBI_EXT_ID { KVM_RISCV_SBI_EXT_EXPERIMENTAL, KVM_RISCV_SBI_EXT_VENDOR, KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_STA, KVM_RISCV_SBI_EXT_MAX, }; +/* SBI STA extension registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +struct kvm_riscv_sbi_sta { + unsigned long shmem_lo; + unsigned long shmem_hi; +}; + /* Possible states for kvm_riscv_timer */ #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 @@ -241,6 +275,12 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_VECTOR_REG(n) \ ((n) + sizeof(struct __riscv_v_ext_state) / sizeof(unsigned long)) +/* Registers for specific SBI extensions are mapped as type 10 */ +#define KVM_REG_RISCV_SBI_STATE (0x0a << KVM_REG_RISCV_TYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) +#define KVM_REG_RISCV_SBI_STA_REG(name) \ + (offsetof(struct kvm_riscv_sbi_sta, name) / sizeof(unsigned long)) + /* Device Control API: RISC-V AIA */ #define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 #define KVM_DEV_RISCV_APLIC_SIZE 0x4000 diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h index c093e6d5f9..a3ece69d82 100644 --- a/linux-headers/asm-s390/unistd_32.h +++ b/linux-headers/asm-s390/unistd_32.h @@ -429,5 +429,10 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_S390_UNISTD_32_H */ diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h index 114c0569a4..8c5fd93495 100644 --- a/linux-headers/asm-s390/unistd_64.h +++ b/linux-headers/asm-s390/unistd_64.h @@ -377,5 +377,10 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_S390_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 2b3a8f7bd2..003fb74534 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -560,4 +560,7 @@ struct kvm_pmu_event_filter { /* x86-specific KVM_EXIT_HYPERCALL flags. */ #define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_X86_DEFAULT_VM 0 +#define KVM_X86_SW_PROTECTED_VM 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h index 329649c377..5c9c329e93 100644 --- a/linux-headers/asm-x86/unistd_32.h +++ b/linux-headers/asm-x86/unistd_32.h @@ -447,6 +447,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_32_H */ diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h index 4583606ce6..d9aab7ae87 100644 --- a/linux-headers/asm-x86/unistd_64.h +++ b/linux-headers/asm-x86/unistd_64.h @@ -369,6 +369,11 @@ #define __NR_futex_wake 454 #define __NR_futex_wait 455 #define __NR_futex_requeue 456 +#define __NR_statmount 457 +#define __NR_listmount 458 +#define __NR_lsm_get_self_attr 459 +#define __NR_lsm_set_self_attr 460 +#define __NR_lsm_list_modules 461 #endif /* _ASM_UNISTD_64_H */ diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h index 146d74d8e4..63cdd1ee43 100644 --- a/linux-headers/asm-x86/unistd_x32.h +++ b/linux-headers/asm-x86/unistd_x32.h @@ -321,6 +321,11 @@ #define __NR_futex_wake (__X32_SYSCALL_BIT + 454) #define __NR_futex_wait (__X32_SYSCALL_BIT + 455) #define __NR_futex_requeue (__X32_SYSCALL_BIT + 456) +#define __NR_statmount (__X32_SYSCALL_BIT + 457) +#define __NR_listmount (__X32_SYSCALL_BIT + 458) +#define __NR_lsm_get_self_attr (__X32_SYSCALL_BIT + 459) +#define __NR_lsm_set_self_attr (__X32_SYSCALL_BIT + 460) +#define __NR_lsm_list_modules (__X32_SYSCALL_BIT + 461) #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) #define __NR_ioctl (__X32_SYSCALL_BIT + 514) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index 96bc60475e..c7209118e5 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -18,76 +18,6 @@ #define KVM_API_VERSION 12 -/* *** Deprecated interfaces *** */ - -#define KVM_TRC_SHIFT 16 - -#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) -#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) - -#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) -#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) -#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) - -#define KVM_TRC_HEAD_SIZE 12 -#define KVM_TRC_CYCLE_SIZE 8 -#define KVM_TRC_EXTRA_MAX 7 - -#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) -#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) -#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) -#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) -#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) -#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) -#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) -#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) -#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) -#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) -#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) -#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) -#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) -#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) -#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) -#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) -#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) -#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) -#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) -#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) -#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) -#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) -#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) - -struct kvm_user_trace_setup { - __u32 buf_size; - __u32 buf_nr; -}; - -#define __KVM_DEPRECATED_MAIN_W_0x06 \ - _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) -#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) -#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) - -#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) - -struct kvm_breakpoint { - __u32 enabled; - __u32 padding; - __u64 address; -}; - -struct kvm_debug_guest { - __u32 enabled; - __u32 pad; - struct kvm_breakpoint breakpoints[4]; - __u32 singlestep; -}; - -#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) - -/* *** End of deprecated interfaces *** */ - - /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -97,6 +27,19 @@ struct kvm_userspace_memory_region { __u64 userspace_addr; /* start of the userspace allocated memory */ }; +/* for KVM_SET_USER_MEMORY_REGION2 */ +struct kvm_userspace_memory_region2 { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; + __u64 userspace_addr; + __u64 guest_memfd_offset; + __u32 guest_memfd; + __u32 pad1; + __u64 pad2[14]; +}; + /* * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for * userspace, other bits are reserved for kvm internal use which are defined @@ -104,6 +47,7 @@ struct kvm_userspace_memory_region { */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) +#define KVM_MEM_GUEST_MEMFD (1UL << 2) /* for KVM_IRQ_LINE */ struct kvm_irq_level { @@ -267,6 +211,7 @@ struct kvm_xen_exit { #define KVM_EXIT_RISCV_CSR 36 #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 +#define KVM_EXIT_MEMORY_FAULT 39 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -517,6 +462,13 @@ struct kvm_run { #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) __u32 flags; } notify; + /* KVM_EXIT_MEMORY_FAULT */ + struct { +#define KVM_MEMORY_EXIT_FLAG_PRIVATE (1ULL << 3) + __u64 flags; + __u64 gpa; + __u64 size; + } memory_fault; /* Fix the size of the union. */ char padding[256]; }; @@ -961,9 +913,6 @@ struct kvm_ppc_resize_hpt { */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ #define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) -#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 -#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 -#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 #define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) #define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) @@ -1219,6 +1168,12 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 #define KVM_CAP_ARM_RME 300 +#define KVM_CAP_USER_MEMORY2 231 +#define KVM_CAP_MEMORY_FAULT_INFO 232 +#define KVM_CAP_MEMORY_ATTRIBUTES 233 +#define KVM_CAP_GUEST_MEMFD 234 +#define KVM_CAP_VM_TYPES 235 + #define KVM_CAP_SEV_ES_GHCB 500 #define KVM_CAP_HYGON_COCO_EXT 501 @@ -1325,6 +1280,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) struct kvm_xen_hvm_config { __u32 flags; @@ -1556,7 +1512,10 @@ struct kvm_user_data { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) -#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x49, struct kvm_user_data) +#define KVM_LOAD_USER_DATA _IOW(KVMIO, 0x43, struct kvm_user_data) +#define KVM_SET_USER_MEMORY_REGION2 _IOW(KVMIO, 0x49, \ + struct kvm_userspace_memory_region2) + /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { __u64 user_addr; @@ -1580,20 +1539,8 @@ struct kvm_s390_ucas_mapping { _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) #define KVM_UNREGISTER_COALESCED_MMIO \ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) -#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ - struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) -/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ -#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 -#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) -#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ - struct kvm_assigned_pci_dev) -#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ - struct kvm_assigned_msix_nr) -#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ - struct kvm_assigned_msix_entry) -#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) @@ -1621,9 +1568,6 @@ struct kvm_master_dev_info * KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) -/* Available with KVM_CAP_PCI_2_3 */ -#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ - struct kvm_assigned_pci_dev) /* Available with KVM_CAP_SIGNAL_MSI */ #define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) /* Available with KVM_CAP_PPC_GET_SMMU_INFO */ @@ -1680,8 +1624,6 @@ struct kvm_master_dev_info #define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) #define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) #define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) -/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ -#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 #define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) #define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) #define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) @@ -2469,4 +2411,24 @@ struct kvm_arm_rmm_psci_complete { __u32 padding[3]; }; +/* Available with KVM_CAP_MEMORY_ATTRIBUTES */ +#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes) + +struct kvm_memory_attributes { + __u64 address; + __u64 size; + __u64 attributes; + __u64 flags; +}; + +#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) + +#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) + +struct kvm_create_guest_memfd { + __u64 size; + __u64 flags; + __u64 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h index 953c75feda..4283de22d5 100644 --- a/linux-headers/linux/userfaultfd.h +++ b/linux-headers/linux/userfaultfd.h @@ -41,7 +41,8 @@ UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ UFFD_FEATURE_WP_UNPOPULATED | \ UFFD_FEATURE_POISON | \ - UFFD_FEATURE_WP_ASYNC) + UFFD_FEATURE_WP_ASYNC | \ + UFFD_FEATURE_MOVE) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -50,6 +51,7 @@ ((__u64)1 << _UFFDIO_WAKE | \ (__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_MOVE | \ (__u64)1 << _UFFDIO_WRITEPROTECT | \ (__u64)1 << _UFFDIO_CONTINUE | \ (__u64)1 << _UFFDIO_POISON) @@ -73,6 +75,7 @@ #define _UFFDIO_WAKE (0x02) #define _UFFDIO_COPY (0x03) #define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_MOVE (0x05) #define _UFFDIO_WRITEPROTECT (0x06) #define _UFFDIO_CONTINUE (0x07) #define _UFFDIO_POISON (0x08) @@ -92,6 +95,8 @@ struct uffdio_copy) #define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ struct uffdio_zeropage) +#define UFFDIO_MOVE _IOWR(UFFDIO, _UFFDIO_MOVE, \ + struct uffdio_move) #define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ struct uffdio_writeprotect) #define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ @@ -222,6 +227,9 @@ struct uffdio_api { * asynchronous mode is supported in which the write fault is * automatically resolved and write-protection is un-set. * It implies UFFD_FEATURE_WP_UNPOPULATED. + * + * UFFD_FEATURE_MOVE indicates that the kernel supports moving an + * existing page contents from userspace. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -239,6 +247,7 @@ struct uffdio_api { #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) #define UFFD_FEATURE_POISON (1<<14) #define UFFD_FEATURE_WP_ASYNC (1<<15) +#define UFFD_FEATURE_MOVE (1<<16) __u64 features; __u64 ioctls; @@ -347,6 +356,24 @@ struct uffdio_poison { __s64 updated; }; +struct uffdio_move { + __u64 dst; + __u64 src; + __u64 len; + /* + * Especially if used to atomically remove memory from the + * address space the wake on the dst range is not needed. + */ +#define UFFDIO_MOVE_MODE_DONTWAKE ((__u64)1<<0) +#define UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES ((__u64)1<<1) + __u64 mode; + /* + * "move" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 move; +}; + /* * Flags for the userfaultfd(2) system call itself. */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h index 5b1e2871af..68f7d446b3 100644 --- a/linux-headers/linux/vfio.h +++ b/linux-headers/linux/vfio.h @@ -1231,6 +1231,7 @@ enum vfio_device_mig_state { VFIO_DEVICE_STATE_RUNNING_P2P = 5, VFIO_DEVICE_STATE_PRE_COPY = 6, VFIO_DEVICE_STATE_PRE_COPY_P2P = 7, + VFIO_DEVICE_STATE_NR, }; /** -- Gitee From e092c5937ae9e920a838f2cdcb7a5451a2b9944d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 23 Apr 2024 11:46:47 +0200 Subject: [PATCH 012/115] linux-headers: update to current kvm/next commit ab0c7fb22b56523f24d6e127cd4d10ecff67bf85 upstream [Backport Changes] 1. Enum sev_cmd_id was migrated from file linux-headers/linux/kvm.h to file linux-headers/asm-x86/kvm.h along with the additional enum member "KVM_CSV_COMMAND_BATCH" added as part of CSV support in commit b2091d2 (target/i386: csv: add support to encrypt the outgoing pages in the list queued before.) to ensure continued support for Hygon-specific functionalities. Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- hw/i386/x86.c | 8 - include/standard-headers/asm-x86/bootparam.h | 17 +- include/standard-headers/asm-x86/kvm_para.h | 3 +- include/standard-headers/asm-x86/setup_data.h | 83 +++ include/standard-headers/linux/ethtool.h | 48 ++ include/standard-headers/linux/fuse.h | 39 +- .../linux/input-event-codes.h | 1 + include/standard-headers/linux/virtio_gpu.h | 2 + include/standard-headers/linux/virtio_pci.h | 10 +- include/standard-headers/linux/virtio_snd.h | 154 ++++ linux-headers/asm-arm64/kvm.h | 15 +- linux-headers/asm-arm64/sve_context.h | 11 + linux-headers/asm-generic/bitsperlong.h | 4 + linux-headers/asm-loongarch/kvm.h | 1 - linux-headers/asm-mips/kvm.h | 2 - linux-headers/asm-powerpc/kvm.h | 45 +- linux-headers/asm-riscv/kvm.h | 3 +- linux-headers/asm-s390/kvm.h | 315 +++++++- linux-headers/asm-x86/kvm.h | 331 ++++++++- linux-headers/linux/bits.h | 15 + linux-headers/linux/kvm.h | 692 +----------------- linux-headers/linux/psp-sev.h | 59 ++ linux-headers/linux/vhost.h | 6 + 23 files changed, 1122 insertions(+), 742 deletions(-) create mode 100644 include/standard-headers/asm-x86/setup_data.h create mode 100644 linux-headers/linux/bits.h diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 2b6291ad8d..d6f9e25abe 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -676,14 +676,6 @@ DeviceState *ioapic_init_secondary(GSIState *gsi_state) return dev; } -struct setup_data { - uint64_t next; - uint32_t type; - uint32_t len; - uint8_t data[]; -} __attribute__((packed)); - - /* * The entry point into the kernel for PVH boot is different from * the native entry point. The PVH entry is defined by the x86/HVM diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h index 0b06d2bff1..b582a105c0 100644 --- a/include/standard-headers/asm-x86/bootparam.h +++ b/include/standard-headers/asm-x86/bootparam.h @@ -2,21 +2,7 @@ #ifndef _ASM_X86_BOOTPARAM_H #define _ASM_X86_BOOTPARAM_H -/* setup_data/setup_indirect types */ -#define SETUP_NONE 0 -#define SETUP_E820_EXT 1 -#define SETUP_DTB 2 -#define SETUP_PCI 3 -#define SETUP_EFI 4 -#define SETUP_APPLE_PROPERTIES 5 -#define SETUP_JAILHOUSE 6 -#define SETUP_CC_BLOB 7 -#define SETUP_IMA 8 -#define SETUP_RNG_SEED 9 -#define SETUP_ENUM_MAX SETUP_RNG_SEED - -#define SETUP_INDIRECT (1<<31) -#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) +#include "standard-headers/asm-x86/setup_data.h" /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF @@ -38,6 +24,7 @@ #define XLF_EFI_KEXEC (1<<4) #define XLF_5LEVEL (1<<5) #define XLF_5LEVEL_ENABLED (1<<6) +#define XLF_MEM_ENCRYPTION (1<<7) #endif /* _ASM_X86_BOOTPARAM_H */ diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h index f0235e58a1..9a011d20f0 100644 --- a/include/standard-headers/asm-x86/kvm_para.h +++ b/include/standard-headers/asm-x86/kvm_para.h @@ -92,7 +92,7 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) /* MSR_KVM_ASYNC_PF_INT */ -#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) +#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0) /* MSR_KVM_MIGRATION_CONTROL */ #define KVM_MIGRATION_READY (1 << 0) @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { uint32_t token; uint8_t pad[56]; - uint32_t enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/include/standard-headers/asm-x86/setup_data.h b/include/standard-headers/asm-x86/setup_data.h new file mode 100644 index 0000000000..09355f54c5 --- /dev/null +++ b/include/standard-headers/asm-x86/setup_data.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_X86_SETUP_DATA_H +#define _ASM_X86_SETUP_DATA_H + +/* setup_data/setup_indirect types */ +#define SETUP_NONE 0 +#define SETUP_E820_EXT 1 +#define SETUP_DTB 2 +#define SETUP_PCI 3 +#define SETUP_EFI 4 +#define SETUP_APPLE_PROPERTIES 5 +#define SETUP_JAILHOUSE 6 +#define SETUP_CC_BLOB 7 +#define SETUP_IMA 8 +#define SETUP_RNG_SEED 9 +#define SETUP_ENUM_MAX SETUP_RNG_SEED + +#define SETUP_INDIRECT (1<<31) +#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT) + +#ifndef __ASSEMBLY__ + +#include "standard-headers/linux/types.h" + +/* extensible setup data list node */ +struct setup_data { + uint64_t next; + uint32_t type; + uint32_t len; + uint8_t data[]; +}; + +/* extensible setup indirect data node */ +struct setup_indirect { + uint32_t type; + uint32_t reserved; /* Reserved, must be set to zero. */ + uint64_t len; + uint64_t addr; +}; + +/* + * The E820 memory region entry of the boot protocol ABI: + */ +struct boot_e820_entry { + uint64_t addr; + uint64_t size; + uint32_t type; +} QEMU_PACKED; + +/* + * The boot loader is passing platform information via this Jailhouse-specific + * setup data structure. + */ +struct jailhouse_setup_data { + struct { + uint16_t version; + uint16_t compatible_version; + } QEMU_PACKED hdr; + struct { + uint16_t pm_timer_address; + uint16_t num_cpus; + uint64_t pci_mmconfig_base; + uint32_t tsc_khz; + uint32_t apic_khz; + uint8_t standard_ioapic; + uint8_t cpu_ids[255]; + } QEMU_PACKED v1; + struct { + uint32_t flags; + } QEMU_PACKED v2; +} QEMU_PACKED; + +/* + * IMA buffer setup data information from the previous kernel during kexec + */ +struct ima_setup_data { + uint64_t addr; + uint64_t size; +} QEMU_PACKED; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SETUP_DATA_H */ diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h index dfb54eff6f..01503784d2 100644 --- a/include/standard-headers/linux/ethtool.h +++ b/include/standard-headers/linux/ethtool.h @@ -2023,6 +2023,53 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define IPV4_FLOW 0x10 /* hash only */ #define IPV6_FLOW 0x11 /* hash only */ #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ + +/* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes + * elements such as TEID and GTP version. + * It is primarily intended for data communication of the UE. + */ +#define GTPU_V4_FLOW 0x13 /* hash only */ +#define GTPU_V6_FLOW 0x14 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * The format of these GTP packets does not include TEID. + * Primarily expected to be used for communication + * to create sessions for UE data communication, + * commonly referred to as CSR (Create Session Request). + */ +#define GTPC_V4_FLOW 0x15 /* hash only */ +#define GTPC_V6_FLOW 0x16 /* hash only */ + +/* Use for GTP-C IPv4 and v6. + * Unlike GTPC_V4_FLOW, the format of these GTP packets includes TEID. + * After session creation, it becomes this packet. + * This is mainly used for requests to realize UE handover. + */ +#define GTPC_TEID_V4_FLOW 0x17 /* hash only */ +#define GTPC_TEID_V6_FLOW 0x18 /* hash only */ + +/* Use for GTP-U and extended headers for the PSC (PDU Session Container). + * The format of these GTP packets includes TEID and QFI. + * In 5G communication using UPF (User Plane Function), + * data communication with this extended header is performed. + */ +#define GTPU_EH_V4_FLOW 0x19 /* hash only */ +#define GTPU_EH_V6_FLOW 0x1a /* hash only */ + +/* Use for GTP-U IPv4 and v6 PSC (PDU Session Container) extended headers. + * This differs from GTPU_EH_V(4|6)_FLOW in that it is distinguished by + * UL/DL included in the PSC. + * There are differences in the data included based on Downlink/Uplink, + * and can be used to distinguish packets. + * The functions described so far are useful when you want to + * handle communication from the mobile network in UPF, PGW, etc. + */ +#define GTPU_UL_V4_FLOW 0x1b /* hash only */ +#define GTPU_UL_V6_FLOW 0x1c /* hash only */ +#define GTPU_DL_V4_FLOW 0x1d /* hash only */ +#define GTPU_DL_V6_FLOW 0x1e /* hash only */ + /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 #define FLOW_MAC_EXT 0x40000000 @@ -2037,6 +2084,7 @@ static inline int ethtool_validate_duplex(uint8_t duplex) #define RXH_IP_DST (1 << 5) #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ +#define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ #define RXH_DISCARD (1 << 31) #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h index fc0dcd10ae..bac9dbc49f 100644 --- a/include/standard-headers/linux/fuse.h +++ b/include/standard-headers/linux/fuse.h @@ -211,6 +211,12 @@ * 7.39 * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures + * + * 7.40 + * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag + * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag + * - add FUSE_NO_EXPORT_SUPPORT init flag + * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag */ #ifndef _LINUX_FUSE_H @@ -242,7 +248,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 39 +#define FUSE_KERNEL_MINOR_VERSION 40 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -349,6 +355,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_PASSTHROUGH: passthrough read/write io for this open file */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -357,6 +364,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_PASSTHROUGH (1 << 7) /** * INIT request/reply flags @@ -406,6 +414,9 @@ struct fuse_file_lock { * symlink and mknod (single group that matches parent) * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support + * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit + * of the request ID indicates resend requests */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -445,6 +456,9 @@ struct fuse_file_lock { #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) +#define FUSE_PASSTHROUGH (1ULL << 37) +#define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) +#define FUSE_HAS_RESEND (1ULL << 39) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP @@ -631,6 +645,7 @@ enum fuse_notify_code { FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_RESEND = 7, FUSE_NOTIFY_CODE_MAX, }; @@ -757,7 +772,7 @@ struct fuse_create_in { struct fuse_open_out { uint64_t fh; uint32_t open_flags; - uint32_t padding; + int32_t backing_id; }; struct fuse_release_in { @@ -873,7 +888,8 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint32_t unused[6]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -956,6 +972,14 @@ struct fuse_fallocate_in { uint32_t padding; }; +/** + * FUSE request unique ID flag + * + * Indicates whether this is a resend request. The receiver should handle this + * request accordingly. + */ +#define FUSE_UNIQUE_RESEND (1ULL << 63) + struct fuse_in_header { uint32_t len; uint32_t opcode; @@ -1045,9 +1069,18 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_backing_map { + int32_t fd; + uint32_t flags; + uint64_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ + struct fuse_backing_map) +#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h index f6bab08540..2221b0c383 100644 --- a/include/standard-headers/linux/input-event-codes.h +++ b/include/standard-headers/linux/input-event-codes.h @@ -602,6 +602,7 @@ #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ +#define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ #define KEY_BUTTONCONFIG 0x240 /* AL Button Configuration */ #define KEY_TASKMANAGER 0x241 /* AL Task/Project Manager */ diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h index 2da48d3d4c..2db643ed8f 100644 --- a/include/standard-headers/linux/virtio_gpu.h +++ b/include/standard-headers/linux/virtio_gpu.h @@ -309,6 +309,8 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL 1 #define VIRTIO_GPU_CAPSET_VIRGL2 2 +/* 3 is reserved for gfxstream */ +#define VIRTIO_GPU_CAPSET_VENUS 4 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h index 3e2bc2c97e..4010216103 100644 --- a/include/standard-headers/linux/virtio_pci.h +++ b/include/standard-headers/linux/virtio_pci.h @@ -240,7 +240,7 @@ struct virtio_pci_cfg_cap { #define VIRTIO_ADMIN_CMD_LEGACY_DEV_CFG_READ 0x5 #define VIRTIO_ADMIN_CMD_LEGACY_NOTIFY_INFO 0x6 -struct QEMU_PACKED virtio_admin_cmd_hdr { +struct virtio_admin_cmd_hdr { uint16_t opcode; /* * 1 - SR-IOV @@ -252,20 +252,20 @@ struct QEMU_PACKED virtio_admin_cmd_hdr { uint64_t group_member_id; }; -struct QEMU_PACKED virtio_admin_cmd_status { +struct virtio_admin_cmd_status { uint16_t status; uint16_t status_qualifier; /* Unused, reserved for future extensions. */ uint8_t reserved2[4]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_wr_data { +struct virtio_admin_cmd_legacy_wr_data { uint8_t offset; /* Starting offset of the register(s) to write. */ uint8_t reserved[7]; uint8_t registers[]; }; -struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { +struct virtio_admin_cmd_legacy_rd_data { uint8_t offset; /* Starting offset of the register(s) to read. */ }; @@ -275,7 +275,7 @@ struct QEMU_PACKED virtio_admin_cmd_legacy_rd_data { #define VIRTIO_ADMIN_CMD_MAX_NOTIFY_INFO 4 -struct QEMU_PACKED virtio_admin_cmd_notify_info_data { +struct virtio_admin_cmd_notify_info_data { uint8_t flags; /* 0 = end of list, 1 = owner device, 2 = member device */ uint8_t bar; /* BAR of the member or the owner device */ uint8_t padding[6]; diff --git a/include/standard-headers/linux/virtio_snd.h b/include/standard-headers/linux/virtio_snd.h index 1af96b9fc6..860f12e0a4 100644 --- a/include/standard-headers/linux/virtio_snd.h +++ b/include/standard-headers/linux/virtio_snd.h @@ -7,6 +7,14 @@ #include "standard-headers/linux/virtio_types.h" +/******************************************************************************* + * FEATURE BITS + */ +enum { + /* device supports control elements */ + VIRTIO_SND_F_CTLS = 0 +}; + /******************************************************************************* * CONFIGURATION SPACE */ @@ -17,6 +25,8 @@ struct virtio_snd_config { uint32_t streams; /* # of available channel maps */ uint32_t chmaps; + /* # of available control elements */ + uint32_t controls; }; enum { @@ -55,6 +65,15 @@ enum { /* channel map control request types */ VIRTIO_SND_R_CHMAP_INFO = 0x0200, + /* control element request types */ + VIRTIO_SND_R_CTL_INFO = 0x0300, + VIRTIO_SND_R_CTL_ENUM_ITEMS, + VIRTIO_SND_R_CTL_READ, + VIRTIO_SND_R_CTL_WRITE, + VIRTIO_SND_R_CTL_TLV_READ, + VIRTIO_SND_R_CTL_TLV_WRITE, + VIRTIO_SND_R_CTL_TLV_COMMAND, + /* jack event types */ VIRTIO_SND_EVT_JACK_CONNECTED = 0x1000, VIRTIO_SND_EVT_JACK_DISCONNECTED, @@ -63,6 +82,9 @@ enum { VIRTIO_SND_EVT_PCM_PERIOD_ELAPSED = 0x1100, VIRTIO_SND_EVT_PCM_XRUN, + /* control element event types */ + VIRTIO_SND_EVT_CTL_NOTIFY = 0x1200, + /* common status codes */ VIRTIO_SND_S_OK = 0x8000, VIRTIO_SND_S_BAD_MSG, @@ -331,4 +353,136 @@ struct virtio_snd_chmap_info { uint8_t positions[VIRTIO_SND_CHMAP_MAX_SIZE]; }; +/******************************************************************************* + * CONTROL ELEMENTS MESSAGES + */ +struct virtio_snd_ctl_hdr { + /* VIRTIO_SND_R_CTL_XXX */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint32_t control_id; +}; + +/* supported roles for control elements */ +enum { + VIRTIO_SND_CTL_ROLE_UNDEFINED = 0, + VIRTIO_SND_CTL_ROLE_VOLUME, + VIRTIO_SND_CTL_ROLE_MUTE, + VIRTIO_SND_CTL_ROLE_GAIN +}; + +/* supported value types for control elements */ +enum { + VIRTIO_SND_CTL_TYPE_BOOLEAN = 0, + VIRTIO_SND_CTL_TYPE_INTEGER, + VIRTIO_SND_CTL_TYPE_INTEGER64, + VIRTIO_SND_CTL_TYPE_ENUMERATED, + VIRTIO_SND_CTL_TYPE_BYTES, + VIRTIO_SND_CTL_TYPE_IEC958 +}; + +/* supported access rights for control elements */ +enum { + VIRTIO_SND_CTL_ACCESS_READ = 0, + VIRTIO_SND_CTL_ACCESS_WRITE, + VIRTIO_SND_CTL_ACCESS_VOLATILE, + VIRTIO_SND_CTL_ACCESS_INACTIVE, + VIRTIO_SND_CTL_ACCESS_TLV_READ, + VIRTIO_SND_CTL_ACCESS_TLV_WRITE, + VIRTIO_SND_CTL_ACCESS_TLV_COMMAND +}; + +struct virtio_snd_ctl_info { + /* common header */ + struct virtio_snd_info hdr; + /* element role (VIRTIO_SND_CTL_ROLE_XXX) */ + uint32_t role; + /* element value type (VIRTIO_SND_CTL_TYPE_XXX) */ + uint32_t type; + /* element access right bit map (1 << VIRTIO_SND_CTL_ACCESS_XXX) */ + uint32_t access; + /* # of members in the element value */ + uint32_t count; + /* index for an element with a non-unique name */ + uint32_t index; + /* name identifier string for the element */ + uint8_t name[44]; + /* additional information about the element's value */ + union { + /* VIRTIO_SND_CTL_TYPE_INTEGER */ + struct { + /* minimum supported value */ + uint32_t min; + /* maximum supported value */ + uint32_t max; + /* fixed step size for value (0 = variable size) */ + uint32_t step; + } integer; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 */ + struct { + /* minimum supported value */ + uint64_t min; + /* maximum supported value */ + uint64_t max; + /* fixed step size for value (0 = variable size) */ + uint64_t step; + } integer64; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED */ + struct { + /* # of options supported for value */ + uint32_t items; + } enumerated; + } value; +}; + +struct virtio_snd_ctl_enum_item { + /* option name */ + uint8_t item[64]; +}; + +struct virtio_snd_ctl_iec958 { + /* AES/IEC958 channel status bits */ + uint8_t status[24]; + /* AES/IEC958 subcode bits */ + uint8_t subcode[147]; + /* nothing */ + uint8_t pad; + /* AES/IEC958 subframe bits */ + uint8_t dig_subframe[4]; +}; + +struct virtio_snd_ctl_value { + union { + /* VIRTIO_SND_CTL_TYPE_BOOLEAN|INTEGER value */ + uint32_t integer[128]; + /* VIRTIO_SND_CTL_TYPE_INTEGER64 value */ + uint64_t integer64[64]; + /* VIRTIO_SND_CTL_TYPE_ENUMERATED value (option indexes) */ + uint32_t enumerated[128]; + /* VIRTIO_SND_CTL_TYPE_BYTES value */ + uint8_t bytes[512]; + /* VIRTIO_SND_CTL_TYPE_IEC958 value */ + struct virtio_snd_ctl_iec958 iec958; + } value; +}; + +/* supported event reason types */ +enum { + /* element's value has changed */ + VIRTIO_SND_CTL_EVT_MASK_VALUE = 0, + /* element's information has changed */ + VIRTIO_SND_CTL_EVT_MASK_INFO, + /* element's metadata has changed */ + VIRTIO_SND_CTL_EVT_MASK_TLV +}; + +struct virtio_snd_ctl_event { + /* VIRTIO_SND_EVT_CTL_NOTIFY */ + struct virtio_snd_hdr hdr; + /* 0 ... virtio_snd_config::controls - 1 */ + uint16_t control_id; + /* event reason bit map (1 << VIRTIO_SND_CTL_EVT_MASK_XXX) */ + uint16_t mask; +}; + #endif /* VIRTIO_SND_IF_H */ diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h index 777b668851..3e783f3936 100644 --- a/linux-headers/asm-arm64/kvm.h +++ b/linux-headers/asm-arm64/kvm.h @@ -37,9 +37,7 @@ #include #include -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -76,11 +74,11 @@ struct kvm_regs { /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 -#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ - KVM_ARM_DEVICE_TYPE_SHIFT) +#define KVM_ARM_DEVICE_TYPE_MASK __GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \ + KVM_ARM_DEVICE_TYPE_SHIFT) #define KVM_ARM_DEVICE_ID_SHIFT 16 -#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ - KVM_ARM_DEVICE_ID_SHIFT) +#define KVM_ARM_DEVICE_ID_MASK __GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \ + KVM_ARM_DEVICE_ID_SHIFT) /* Supported device IDs */ #define KVM_ARM_DEVICE_VGIC_V2 0 @@ -164,6 +162,11 @@ struct kvm_sync_regs { __u64 device_irq_level; }; +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/linux-headers/asm-arm64/sve_context.h b/linux-headers/asm-arm64/sve_context.h index 1d0e3e1d09..d1b1ec8cb1 100644 --- a/linux-headers/asm-arm64/sve_context.h +++ b/linux-headers/asm-arm64/sve_context.h @@ -13,6 +13,17 @@ #define __SVE_VQ_BYTES 16 /* number of bytes per quadword */ +/* + * Yes, __SVE_VQ_MAX is 512 QUADWORDS. + * + * To help ensure forward portability, this is much larger than the + * current maximum value defined by the SVE architecture. While arrays + * or static allocations can be sized based on this value, watch out! + * It will waste a surprisingly large amount of memory. + * + * Dynamic sizing based on the actual runtime vector length is likely to + * be preferable for most purposes. + */ #define __SVE_VQ_MIN 1 #define __SVE_VQ_MAX 512 diff --git a/linux-headers/asm-generic/bitsperlong.h b/linux-headers/asm-generic/bitsperlong.h index 75f320fa91..1fb4f0c9f2 100644 --- a/linux-headers/asm-generic/bitsperlong.h +++ b/linux-headers/asm-generic/bitsperlong.h @@ -24,4 +24,8 @@ #endif #endif +#ifndef __BITS_PER_LONG_LONG +#define __BITS_PER_LONG_LONG 64 +#endif + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h index d22b19e134..5bb4cd6f29 100644 --- a/linux-headers/asm-loongarch/kvm.h +++ b/linux-headers/asm-loongarch/kvm.h @@ -14,7 +14,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM #define __KVM_HAVE_GUEST_DEBUG #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/linux-headers/asm-mips/kvm.h b/linux-headers/asm-mips/kvm.h index edcf717c43..9673dc9cb3 100644 --- a/linux-headers/asm-mips/kvm.h +++ b/linux-headers/asm-mips/kvm.h @@ -20,8 +20,6 @@ * Some parts derived from the x86 version of this file. */ -#define __KVM_HAVE_READONLY_MEM - #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 /* diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 9f18fa090f..1691297a76 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -28,7 +28,6 @@ #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_GUEST_DEBUG /* Not always available, but if it is, this is the correct offset. */ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -733,4 +732,48 @@ struct kvm_ppc_xive_eq { #define KVM_XIVE_TIMA_PAGE_OFFSET 0 #define KVM_XIVE_ESB_PAGE_OFFSET 4 +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index 7499e88a94..b1c503c295 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -16,7 +16,6 @@ #include #define __KVM_HAVE_IRQ_LINE -#define __KVM_HAVE_READONLY_MEM #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -166,6 +165,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZVFH, KVM_RISCV_ISA_EXT_ZVFHMIN, KVM_RISCV_ISA_EXT_ZFA, + KVM_RISCV_ISA_EXT_ZTSO, + KVM_RISCV_ISA_EXT_ZACAS, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-s390/kvm.h b/linux-headers/asm-s390/kvm.h index 023a2763a9..684c4e1205 100644 --- a/linux-headers/asm-s390/kvm.h +++ b/linux-headers/asm-s390/kvm.h @@ -12,7 +12,320 @@ #include #define __KVM_S390 -#define __KVM_HAVE_GUEST_DEBUG + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { + struct { + __u8 ar; /* the access register number */ + __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ + }; + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* ignored */ + }; +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 +#define KVM_S390_MEMOP_ABSOLUTE_READ 4 +#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) +#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) + +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ + __u32 len; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ +}; + +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; + +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_dmp_id { + KVM_PV_DUMP_INIT, + KVM_PV_DUMP_CONFIG_STOR_STATE, + KVM_PV_DUMP_COMPLETE, + KVM_PV_DUMP_CPU, +}; + +struct kvm_s390_pv_dmp { + __u64 subcmd; + __u64 buff_addr; + __u64 buff_len; + __u64 gaddr; /* For dump storage state */ + __u64 reserved[4]; +}; + +enum pv_cmd_info_id { + KVM_PV_INFO_VM, + KVM_PV_INFO_DUMP, +}; + +struct kvm_s390_pv_info_dump { + __u64 dump_cpu_buffer_len; + __u64 dump_config_mem_buffer_per_1m; + __u64 dump_config_finalize_len; +}; + +struct kvm_s390_pv_info_vm { + __u64 inst_calls_list[4]; + __u64 max_cpus; + __u64 max_guests; + __u64 max_guest_addr; + __u64 feature_indication; +}; + +struct kvm_s390_pv_info_header { + __u32 id; + __u32 len_max; + __u32 len_written; + __u32 reserved; +}; + +struct kvm_s390_pv_info { + struct kvm_s390_pv_info_header header; + union { + struct kvm_s390_pv_info_dump dump; + struct kvm_s390_pv_info_vm vm; + }; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, + KVM_PV_INFO, + KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +struct kvm_s390_zpci_op { + /* in */ + __u32 fh; /* target device */ + __u8 op; /* operation to perform */ + __u8 pad[3]; + union { + /* for KVM_S390_ZPCIOP_REG_AEN */ + struct { + __u64 ibv; /* Guest addr of interrupt bit vector */ + __u64 sb; /* Guest addr of summary bit */ + __u32 flags; + __u32 noi; /* Number of interrupts */ + __u8 isc; /* Guest interrupt subclass */ + __u8 sbo; /* Offset of guest summary bit vector */ + __u16 pad; + } reg_aen; + __u64 reserved[8]; + } u; +}; + +/* types for kvm_s390_zpci_op->op */ +#define KVM_S390_ZPCIOP_REG_AEN 0 +#define KVM_S390_ZPCIOP_DEREG_AEN 1 + +/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ +#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) /* Device control API: s390-specific devices */ #define KVM_DEV_FLIC_GET_ALL_IRQS 1 diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 003fb74534..abef1ce92c 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -7,6 +7,8 @@ * */ +#include +#include #include #include #include @@ -40,7 +42,6 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI -#define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 @@ -49,7 +50,6 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS -#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -455,8 +455,13 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 -/* attributes for system fd (group 0) */ -#define KVM_X86_XCOMP_GUEST_SUPP 0 +/* vendor-independent attributes for system fd (group 0) */ +#define KVM_X86_GRP_SYSTEM 0 +# define KVM_X86_XCOMP_GUEST_SUPP 0 + +/* vendor-specific groups and attributes for system fd */ +#define KVM_X86_GRP_SEV 1 +# define KVM_X86_SEV_VMSA_FEATURES 0 struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; @@ -524,9 +529,313 @@ struct kvm_pmu_event_filter { #define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_DENY 1 -#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0) +#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0) #define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS) +/* for KVM_CAP_MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; + +/* for KVM_CAP_XEN_HVM */ +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) +#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) +#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) +#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + __u8 runstate_update_flag; + union { + __u64 gfn; +#define KVM_XEN_INVALID_GFN ((__u64)-1) + __u64 hva; + } shared_info; + struct { + __u32 send_port; + __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ + __u32 flags; +#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) +#define KVM_XEN_EVTCHN_UPDATE (1 << 1) +#define KVM_XEN_EVTCHN_RESET (1 << 2) + /* + * Events sent by the guest are either looped back to + * the guest itself (potentially on a different port#) + * or signalled via an eventfd. + */ + union { + struct { + __u32 port; + __u32 vcpu; + __u32 priority; + } port; + struct { + __u32 port; /* Zero for eventfd */ + __s32 fd; + } eventfd; + __u32 padding[4]; + } deliver; + } evtchn; + __u32 xen_version; + __u64 pad[8]; + } u; +}; + + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 +#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ +#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA 0x6 + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; +#define KVM_XEN_INVALID_GPA ((__u64)-1) + __u64 hva; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + __u32 vcpu_id; + struct { + __u32 port; + __u32 priority; + __u64 expires_ns; + } timer; + __u8 vector; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 +#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 +#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + /* Second time is the charm; improved versions of the above ioctls. */ + KVM_SEV_INIT2, + + /* Hygon CSV batch command */ + KVM_CSV_COMMAND_BATCH = 0x18, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u32 pad0; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_init { + __u64 vmsa_features; + __u32 flags; + __u32 pad[9]; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; + __u32 pad0; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u32 pad0; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u32 pad1; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u32 pad2; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u32 pad3; + __u64 session_uaddr; + __u32 session_len; + __u32 pad4; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u32 pad0; + __u64 session_uaddr; + __u32 session_len; + __u32 pad1; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u32 pad0; + __u64 guest_uaddr; + __u32 guest_len; + __u32 pad1; + __u64 trans_uaddr; + __u32 trans_len; + __u32 pad2; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + /* * Masked event layout. * Bits Description @@ -547,10 +856,10 @@ struct kvm_pmu_event_filter { ((__u64)(!!(exclude)) << 55)) #define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \ - (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56)) -#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8)) -#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55)) + (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56)) +#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8)) +#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55)) #define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56) /* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ @@ -558,9 +867,11 @@ struct kvm_pmu_event_filter { #define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ /* x86-specific KVM_EXIT_HYPERCALL flags. */ -#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0) +#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0) #define KVM_X86_DEFAULT_VM 0 #define KVM_X86_SW_PROTECTED_VM 1 +#define KVM_X86_SEV_VM 2 +#define KVM_X86_SEV_ES_VM 3 #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/linux/bits.h b/linux-headers/linux/bits.h new file mode 100644 index 0000000000..d9897771be --- /dev/null +++ b/linux-headers/linux/bits.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* bits.h: Macros for dealing with bitmasks. */ + +#ifndef _LINUX_BITS_H +#define _LINUX_BITS_H + +#define __GENMASK(h, l) \ + (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ + (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) + +#define __GENMASK_ULL(h, l) \ + (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ + (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* _LINUX_BITS_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index c7209118e5..b114944549 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -18,6 +18,11 @@ #define KVM_API_VERSION 12 +/* + * Backwards-compatible definitions. + */ +#define __KVM_HAVE_GUEST_DEBUG + /* for KVM_SET_USER_MEMORY_REGION */ struct kvm_userspace_memory_region { __u32 slot; @@ -87,43 +92,6 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 -struct kvm_s390_skeys { - __u64 start_gfn; - __u64 count; - __u64 skeydata_addr; - __u32 flags; - __u32 reserved[9]; -}; - -#define KVM_S390_CMMA_PEEK (1 << 0) - -/** - * kvm_s390_cmma_log - Used for CMMA migration. - * - * Used both for input and output. - * - * @start_gfn: Guest page number to start from. - * @count: Size of the result buffer. - * @flags: Control operation mode via KVM_S390_CMMA_* flags - * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty - * pages are still remaining. - * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set - * in the PGSTE. - * @values: Pointer to the values buffer. - * - * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. - */ -struct kvm_s390_cmma_log { - __u64 start_gfn; - __u32 count; - __u32 flags; - union { - __u64 remaining; - __u64 mask; - }; - __u64 values; -}; - struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 #define KVM_EXIT_HYPERV_HCALL 2 @@ -316,11 +284,6 @@ struct kvm_run { __u32 ipb; } s390_sieic; /* KVM_EXIT_S390_RESET */ -#define KVM_S390_RESET_POR 1 -#define KVM_S390_RESET_CLEAR 2 -#define KVM_S390_RESET_SUBSYSTEM 4 -#define KVM_S390_RESET_CPU_INIT 8 -#define KVM_S390_RESET_IPL 16 __u64 s390_reset_flags; /* KVM_EXIT_S390_UCONTROL */ struct { @@ -535,43 +498,6 @@ struct kvm_translation { __u8 pad[5]; }; -/* for KVM_S390_MEM_OP */ -struct kvm_s390_mem_op { - /* in */ - __u64 gaddr; /* the guest address */ - __u64 flags; /* flags */ - __u32 size; /* amount of bytes */ - __u32 op; /* type of operation */ - __u64 buf; /* buffer in userspace */ - union { - struct { - __u8 ar; /* the access register number */ - __u8 key; /* access key, ignored if flag unset */ - __u8 pad1[6]; /* ignored */ - __u64 old_addr; /* ignored if cmpxchg flag unset */ - }; - __u32 sida_offset; /* offset into the sida */ - __u8 reserved[32]; /* ignored */ - }; -}; -/* types for kvm_s390_mem_op->op */ -#define KVM_S390_MEMOP_LOGICAL_READ 0 -#define KVM_S390_MEMOP_LOGICAL_WRITE 1 -#define KVM_S390_MEMOP_SIDA_READ 2 -#define KVM_S390_MEMOP_SIDA_WRITE 3 -#define KVM_S390_MEMOP_ABSOLUTE_READ 4 -#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 -#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 - -/* flags for kvm_s390_mem_op->flags */ -#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) -#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) -#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) - -/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ -#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) -#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) - /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -636,124 +562,6 @@ struct kvm_mp_state { __u32 mp_state; }; -struct kvm_s390_psw { - __u64 mask; - __u64 addr; -}; - -/* valid values for type in kvm_s390_interrupt */ -#define KVM_S390_SIGP_STOP 0xfffe0000u -#define KVM_S390_PROGRAM_INT 0xfffe0001u -#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u -#define KVM_S390_RESTART 0xfffe0003u -#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u -#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u -#define KVM_S390_MCHK 0xfffe1000u -#define KVM_S390_INT_CLOCK_COMP 0xffff1004u -#define KVM_S390_INT_CPU_TIMER 0xffff1005u -#define KVM_S390_INT_VIRTIO 0xffff2603u -#define KVM_S390_INT_SERVICE 0xffff2401u -#define KVM_S390_INT_EMERGENCY 0xffff1201u -#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u -/* Anything below 0xfffe0000u is taken by INT_IO */ -#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ - (((schid)) | \ - ((ssid) << 16) | \ - ((cssid) << 18) | \ - ((ai) << 26)) -#define KVM_S390_INT_IO_MIN 0x00000000u -#define KVM_S390_INT_IO_MAX 0xfffdffffu -#define KVM_S390_INT_IO_AI_MASK 0x04000000u - - -struct kvm_s390_interrupt { - __u32 type; - __u32 parm; - __u64 parm64; -}; - -struct kvm_s390_io_info { - __u16 subchannel_id; - __u16 subchannel_nr; - __u32 io_int_parm; - __u32 io_int_word; -}; - -struct kvm_s390_ext_info { - __u32 ext_params; - __u32 pad; - __u64 ext_params2; -}; - -struct kvm_s390_pgm_info { - __u64 trans_exc_code; - __u64 mon_code; - __u64 per_address; - __u32 data_exc_code; - __u16 code; - __u16 mon_class_nr; - __u8 per_code; - __u8 per_atmid; - __u8 exc_access_id; - __u8 per_access_id; - __u8 op_access_id; -#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 -#define KVM_S390_PGM_FLAGS_ILC_0 0x02 -#define KVM_S390_PGM_FLAGS_ILC_1 0x04 -#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 -#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 - __u8 flags; - __u8 pad[2]; -}; - -struct kvm_s390_prefix_info { - __u32 address; -}; - -struct kvm_s390_extcall_info { - __u16 code; -}; - -struct kvm_s390_emerg_info { - __u16 code; -}; - -#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 -struct kvm_s390_stop_info { - __u32 flags; -}; - -struct kvm_s390_mchk_info { - __u64 cr14; - __u64 mcic; - __u64 failing_storage_address; - __u32 ext_damage_code; - __u32 pad; - __u8 fixed_logout[16]; -}; - -struct kvm_s390_irq { - __u64 type; - union { - struct kvm_s390_io_info io; - struct kvm_s390_ext_info ext; - struct kvm_s390_pgm_info pgm; - struct kvm_s390_emerg_info emerg; - struct kvm_s390_extcall_info extcall; - struct kvm_s390_prefix_info prefix; - struct kvm_s390_stop_info stop; - struct kvm_s390_mchk_info mchk; - char reserved[64]; - } u; -}; - -struct kvm_s390_irq_state { - __u64 buf; - __u32 flags; /* will stay unused for compatibility reasons */ - __u32 len; - __u32 reserved[4]; /* will stay unused for compatibility reasons */ -}; - /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -815,50 +623,6 @@ struct kvm_enable_cap { __u8 pad[64]; }; -/* for KVM_PPC_GET_PVINFO */ - -#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) - -struct kvm_ppc_pvinfo { - /* out */ - __u32 flags; - __u32 hcall[4]; - __u8 pad[108]; -}; - -/* for KVM_PPC_GET_SMMU_INFO */ -#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 - -struct kvm_ppc_one_page_size { - __u32 page_shift; /* Page shift (or 0) */ - __u32 pte_enc; /* Encoding in the HPTE (>>12) */ -}; - -struct kvm_ppc_one_seg_page_size { - __u32 page_shift; /* Base page shift of segment (or 0) */ - __u32 slb_enc; /* SLB encoding for BookS */ - struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 -#define KVM_PPC_1T_SEGMENTS 0x00000002 -#define KVM_PPC_NO_HASH 0x00000004 - -struct kvm_ppc_smmu_info { - __u64 flags; - __u32 slb_size; - __u16 data_keys; /* # storage keys supported for data */ - __u16 instr_keys; /* # storage keys supported for instructions */ - struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; -}; - -/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ -struct kvm_ppc_resize_hpt { - __u64 flags; - __u32 shift; - __u32 pad; -}; - #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -939,9 +703,7 @@ struct kvm_ppc_resize_hpt { /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 #define KVM_CAP_USER_NMI 22 -#ifdef __KVM_HAVE_GUEST_DEBUG #define KVM_CAP_SET_GUEST_DEBUG 23 -#endif #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif @@ -1191,8 +953,6 @@ struct kvm_ppc_resize_hpt { #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_irqchip { __u32 irqchip; __u32 pin; @@ -1257,42 +1017,6 @@ struct kvm_irq_routing { struct kvm_irq_routing_entry entries[]; }; -#endif - -#ifdef KVM_CAP_MCE -/* x86 MCE */ -struct kvm_x86_mce { - __u64 status; - __u64 addr; - __u64 misc; - __u64 mcg_status; - __u8 bank; - __u8 pad1[7]; - __u64 pad2[3]; -}; -#endif - -#ifdef KVM_CAP_XEN_HVM -#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) -#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) -#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) -#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) -#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) -#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) -#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) -#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) - -struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; - __u64 blob_addr_32; - __u64 blob_addr_64; - __u8 blob_size_32; - __u8 blob_size_64; - __u8 pad2[30]; -}; -#endif - #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) /* * Available with KVM_CAP_IRQFD_RESAMPLE @@ -1517,11 +1241,6 @@ struct kvm_user_data { struct kvm_userspace_memory_region2) /* enable ucontrol for s390 */ -struct kvm_s390_ucas_mapping { - __u64 user_addr; - __u64 vcpu_addr; - __u64 length; -}; #define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) #define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) #define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) @@ -1731,89 +1450,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -struct kvm_s390_pv_sec_parm { - __u64 origin; - __u64 length; -}; - -struct kvm_s390_pv_unp { - __u64 addr; - __u64 size; - __u64 tweak; -}; - -enum pv_cmd_dmp_id { - KVM_PV_DUMP_INIT, - KVM_PV_DUMP_CONFIG_STOR_STATE, - KVM_PV_DUMP_COMPLETE, - KVM_PV_DUMP_CPU, -}; - -struct kvm_s390_pv_dmp { - __u64 subcmd; - __u64 buff_addr; - __u64 buff_len; - __u64 gaddr; /* For dump storage state */ - __u64 reserved[4]; -}; - -enum pv_cmd_info_id { - KVM_PV_INFO_VM, - KVM_PV_INFO_DUMP, -}; - -struct kvm_s390_pv_info_dump { - __u64 dump_cpu_buffer_len; - __u64 dump_config_mem_buffer_per_1m; - __u64 dump_config_finalize_len; -}; - -struct kvm_s390_pv_info_vm { - __u64 inst_calls_list[4]; - __u64 max_cpus; - __u64 max_guests; - __u64 max_guest_addr; - __u64 feature_indication; -}; - -struct kvm_s390_pv_info_header { - __u32 id; - __u32 len_max; - __u32 len_written; - __u32 reserved; -}; - -struct kvm_s390_pv_info { - struct kvm_s390_pv_info_header header; - union { - struct kvm_s390_pv_info_dump dump; - struct kvm_s390_pv_info_vm vm; - }; -}; - -enum pv_cmd_id { - KVM_PV_ENABLE, - KVM_PV_DISABLE, - KVM_PV_SET_SEC_PARMS, - KVM_PV_UNPACK, - KVM_PV_VERIFY, - KVM_PV_PREP_RESET, - KVM_PV_UNSHARE_ALL, - KVM_PV_INFO, - KVM_PV_DUMP, - KVM_PV_ASYNC_CLEANUP_PREPARE, - KVM_PV_ASYNC_CLEANUP_PERFORM, -}; - -struct kvm_pv_cmd { - __u32 cmd; /* Command to be executed */ - __u16 rc; /* Ultravisor return code */ - __u16 rrc; /* Ultravisor return reason code */ - __u64 data; /* Data or address */ - __u32 flags; /* flags for future extensions. Must be 0 for now */ - __u32 reserved[3]; -}; - /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) @@ -1827,58 +1463,6 @@ struct kvm_pv_cmd { #define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) #define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) -struct kvm_xen_hvm_attr { - __u16 type; - __u16 pad[3]; - union { - __u8 long_mode; - __u8 vector; - __u8 runstate_update_flag; - struct { - __u64 gfn; -#define KVM_XEN_INVALID_GFN ((__u64)-1) - } shared_info; - struct { - __u32 send_port; - __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */ - __u32 flags; -#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0) -#define KVM_XEN_EVTCHN_UPDATE (1 << 1) -#define KVM_XEN_EVTCHN_RESET (1 << 2) - /* - * Events sent by the guest are either looped back to - * the guest itself (potentially on a different port#) - * or signalled via an eventfd. - */ - union { - struct { - __u32 port; - __u32 vcpu; - __u32 priority; - } port; - struct { - __u32 port; /* Zero for eventfd */ - __s32 fd; - } eventfd; - __u32 padding[4]; - } deliver; - } evtchn; - __u32 xen_version; - __u64 pad[8]; - } u; -}; - - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 -#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 -#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3 -#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */ -#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5 - /* Per-vCPU Xen attributes */ #define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) #define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) @@ -1889,157 +1473,6 @@ struct kvm_xen_hvm_attr { #define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) #define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) -struct kvm_xen_vcpu_attr { - __u16 type; - __u16 pad[3]; - union { - __u64 gpa; -#define KVM_XEN_INVALID_GPA ((__u64)-1) - __u64 pad[8]; - struct { - __u64 state; - __u64 state_entry_time; - __u64 time_running; - __u64 time_runnable; - __u64 time_blocked; - __u64 time_offline; - } runstate; - __u32 vcpu_id; - struct { - __u32 port; - __u32 priority; - __u64 expires_ns; - } timer; - __u8 vector; - } u; -}; - -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 -#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 -/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */ -#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6 -#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7 -#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8 - -/* Secure Encrypted Virtualization command */ -enum sev_cmd_id { - /* Guest initialization commands */ - KVM_SEV_INIT = 0, - KVM_SEV_ES_INIT, - /* Guest launch commands */ - KVM_SEV_LAUNCH_START, - KVM_SEV_LAUNCH_UPDATE_DATA, - KVM_SEV_LAUNCH_UPDATE_VMSA, - KVM_SEV_LAUNCH_SECRET, - KVM_SEV_LAUNCH_MEASURE, - KVM_SEV_LAUNCH_FINISH, - /* Guest migration commands (outgoing) */ - KVM_SEV_SEND_START, - KVM_SEV_SEND_UPDATE_DATA, - KVM_SEV_SEND_UPDATE_VMSA, - KVM_SEV_SEND_FINISH, - /* Guest migration commands (incoming) */ - KVM_SEV_RECEIVE_START, - KVM_SEV_RECEIVE_UPDATE_DATA, - KVM_SEV_RECEIVE_UPDATE_VMSA, - KVM_SEV_RECEIVE_FINISH, - /* Guest status and debug commands */ - KVM_SEV_GUEST_STATUS, - KVM_SEV_DBG_DECRYPT, - KVM_SEV_DBG_ENCRYPT, - /* Guest certificates commands */ - KVM_SEV_CERT_EXPORT, - /* Attestation report */ - KVM_SEV_GET_ATTESTATION_REPORT, - /* Guest Migration Extension */ - KVM_SEV_SEND_CANCEL, - - /* Hygon CSV batch command */ - KVM_CSV_COMMAND_BATCH = 0x18, - - KVM_SEV_NR_MAX, -}; - -struct kvm_sev_cmd { - __u32 id; - __u64 data; - __u32 error; - __u32 sev_fd; -}; - -struct kvm_sev_launch_start { - __u32 handle; - __u32 policy; - __u64 dh_uaddr; - __u32 dh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_launch_update_data { - __u64 uaddr; - __u32 len; -}; - - -struct kvm_sev_launch_secret { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - -struct kvm_sev_launch_measure { - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_guest_status { - __u32 handle; - __u32 policy; - __u32 state; -}; - -struct kvm_sev_dbg { - __u64 src_uaddr; - __u64 dst_uaddr; - __u32 len; -}; - -struct kvm_sev_attestation_report { - __u8 mnonce[16]; - __u64 uaddr; - __u32 len; -}; - -struct kvm_sev_send_start { - __u32 policy; - __u64 pdh_cert_uaddr; - __u32 pdh_cert_len; - __u64 plat_certs_uaddr; - __u32 plat_certs_len; - __u64 amd_certs_uaddr; - __u32 amd_certs_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_send_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - struct kvm_sev_send_update_vmsa { __u32 vcpu_id; __u64 hdr_uaddr; @@ -2048,24 +1481,6 @@ struct kvm_sev_send_update_vmsa { __u32 trans_len; }; -struct kvm_sev_receive_start { - __u32 handle; - __u32 policy; - __u64 pdh_uaddr; - __u32 pdh_len; - __u64 session_uaddr; - __u32 session_len; -}; - -struct kvm_sev_receive_update_data { - __u64 hdr_uaddr; - __u32 hdr_len; - __u64 guest_uaddr; - __u32 guest_len; - __u64 trans_uaddr; - __u32 trans_len; -}; - struct kvm_sev_receive_update_vmsa { __u32 vcpu_id; __u64 hdr_uaddr; @@ -2158,76 +1573,6 @@ struct kvm_csv3_handle_memory { __u32 opcode; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) - -struct kvm_assigned_pci_dev { - __u32 assigned_dev_id; - __u32 busnr; - __u32 devfn; - __u32 flags; - __u32 segnr; - union { - __u32 reserved[11]; - }; -}; - -#define KVM_DEV_IRQ_HOST_INTX (1 << 0) -#define KVM_DEV_IRQ_HOST_MSI (1 << 1) -#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) - -#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) -#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) -#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) - -#define KVM_DEV_IRQ_HOST_MASK 0x00ff -#define KVM_DEV_IRQ_GUEST_MASK 0xff00 - -struct kvm_assigned_irq { - __u32 assigned_dev_id; - __u32 host_irq; /* ignored (legacy field) */ - __u32 guest_irq; - __u32 flags; - union { - __u32 reserved[12]; - }; -}; - -struct kvm_assigned_msix_nr { - __u32 assigned_dev_id; - __u16 entry_nr; - __u16 padding; -}; - -#define KVM_MAX_MSIX_PER_DEV 256 -struct kvm_assigned_msix_entry { - __u32 assigned_dev_id; - __u32 gsi; - __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; -}; - -#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) -#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) - -/* Available with KVM_CAP_ARM_USER_IRQ */ - -/* Bits for run->s.regs.device_irq_level */ -#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) -#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) -#define KVM_ARM_DEV_PMU (1 << 2) - -struct kvm_hyperv_eventfd { - __u32 conn_id; - __s32 fd; - __u32 flags; - __u32 padding[3]; -}; - -#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff -#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) - #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) #define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) @@ -2373,33 +1718,6 @@ struct kvm_stats_desc { /* Available with KVM_CAP_S390_ZPCI_OP */ #define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) -struct kvm_s390_zpci_op { - /* in */ - __u32 fh; /* target device */ - __u8 op; /* operation to perform */ - __u8 pad[3]; - union { - /* for KVM_S390_ZPCIOP_REG_AEN */ - struct { - __u64 ibv; /* Guest addr of interrupt bit vector */ - __u64 sb; /* Guest addr of summary bit */ - __u32 flags; - __u32 noi; /* Number of interrupts */ - __u8 isc; /* Guest interrupt subclass */ - __u8 sbo; /* Offset of guest summary bit vector */ - __u16 pad; - } reg_aen; - __u64 reserved[8]; - } u; -}; - -/* types for kvm_s390_zpci_op->op */ -#define KVM_S390_ZPCIOP_REG_AEN 0 -#define KVM_S390_ZPCIOP_DEREG_AEN 1 - -/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ -#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) - /* get tmi version */ #define KVM_GET_TMI_VERSION _IOR(KVMIO, 0xd2, uint64_t) #define MIN_TMI_VERSION_FOR_UEFI_BOOTED_CVM 0x20001 diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h index bcb21339ee..c3046c6bff 100644 --- a/linux-headers/linux/psp-sev.h +++ b/linux-headers/linux/psp-sev.h @@ -28,6 +28,9 @@ enum { SEV_PEK_CERT_IMPORT, SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ SEV_GET_ID2, + SNP_PLATFORM_STATUS, + SNP_COMMIT, + SNP_SET_CONFIG, SEV_MAX, }; @@ -69,6 +72,12 @@ typedef enum { SEV_RET_RESOURCE_LIMIT, SEV_RET_SECURE_DATA_INVALID, SEV_RET_INVALID_KEY = 0x27, + SEV_RET_INVALID_PAGE_SIZE, + SEV_RET_INVALID_PAGE_STATE, + SEV_RET_INVALID_MDATA_ENTRY, + SEV_RET_INVALID_PAGE_OWNER, + SEV_RET_INVALID_PAGE_AEAD_OFLOW, + SEV_RET_RMP_INIT_REQUIRED, SEV_RET_MAX, } sev_ret_code; @@ -155,6 +164,56 @@ struct sev_user_data_get_id2 { __u32 length; /* In/Out */ } __attribute__((packed)); +/** + * struct sev_user_data_snp_status - SNP status + * + * @api_major: API major version + * @api_minor: API minor version + * @state: current platform state + * @is_rmp_initialized: whether RMP is initialized or not + * @rsvd: reserved + * @build_id: firmware build id for the API version + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @vlek_en: VLEK (Version Loaded Endorsement Key) hashstick is loaded + * @rsvd1: reserved + * @guest_count: the number of guest currently managed by the firmware + * @current_tcb_version: current TCB version + * @reported_tcb_version: reported TCB version + */ +struct sev_user_data_snp_status { + __u8 api_major; /* Out */ + __u8 api_minor; /* Out */ + __u8 state; /* Out */ + __u8 is_rmp_initialized:1; /* Out */ + __u8 rsvd:7; + __u32 build_id; /* Out */ + __u32 mask_chip_id:1; /* Out */ + __u32 mask_chip_key:1; /* Out */ + __u32 vlek_en:1; /* Out */ + __u32 rsvd1:29; + __u32 guest_count; /* Out */ + __u64 current_tcb_version; /* Out */ + __u64 reported_tcb_version; /* Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_snp_config - system wide configuration value for SNP. + * + * @reported_tcb: the TCB version to report in the guest attestation report. + * @mask_chip_id: whether chip id is present in attestation reports or not + * @mask_chip_key: whether attestation reports are signed or not + * @rsvd: reserved + * @rsvd1: reserved + */ +struct sev_user_data_snp_config { + __u64 reported_tcb ; /* In */ + __u32 mask_chip_id:1; /* In */ + __u32 mask_chip_key:1; /* In */ + __u32 rsvd:30; /* In */ + __u8 rsvd1[52]; +} __attribute__((packed)); + /** * struct sev_issue_cmd - SEV ioctl parameters * diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index f5c05abe8b..e324b7aebe 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -268,4 +268,10 @@ /* set device migtration state */ #define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) +/* Get the queue size of a specific virtqueue. + * userspace set the vring index in vhost_vring_state.index + * kernel set the queue size in vhost_vring_state.num + */ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ + struct vhost_vring_state) #endif -- Gitee From 05c4a11c8c6691f78481a1f8eb0ef889e6d8a2aa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 17:45:56 -0400 Subject: [PATCH 013/115] runstate: skip initial CPU reset if reset is not actually possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 08b2d15cdd0d3fbbe37ce23bf192b770db3a7539 upstream Right now, the system reset is concluded by a call to cpu_synchronize_all_post_reset() in order to sync any changes that the machine reset callback applied to the CPU state. However, for VMs with encrypted state such as SEV-ES guests (currently the only case of guests with non-resettable CPUs) this cannot be done, because guest state has already been finalized by machine-init-done notifiers. cpu_synchronize_all_post_reset() does nothing on these guests, and actually we would like to make it fail if called once guest has been encrypted. So, assume that boards that support non-resettable CPUs do not touch CPU state and that all such setup is done before, at the time of cpu_synchronize_all_post_init(). Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- system/runstate.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/system/runstate.c b/system/runstate.c index 7e41626bb1..da1c959f17 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -504,11 +504,24 @@ void qemu_system_reset(ShutdownCause reason) default: qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } - cpu_synchronize_all_post_reset(); cpus_control_post_system_reset(); monitor_qapi_event_discard_io_error(); + + /* + * Some boards use the machine reset callback to point CPUs to the firmware + * entry point. Assume that this is not the case for boards that support + * non-resettable CPUs (currently used only for confidential guests), in + * which case cpu_synchronize_all_post_init() is enough because + * it does _more_ than cpu_synchronize_all_post_reset(). + */ + if (cpus_are_resettable()) { + cpu_synchronize_all_post_reset(); + } else { + assert(runstate_check(RUN_STATE_PRELAUNCH)); + } + } /* -- Gitee From 2d7ad24b4ebf7adcdf88516a7a0141b8bb8b0a76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 14:41:33 -0400 Subject: [PATCH 014/115] KVM: remove kvm_arch_cpu_check_are_resettable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a99c0c66ebe7d8db3af6f16689ade9375247e43e upstream Board reset requires writing a fresh CPU state. As far as KVM is concerned, the only thing that blocks reset is that CPU state is encrypted; therefore, kvm_cpus_are_resettable() can simply check if that is the case. [Backport changes] 1. In files target/arm/kvm.c and target/i386/kvm/kvm.c function kvm_arch_cpu_check_are_resettable() was removed entirely as per the current upstream patch. This removal also includes additional vendor-specific if check `if(is_hygon_cpu())` which was added in kvm_arch_cpu_check_are_resettable() as part of hygon hardware support. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- accel/kvm/kvm-accel-ops.c | 2 +- accel/kvm/kvm-all.c | 5 ----- include/sysemu/kvm.h | 10 ---------- target/arm/kvm.c | 5 ----- target/i386/kvm/kvm.c | 8 -------- target/loongarch/kvm/kvm.c | 5 ----- target/mips/kvm.c | 5 ----- target/ppc/kvm.c | 5 ----- target/riscv/kvm/kvm-cpu.c | 5 ----- target/s390x/kvm/kvm.c | 5 ----- 10 files changed, 1 insertion(+), 54 deletions(-) diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index 54f19028b8..afb4dfb3e7 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -83,7 +83,7 @@ static bool kvm_vcpu_thread_is_idle(CPUState *cpu) static bool kvm_cpus_are_resettable(void) { - return !kvm_enabled() || kvm_cpu_check_are_resettable(); + return !kvm_enabled() || !kvm_state->guest_state_protected; } #ifdef KVM_CAP_SET_GUEST_DEBUG diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f472fc4f69..57c8b5ad89 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2771,11 +2771,6 @@ void kvm_flush_coalesced_mmio_buffer(void) s->coalesced_flush_in_progress = false; } -bool kvm_cpu_check_are_resettable(void) -{ - return kvm_arch_cpu_check_are_resettable(); -} - static void do_kvm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { if (!cpu->vcpu_dirty && !kvm_state->guest_state_protected) { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 7602cd4429..1528824b93 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -584,16 +584,6 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); /* Notify resamplefd for EOI of specific interrupts. */ void kvm_resample_fd_notify(int gsi); -/** - * kvm_cpu_check_are_resettable - return whether CPUs can be reset - * - * Returns: true: CPUs are resettable - * false: CPUs are not resettable - */ -bool kvm_cpu_check_are_resettable(void); - -bool kvm_arch_cpu_check_are_resettable(void); - bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); diff --git a/target/arm/kvm.c b/target/arm/kvm.c index f45783a9da..127e33b42a 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1325,11 +1325,6 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return (data - 32) & 0xffff; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return !virtcca_cvm_enabled(); -} - static void kvm_arch_get_eager_split_size(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index f90f098592..8167fce904 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5808,14 +5808,6 @@ bool kvm_has_waitpkg(void) return has_msr_umwait; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - if (is_hygon_cpu()) - return !csv_kvm_cpu_reset_inhibit; - - return !sev_es_enabled(); -} - #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index f724e77a1b..4b8f08cdb6 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -1000,11 +1000,6 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs) return true; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) { diff --git a/target/mips/kvm.c b/target/mips/kvm.c index e22e24ed97..6393bc3da2 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -1282,11 +1282,6 @@ int kvm_arch_get_default_type(MachineState *machine) return -1; } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 9b1abe2fc4..ddb5e7f8e7 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2971,11 +2971,6 @@ void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - void kvm_arch_accel_class_init(ObjectClass *oc) { } diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index b3dc2070f9..c110b11704 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1274,11 +1274,6 @@ void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level) } } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - static int aia_mode; static const char *kvm_aia_mode_str(uint64_t mode) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 33ab3551f4..0a22833f47 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2624,11 +2624,6 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) kvm_s390_vcpu_interrupt(cpu, &irq); } -bool kvm_arch_cpu_check_are_resettable(void) -{ - return true; -} - int kvm_s390_get_zpci_op(void) { return cap_zpci_op; -- Gitee From ab2bf573c0e32f597f916cac46c4dcc709b510bb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 18 Mar 2024 11:07:43 -0400 Subject: [PATCH 015/115] target/i386: introduce x86-confidential-guest commit d82e9c843d662f13821026618aba936eda31a6c0 upstream Introduce a common superclass for x86 confidential guest implementations. It will extend ConfidentialGuestSupportClass with a method that provides the VM type to be passed to KVM_CREATE_VM. Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/confidential-guest.c | 33 ++++++++++++++++++++++++++ target/i386/confidential-guest.h | 40 ++++++++++++++++++++++++++++++++ target/i386/meson.build | 2 +- target/i386/sev.c | 6 ++--- 4 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 target/i386/confidential-guest.c create mode 100644 target/i386/confidential-guest.h diff --git a/target/i386/confidential-guest.c b/target/i386/confidential-guest.c new file mode 100644 index 0000000000..b3727845ad --- /dev/null +++ b/target/i386/confidential-guest.c @@ -0,0 +1,33 @@ +/* + * QEMU Confidential Guest support + * + * Copyright (C) 2024 Red Hat, Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "confidential-guest.h" + +OBJECT_DEFINE_ABSTRACT_TYPE(X86ConfidentialGuest, + x86_confidential_guest, + X86_CONFIDENTIAL_GUEST, + CONFIDENTIAL_GUEST_SUPPORT) + +static void x86_confidential_guest_class_init(ObjectClass *oc, void *data) +{ +} + +static void x86_confidential_guest_init(Object *obj) +{ +} + +static void x86_confidential_guest_finalize(Object *obj) +{ +} diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h new file mode 100644 index 0000000000..ca12d5a8fb --- /dev/null +++ b/target/i386/confidential-guest.h @@ -0,0 +1,40 @@ +/* + * x86-specific confidential guest methods. + * + * Copyright (c) 2024 Red Hat Inc. + * + * Authors: + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#ifndef TARGET_I386_CG_H +#define TARGET_I386_CG_H + +#include "qom/object.h" + +#include "exec/confidential-guest-support.h" + +#define TYPE_X86_CONFIDENTIAL_GUEST "x86-confidential-guest" + +OBJECT_DECLARE_TYPE(X86ConfidentialGuest, + X86ConfidentialGuestClass, + X86_CONFIDENTIAL_GUEST) + +struct X86ConfidentialGuest { + /* */ + ConfidentialGuestSupport parent_obj; +}; + +/** + * X86ConfidentialGuestClass: + * + * Class to be implemented by confidential-guest-support concrete objects + * for the x86 target. + */ +struct X86ConfidentialGuestClass { + /* */ + ConfidentialGuestSupportClass parent; +}; +#endif diff --git a/target/i386/meson.build b/target/i386/meson.build index 594a0a6abf..9a1d070d13 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -6,7 +6,7 @@ i386_ss.add(files( 'xsave_helper.c', 'cpu-dump.c', )) -i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c', 'confidential-guest.c')) # x86 cpu type i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) diff --git a/target/i386/sev.c b/target/i386/sev.c index 70238e3402..a53f594a24 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -38,7 +38,7 @@ #include "monitor/monitor.h" #include "monitor/hmp-target.h" #include "qapi/qapi-commands-misc-target.h" -#include "exec/confidential-guest-support.h" +#include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" @@ -62,7 +62,7 @@ struct shared_region { * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - ConfidentialGuestSupport parent_obj; + X86ConfidentialGuest parent_obj; /* configuration parameters */ char *sev_device; @@ -2788,7 +2788,7 @@ sev_guest_instance_init(Object *obj) /* sev guest info */ static const TypeInfo sev_guest_info = { - .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, + .parent = TYPE_X86_CONFIDENTIAL_GUEST, .name = TYPE_SEV_GUEST, .instance_size = sizeof(SevGuestState), .instance_finalize = sev_guest_finalize, -- Gitee From d915f0b495ea7302ea5eed6e5d6a296fdc5cc3ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Mar 2024 15:29:33 +0100 Subject: [PATCH 016/115] target/i386: Implement mc->kvm_type() to get VM type commit ee88612df1e8d6c2bfec75bff3f9482ea44acec1 upstream KVM is introducing a new API to create confidential guests, which will be used by TDX and SEV-SNP but is also available for SEV and SEV-ES. The API uses the VM type argument to KVM_CREATE_VM to identify which confidential computing technology to use. Since there are no other expected uses of VM types, delegate mc->kvm_type() for x86 boards to the confidential-guest-support object pointed to by ms->cgs. For example, if a sev-guest object is specified to confidential-guest-support, like, qemu -machine ...,confidential-guest-support=sev0 \ -object sev-guest,id=sev0,... it will check if a VM type KVM_X86_SEV_VM or KVM_X86_SEV_ES_VM is supported, and if so use them together with the KVM_SEV_INIT2 function of the KVM_MEMORY_ENCRYPT_OP ioctl. If not, it will fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT. This is a preparatory work towards TDX and SEV-SNP support, but it will also enable support for VMSA features such as DebugSwap, which are only available via KVM_SEV_INIT2. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/i386/x86.c | 11 ++++++++ target/i386/confidential-guest.h | 19 ++++++++++++++ target/i386/kvm/kvm.c | 44 ++++++++++++++++++++++++++++++++ target/i386/kvm/kvm_i386.h | 2 ++ 4 files changed, 76 insertions(+) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index d6f9e25abe..864fbd037e 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1378,6 +1378,16 @@ static void machine_set_sgx_epc(Object *obj, Visitor *v, const char *name, qapi_free_SgxEPCList(list); } +static int x86_kvm_type(MachineState *ms, const char *vm_type) +{ + /* + * No x86 machine has a kvm-type property. If one is added that has + * it, it should call kvm_get_vm_type() directly or not use it at all. + */ + assert(vm_type == NULL); + return kvm_enabled() ? kvm_get_vm_type(ms) : 0; +} + static void x86_machine_initfn(Object *obj) { X86MachineState *x86ms = X86_MACHINE(obj); @@ -1402,6 +1412,7 @@ static void x86_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = x86_cpu_index_to_props; mc->get_default_cpu_node_id = x86_get_default_cpu_node_id; mc->possible_cpu_arch_ids = x86_possible_cpu_arch_ids; + mc->kvm_type = x86_kvm_type; x86mc->save_tsc_khz = true; x86mc->fwcfg_dma_enabled = true; nc->nmi_monitor_handler = x86_nmi; diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index ca12d5a8fb..532e172a60 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -36,5 +36,24 @@ struct X86ConfidentialGuest { struct X86ConfidentialGuestClass { /* */ ConfidentialGuestSupportClass parent; + + /* */ + int (*kvm_type)(X86ConfidentialGuest *cg); }; + +/** + * x86_confidential_guest_kvm_type: + * + * Calls #X86ConfidentialGuestClass.unplug callback of @plug_handler. + */ +static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->kvm_type) { + return klass->kvm_type(cg); + } else { + return 0; + } +} #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 8167fce904..4ef0fd21d7 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -31,6 +31,7 @@ #include "sysemu/kvm_int.h" #include "sysemu/runstate.h" #include "kvm_i386.h" +#include "../confidential-guest.h" #include "sev.h" #include "csv.h" #include "xen-emu.h" @@ -163,6 +164,49 @@ static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; static RateLimit bus_lock_ratelimit_ctrl; static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); +static const char *vm_type_name[] = { + [KVM_X86_DEFAULT_VM] = "default", +}; + +bool kvm_is_vm_type_supported(int type) +{ + uint32_t machine_types; + + /* + * old KVM doesn't support KVM_CAP_VM_TYPES but KVM_X86_DEFAULT_VM + * is always supported + */ + if (type == KVM_X86_DEFAULT_VM) { + return true; + } + + machine_types = kvm_check_extension(KVM_STATE(current_machine->accelerator), + KVM_CAP_VM_TYPES); + return !!(machine_types & BIT(type)); +} + +int kvm_get_vm_type(MachineState *ms) +{ + int kvm_type = KVM_X86_DEFAULT_VM; + + if (ms->cgs) { + if (!object_dynamic_cast(OBJECT(ms->cgs), TYPE_X86_CONFIDENTIAL_GUEST)) { + error_report("configuration type %s not supported for x86 guests", + object_get_typename(OBJECT(ms->cgs))); + exit(1); + } + kvm_type = x86_confidential_guest_kvm_type( + X86_CONFIDENTIAL_GUEST(ms->cgs)); + } + + if (!kvm_is_vm_type_supported(kvm_type)) { + error_report("vm-type %s not supported by KVM", vm_type_name[kvm_type]); + exit(1); + } + + return kvm_type; +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 30fedcffea..6b44844d95 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -37,6 +37,7 @@ bool kvm_hv_vpindex_settable(void); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); +int kvm_get_vm_type(MachineState *ms); void kvm_arch_reset_vcpu(X86CPU *cs); void kvm_arch_after_reset_vcpu(X86CPU *cpu); void kvm_arch_do_init_vcpu(X86CPU *cs); @@ -49,6 +50,7 @@ void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); #ifdef CONFIG_KVM +bool kvm_is_vm_type_supported(int type); bool kvm_has_adjust_clock_stable(void); bool kvm_has_exception_payload(void); void kvm_synchronize_all_tsc(void); -- Gitee From 6a23503f7e34939d57b99716d2f3f95df49612fe Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 Mar 2024 15:30:25 +0100 Subject: [PATCH 017/115] target/i386: SEV: use KVM_SEV_INIT2 if possible commit 663e2f443e5722370708ce2f4c27d94a2087d2d3 upstream Implement support for the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM virtual machine types, and the KVM_SEV_INIT2 function of KVM_MEMORY_ENCRYPT_OP. These replace the KVM_SEV_INIT and KVM_SEV_ES_INIT functions, and have several advantages: - sharing the initialization sequence with SEV-SNP and TDX - allowing arguments including the set of desired VMSA features - protection against invalid use of KVM_GET/SET_* ioctls for guests with encrypted state If the KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types are not supported, fall back to KVM_SEV_INIT and KVM_SEV_ES_INIT (which use the default x86 VM type). [Backport Changes] 1. In target/i386/sev.c, in function sev_kvm_init(), the upstream patch removes a conditional logic that assigns variable `cmd` to either KVM_SEV_INIT or KVM_SEV_ES_INIT based on the state of sev_es_enabled(). However, a non-upstream Hygon-specific CSV/CSV2 logic added in Euler commit 8f4f8a2071e69 uses this `cmd` variable in a call to sev_ioctl() when is_hygon_cpu() and GUEST_POLICY_REUSE_ASID is set to true and requires cmd being set appropriately before ioctl call. Therefore, conditional logic for `cmd` assignment was retained to maintain compatibility with both upstream and Hygon-specific behavior 2. In sev_kvm_init(), the upstream patch introduces changes to support KVM_SEV_INIT2 and to enable the use of KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM types by removing existing sev_ioctl() call. However, in our codebase, the existing sev_ioctl() call is wrapped in an else block following vendor-specific Hygon logic for CSV/CSV2 ASID reuse. To preserve that logic, the upstream changes were also integrated after the Hygon-specific block, ensuring both the non-upstream CSV/CSV2 guest support and the new KVM_SEV_INIT2 handling coexist without conflict. Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 2 ++ target/i386/sev.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 4ef0fd21d7..10187850d1 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -166,6 +166,8 @@ static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); static const char *vm_type_name[] = { [KVM_X86_DEFAULT_VM] = "default", + [KVM_X86_SEV_VM] = "SEV", + [KVM_X86_SEV_ES_VM] = "SEV-ES", }; bool kvm_is_vm_type_supported(int type) diff --git a/target/i386/sev.c b/target/i386/sev.c index a53f594a24..800eee50e3 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -26,6 +26,7 @@ #include "qemu/error-report.h" #include "crypto/hash.h" #include "sysemu/kvm.h" +#include "kvm/kvm_i386.h" #include "sev.h" #include "csv.h" #include "sysemu/sysemu.h" @@ -64,6 +65,8 @@ struct shared_region { struct SevGuestState { X86ConfidentialGuest parent_obj; + int kvm_type; + /* configuration parameters */ char *sev_device; uint32_t policy; @@ -1092,6 +1095,26 @@ sev_migration_state_notifier(Notifier *notifier, void *data) static Notifier sev_migration_state; +static int sev_kvm_type(X86ConfidentialGuest *cg) +{ + SevGuestState *sev = SEV_GUEST(cg); + int kvm_type; + + if (sev->kvm_type != -1) { + goto out; + } + + kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (kvm_is_vm_type_supported(kvm_type)) { + sev->kvm_type = kvm_type; + } else { + sev->kvm_type = KVM_X86_DEFAULT_VM; + } + +out: + return sev->kvm_type; +} + static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { SevGuestState *sev = SEV_GUEST(cgs); @@ -1200,7 +1223,15 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) g_free(init_cmd_buf); } } else { + if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { + cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + } else { + struct kvm_sev_init args = { 0 }; + + ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + } } if (ret) { @@ -2727,8 +2758,10 @@ static void sev_guest_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", sev_guest_get_sev_device, @@ -2773,6 +2806,8 @@ sev_guest_instance_init(Object *obj) { SevGuestState *sev = SEV_GUEST(obj); + sev->kvm_type = -1; + sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); sev->policy = DEFAULT_GUEST_POLICY; object_property_add_uint32_ptr(obj, "policy", &sev->policy, -- Gitee From 3460573cb01b09f5215e29884c74f18d75327068 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Sat, 7 Oct 2023 02:58:19 -0400 Subject: [PATCH 018/115] i386/pc: Drop pc_machine_kvm_type() commit 2a1019f2092dd73e157e01add94db5c7c82563b4 upstream. pc_machine_kvm_type() was introduced by commit e21be724eaf5 ("i386/xen: add pc_machine_kvm_type to initialize XEN_EMULATE mode") to do Xen specific initialization by utilizing kvm_type method. commit eeedfe6c6316 ("hw/xen: Simplify emulated Xen platform init") moves the Xen specific initialization to pc_basic_device_init(). There is no need to keep the PC specific kvm_type() implementation anymore. So we'll fallback to kvm_arch_get_default_type(), which simply returns 0. Signed-off-by: Xiaoyao Li Reviewed-by: Isaku Yamahata Reviewed-by: David Hildenbrand Acked-by: David Woodhouse Acked-by: Michael S. Tsirkin Message-ID: <20231007065819.27498-1-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/i386/pc.c | 5 ----- include/hw/i386/pc.h | 3 --- 2 files changed, 8 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 204e34db86..2304a12f12 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1876,11 +1876,6 @@ static void pc_machine_initfn(Object *obj) cxl_machine_init(obj, &pcms->cxl_devices_state); } -int pc_machine_kvm_type(MachineState *machine, const char *kvm_type) -{ - return 0; -} - static void pc_machine_reset(MachineState *machine, ShutdownCause reason) { CPUState *cs; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c49e2970f1..9846edebac 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -310,15 +310,12 @@ extern const size_t pc_compat_1_5_len; extern GlobalProperty pc_compat_1_4[]; extern const size_t pc_compat_1_4_len; -int pc_machine_kvm_type(MachineState *machine, const char *vm_type); - #define DEFINE_PC_MACHINE(suffix, namestr, initfn, optsfn) \ static void pc_machine_##suffix##_class_init(ObjectClass *oc, void *data) \ { \ MachineClass *mc = MACHINE_CLASS(oc); \ optsfn(mc); \ mc->init = initfn; \ - mc->kvm_type = pc_machine_kvm_type; \ } \ static const TypeInfo pc_machine_type_##suffix = { \ .name = namestr TYPE_MACHINE_SUFFIX, \ -- Gitee From 7c8c826291de386f161047427e5702fe840ed7f7 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 9 Apr 2024 18:07:41 -0500 Subject: [PATCH 019/115] i386/sev: Add 'legacy-vm-type' parameter for SEV guest objects commit 023267334da375226720e62963df9545aa8fc2fd upstream QEMU will currently automatically make use of the KVM_SEV_INIT2 API for initializing SEV and SEV-ES guests verses the older KVM_SEV_INIT/KVM_SEV_ES_INIT interfaces. However, the older interfaces will silently avoid sync'ing FPU/XSAVE state to the VMSA prior to encryption, thus relying on behavior and measurements that assume the related fields to be allow zero. With KVM_SEV_INIT2, this state is now synced into the VMSA, resulting in measurements changes and, theoretically, behaviorial changes, though the latter are unlikely to be seen in practice. To allow a smooth transition to the newer interface, while still providing a mechanism to maintain backward compatibility with VMs created using the older interfaces, provide a new command-line parameter: -object sev-guest,legacy-vm-type=true,... and have it default to false. [Backport Changes] 1. In file qapi/qom.json, within the SevGuestProperties struct, the addition of the kernel-hashes property was skipped as it already exists in the current source code and only legacy-vm-type property was appended as per the current upstream patch. Signed-off-by: Michael Roth Message-ID: <20240409230743.962513-2-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- qapi/qom.json | 10 +++++++++- target/i386/sev.c | 18 +++++++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index e0590a6019..aa5810d843 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -890,6 +890,13 @@ # support on Hygon CPUs (since 8.2) # @secret-file: the file guest owner's secret, only support on Hygon # CPUs (since 8.2) +# @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. +# The newer KVM_SEV_INIT2 interface syncs additional vCPU +# state when initializing the VMSA structures, which will +# result in a different guest measurement. Set this to +# maintain compatibility with older QEMU or kernel versions +# that rely on legacy KVM_SEV_INIT behavior. +# (default: false) (since 9.1) # # Since: 2.12 ## @@ -904,7 +911,8 @@ '*kernel-hashes': 'bool', '*user-id': 'str', '*secret-header-file': 'str', - '*secret-file': 'str' } } + '*secret-file': 'str', + '*legacy-vm-type': 'bool' } } ## # @ThreadContextProperties: diff --git a/target/i386/sev.c b/target/i386/sev.c index 800eee50e3..dd6699bdaf 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -78,6 +78,7 @@ struct SevGuestState { char *user_id; char *secret_header_file; char *secret_file; + bool legacy_vm_type; /* runtime state */ uint32_t handle; @@ -457,6 +458,16 @@ static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) sev->kernel_hashes = value; } +static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +{ + return SEV_GUEST(obj)->legacy_vm_type; +} + +static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +{ + SEV_GUEST(obj)->legacy_vm_type = value; +} + bool sev_enabled(void) { @@ -1105,7 +1116,7 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) } kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - if (kvm_is_vm_type_supported(kvm_type)) { + if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { sev->kvm_type = kvm_type; } else { sev->kvm_type = KVM_X86_DEFAULT_VM; @@ -2799,6 +2810,11 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_secret_file); object_class_property_set_description(oc, "secret-file", "file of the guest owner's secret"); + object_class_property_add_bool(oc, "legacy-vm-type", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type); + object_class_property_set_description(oc, "legacy-vm-type", + "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); } static void -- Gitee From 36543fc6456511586c2b7a4ced89d0e228283ec1 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 20 Nov 2023 10:42:59 +0100 Subject: [PATCH 020/115] hw: Add compat machines for 9.0 commit 2b10a6760ee6d00dd404975549d51e079a26605c upstream Add 9.0 machine types for arm/i440fx/m68k/q35/s390x/spapr. Signed-off-by: Cornelia Huck Message-ID: <20231120094259.1191804-1-cohuck@redhat.com> Acked-by: Thomas Huth Reviewed-by: Harsh Prateek Bora Reviewed-by: Gavin Shan Acked-by: Eric Farman # s390x Signed-off-by: Thomas Huth Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- hw/arm/virt.c | 9 ++++++++- hw/core/machine.c | 3 +++ hw/i386/pc.c | 3 +++ hw/i386/pc_piix.c | 17 ++++++++++++++--- hw/i386/pc_q35.c | 13 ++++++++++++- hw/m68k/virt.c | 9 ++++++++- hw/ppc/spapr.c | 15 +++++++++++++-- hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 10 files changed, 80 insertions(+), 9 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index cf4156ed49..c12fb9a8dd 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -4190,10 +4190,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_9_0_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(9, 0) + static void virt_machine_8_2_options(MachineClass *mc) { + virt_machine_9_0_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(8, 2) +DEFINE_VIRT_MACHINE(8, 2) static void virt_machine_8_1_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index 965682619b..a0e252073f 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -32,6 +32,9 @@ #include "hw/virtio/virtio-net.h" #include "audio/audio.h" +GlobalProperty hw_compat_8_2[] = {}; +const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); + GlobalProperty hw_compat_8_1[] = { { TYPE_PCI_BRIDGE, "x-pci-express-writeable-slt-bug", "true" }, { "ramfb", "x-migrate", "off" }, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2304a12f12..b8de693823 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -78,6 +78,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, +GlobalProperty pc_compat_8_2[] = {}; +const size_t pc_compat_8_2_len = G_N_ELEMENTS(pc_compat_8_2); + GlobalProperty pc_compat_8_1[] = {}; const size_t pc_compat_8_1_len = G_N_ELEMENTS(pc_compat_8_1); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index eace854335..042c13cdbc 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -545,13 +545,26 @@ static void pc_i440fx_machine_options(MachineClass *m) "Use a different south bridge than PIIX3"); } -static void pc_i440fx_8_2_machine_options(MachineClass *m) +static void pc_i440fx_9_0_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = true; } +DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL, + pc_i440fx_9_0_machine_options); + +static void pc_i440fx_8_2_machine_options(MachineClass *m) +{ + pc_i440fx_9_0_machine_options(m); + m->alias = NULL; + m->is_default = false; + + compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); + compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); +} + DEFINE_I440FX_MACHINE(v8_2, "pc-i440fx-8.2", NULL, pc_i440fx_8_2_machine_options); @@ -560,8 +573,6 @@ static void pc_i440fx_8_1_machine_options(MachineClass *m) PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_8_2_machine_options(m); - m->alias = NULL; - m->is_default = false; pcmc->broken_32bit_mem_addr_check = true; compat_props_add(m->compat_props, hw_compat_8_1, hw_compat_8_1_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 3392b0c110..4e467fbb2f 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -385,12 +385,23 @@ static void pc_q35_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_q35_8_2_machine_options(MachineClass *m) +static void pc_q35_9_0_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL, + pc_q35_9_0_machine_options); + +static void pc_q35_8_2_machine_options(MachineClass *m) +{ + pc_q35_9_0_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); + compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); +} + DEFINE_Q35_MACHINE(v8_2, "pc-q35-8.2", NULL, pc_q35_8_2_machine_options); diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 2e49e262ee..e2792ef46d 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -346,10 +346,17 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); +static void virt_machine_9_0_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE(9, 0, true) + static void virt_machine_8_2_options(MachineClass *mc) { + virt_machine_9_0_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_VIRT_MACHINE(8, 2, true) +DEFINE_VIRT_MACHINE(8, 2, false) static void virt_machine_8_1_options(MachineClass *mc) { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index df1a93a58b..ba4707fd09 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4789,15 +4789,26 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +/* + * pseries-9.0 + */ +static void spapr_machine_9_0_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(9_0, "9.0", true); + /* * pseries-8.2 */ static void spapr_machine_8_2_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_9_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_SPAPR_MACHINE(8_2, "8.2", true); +DEFINE_SPAPR_MACHINE(8_2, "8.2", false); /* * pseries-8.1 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 62a94c08bb..bc382730ac 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -858,14 +858,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_9_0_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_9_0_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(9_0, "9.0", true); + static void ccw_machine_8_2_instance_options(MachineState *machine) { + ccw_machine_9_0_instance_options(machine); } static void ccw_machine_8_2_class_options(MachineClass *mc) { + ccw_machine_9_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_2, hw_compat_8_2_len); } -DEFINE_CCW_MACHINE(8_2, "8.2", true); +DEFINE_CCW_MACHINE(8_2, "8.2", false); static void ccw_machine_8_1_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index 8ac8cad2a2..1f239622f1 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -421,6 +421,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_8_2[]; +extern const size_t hw_compat_8_2_len; + extern GlobalProperty hw_compat_8_1[]; extern const size_t hw_compat_8_1_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 9846edebac..1aa3ed5905 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -211,6 +211,9 @@ void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_8_2[]; +extern const size_t pc_compat_8_2_len; + extern GlobalProperty pc_compat_8_1[]; extern const size_t pc_compat_8_1_len; -- Gitee From 9008c65343e33dfaeb01944e8afc54cc2b024926 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 25 Mar 2024 15:01:51 +0100 Subject: [PATCH 021/115] hw: Add compat machines for 9.1 commit 85fa9acda887438feb0711970bb528959a37568e upstream Add 9.1 machine types for arm/i440fx/m68k/q35/s390x/spapr. Reviewed-by: Cornelia Huck Acked-by: Thomas Huth Reviewed-by: Harsh Prateek Bora Reviewed-by: Zhao Liu Cc: Gavin Shan Signed-off-by: Paolo Bonzini Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/arm/virt.c | 9 ++++++++- hw/core/machine.c | 3 +++ hw/i386/pc.c | 3 +++ hw/i386/pc_piix.c | 17 ++++++++++++++--- hw/i386/pc_q35.c | 14 ++++++++++++-- hw/m68k/virt.c | 9 ++++++++- hw/ppc/spapr.c | 15 +++++++++++++-- hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 10 files changed, 80 insertions(+), 10 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index c12fb9a8dd..9c8626ee07 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -4190,10 +4190,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); +static void virt_machine_9_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE_AS_LATEST(9, 1) + static void virt_machine_9_0_options(MachineClass *mc) { + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } -DEFINE_VIRT_MACHINE_AS_LATEST(9, 0) +DEFINE_VIRT_MACHINE(9, 0) static void virt_machine_8_2_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index a0e252073f..fd06145e36 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -33,6 +33,9 @@ #include "audio/audio.h" GlobalProperty hw_compat_8_2[] = {}; +GlobalProperty hw_compat_9_0[] = {}; +const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); + const size_t hw_compat_8_2_len = G_N_ELEMENTS(hw_compat_8_2); GlobalProperty hw_compat_8_1[] = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index b8de693823..1b5100fa66 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -78,6 +78,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, +GlobalProperty pc_compat_9_0[] = {}; +const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); + GlobalProperty pc_compat_8_2[] = {}; const size_t pc_compat_8_2_len = G_N_ELEMENTS(pc_compat_8_2); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 042c13cdbc..95a773062a 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -545,21 +545,32 @@ static void pc_i440fx_machine_options(MachineClass *m) "Use a different south bridge than PIIX3"); } -static void pc_i440fx_9_0_machine_options(MachineClass *m) +static void pc_i440fx_9_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = true; } +DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1", NULL, + pc_i440fx_9_1_machine_options); + +static void pc_i440fx_9_0_machine_options(MachineClass *m) +{ + pc_i440fx_9_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); +} + DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL, pc_i440fx_9_0_machine_options); static void pc_i440fx_8_2_machine_options(MachineClass *m) { pc_i440fx_9_0_machine_options(m); - m->alias = NULL; - m->is_default = false; compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 4e467fbb2f..b19d3a2d89 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -385,19 +385,29 @@ static void pc_q35_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_q35_9_0_machine_options(MachineClass *m) +static void pc_q35_9_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v9_1, "pc-q35-9.1", NULL, + pc_q35_9_1_machine_options); + +static void pc_q35_9_0_machine_options(MachineClass *m) +{ + pc_q35_9_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); + compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); +} + DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL, pc_q35_9_0_machine_options); static void pc_q35_8_2_machine_options(MachineClass *m) { pc_q35_9_0_machine_options(m); - m->alias = NULL; compat_props_add(m->compat_props, hw_compat_8_2, hw_compat_8_2_len); compat_props_add(m->compat_props, pc_compat_8_2, pc_compat_8_2_len); } diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index e2792ef46d..33c00083d4 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -346,10 +346,17 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); +static void virt_machine_9_1_options(MachineClass *mc) +{ +} +DEFINE_VIRT_MACHINE(9, 1, true) + static void virt_machine_9_0_options(MachineClass *mc) { + virt_machine_9_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } -DEFINE_VIRT_MACHINE(9, 0, true) +DEFINE_VIRT_MACHINE(9, 0, false) static void virt_machine_8_2_options(MachineClass *mc) { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index ba4707fd09..e56cfb1d8b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4789,15 +4789,26 @@ static void spapr_machine_latest_class_options(MachineClass *mc) } \ type_init(spapr_machine_register_##suffix) +/* + * pseries-9.1 + */ +static void spapr_machine_9_1_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(9_1, "9.1", true); + /* * pseries-9.0 */ static void spapr_machine_9_0_class_options(MachineClass *mc) { - /* Defaults for the latest behaviour inherited from the base class */ + spapr_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } -DEFINE_SPAPR_MACHINE(9_0, "9.0", true); +DEFINE_SPAPR_MACHINE(9_0, "9.0", false); /* * pseries-8.2 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index bc382730ac..bd5c430da8 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -858,14 +858,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_9_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_9_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(9_1, "9.1", true); + static void ccw_machine_9_0_instance_options(MachineState *machine) { + ccw_machine_9_1_instance_options(machine); } static void ccw_machine_9_0_class_options(MachineClass *mc) { + ccw_machine_9_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_9_0, hw_compat_9_0_len); } -DEFINE_CCW_MACHINE(9_0, "9.0", true); +DEFINE_CCW_MACHINE(9_0, "9.0", false); static void ccw_machine_8_2_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index 1f239622f1..a1cc005522 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -421,6 +421,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_9_0[]; +extern const size_t hw_compat_9_0_len; + extern GlobalProperty hw_compat_8_2[]; extern const size_t hw_compat_8_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 1aa3ed5905..ee5138cb97 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -211,6 +211,9 @@ void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_9_0[]; +extern const size_t pc_compat_9_0_len; + extern GlobalProperty pc_compat_8_2[]; extern const size_t pc_compat_8_2_len; -- Gitee From 1eb71dc0fbfb7e5d699d6714094f989066e6de6c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 18 Mar 2024 16:53:36 +0100 Subject: [PATCH 022/115] target/i386: add guest-phys-bits cpu property commit 513ba32dccc659c80722b3a43233b26eaa50309a upstream Allows to set guest-phys-bits (cpuid leaf 80000008, eax[23:16]) via -cpu $model,guest-phys-bits=$nr. [Backport Changes] Changes to file target/i386/cpu.h and target/i386/cpu.c were skipped since the current codebase has already been updated with latest changes in commit b6bfee023b15f25c1db . Signed-off-by: Gerd Hoffmann Message-ID: <20240318155336.156197-3-kraxel@redhat.com> Reviewed-by: Zhao Liu Signed-off-by: Paolo Bonzini Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/pc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1b5100fa66..e081b8f12a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -78,7 +78,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_9_0[] = {}; +GlobalProperty pc_compat_9_0[] = { + { TYPE_X86_CPU, "guest-phys-bits", "0" }, +}; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); GlobalProperty pc_compat_8_2[] = {}; -- Gitee From 8733a63038b54efc0e0ac1a2d26e3407accb13ff Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 9 Apr 2024 18:07:43 -0500 Subject: [PATCH 023/115] hw/i386/sev: Use legacy SEV VM types for older machine types commit ea7fbd37537b3a598335c21ccb2ea674630fc810 upstream Newer 9.1 machine types will default to using the KVM_SEV_INIT2 API for creating SEV/SEV-ES going forward. However, this API results in guest measurement changes which are generally not expected for users of these older guest types and can cause disruption if they switch to a newer QEMU/kernel version. Avoid this by continuing to use the older KVM_SEV_INIT/KVM_SEV_ES_INIT APIs for older machine types. Signed-off-by: Michael Roth Message-ID: <20240409230743.962513-4-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/pc.c | 1 + target/i386/sev.c | 1 + 2 files changed, 2 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index e081b8f12a..672ce2abd6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -80,6 +80,7 @@ GlobalProperty pc_compat_9_0[] = { { TYPE_X86_CPU, "guest-phys-bits", "0" }, + { "sev-guest", "legacy-vm-type", "true" }, }; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); diff --git a/target/i386/sev.c b/target/i386/sev.c index dd6699bdaf..a2952fbb1c 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -2835,6 +2835,7 @@ sev_guest_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "reduced-phys-bits", &sev->reduced_phys_bits, OBJ_PROP_FLAG_READWRITE); + object_apply_compat_props(obj); } /* sev guest info */ -- Gitee From c1233893632b1bec8a31235fb09c6397c4216104 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 29 Feb 2024 01:36:25 -0500 Subject: [PATCH 024/115] trace/kvm: Split address space and slot id in trace_kvm_set_user_memory() commit 72853afc638b3e28779c86dd05da2f3bb149fe2c upstream The upper 16 bits of kvm_userspace_memory_region::slot are address space id. Parse it separately in trace_kvm_set_user_memory(). Signed-off-by: Xiaoyao Li Message-ID: <20240229063726.610065-5-xiaoyao.li@intel.com> Signed-off-by: Paolo Bonzini Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 5 +++-- accel/kvm/trace-events | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 57c8b5ad89..1c7e13f5ff 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -319,8 +319,9 @@ static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, boo ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); slot->old_flags = mem.flags; err: - trace_kvm_set_user_memory(mem.slot, mem.flags, mem.guest_phys_addr, - mem.memory_size, mem.userspace_addr, ret); + trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, + mem.guest_phys_addr, mem.memory_size, + mem.userspace_addr, ret); if (ret < 0) { error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 9c880fdcf4..4517e8a070 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -19,7 +19,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" -kvm_set_user_memory(uint32_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" kvm_dirty_ring_full(int id) "vcpu %d" -- Gitee From aacc9f6ee40fa6c4a32060e56bfd44db2b219c77 Mon Sep 17 00:00:00 2001 From: William Roche Date: Tue, 30 Jan 2024 19:06:40 +0000 Subject: [PATCH 025/115] migration: prevent migration when VM has poisoned memory commit 06152b89db64bc5ccec1e54576706ba891654df9 upstream A memory page poisoned from the hypervisor level is no longer readable. The migration of a VM will crash Qemu when it tries to read the memory address space and stumbles on the poisoned page with a similar stack trace: Program terminated with signal SIGBUS, Bus error. To avoid this VM crash during the migration, prevent the migration when a known hardware poison exists on the VM. Signed-off-by: William Roche Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com Signed-off-by: Peter Xu Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 10 ++++++++++ accel/stubs/kvm-stub.c | 5 +++++ include/sysemu/kvm.h | 6 ++++++ migration/migration.c | 7 +++++++ 4 files changed, 28 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 1c7e13f5ff..3b2870f297 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1169,6 +1169,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension) return ret; } +/* + * We track the poisoned pages to be able to: + * - replace them on VM reset + * - block a migration for a VM with a poisoned page + */ typedef struct HWPoisonPage { ram_addr_t ram_addr; QLIST_ENTRY(HWPoisonPage) list; @@ -1202,6 +1207,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr) QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); } +bool kvm_hwpoisoned_mem(void) +{ + return !QLIST_EMPTY(&hwpoison_page_list); +} + static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) { #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index e68f3433ad..1aef4baadb 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -134,6 +134,11 @@ uint32_t kvm_dirty_ring_size(void) return 0; } + +bool kvm_hwpoisoned_mem(void) +{ + return false; +} int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, struct kvm_numa_info *numa_info) { diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 1528824b93..049bc9f487 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -588,6 +588,12 @@ bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +/** + * kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page + * reported for the VM. + */ +bool kvm_hwpoisoned_mem(void); int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, struct kvm_numa_info *numa_info); diff --git a/migration/migration.c b/migration/migration.c index 526f926b79..1ac0a9dfac 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -67,6 +67,7 @@ #include "options.h" #include "sysemu/dirtylimit.h" #include "qemu/sockets.h" +#include "sysemu/kvm.h" #define DEFAULT_FD_MAX 4096 @@ -1937,6 +1938,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, return false; } + if (kvm_hwpoisoned_mem()) { + error_setg(errp, "Can't migrate this vm with hardware poisoned memory, " + "please reboot the vm and try again"); + return false; + } + if (migration_is_blocked(errp)) { return false; } -- Gitee From 02a23880920700cdc8b65ac02887386b5976c9fa Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:06 -0500 Subject: [PATCH 026/115] kvm: Introduce support for memory_attributes commit 0811baed49010a9b651b8029ab6b9828b09a884f upstream Introduce the helper functions to set the attributes of a range of memory to private or shared. This is necessary to notify KVM the private/shared attribute of each gpa range. KVM needs the information to decide the GPA needs to be mapped at hva-based shared memory or guest_memfd based private memory. Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-11-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ include/sysemu/kvm.h | 4 ++++ 2 files changed, 36 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 3b2870f297..665f23c760 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -107,6 +107,7 @@ bool kvm_csv3_allowed; bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; +static uint64_t kvm_supported_memory_attributes; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -1315,6 +1316,36 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) kvm_max_slot_size = max_slot_size; } +static int kvm_set_memory_attributes(hwaddr start, uint64_t size, uint64_t attr) +{ + struct kvm_memory_attributes attrs; + int r; + + assert((attr & kvm_supported_memory_attributes) == attr); + attrs.attributes = attr; + attrs.address = start; + attrs.size = size; + attrs.flags = 0; + + r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ATTRIBUTES, &attrs); + if (r) { + error_report("failed to set memory (0x%" HWADDR_PRIx "+0x%" PRIx64 ") " + "with attr 0x%" PRIx64 " error '%s'", + start, size, attr, strerror(errno)); + } + return r; +} + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size) +{ + return kvm_set_memory_attributes(start, size, 0); +} + /* Called with KVMMemoryListener.slots_lock held */ static void kvm_set_phys_mem(KVMMemoryListener *kml, MemoryRegionSection *section, bool add) @@ -2472,6 +2503,7 @@ static int kvm_init(MachineState *ms) goto err; } + kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 049bc9f487..0ce82e379e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -604,4 +604,8 @@ int kvm_delete_shadow_device(PCIDevice *dev); void kvm_mark_guest_state_protected(void); + +int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); +int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); + #endif -- Gitee From d3a0bcfe8f0ed9b2e5c14807279754895e7e59db Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:02 -0500 Subject: [PATCH 027/115] RAMBlock: Add support of KVM private guest memfd commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff upstream Add KVM guest_memfd support to RAMBlock so both normal hva based memory and kvm guest memfd based private memory can be associated in one RAMBlock. Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to create private guest_memfd during RAMBlock setup. Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd is more flexible and extensible than simply relying on the VM type because in the future we may have the case that not all the memory of a VM need guest memfd. As a benefit, it also avoid getting MachineState in memory subsystem. Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of confidential guests, such as TDX VM. How and when to set it for memory backends will be implemented in the following patches. Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has KVM guest_memfd allocated. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-7-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++ accel/stubs/kvm-stub.c | 5 +++++ include/exec/memory.h | 20 +++++++++++++++++--- include/exec/ram_addr.h | 2 +- include/exec/ramblock.h | 1 + include/sysemu/kvm.h | 2 ++ system/memory.c | 5 +++++ system/physmem.c | 34 +++++++++++++++++++++++++++++++--- 8 files changed, 90 insertions(+), 7 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 665f23c760..53788b0afb 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -108,6 +108,7 @@ bool kvm_has_guest_debug; static int kvm_sstep_flags; static bool kvm_immediate_exit; static uint64_t kvm_supported_memory_attributes; +static bool kvm_guest_memfd_supported; static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { @@ -2504,6 +2505,11 @@ static int kvm_init(MachineState *ms) } kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES); + kvm_guest_memfd_supported = + kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) && + kvm_check_extension(s, KVM_CAP_USER_MEMORY2) && + (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE); + kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT); s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); @@ -4268,3 +4274,25 @@ void kvm_mark_guest_state_protected(void) { kvm_state->guest_state_protected = true; } + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + int fd; + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + if (!kvm_guest_memfd_supported) { + error_setg(errp, "KVM does not support guest_memfd"); + return -1; + } + + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd); + if (fd < 0) { + error_setg_errno(errp, errno, "Error creating KVM guest_memfd"); + return -1; + } + + return fd; +} diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 1aef4baadb..106007acae 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -139,6 +139,11 @@ bool kvm_hwpoisoned_mem(void) { return false; } + +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp) +{ + return -ENOSYS; +} int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, struct kvm_numa_info *numa_info) { diff --git a/include/exec/memory.h b/include/exec/memory.h index 51fe10d4a0..e2f99cf34a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -255,6 +255,9 @@ typedef struct IOMMUTLBEvent { /* RAM FD is opened read-only */ #define RAM_READONLY_FD (1 << 11) +/* RAM can be private that has kvm guest memfd backend */ +#define RAM_GUEST_MEMFD (1 << 12) + /* The address limit of the VirtCCA bounce buffer is 4GB. */ #define VIRTCCA_SHARED_HUGEPAGE_ADDR_LIMIT 0x100000000ULL @@ -1472,7 +1475,8 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr, * @name: Region name, becomes part of RAMBlock name used in migration stream * must be unique within any device * @size: size of the region. - * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE. + * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE, + * RAM_GUEST_MEMFD. * @errp: pointer to Error*, to store an error if it happens. * * Note that this function does not do anything to cause the data in the @@ -1530,7 +1534,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, * (getpagesize()) will be used. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @path: the path in which to allocate the RAM. * @offset: offset within the file referenced by path * @errp: pointer to Error*, to store an error if it happens. @@ -1558,7 +1562,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, * @size: size of the region. * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @fd: the fd to mmap. * @offset: offset within the file referenced by fd * @errp: pointer to Error*, to store an error if it happens. @@ -1869,6 +1873,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr) */ bool memory_region_is_protected(MemoryRegion *mr); +/** + * memory_region_has_guest_memfd: check whether a memory region has guest_memfd + * associated + * + * Returns %true if a memory region's ram_block has valid guest_memfd assigned. + * + * @mr: the memory region being queried + */ +bool memory_region_has_guest_memfd(MemoryRegion *mr); + /** * memory_region_get_iommu: check whether a memory region is an iommu * diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index ef6988b445..435ade6bd9 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -109,7 +109,7 @@ long qemu_maxrampagesize(void); * @mr: the memory region where the ram block is * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM, * RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY, - * RAM_READONLY_FD + * RAM_READONLY_FD, RAM_GUEST_MEMFD * @mem_path or @fd: specify the backing file or device * @offset: Offset into target file * @errp: pointer to Error*, to store an error if it happens diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h index 8f9579ed70..f62d3019db 100644 --- a/include/exec/ramblock.h +++ b/include/exec/ramblock.h @@ -41,6 +41,7 @@ struct RAMBlock { QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers; int fd; uint64_t fd_offset; + int guest_memfd; size_t page_size; /* dirty bitmap used during migration */ unsigned long *bmap; diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 0ce82e379e..0dd78b5323 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -605,6 +605,8 @@ int kvm_delete_shadow_device(PCIDevice *dev); void kvm_mark_guest_state_protected(void); +int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); + int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); diff --git a/system/memory.c b/system/memory.c index fa99009701..30eaa753bc 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1867,6 +1867,11 @@ bool memory_region_is_protected(MemoryRegion *mr) return mr->ram && (mr->ram_block->flags & RAM_PROTECTED); } +bool memory_region_has_guest_memfd(MemoryRegion *mr) +{ + return mr->ram_block && mr->ram_block->guest_memfd >= 0; +} + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) { uint8_t mask = mr->dirty_log_mask; diff --git a/system/physmem.c b/system/physmem.c index 8f4be2d131..b467b3e98b 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1842,6 +1842,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) RAMBlock *block; RAMBlock *last_block = NULL; ram_addr_t ram_size; + bool free_on_error = false; Error *err = NULL; qemu_mutex_lock_ramlist(); @@ -1868,6 +1869,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) return; } memory_try_enable_merging(new_block->host, new_block->max_length); + free_on_error = true; + } + } + + if (new_block->flags & RAM_GUEST_MEMFD) { + assert(kvm_enabled()); + assert(new_block->guest_memfd < 0); + + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, + 0, errp); + if (new_block->guest_memfd < 0) { + qemu_mutex_unlock_ramlist(); + goto out_free; } } @@ -1916,6 +1930,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) ram_block_notify_add(new_block->host, new_block->used_length, new_block->max_length); } + return; + +out_free: + if (free_on_error) { + qemu_anon_ram_free(new_block->host, new_block->max_length); + new_block->host = NULL; + } } #ifdef CONFIG_POSIX @@ -1930,7 +1951,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, /* Just support these ram flags by now. */ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE | RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY | - RAM_READONLY_FD)) == 0); + RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0); if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); @@ -1965,6 +1986,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->used_length = size; new_block->max_length = size; new_block->flags = ram_flags; + new_block->guest_memfd = -1; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset, errp); if (!new_block->host) { @@ -2043,7 +2065,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, Error *local_err = NULL; assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC | - RAM_NORESERVE)) == 0); + RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); assert(!host ^ (ram_flags & RAM_PREALLOC)); size = HOST_PAGE_ALIGN(size); @@ -2055,6 +2077,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, new_block->max_length = max_size; assert(max_size >= size); new_block->fd = -1; + new_block->guest_memfd = -1; new_block->page_size = qemu_real_host_page_size(); new_block->host = host; new_block->flags = ram_flags; @@ -2077,7 +2100,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags, MemoryRegion *mr, Error **errp) { - assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0); + assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0); return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp); } @@ -2105,6 +2128,11 @@ static void reclaim_ramblock(RAMBlock *block) } else { qemu_anon_ram_free(block->host, block->max_length); } + + if (block->guest_memfd >= 0) { + close(block->guest_memfd); + } + g_free(block); } -- Gitee From a625cb68145be9d14befad75cdae5116ff820d8f Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:05 -0500 Subject: [PATCH 028/115] kvm: Enable KVM_SET_USER_MEMORY_REGION2 for memslot commit ce5a983233b4ca94ced88c9581014346509b5c71 upstream Switch to KVM_SET_USER_MEMORY_REGION2 when supported by KVM. With KVM_SET_USER_MEMORY_REGION2, QEMU can set up memory region that backend'ed both by hva-based shared memory and guest memfd based private memory. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-10-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 46 +++++++++++++++++++++++++++++++++------- accel/kvm/trace-events | 2 +- include/sysemu/kvm_int.h | 2 ++ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 53788b0afb..6af9f8fa6d 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -300,35 +300,58 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram, static int kvm_set_user_memory_region(KVMMemoryListener *kml, KVMSlot *slot, bool new) { KVMState *s = kvm_state; - struct kvm_userspace_memory_region mem; + struct kvm_userspace_memory_region2 mem; int ret; mem.slot = slot->slot | (kml->as_id << 16); mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; + mem.guest_memfd = slot->guest_memfd; + mem.guest_memfd_offset = slot->guest_memfd_offset; if (slot->memory_size && !new && (mem.flags ^ slot->old_flags) & KVM_MEM_READONLY) { /* Set the slot size to 0 before setting the slot to the desired * value. This is needed based on KVM commit 75d61fbc. */ mem.memory_size = 0; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } if (ret < 0) { goto err; } } mem.memory_size = slot->memory_size; - ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + if (kvm_guest_memfd_supported) { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION2, &mem); + } else { + ret = kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); + } slot->old_flags = mem.flags; err: trace_kvm_set_user_memory(mem.slot >> 16, (uint16_t)mem.slot, mem.flags, mem.guest_phys_addr, mem.memory_size, - mem.userspace_addr, ret); + mem.userspace_addr, mem.guest_memfd, + mem.guest_memfd_offset, ret); if (ret < 0) { - error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," - " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", - __func__, mem.slot, slot->start_addr, - (uint64_t)mem.memory_size, strerror(errno)); + if (kvm_guest_memfd_supported) { + error_report("%s: KVM_SET_USER_MEMORY_REGION2 failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 "," + " flags=0x%" PRIx32 ", guest_memfd=%" PRId32 "," + " guest_memfd_offset=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, mem.flags, + mem.guest_memfd, (uint64_t)mem.guest_memfd_offset, + strerror(errno)); + } else { + error_report("%s: KVM_SET_USER_MEMORY_REGION failed, slot=%d," + " start=0x%" PRIx64 ", size=0x%" PRIx64 ": %s", + __func__, mem.slot, slot->start_addr, + (uint64_t)mem.memory_size, strerror(errno)); + } } return ret; } @@ -516,6 +539,10 @@ static int kvm_mem_flags(MemoryRegion *mr) if (readonly && kvm_readonly_mem_allowed) { flags |= KVM_MEM_READONLY; } + if (memory_region_has_guest_memfd(mr)) { + assert(kvm_guest_memfd_supported); + flags |= KVM_MEM_GUEST_MEMFD; + } return flags; } @@ -1443,6 +1470,9 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram_start_offset = ram_start_offset; mem->ram = ram; mem->flags = kvm_mem_flags(mr); + mem->guest_memfd = mr->ram_block->guest_memfd; + mem->guest_memfd_offset = (uint8_t*)ram - mr->ram_block->host; + kvm_slot_init_dirty_bitmap(mem); err = kvm_set_user_memory_region(kml, mem, true); if (err) { diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index 4517e8a070..c2445a646d 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -19,7 +19,7 @@ kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" kvm_irqchip_release_virq(int virq) "virq %d" kvm_set_ioeventfd_mmio(int fd, uint64_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%" PRIx64 " val=0x%x assign: %d size: %d match: %d" kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint32_t val, bool assign, uint32_t size, bool datamatch) "fd: %d @0x%x val=0x%x assign: %d size: %d match: %d" -kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " ret=%d" +kvm_set_user_memory(uint16_t as, uint16_t slot, uint32_t flags, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint32_t fd, uint64_t fd_offset, int ret) "AddrSpace#%d Slot#%d flags=0x%x gpa=0x%"PRIx64 " size=0x%"PRIx64 " ua=0x%"PRIx64 " guest_memfd=%d" " guest_memfd_offset=0x%" PRIx64 " ret=%d" kvm_clear_dirty_log(uint32_t slot, uint64_t start, uint32_t size) "slot#%"PRId32" start 0x%"PRIx64" size 0x%"PRIx32 kvm_resample_fd_notify(int gsi) "gsi %d" kvm_dirty_ring_full(int id) "vcpu %d" diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 9a7bc1a4b8..d4568a66d2 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -30,6 +30,8 @@ typedef struct KVMSlot int as_id; /* Cache of the offset in ram address space */ ram_addr_t ram_start_offset; + int guest_memfd; + hwaddr guest_memfd_offset; } KVMSlot; typedef struct KVMMemoryUpdate { -- Gitee From 17529afd6acb56dd491b87b3e4a8c4434723b375 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:11 -0500 Subject: [PATCH 029/115] kvm/memory: Make memory type private by default if it has guest memfd backend commit bd3bcf6962b664ca3bf9c60fdcc4534e8e3d0641 upstream KVM side leaves the memory to shared by default, which may incur the overhead of paging conversion on the first visit of each page. Because the expectation is that page is likely to private for the VMs that require private memory (has guest memfd). Explicitly set the memory to private when memory region has valid guest memfd backend. Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-16-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 6af9f8fa6d..f846c30e6f 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1480,6 +1480,16 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, strerror(-err)); abort(); } + + if (memory_region_has_guest_memfd(mr)) { + err = kvm_set_memory_attributes_private(start_addr, slot_size); + if (err) { + error_report("%s: failed to set memory attribute private: %s", + __func__, strerror(-err)); + exit(1); + } + } + start_addr += slot_size; ram_start_offset += slot_size; ram += slot_size; -- Gitee From e198d63da86dacb32c784579306291e68fdfc9fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 12:59:15 +0100 Subject: [PATCH 030/115] backends: Use g_autofree in HostMemoryBackendClass::alloc() handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2d7a1eb6e65d5dd32cb9943b6fb3c81d206cd61f upstream In preparation of having HostMemoryBackendClass::alloc() handlers return a boolean, have them use g_autofree. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-15-philmd@linaro.org> Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- backends/hostmem-epc.c | 3 +-- backends/hostmem-file.c | 3 +-- backends/hostmem-memfd.c | 3 +-- backends/hostmem-ram.c | 3 +-- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c index 4e162d6789..3ceb079f9e 100644 --- a/backends/hostmem-epc.c +++ b/backends/hostmem-epc.c @@ -20,8 +20,8 @@ static void sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; int fd; if (!backend->size) { @@ -41,7 +41,6 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); - g_free(name); } static void sgx_epc_backend_instance_init(Object *obj) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index ce63a372a3..01a526f57f 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -134,8 +134,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) object_get_typename(OBJECT(backend))); #else HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend); + g_autofree gchar *name = NULL; uint32_t ram_flags; - gchar *name; if (!backend->size) { error_setg(errp, "can't create backend with size 0"); @@ -179,7 +179,6 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, backend->size, fb->align, ram_flags, fb->mem_path, fb->offset, errp); - g_free(name); if (virtcca_cvm_enabled() && backend->share && (strcmp(fb->mem_path, "/dev/shm") != 0) && diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 3fc85c3db8..db28ab5a56 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -35,8 +35,8 @@ static void memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend); + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; int fd; if (!backend->size) { @@ -57,7 +57,6 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); - g_free(name); } static bool diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c index b8e55cdbd0..0a670fc22a 100644 --- a/backends/hostmem-ram.c +++ b/backends/hostmem-ram.c @@ -19,8 +19,8 @@ static void ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) { + g_autofree char *name = NULL; uint32_t ram_flags; - char *name; if (!backend->size) { error_setg(errp, "can't create backend with size 0"); @@ -32,7 +32,6 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, errp); - g_free(name); } static void -- Gitee From de6cb2ad183f51777c301a764aa63030995a7295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:49:30 +0100 Subject: [PATCH 031/115] backends: Simplify host_memory_backend_memory_complete() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e199f7ad4df8b2c2efad0acdea91b936c130261c upstream Return early if bc->alloc is NULL. De-indent the if() ladder. Note, this avoids a pointless call to error_propagate() with errp=NULL at the 'out:' label. Change trivial when reviewed with 'git-diff --ignore-all-space'. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-16-philmd@linaro.org> Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- backends/hostmem.c | 135 +++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/backends/hostmem.c b/backends/hostmem.c index 747e7838c0..1723c19165 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -328,83 +328,84 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) void *ptr; uint64_t sz; - if (bc->alloc) { - bc->alloc(backend, &local_err); - if (local_err) { - goto out; - } + if (!bc->alloc) { + return; + } + bc->alloc(backend, &local_err); + if (local_err) { + goto out; + } - ptr = memory_region_get_ram_ptr(&backend->mr); - sz = memory_region_size(&backend->mr); + ptr = memory_region_get_ram_ptr(&backend->mr); + sz = memory_region_size(&backend->mr); - if (backend->merge) { - qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); - } - if (!backend->dump) { - qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); - } + if (backend->merge) { + qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); + } + if (!backend->dump) { + qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP); + } #ifdef CONFIG_NUMA - unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); - /* lastbit == MAX_NODES means maxnode = 0 */ - unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); - /* ensure policy won't be ignored in case memory is preallocated - * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so - * this doesn't catch hugepage case. */ - unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; - int mode = backend->policy; - - /* check for invalid host-nodes and policies and give more verbose - * error messages than mbind(). */ - if (maxnode && backend->policy == MPOL_DEFAULT) { - error_setg(errp, "host-nodes must be empty for policy default," - " or you should explicitly specify a policy other" - " than default"); - return; - } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { - error_setg(errp, "host-nodes must be set for policy %s", - HostMemPolicy_str(backend->policy)); - return; - } + unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES); + /* lastbit == MAX_NODES means maxnode = 0 */ + unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1); + /* ensure policy won't be ignored in case memory is preallocated + * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so + * this doesn't catch hugepage case. */ + unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE; + int mode = backend->policy; + + /* check for invalid host-nodes and policies and give more verbose + * error messages than mbind(). */ + if (maxnode && backend->policy == MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be empty for policy default," + " or you should explicitly specify a policy other" + " than default"); + return; + } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) { + error_setg(errp, "host-nodes must be set for policy %s", + HostMemPolicy_str(backend->policy)); + return; + } - /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 - * as argument to mbind() due to an old Linux bug (feature?) which - * cuts off the last specified node. This means backend->host_nodes - * must have MAX_NODES+1 bits available. - */ - assert(sizeof(backend->host_nodes) >= - BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); - assert(maxnode <= MAX_NODES); + /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1 + * as argument to mbind() due to an old Linux bug (feature?) which + * cuts off the last specified node. This means backend->host_nodes + * must have MAX_NODES+1 bits available. + */ + assert(sizeof(backend->host_nodes) >= + BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long)); + assert(maxnode <= MAX_NODES); #ifdef HAVE_NUMA_HAS_PREFERRED_MANY - if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) { - /* - * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below - * silently picks the first node. - */ - mode = MPOL_PREFERRED_MANY; - } + if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) { + /* + * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below + * silently picks the first node. + */ + mode = MPOL_PREFERRED_MANY; + } #endif - if (maxnode && - mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) { - if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { - error_setg_errno(errp, errno, - "cannot bind memory to host NUMA nodes"); - return; - } + if (maxnode && + mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) { + if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) { + error_setg_errno(errp, errno, + "cannot bind memory to host NUMA nodes"); + return; } + } #endif - /* Preallocate memory after the NUMA policy has been instantiated. - * This is necessary to guarantee memory is allocated with - * specified NUMA policy in place. - */ - if (backend->prealloc) { - qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, - backend->prealloc_threads, - backend->prealloc_context, &local_err); - if (local_err) { - goto out; - } + /* Preallocate memory after the NUMA policy has been instantiated. + * This is necessary to guarantee memory is allocated with + * specified NUMA policy in place. + */ + if (backend->prealloc) { + qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz, + backend->prealloc_threads, + backend->prealloc_context, &local_err); + if (local_err) { + goto out; } } out: -- Gitee From 031fa6eb7bb08d9c5a0ae26878c027bb66eb7760 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:03 -0500 Subject: [PATCH 032/115] HostMem: Add mechanism to opt in kvm guest memfd via MachineState commit 37662d85b0b7dded0ebdf6747bef6c3bb7ed6a0c upstream Add a new member "guest_memfd" to memory backends. When it's set to true, it enables RAM_GUEST_MEMFD in ram_flags, thus private kvm guest_memfd will be allocated during RAMBlock allocation. Memory backend's @guest_memfd is wired with @require_guest_memfd field of MachineState. It avoid looking up the machine in phymem.c. MachineState::require_guest_memfd is supposed to be set by any VMs that requires KVM guest memfd as private memory, e.g., TDX VM. Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Message-ID: <20240320083945.991426-8-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- backends/hostmem-file.c | 1 + backends/hostmem-memfd.c | 1 + backends/hostmem-ram.c | 1 + backends/hostmem.c | 1 + hw/core/machine.c | 5 +++++ include/hw/boards.h | 2 ++ include/sysemu/hostmem.h | 1 + 7 files changed, 12 insertions(+) diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 01a526f57f..aa2898da4a 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -174,6 +174,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; ram_flags |= fb->rom == ON_OFF_AUTO_ON ? RAM_READONLY : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; ram_flags |= fb->is_pmem ? RAM_PMEM : 0; ram_flags |= RAM_NAMED_FILE; memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name, diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index db28ab5a56..e0d3ba197f 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -55,6 +55,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, fd, 0, errp); } diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c index 0a670fc22a..8c41dc8959 100644 --- a/backends/hostmem-ram.c +++ b/backends/hostmem-ram.c @@ -30,6 +30,7 @@ ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; + ram_flags |= backend->guest_memfd ? RAM_GUEST_MEMFD : 0; memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name, backend->size, ram_flags, errp); } diff --git a/backends/hostmem.c b/backends/hostmem.c index 1723c19165..6097e0249f 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -279,6 +279,7 @@ static void host_memory_backend_init(Object *obj) /* TODO: convert access to globals to compat properties */ backend->merge = machine_mem_merge(machine); backend->dump = machine_dump_guest_core(machine); + backend->guest_memfd = machine_require_guest_memfd(machine); backend->reserve = true; backend->prealloc_threads = machine->smp.cpus; } diff --git a/hw/core/machine.c b/hw/core/machine.c index fd06145e36..42c982caa2 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1197,6 +1197,11 @@ bool machine_mem_merge(MachineState *machine) return machine->mem_merge; } +bool machine_require_guest_memfd(MachineState *machine) +{ + return machine->require_guest_memfd; +} + static char *cpu_slot_to_string(const CPUArchId *cpu) { GString *s = g_string_new(NULL); diff --git a/include/hw/boards.h b/include/hw/boards.h index a1cc005522..4ce2a5b17a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -30,6 +30,7 @@ bool machine_usb(MachineState *machine); int machine_phandle_start(MachineState *machine); bool machine_dump_guest_core(MachineState *machine); bool machine_mem_merge(MachineState *machine); +bool machine_require_guest_memfd(MachineState *machine); HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine); void machine_set_cpu_numa_node(MachineState *machine, const CpuInstanceProperties *props, @@ -364,6 +365,7 @@ struct MachineState { char *dt_compatible; bool dump_guest_core; bool mem_merge; + bool require_guest_memfd; bool usb; bool usb_disabled; char *firmware; diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 39326f1d4f..ffeb32c0e0 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -66,6 +66,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; + bool guest_memfd; uint32_t prealloc_threads; ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); -- Gitee From a3146c1093691871e9b10ddc17b8c88c404812f7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 20 Mar 2024 17:45:29 +0100 Subject: [PATCH 033/115] RAMBlock: make guest_memfd require uncoordinated discard commit 852f0048f3ea9f14de18eb279a99fccb6d250e8f upstream Some subsystems like VFIO might disable ram block discard, but guest_memfd uses discard operations to implement conversions between private and shared memory. Because of this, sequences like the following can result in stale IOMMU mappings: 1. allocate shared page 2. convert page shared->private 3. discard shared page 4. convert page private->shared 5. allocate shared page 6. issue DMA operations against that shared page This is not a use-after-free, because after step 3 VFIO is still pinning the page. However, DMA operations in step 6 will hit the old mapping that was allocated in step 1. Address this by taking ram_block_discard_is_enabled() into account when deciding whether or not to discard pages. Since kvm_convert_memory()/guest_memfd doesn't implement a RamDiscardManager handler to convey and replay discard operations, this is a case of uncoordinated discard, which is blocked/released by ram_block_discard_require(). Interestingly, this function had no use so far. Alternative approaches would be to block discard of shared pages, but this would cause guests to consume twice the memory if they use VFIO; or to implement a RamDiscardManager and only block uncoordinated discard, i.e. use ram_block_coordinated_discard_require(). [Commit message mostly by Michael Roth ] Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- system/physmem.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/system/physmem.c b/system/physmem.c index b467b3e98b..aa5149cf72 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1877,6 +1877,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) assert(kvm_enabled()); assert(new_block->guest_memfd < 0); + if (ram_block_discard_require(true) < 0) { + error_setg_errno(errp, errno, + "cannot set up private guest memory: discard currently blocked"); + error_append_hint(errp, "Are you using assigned devices?\n"); + goto out_free; + } + new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length, 0, errp); if (new_block->guest_memfd < 0) { @@ -2131,6 +2138,7 @@ static void reclaim_ramblock(RAMBlock *block) if (block->guest_memfd >= 0) { close(block->guest_memfd); + ram_block_discard_require(false); } g_free(block); -- Gitee From 631e3253e1fbf7b81a3ff18dc5eedfbb72f40530 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Wed, 20 Mar 2024 03:39:07 -0500 Subject: [PATCH 034/115] physmem: Introduce ram_block_discard_guest_memfd_range() commit b2e9426c04fdd32d93a3a37db6b0c2e67c88c335 upstream When memory page is converted from private to shared, the original private memory is back'ed by guest_memfd. Introduce ram_block_discard_guest_memfd_range() for discarding memory in guest_memfd. Based on a patch by Isaku Yamahata . Signed-off-by: Xiaoyao Li Reviewed-by: David Hildenbrand Signed-off-by: Michael Roth Message-ID: <20240320083945.991426-12-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- include/exec/cpu-common.h | 2 ++ system/physmem.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index d21d9990ad..f267aa67e7 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -182,6 +182,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque); int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque); int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length); +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length); #endif diff --git a/system/physmem.c b/system/physmem.c index aa5149cf72..4d3bf75ce8 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -3712,6 +3712,29 @@ err: return ret; } +int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start, + size_t length) +{ + int ret = -1; + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + start, length); + + if (ret) { + ret = -errno; + error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); + } +#else + ret = -ENOSYS; + error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)", + __func__, rb->idstr, start, length, ret); +#endif + + return ret; +} + bool ramblock_is_pmem(RAMBlock *rb) { return rb->flags & RAM_PMEM; -- Gitee From 61e04892fe46ee4493c56abc2e9122b7b2bd554b Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Wed, 20 Mar 2024 03:39:08 -0500 Subject: [PATCH 035/115] kvm: handle KVM_EXIT_MEMORY_FAULT commit c15e5684071d93174e446be318f49d8d59b15d6d upstream Upon an KVM_EXIT_MEMORY_FAULT exit, userspace needs to do the memory conversion on the RAMBlock to turn the memory into desired attribute, switching between private and shared. Currently only KVM_MEMORY_EXIT_FLAG_PRIVATE in flags is valid when KVM_EXIT_MEMORY_FAULT happens. Note, KVM_EXIT_MEMORY_FAULT makes sense only when the RAMBlock has guest_memfd memory backend. Note, KVM_EXIT_MEMORY_FAULT returns with -EFAULT, so special handling is added. When page is converted from shared to private, the original shared memory can be discarded via ram_block_discard_range(). Note, shared memory can be discarded only when it's not back'ed by hugetlb because hugetlb is supposed to be pre-allocated and no need for discarding. [Backport Changes] Changes to file accel/kvm/trace-events were skipped since the current codebase has already been updated with latest changes in commit 2464d0d6115e1794468ff455e3acdb98e0d71a31. Signed-off-by: Chao Peng Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Message-ID: <20240320083945.991426-13-michael.roth@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- accel/kvm/kvm-all.c | 98 +++++++++++++++++++++++++++++++++++++++----- include/sysemu/kvm.h | 2 + 2 files changed, 90 insertions(+), 10 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f846c30e6f..4cf2179c29 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -2995,6 +2995,69 @@ static void kvm_eat_signals(CPUState *cpu) } while (sigismember(&chkset, SIG_IPI)); } +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private) +{ + MemoryRegionSection section; + ram_addr_t offset; + MemoryRegion *mr; + RAMBlock *rb; + void *addr; + int ret = -1; + + trace_kvm_convert_memory(start, size, to_private ? "shared_to_private" : "private_to_shared"); + + if (!QEMU_PTR_IS_ALIGNED(start, qemu_real_host_page_size()) || + !QEMU_PTR_IS_ALIGNED(size, qemu_real_host_page_size())) { + return -1; + } + + if (!size) { + return -1; + } + + section = memory_region_find(get_system_memory(), start, size); + mr = section.mr; + if (!mr) { + return -1; + } + + if (!memory_region_has_guest_memfd(mr)) { + error_report("Converting non guest_memfd backed memory region " + "(0x%"HWADDR_PRIx" ,+ 0x%"HWADDR_PRIx") to %s", + start, size, to_private ? "private" : "shared"); + goto out_unref; + } + + if (to_private) { + ret = kvm_set_memory_attributes_private(start, size); + } else { + ret = kvm_set_memory_attributes_shared(start, size); + } + if (ret) { + goto out_unref; + } + + addr = memory_region_get_ram_ptr(mr) + section.offset_within_region; + rb = qemu_ram_block_from_host(addr, false, &offset); + + if (to_private) { + if (rb->page_size != qemu_real_host_page_size()) { + /* + * shared memory is backed by hugetlb, which is supposed to be + * pre-allocated and doesn't need to be discarded + */ + goto out_unref; + } + ret = ram_block_discard_range(rb, offset, size); + } else { + ret = ram_block_discard_guest_memfd_range(rb, offset, size); + } + +out_unref: + memory_region_unref(mr); + return ret; +} + int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -3062,18 +3125,20 @@ int kvm_cpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - fprintf(stderr, "error: kvm run failed %s\n", - strerror(-run_ret)); + if (!(run_ret == -EFAULT && run->exit_reason == KVM_EXIT_MEMORY_FAULT)) { + fprintf(stderr, "error: kvm run failed %s\n", + strerror(-run_ret)); #ifdef TARGET_PPC - if (run_ret == -EBUSY) { - fprintf(stderr, - "This is probably because your SMT is enabled.\n" - "VCPU can only run on primary threads with all " - "secondary threads offline.\n"); - } + if (run_ret == -EBUSY) { + fprintf(stderr, + "This is probably because your SMT is enabled.\n" + "VCPU can only run on primary threads with all " + "secondary threads offline.\n"); + } #endif - ret = -1; - break; + ret = -1; + break; + } } trace_kvm_run_exit(cpu->cpu_index, run->exit_reason); @@ -3160,6 +3225,19 @@ int kvm_cpu_exec(CPUState *cpu) break; } break; + case KVM_EXIT_MEMORY_FAULT: + trace_kvm_memory_fault(run->memory_fault.gpa, + run->memory_fault.size, + run->memory_fault.flags); + if (run->memory_fault.flags & ~KVM_MEMORY_EXIT_FLAG_PRIVATE) { + error_report("KVM_EXIT_MEMORY_FAULT: Unknown flag 0x%" PRIx64, + (uint64_t)run->memory_fault.flags); + ret = -1; + break; + } + ret = kvm_convert_memory(run->memory_fault.gpa, run->memory_fault.size, + run->memory_fault.flags & KVM_MEMORY_EXIT_FLAG_PRIVATE); + break; default: DPRINTF("kvm_arch_handle_exit\n"); ret = kvm_arch_handle_exit(cpu, run); diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 0dd78b5323..8f78abc5b6 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -610,4 +610,6 @@ int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp); int kvm_set_memory_attributes_private(hwaddr start, uint64_t size); int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size); +int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private); + #endif -- Gitee From 4a282f62fe65e5857a9ea4e56fcddcd3b36305eb Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Mon, 18 Dec 2023 17:43:19 -0300 Subject: [PATCH 036/115] linux-headers: riscv: add ptrace.h commit 1583ca8aa61e1648d1f340c9a6ae3cd7ba3a82ae upstream KVM vector support for RISC-V requires the linux-header ptrace.h. Signed-off-by: Daniel Henrique Barboza Acked-by: Alistair Francis Message-ID: <20231218204321.75757-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- linux-headers/asm-riscv/ptrace.h | 132 +++++++++++++++++++++++++++++++ scripts/update-linux-headers.sh | 3 + 2 files changed, 135 insertions(+) create mode 100644 linux-headers/asm-riscv/ptrace.h diff --git a/linux-headers/asm-riscv/ptrace.h b/linux-headers/asm-riscv/ptrace.h new file mode 100644 index 0000000000..1e3166caca --- /dev/null +++ b/linux-headers/asm-riscv/ptrace.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#ifndef _ASM_RISCV_PTRACE_H +#define _ASM_RISCV_PTRACE_H + +#ifndef __ASSEMBLY__ + +#include + +#define PTRACE_GETFDPIC 33 + +#define PTRACE_GETFDPIC_EXEC 0 +#define PTRACE_GETFDPIC_INTERP 1 + +/* + * User-mode register state for core dumps, ptrace, sigcontext + * + * This decouples struct pt_regs from the userspace ABI. + * struct user_regs_struct must form a prefix of struct pt_regs. + */ +struct user_regs_struct { + unsigned long pc; + unsigned long ra; + unsigned long sp; + unsigned long gp; + unsigned long tp; + unsigned long t0; + unsigned long t1; + unsigned long t2; + unsigned long s0; + unsigned long s1; + unsigned long a0; + unsigned long a1; + unsigned long a2; + unsigned long a3; + unsigned long a4; + unsigned long a5; + unsigned long a6; + unsigned long a7; + unsigned long s2; + unsigned long s3; + unsigned long s4; + unsigned long s5; + unsigned long s6; + unsigned long s7; + unsigned long s8; + unsigned long s9; + unsigned long s10; + unsigned long s11; + unsigned long t3; + unsigned long t4; + unsigned long t5; + unsigned long t6; +}; + +struct __riscv_f_ext_state { + __u32 f[32]; + __u32 fcsr; +}; + +struct __riscv_d_ext_state { + __u64 f[32]; + __u32 fcsr; +}; + +struct __riscv_q_ext_state { + __u64 f[64] __attribute__((aligned(16))); + __u32 fcsr; + /* + * Reserved for expansion of sigcontext structure. Currently zeroed + * upon signal, and must be zero upon sigreturn. + */ + __u32 reserved[3]; +}; + +struct __riscv_ctx_hdr { + __u32 magic; + __u32 size; +}; + +struct __riscv_extra_ext_header { + __u32 __padding[129] __attribute__((aligned(16))); + /* + * Reserved for expansion of sigcontext structure. Currently zeroed + * upon signal, and must be zero upon sigreturn. + */ + __u32 reserved; + struct __riscv_ctx_hdr hdr; +}; + +union __riscv_fp_state { + struct __riscv_f_ext_state f; + struct __riscv_d_ext_state d; + struct __riscv_q_ext_state q; +}; + +struct __riscv_v_ext_state { + unsigned long vstart; + unsigned long vl; + unsigned long vtype; + unsigned long vcsr; + unsigned long vlenb; + void *datap; + /* + * In signal handler, datap will be set a correct user stack offset + * and vector registers will be copied to the address of datap + * pointer. + */ +}; + +struct __riscv_v_regset_state { + unsigned long vstart; + unsigned long vl; + unsigned long vtype; + unsigned long vcsr; + unsigned long vlenb; + char vreg[]; +}; + +/* + * According to spec: The number of bits in a single vector register, + * VLEN >= ELEN, which must be a power of 2, and must be no greater than + * 2^16 = 65536bits = 8192bytes + */ +#define RISCV_MAX_VLENB (8192) + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_RISCV_PTRACE_H */ diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 4153a00957..fa02da1917 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -163,6 +163,9 @@ for arch in $ARCHLIST; do cp "$hdrdir/include/asm/kvm_para.h" "$output/linux-headers/asm-loongarch/" cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-loongarch/" fi + if [ $arch = riscv ]; then + cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + fi done arch= -- Gitee From 1b39fe37791e30d6305445834d6c8eefec583c2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Tue, 14 May 2024 18:42:44 +0100 Subject: [PATCH 037/115] scripts/update-linux-header.sh: be more src tree friendly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b51ddd937f11f76614d4b36d14d8778df242661c upstream Running "install_headers" in the Linux source tree is fairly unfriendly as out-of-tree builds will start complaining about the kernel source being non-pristine. As we have a temporary directory for the install we should also do the build step here. So now we have: $tmpdir/ $blddir/ $hdrdir/ Reviewed-by: Pierrick Bouvier Reviewed-by: Michael S. Tsirkin Signed-off-by: Alex Bennée Message-Id: <20240514174253.694591-3-alex.bennee@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- scripts/update-linux-headers.sh | 80 +++++++++++++++++---------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index fa02da1917..746573d80c 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -27,6 +27,8 @@ # types like "__u64". This work is done in the cp_portable function. tmpdir=$(mktemp -d) +hdrdir="$tmpdir/headers" +blddir="$tmpdir/build" linux="$1" output="$2" @@ -111,52 +113,52 @@ for arch in $ARCHLIST; do arch_var=ARCH fi - make -C "$linux" INSTALL_HDR_PATH="$tmpdir" $arch_var=$arch headers_install + make -C "$linux" O="$blddir" INSTALL_HDR_PATH="$hdrdir" $arch_var=$arch headers_install rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" for header in kvm.h unistd.h bitsperlong.h mman.h; do - cp "$tmpdir/include/asm/$header" "$output/linux-headers/asm-$arch" + cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" done if [ $arch = mips ]; then - cp "$tmpdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" - cp "$tmpdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/sgidefs.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_o32.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_n32.h" "$output/linux-headers/asm-mips/" + cp "$hdrdir/include/asm/unistd_n64.h" "$output/linux-headers/asm-mips/" fi if [ $arch = powerpc ]; then - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-powerpc/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-powerpc/" fi rm -rf "$output/include/standard-headers/asm-$arch" mkdir -p "$output/include/standard-headers/asm-$arch" if [ $arch = s390 ]; then - cp_portable "$tmpdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" + cp_portable "$hdrdir/include/asm/virtio-ccw.h" "$output/include/standard-headers/asm-s390/" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-s390/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-s390/" fi if [ $arch = arm ]; then - cp "$tmpdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" - cp "$tmpdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" - cp "$tmpdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-eabi.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-oabi.h" "$output/linux-headers/asm-arm/" + cp "$hdrdir/include/asm/unistd-common.h" "$output/linux-headers/asm-arm/" fi if [ $arch = arm64 ]; then - cp "$tmpdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" + cp "$hdrdir/include/asm/sve_context.h" "$output/linux-headers/asm-arm64/" fi if [ $arch = x86 ]; then - cp "$tmpdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" - cp "$tmpdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" - cp "$tmpdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" - cp_portable "$tmpdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" + cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" # Remove everything except the macros from bootparam.h avoiding the # unnecessary import of several video/ist/etc headers sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ - "$tmpdir/include/asm/bootparam.h" > "$tmpdir/bootparam.h" - cp_portable "$tmpdir/bootparam.h" \ + "$hdrdir/include/asm/bootparam.h" > "$hdrdir/bootparam.h" + cp_portable "$hdrdir/bootparam.h" \ "$output/include/standard-headers/asm-$arch" - cp_portable "$tmpdir/include/asm/setup_data.h" \ + cp_portable "$hdrdir/include/asm/setup_data.h" \ "$output/standard-headers/asm-x86" fi if [ $arch = loongarch ]; then @@ -164,7 +166,7 @@ for arch in $ARCHLIST; do cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-loongarch/" fi if [ $arch = riscv ]; then - cp "$tmpdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" + cp "$hdrdir/include/asm/ptrace.h" "$output/linux-headers/asm-riscv/" fi done arch= @@ -174,13 +176,13 @@ mkdir -p "$output/linux-headers/linux" for header in const.h stddef.h kvm.h vfio.h vfio_ccw.h vfio_zdev.h vhost.h \ psci.h psp-sev.h userfaultfd.h memfd.h mman.h nvme_ioctl.h \ vduse.h iommufd.h bits.h; do - cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux" + cp "$hdrdir/include/linux/$header" "$output/linux-headers/linux" done rm -rf "$output/linux-headers/asm-generic" mkdir -p "$output/linux-headers/asm-generic" for header in unistd.h bitsperlong.h mman-common.h mman.h hugetlb_encode.h; do - cp "$tmpdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" + cp "$hdrdir/include/asm-generic/$header" "$output/linux-headers/asm-generic" done if [ -L "$linux/source" ]; then @@ -215,23 +217,23 @@ EOF rm -rf "$output/include/standard-headers/linux" mkdir -p "$output/include/standard-headers/linux" -for i in "$tmpdir"/include/linux/*virtio*.h \ - "$tmpdir/include/linux/qemu_fw_cfg.h" \ - "$tmpdir/include/linux/fuse.h" \ - "$tmpdir/include/linux/input.h" \ - "$tmpdir/include/linux/input-event-codes.h" \ - "$tmpdir/include/linux/udmabuf.h" \ - "$tmpdir/include/linux/pci_regs.h" \ - "$tmpdir/include/linux/ethtool.h" \ - "$tmpdir/include/linux/const.h" \ - "$tmpdir/include/linux/kernel.h" \ - "$tmpdir/include/linux/vhost_types.h" \ - "$tmpdir/include/linux/sysinfo.h" \ - "$tmpdir/include/misc/pvpanic.h"; do +for i in "$hdrdir"/include/linux/*virtio*.h \ + "$hdrdir/include/linux/qemu_fw_cfg.h" \ + "$hdrdir/include/linux/fuse.h" \ + "$hdrdir/include/linux/input.h" \ + "$hdrdir/include/linux/input-event-codes.h" \ + "$hdrdir/include/linux/udmabuf.h" \ + "$hdrdir/include/linux/pci_regs.h" \ + "$hdrdir/include/linux/ethtool.h" \ + "$hdrdir/include/linux/const.h" \ + "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/sysinfo.h" \ + "$hdrdir/include/misc/pvpanic.h"; do cp_portable "$i" "$output/include/standard-headers/linux" done mkdir -p "$output/include/standard-headers/drm" -cp_portable "$tmpdir/include/drm/drm_fourcc.h" \ +cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ "$output/include/standard-headers/drm" rm -rf "$output/include/standard-headers/drivers/infiniband/hw/vmw_pvrdma" -- Gitee From f6c6bec8d824b9f3e348da629937ef562783a879 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:14 -0500 Subject: [PATCH 038/115] linux-headers: Update to current kvm/next commit 5f69e42da5b40a2213f4db70ca461f554abca686 upstream This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will be included in Linux 6.11, to be released next July. Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. [Backport Changes] Changes to file linux-headers/asm-loongarch/kvm.h were skipped since the current codebase has already been updated with latest changes in commit 9e7059885812f. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- linux-headers/asm-riscv/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 51 ++++++++++++++++++++++++++++++++++- linux-headers/linux/vhost.h | 14 +++++----- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index b1c503c295..e878e7cc39 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_ZTSO, KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index abef1ce92c..805ab535f3 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -697,6 +697,10 @@ enum sev_cmd_id { /* Hygon CSV batch command */ KVM_CSV_COMMAND_BATCH = 0x18, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, KVM_SEV_NR_MAX, }; @@ -712,7 +716,9 @@ struct kvm_sev_cmd { struct kvm_sev_init { __u64 vmsa_features; __u32 flags; - __u32 pad[9]; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; }; struct kvm_sev_launch_start { @@ -823,6 +829,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -873,5 +921,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index e324b7aebe..21072f6f66 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -183,12 +183,6 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) -/* Get the count of all virtqueues */ -#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) - -/* Get the number of virtqueue groups. */ -#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) - /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) @@ -268,10 +262,16 @@ /* set device migtration state */ #define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + /* Get the queue size of a specific virtqueue. * userspace set the vring index in vhost_vring_state.index * kernel set the queue size in vhost_vring_state.num */ -#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) #endif -- Gitee From 12a117e72f350e58da83781d86a0d8fb8df440ab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 3 Jun 2024 14:25:06 +0200 Subject: [PATCH 039/115] update-linux-headers: import linux/kvm_para.h header commit aa274c33c39e7de981dc195abe60e1a246c9d248 upstream Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h because it includes definitions for kvmclock and for KVM CPUID bits. However, other definitions for KVM hypercall values and return codes are included in include/uapi/linux/kvm_para.h and they will be used by SEV-SNP. To ensure that it is possible to include both and "standard-headers/asm-x86/kvm_para.h" without conflicts, provide linux/kvm_para.h as a portable header too, and forward linux-headers/ files to those in include/standard-headers. Note that will include architecture-specific definitions as well, but "standard-headers/linux/kvm_para.h" will not because it can be used in architecture-independent files. This could easily be extended to other architectures, but right now they do not need any symbol in their specific kvm_para.h files. Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ linux-headers/asm-x86/kvm_para.h | 1 + linux-headers/linux/kvm_para.h | 2 ++ scripts/update-linux-headers.sh | 22 ++++++++++++- 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 include/standard-headers/linux/kvm_para.h create mode 100644 linux-headers/asm-x86/kvm_para.h create mode 100644 linux-headers/linux/kvm_para.h diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h new file mode 100644 index 0000000000..015c166302 --- /dev/null +++ b/include/standard-headers/linux/kvm_para.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_KVM_PARA_H +#define __LINUX_KVM_PARA_H + +/* + * This header file provides a method for making a hypercall to the host + * Architectures should define: + * - kvm_hypercall0, kvm_hypercall1... + * - kvm_arch_para_features + * - kvm_para_available + */ + +/* Return values for hypercalls */ +#define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_EINVAL EINVAL +#define KVM_E2BIG E2BIG +#define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 + +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_MMU_OP 2 +#define KVM_HC_FEATURES 3 +#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_KICK_CPU 5 +#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 +#define KVM_HC_MIPS_EXIT_VM 7 +#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 +#define KVM_HC_SEND_IPI 10 +#define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_MAP_GPA_RANGE 12 + +/* + * hypercalls use architecture specific + */ + +#endif /* __LINUX_KVM_PARA_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h new file mode 100644 index 0000000000..1d3e0e0b07 --- /dev/null +++ b/linux-headers/asm-x86/kvm_para.h @@ -0,0 +1 @@ +#include "standard-headers/asm-x86/kvm_para.h" diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h new file mode 100644 index 0000000000..6a1e672259 --- /dev/null +++ b/linux-headers/linux/kvm_para.h @@ -0,0 +1,2 @@ +#include "standard-headers/linux/kvm_para.h" +#include diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 746573d80c..6586201aab 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -64,6 +64,7 @@ cp_portable() { -e 'linux/kernel' \ -e 'linux/sysinfo' \ -e 'asm/setup_data.h' \ + -e 'asm/kvm_para.h' \ > /dev/null then echo "Unexpected #include in input file $f". @@ -71,6 +72,15 @@ cp_portable() { fi header=$(basename "$f"); + + if test -z "$arch"; then + # Let users of include/standard-headers/linux/ headers pick the + # asm-* header that they care about + arch_cmd='/]*\)>/d' + else + arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' + fi + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ @@ -79,7 +89,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ - -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e "$arch_cmd" \ -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ @@ -151,7 +161,12 @@ for arch in $ARCHLIST; do cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + cat <$output/linux-headers/asm-$arch/kvm_para.h +#include "standard-headers/asm-$arch/kvm_para.h" +EOF + # Remove everything except the macros from bootparam.h avoiding the # unnecessary import of several video/ist/etc headers sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ @@ -205,6 +220,10 @@ if [ -d "$linux/LICENSES" ]; then done fi +cat <$output/linux-headers/linux/kvm_para.h +#include "standard-headers/linux/kvm_para.h" +#include +EOF cat <$output/linux-headers/linux/virtio_config.h #include "standard-headers/linux/virtio_config.h" EOF @@ -227,6 +246,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ "$hdrdir/include/linux/ethtool.h" \ "$hdrdir/include/linux/const.h" \ "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/kvm_para.h" \ "$hdrdir/include/linux/vhost_types.h" \ "$hdrdir/include/linux/sysinfo.h" \ "$hdrdir/include/misc/pvpanic.h"; do -- Gitee From 770f1a87e5d3383d5f55c1f150af084d5b863bf8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 13:29:53 +0200 Subject: [PATCH 040/115] machine: allow early use of machine_require_guest_memfd commit dc0d28ca46c0e7ee3c055ad4da24022995bd3765 upstream Ask the ConfidentialGuestSupport object whether to use guest_memfd for KVM-backend private memory. This bool can be set in instance_init (or user_complete) so that it is available when the machine is created. Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- hw/core/machine.c | 2 +- include/exec/confidential-guest-support.h | 5 +++++ include/hw/boards.h | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 42c982caa2..a5ecdbd996 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1199,7 +1199,7 @@ bool machine_mem_merge(MachineState *machine) bool machine_require_guest_memfd(MachineState *machine) { - return machine->require_guest_memfd; + return machine->cgs && machine->cgs->require_guest_memfd; } static char *cpu_slot_to_string(const CPUArchId *cpu) diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index 765dab4f83..cd7c7b7459 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, struct ConfidentialGuestSupport { Object parent; + /* + * True if the machine should use guest_memfd for RAM. + */ + bool require_guest_memfd; + /* * ready: flag set by CGS initialization code once it's ready to * start executing instructions in a potentially-secure diff --git a/include/hw/boards.h b/include/hw/boards.h index 4ce2a5b17a..ca4237d7bb 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -365,7 +365,6 @@ struct MachineState { char *dt_compatible; bool dump_guest_core; bool mem_merge; - bool require_guest_memfd; bool usb; bool usb_disabled; char *firmware; -- Gitee From 9a002b6cf427e0d34b7d851352c3ba07538a31c7 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:13 -0500 Subject: [PATCH 041/115] i386/sev: Replace error_report with error_setg commit 18c453409a3a84cf7b2c764c5a03fb429a73bbeb upstream Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/sev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index a2952fbb1c..a11f6b4600 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1197,13 +1197,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (sev_es_enabled()) { if (!kvm_kernel_irqchip_allowed()) { - error_report("%s: SEV-ES guests require in-kernel irqchip support", - __func__); + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); goto err; } if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { - error_report("%s: guest policy requires SEV-ES, but " + error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", __func__); goto err; -- Gitee From c84339d1237fc5c9223d9bfec415e5cf39c85146 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:16 -0500 Subject: [PATCH 042/115] i386/sev: Introduce "sev-common" type to encapsulate common SEV state commit 16dcf200dc951c1cde3e5b442457db5f690b8cf0 upstream. Currently all SEV/SEV-ES functionality is managed through a single 'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this same approach won't work well since some of the properties/state managed by 'sev-guest' is not applicable to SEV-SNP, which will instead rely on a new QOM type with its own set of properties/state. To prepare for this, this patch moves common state into an abstract 'sev-common' parent type to encapsulate properties/state that are common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific properties/state in the current 'sev-guest' type. This should not affect current behavior or command-line options. As part of this patch, some related changes are also made: - a static 'sev_guest' variable is currently used to keep track of the 'sev-guest' instance. SEV-SNP would similarly introduce an 'sev_snp_guest' static variable. But these instances are now available via qdev_get_machine()->cgs, so switch to using that instead and drop the static variable. - 'sev_guest' is currently used as the name for the static variable holding a pointer to the 'sev-guest' instance. Re-purpose the name as a local variable referring the 'sev-guest' instance, and use that consistently throughout the code so it can be easily distinguished from sev-common/sev-snp-guest instances. - 'sev' is generally used as the name for local variables holding a pointer to the 'sev-guest' instance. In cases where that now points to common state, use the name 'sev_common'; in cases where that now points to state specific to 'sev-guest' instance, use the name 'sev_guest' In order to enable kernel-hashes for SNP, pull it from SevGuestProperties to its parent SevCommonProperties so it will be available for both SEV and SNP. [Backport changes] The deviations noted below are due to the introduction of the new structure model, `SevCommonState`, which consolidates properties common to SEV, SEV-ES, and SEV-SNP into a unified structure, while SEV/SEV-ES specific members remain within the `SevGuestState` structure. However, the upstream changes do not account for several custom features added as part of SEV live migration support and Hygon Secure Virtualization (CSV) support. The following deviations reflect necessary adjustments made to maintain compatibility with these non-upstream features. 1. In the file target/i386/sev.c to continue supporting custom features, the following non-upstream structure members were retained in `SevGuestState`. - remote_pdh, remote_pdh_len - remote_plat_cert, remote_plat_cert_len - amd_cert, amd_cert_len - send_packet_hdr, send_packet_hdr_len - send_vmsa_packet_hdr, ssend_vmsa_packet_hdr_len 2. In the file target/i386/sev.c, 30 non-upstream functions were updated to follow the new two-structure model. These changes ensures original functionality is preserved while adopting the upstream separation between common and SEV/SEV-ES-specific guest state. The modified non-upstream functions and their respective commits from Euler Qemu repo are as follows: - Commit 0a7dde8: sev_send_finish(), sev_send_start(), sev_migration_state_notifier(), sev_get_send_session_length(), sev_send_update_data(), sev_send_get_packet_len(), sev_save_outgoing_page(). - Commit 778457c: sev_receive_finish(), sev_receive_start(), sev_receive_update_data(), sev_load_incoming_page(). - Commit 940858a3ab395: sev_send_vmsa_get_packet_len(), csv_save_outgoing_cpu_state(), sev_receive_update_vmsa(), sev_send_update_vmsa(). - Commit 02e6bfc, Commit af3077a2f19f0 and Commit f675319: sev_add_shared_regions_list(), sev_remove_shared_regions_list(). sev_is_gfn_in_unshared_region(). sev_save_setup(). - Commit 0f85e3a: sev_save_outgoing_shared_regions_list(), sev_load_incoming_shared_regions_list(). - Commit b2091d2: csv_save_queued_outgoing_pages(), csv_command_batch(), csv_send_update_data_batch(). - Commit cb5c1c9, Commit e6d587b, and Commit 8125145: csv_receive_update_data_batch(), csv_load_queued_incoming_pages(). csv_queue_outgoing_page(), csv_send_queue_data(). csv_receive_queue_data(), csv_queue_incoming_page(). 3. In the file target/i386/sev.c, the upstream function sev_kvm_init() has the following non-upstream modifications: - An if block added in commit c8a6d5f, was retained and modified to align with new structure model. - Macro QTAILQ_INIT from commit 02e6bfc has been updated to comply with the new structure model. 4. In the file target/i386/sev.c, the upstream function sev_vm_state_change() consists a non-upstream if block added in commit c8a6d5f, which was retained and modified to maintain its core functionality while adhering to the new structure model. Signed-off-by: Michael Roth Co-developed-by: Dov Murik Signed-off-by: Dov Murik Acked-by: Markus Armbruster (QAPI schema) Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- qapi/qom.json | 40 +-- target/i386/sev.c | 640 ++++++++++++++++++++++++++-------------------- target/i386/sev.h | 3 + 3 files changed, 384 insertions(+), 299 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index aa5810d843..cd3e6c31f3 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -860,20 +860,12 @@ 'data': { '*filename': 'str' } } ## -# @SevGuestProperties: +# @SevCommonProperties: # -# Properties for sev-guest objects. +# Properties common to objects that are derivatives of sev-common. # # @sev-device: SEV device to use (default: "/dev/sev") # -# @dh-cert-file: guest owners DH certificate (encoded with base64) -# -# @session-file: guest owners session parameters (encoded with base64) -# -# @policy: SEV policy value (default: 0x1) -# -# @handle: SEV firmware handle (default: 0) -# # @cbitpos: C-bit location in page table entry (default: 0) # # @reduced-phys-bits: number of bits in physical addresses that become @@ -883,6 +875,27 @@ # designated guest firmware page for measured boot with -kernel # (default: false) (since 6.2) # +# Since: 9.1 +## +{ 'struct': 'SevCommonProperties', + 'data': { '*sev-device': 'str', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool' } } + +## +# @SevGuestProperties: +# +# Properties for sev-guest objects. +# +# @dh-cert-file: guest owners DH certificate (encoded with base64) +# +# @session-file: guest owners session parameters (encoded with base64) +# +# @policy: SEV policy value (default: 0x1) +# +# @handle: SEV firmware handle (default: 0) +# # @user-id: the user id of the guest owner, only support on Hygon CPUs # (since 8.2) # @@ -901,14 +914,11 @@ # Since: 2.12 ## { 'struct': 'SevGuestProperties', - 'data': { '*sev-device': 'str', - '*dh-cert-file': 'str', + 'base': 'SevCommonProperties', + 'data': { '*dh-cert-file': 'str', '*session-file': 'str', '*policy': 'uint32', '*handle': 'uint32', - '*cbitpos': 'uint32', - 'reduced-phys-bits': 'uint32', - '*kernel-hashes': 'bool', '*user-id': 'str', '*secret-header-file': 'str', '*secret-file': 'str', diff --git a/target/i386/sev.c b/target/i386/sev.c index a11f6b4600..56fedfa22f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -43,14 +43,41 @@ #include "hw/i386/pc.h" #include "exec/address-spaces.h" -#define TYPE_SEV_GUEST "sev-guest" -OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) +OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) struct shared_region { unsigned long gfn_start, gfn_end; QTAILQ_ENTRY(shared_region) list; }; +struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; + + /* runtime state */ + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; +}; + +struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + +}; /** * SevGuestState: @@ -63,31 +90,19 @@ struct shared_region { * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - X86ConfidentialGuest parent_obj; - - int kvm_type; + SevCommonState parent_obj; + gchar *measurement; /* configuration parameters */ - char *sev_device; + uint32_t handle; uint32_t policy; char *dh_cert_file; char *session_file; - uint32_t cbitpos; - uint32_t reduced_phys_bits; - bool kernel_hashes; + bool legacy_vm_type; char *user_id; char *secret_header_file; char *secret_file; - bool legacy_vm_type; - /* runtime state */ - uint32_t handle; - uint8_t api_major; - uint8_t api_minor; - uint8_t build_id; - int sev_fd; - SevState state; - gchar *measurement; guchar *remote_pdh; size_t remote_pdh_len; guchar *remote_plat_cert; @@ -101,10 +116,6 @@ struct SevGuestState { gchar *send_vmsa_packet_hdr; size_t send_vmsa_packet_hdr_len; - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; - QTAILQ_HEAD(, shared_region) shared_regions_list; /* link list used for HYGON CSV */ @@ -156,7 +167,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); -static SevGuestState *sev_guest; static Error *sev_mig_blocker; bool sev_kvm_has_msr_ghcb; @@ -262,21 +272,21 @@ fw_error_to_str(int code) } static bool -sev_check_state(const SevGuestState *sev, SevState state) +sev_check_state(const SevCommonState *sev_common, SevState state) { - assert(sev); - return sev->state == state ? true : false; + assert(sev_common); + return sev_common->state == state ? true : false; } static void -sev_set_guest_state(SevGuestState *sev, SevState new_state) +sev_set_guest_state(SevCommonState *sev_common, SevState new_state) { assert(new_state < SEV_STATE__MAX); - assert(sev); + assert(sev_common); - trace_kvm_sev_change_state(SevState_str(sev->state), + trace_kvm_sev_change_state(SevState_str(sev_common->state), SevState_str(new_state)); - sev->state = new_state; + sev_common->state = new_state; } static void @@ -343,43 +353,6 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; -static void -sev_guest_finalize(Object *obj) -{ -} - -static char * -sev_guest_get_session_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return s->session_file ? g_strdup(s->session_file) : NULL; -} - -static void -sev_guest_set_session_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->session_file = g_strdup(value); -} - -static char * -sev_guest_get_dh_cert_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return g_strdup(s->dh_cert_file); -} - -static void -sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->dh_cert_file = g_strdup(value); -} - static char * sev_guest_get_user_id(Object *obj, Error **errp) { @@ -428,84 +401,61 @@ sev_guest_set_secret_file(Object *obj, const char *value, Error **errp) s->secret_file = g_strdup(value); } -static char * -sev_guest_get_sev_device(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return g_strdup(sev->sev_device); -} - -static void -sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(value); -} - -static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return sev->kernel_hashes; -} - -static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->kernel_hashes = value; -} - -static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) -{ - return SEV_GUEST(obj)->legacy_vm_type; -} - -static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) -{ - SEV_GUEST(obj)->legacy_vm_type = value; -} - bool sev_enabled(void) { - return !!sev_guest; + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); } bool sev_es_enabled(void) { - return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); } uint32_t sev_get_cbit_position(void) { - return sev_guest ? sev_guest->cbitpos : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->cbitpos : 0; } uint32_t sev_get_reduced_phys_bits(void) { - return sev_guest ? sev_guest->reduced_phys_bits : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->reduced_phys_bits : 0; } static SevInfo *sev_get_info(void) { SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = + (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), + TYPE_SEV_GUEST); info = g_new0(SevInfo, 1); info->enabled = sev_enabled(); if (info->enabled) { - info->api_major = sev_guest->api_major; - info->api_minor = sev_guest->api_minor; - info->build_id = sev_guest->build_id; - info->policy = sev_guest->policy; - info->state = sev_guest->state; - info->handle = sev_guest->handle; + if (sev_guest) { + info->handle = sev_guest->handle; + } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; + /* we only report the lower 32-bits of policy for SNP, ok for now... */ + info->policy = + (uint32_t)object_property_get_uint(OBJECT(sev_common), + "policy", NULL); } return info; @@ -632,6 +582,8 @@ static SevCapability *sev_get_capabilities(Error **errp) size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; uint32_t ebx; int fd; + SevCommonState *sev_common; + char *sev_device; if (!kvm_enabled()) { error_setg(errp, "KVM not enabled"); @@ -642,12 +594,21 @@ static SevCapability *sev_get_capabilities(Error **errp) return NULL; } - fd = open(DEFAULT_SEV_DEVICE, O_RDWR); + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", + &error_abort); + fd = open(sev_device, O_RDWR); if (fd < 0) { error_setg_errno(errp, errno, "SEV: Failed to open %s", DEFAULT_SEV_DEVICE); + g_free(sev_device); return NULL; } + g_free(sev_device); if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, &cert_chain_data, &cert_chain_len, errp)) { @@ -690,7 +651,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, { struct kvm_sev_attestation_report input = {}; SevAttestationReport *report = NULL; - SevGuestState *sev = sev_guest; + SevCommonState *sev_common; g_autofree guchar *data = NULL; g_autofree guchar *buf = NULL; gsize len; @@ -715,8 +676,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, return NULL; } + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + /* Query the report length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret < 0) { if (err != SEV_RET_INVALID_LEN) { @@ -732,7 +695,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, memcpy(input.mnonce, buf, sizeof(input.mnonce)); /* Query the report */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret) { error_setg_errno(errp, errno, "SEV: Failed to get attestation report" @@ -772,26 +735,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SevGuestState *sev) +sev_launch_start(SevGuestState *sev_guest) { gsize sz; int ret = 1; int fw_error, rc; struct kvm_sev_launch_start start = { - .handle = sev->handle, .policy = sev->policy + .handle = sev_guest->handle, .policy = sev_guest->policy }; guchar *session = NULL, *dh_cert = NULL; + SevCommonState *sev_common = SEV_COMMON(sev_guest); - if (sev->session_file) { - if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { goto out; } start.session_uaddr = (unsigned long)session; start.session_len = sz; } - if (sev->dh_cert_file) { - if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { + if (sev_guest->dh_cert_file) { + if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { goto out; } start.dh_uaddr = (unsigned long)dh_cert; @@ -799,15 +763,15 @@ sev_launch_start(SevGuestState *sev) } trace_kvm_sev_launch_start(start.policy, session, dh_cert); - rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); if (rc < 0) { error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto out; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); - sev->handle = start.handle; + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + sev_guest->handle = start.handle; ret = 0; out: @@ -817,7 +781,7 @@ out: } static int -sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -829,7 +793,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) update.uaddr = (__u64)(unsigned long)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -840,11 +804,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) } static int -sev_launch_update_vmsa(SevGuestState *sev) +sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, + NULL, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -859,12 +824,13 @@ csv_load_launch_secret(const char *secret_header_file, const char *secret_file); static void sev_launch_get_measure(Notifier *notifier, void *unused) { - SevGuestState *sev = sev_guest; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); int ret, error; g_autofree guchar *data = NULL; struct kvm_sev_launch_measure measurement = {}; - if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { return; } @@ -873,7 +839,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) ret = csv3_launch_encrypt_vmcb(); } else { /* measure all the VM save areas before getting launch_measure */ - ret = sev_launch_update_vmsa(sev); + ret = sev_launch_update_vmsa(sev_guest); } if (ret) { exit(1); @@ -882,7 +848,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) } /* query the measurement blob length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (!measurement.len) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -894,7 +860,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) measurement.uaddr = (unsigned long)data; /* get the measurement blob */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (ret) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -902,26 +868,28 @@ sev_launch_get_measure(Notifier *notifier, void *unused) return; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); /* encode the measurement value and emit the event */ - sev->measurement = g_base64_encode(data, measurement.len); - trace_kvm_sev_launch_measurement(sev->measurement); + sev_guest->measurement = g_base64_encode(data, measurement.len); + trace_kvm_sev_launch_measurement(sev_guest->measurement); /* Hygon CSV will auto load guest owner's secret */ if (is_hygon_cpu()) { - if (sev->secret_header_file && - strlen(sev->secret_header_file) && - sev->secret_file && - strlen(sev->secret_file)) - csv_load_launch_secret(sev->secret_header_file, sev->secret_file); + if (sev_guest->secret_header_file && + strlen(sev_guest->secret_header_file) && + sev_guest->secret_file && + strlen(sev_guest->secret_file)) + csv_load_launch_secret(sev_guest->secret_header_file, sev_guest->secret_file); } } static char *sev_get_launch_measurement(void) { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + if (sev_guest && - sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { return g_strdup(sev_guest->measurement); } @@ -950,19 +918,20 @@ static Notifier sev_machine_done_notify = { }; static void -sev_launch_finish(SevGuestState *sev) +sev_launch_finish(SevGuestState *sev_guest) { int ret, error; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); exit(1); } - sev_set_guest_state(sev, SEV_STATE_RUNNING); + sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, @@ -977,19 +946,19 @@ sev_del_migrate_blocker(void) } static int -sev_receive_finish(SevGuestState *s) +sev_receive_finish(SevCommonState *sev_common) { int error, ret = 1; trace_kvm_sev_receive_finish(); - ret = sev_ioctl(s->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_FINISH, 0, &error); if (ret) { error_report("%s: RECEIVE_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); goto err; } - sev_set_guest_state(s, SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); err: return ret; } @@ -997,13 +966,13 @@ err: static void sev_vm_state_change(void *opaque, bool running, RunState state) { - SevGuestState *sev = opaque; + SevCommonState *sev_common = opaque; if (running) { - if (sev_check_state(sev, SEV_STATE_RECEIVE_UPDATE)) { - sev_receive_finish(sev); - } else if (!sev_check_state(sev, SEV_STATE_RUNNING)) { - sev_launch_finish(sev); + if (sev_check_state(sev_common, SEV_STATE_RECEIVE_UPDATE)) { + sev_receive_finish(sev_common); + } else if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { + sev_launch_finish(SEV_GUEST(sev_common)); } } } @@ -1022,51 +991,52 @@ static inline bool check_blob_length(size_t value) int sev_save_setup(const char *pdh, const char *plat_cert, const char *amd_cert) { - SevGuestState *s = sev_guest; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); if (is_hygon_cpu()) { - if (sev_read_file_base64(pdh, &s->remote_pdh, - &s->remote_pdh_len) < 0) { + if (sev_read_file_base64(pdh, &sev_guest->remote_pdh, + &sev_guest->remote_pdh_len) < 0) { goto error; } } else { - s->remote_pdh = g_base64_decode(pdh, &s->remote_pdh_len); + sev_guest->remote_pdh = g_base64_decode(pdh, &sev_guest->remote_pdh_len); } - if (!check_blob_length(s->remote_pdh_len)) { + if (!check_blob_length(sev_guest->remote_pdh_len)) { goto error; } if (is_hygon_cpu()) { - if (sev_read_file_base64(plat_cert, &s->remote_plat_cert, - &s->remote_plat_cert_len) < 0) { + if (sev_read_file_base64(plat_cert, &sev_guest->remote_plat_cert, + &sev_guest->remote_plat_cert_len) < 0) { goto error; } } else { - s->remote_plat_cert = g_base64_decode(plat_cert, - &s->remote_plat_cert_len); + sev_guest->remote_plat_cert = g_base64_decode(plat_cert, + &sev_guest->remote_plat_cert_len); } - if (!check_blob_length(s->remote_plat_cert_len)) { + if (!check_blob_length(sev_guest->remote_plat_cert_len)) { goto error; } if (is_hygon_cpu()) { - if (sev_read_file_base64(amd_cert, &s->amd_cert, - &s->amd_cert_len) < 0) { + if (sev_read_file_base64(amd_cert, &sev_guest->amd_cert, + &sev_guest->amd_cert_len) < 0) { goto error; } } else { - s->amd_cert = g_base64_decode(amd_cert, &s->amd_cert_len); + sev_guest->amd_cert = g_base64_decode(amd_cert, &sev_guest->amd_cert_len); } - if (!check_blob_length(s->amd_cert_len)) { + if (!check_blob_length(sev_guest->amd_cert_len)) { goto error; } return 0; error: - g_free(s->remote_pdh); - g_free(s->remote_plat_cert); - g_free(s->amd_cert); + g_free(sev_guest->remote_pdh); + g_free(sev_guest->remote_plat_cert); + g_free(sev_guest->amd_cert); return 1; } @@ -1075,9 +1045,11 @@ static void sev_send_finish(void) { int ret, error; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); trace_kvm_sev_send_finish(); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_FINISH, 0, &error); if (ret) { error_report("%s: SEND_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); @@ -1087,7 +1059,7 @@ sev_send_finish(void) if (sev_es_enabled() && is_hygon_cpu()) { g_free(sev_guest->send_vmsa_packet_hdr); } - sev_set_guest_state(sev_guest, SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); } static void @@ -1098,7 +1070,8 @@ sev_migration_state_notifier(Notifier *notifier, void *data) if (migration_has_finished(s) || migration_in_postcopy_after_devices(s) || migration_has_failed(s)) { - if (sev_check_state(sev_guest, SEV_STATE_SEND_UPDATE)) { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (sev_check_state(sev_common, SEV_STATE_SEND_UPDATE)) { sev_send_finish(); } } @@ -1108,27 +1081,29 @@ static Notifier sev_migration_state; static int sev_kvm_type(X86ConfidentialGuest *cg) { - SevGuestState *sev = SEV_GUEST(cg); + SevCommonState *sev_common = SEV_COMMON(cg); + SevGuestState *sev_guest = SEV_GUEST(sev_common); int kvm_type; - if (sev->kvm_type != -1) { + if (sev_common->kvm_type != -1) { goto out; } - kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { - sev->kvm_type = kvm_type; + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { + sev_common->kvm_type = kvm_type; } else { - sev->kvm_type = KVM_X86_DEFAULT_VM; + sev_common->kvm_type = KVM_X86_DEFAULT_VM; } out: - return sev->kvm_type; + return sev_common->kvm_type; } static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev = SEV_GUEST(cgs); + SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; @@ -1144,8 +1119,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -1; } - sev_guest = sev; - sev->state = SEV_STATE_UNINIT; + sev_common->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); host_cbitpos = ebx & 0x3f; @@ -1155,9 +1129,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * register of CPUID 0x8000001F. No need to verify the range as the * comparison against the host value accomplishes that. */ - if (host_cbitpos != sev->cbitpos) { + if (host_cbitpos != sev_common->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); + __func__, host_cbitpos, sev_common->cbitpos); goto err; } @@ -1166,16 +1140,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * the EBX register of CPUID 0x8000001F, so verify the supplied value * is in the range of 1 to 63. */ - if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { + if (sev_common->reduced_phys_bits < 1 || + sev_common->reduced_phys_bits > 63) { error_setg(errp, "%s: reduced_phys_bits check failed," " it should be in the range of 1 to 63, requested '%d'", - __func__, sev->reduced_phys_bits); + __func__, sev_common->reduced_phys_bits); goto err; } - devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); - sev->sev_fd = open(devname, O_RDWR); - if (sev->sev_fd < 0) { + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); + sev_common->sev_fd = open(devname, O_RDWR); + if (sev_common->sev_fd < 0) { error_setg(errp, "%s: Failed to open %s '%s'", __func__, devname, strerror(errno)); g_free(devname); @@ -1183,7 +1158,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } g_free(devname); - ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, + ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, &fw_error); if (ret) { error_setg(errp, "%s: failed to get platform status ret=%d " @@ -1191,9 +1166,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) fw_error_to_str(fw_error)); goto err; } - sev->build_id = status.build; - sev->api_major = status.api_major; - sev->api_minor = status.api_minor; + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; + sev_common->api_minor = status.api_minor; if (sev_es_enabled()) { if (!kvm_kernel_irqchip_allowed()) { @@ -1215,33 +1190,34 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) trace_kvm_sev_init(); + SevGuestState *sev_guest = SEV_GUEST(sev_common); /* Only support reuse asid for CSV/CSV2 guest */ if (is_hygon_cpu() && (sev_guest->policy & GUEST_POLICY_REUSE_ASID)) { char *user_id = NULL; struct kvm_csv_init *init_cmd_buf = NULL; - user_id = object_property_get_str(OBJECT(sev), "user-id", NULL); + user_id = object_property_get_str(OBJECT(sev_guest), "user-id", NULL); if (user_id && strlen(user_id)) { init_cmd_buf = g_new0(struct kvm_csv_init, 1); init_cmd_buf->len = strlen(user_id); init_cmd_buf->userid_addr = (__u64)user_id; } - ret = sev_ioctl(sev->sev_fd, cmd, init_cmd_buf, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, cmd, init_cmd_buf, &fw_error); if (user_id) { g_free(user_id); g_free(init_cmd_buf); } } else { - if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { + if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); } else { struct kvm_sev_init args = { 0 }; - ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); } } @@ -1253,7 +1229,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) /* Support CSV3 */ if (!ret && cmd == KVM_SEV_ES_INIT) { - ret = csv3_init(sev_guest->policy, sev->sev_fd, (void *)&sev->state, &sev_ops); + ret = csv3_init(sev_guest->policy, sev_common->sev_fd, (void *)&sev_common->state, &sev_ops); if (ret) { error_setg(errp, "%s: failed to init csv3 context", __func__); goto err; @@ -1268,7 +1244,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * then RECEIVE context will be created after the connection is established. */ if (!runstate_check(RUN_STATE_INMIGRATE)) { - ret = sev_launch_start(sev); + ret = sev_launch_start(sev_guest); if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); goto err; @@ -1289,7 +1265,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ram_block_notifier_add(&sev_ram_notifier); } qemu_add_machine_init_done_notifier(&sev_machine_done_notify); - qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); if (csv3_enabled()) { @@ -1297,7 +1273,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } else { cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; } - QTAILQ_INIT(&sev->shared_regions_list); + QTAILQ_INIT(&sev_guest->shared_regions_list); /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ if (sev_es_enabled()) { @@ -1311,7 +1287,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return 0; err: - sev_guest = NULL; ram_block_discard_disable(false); return -1; } @@ -1319,13 +1294,15 @@ err: int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { - if (!sev_guest) { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_common) { return 0; } /* if SEV is in update state then encrypt the data else do nothing */ - if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { - int ret = sev_launch_update_data(sev_guest, ptr, len); + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); if (ret < 0) { error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; @@ -1345,16 +1322,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, void *hva; gsize hdr_sz = 0, data_sz = 0; MemoryRegion *mr = NULL; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - if (!sev_guest) { + if (!sev_common) { error_setg(errp, "SEV not enabled for guest"); return 1; } /* secret can be injected only in this state */ - if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", - sev_guest->state); + sev_common->state); return 1; } @@ -1398,7 +1376,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, input.trans_uaddr, input.trans_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, &input, &error); if (ret) { error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", @@ -1505,9 +1483,10 @@ void sev_es_set_reset_vector(CPUState *cpu) { X86CPU *x86; CPUX86State *env; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* Only update if we have valid reset information */ - if (!sev_guest || !sev_guest->reset_data_valid) { + if (!sev_common || !sev_common->reset_data_valid) { return; } @@ -1519,11 +1498,11 @@ void sev_es_set_reset_vector(CPUState *cpu) x86 = X86_CPU(cpu); env = &x86->env; - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, + cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); - env->eip = sev_guest->reset_ip; + env->eip = sev_common->reset_ip; } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) @@ -1531,6 +1510,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) CPUState *cpu; uint32_t addr; int ret; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1544,9 +1524,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) } if (addr) { - sev_guest->reset_cs = addr & 0xffff0000; - sev_guest->reset_ip = addr & 0x0000ffff; - sev_guest->reset_data_valid = true; + sev_common->reset_cs = addr & 0xffff0000; + sev_common->reset_ip = addr & 0x0000ffff; + sev_common->reset_data_valid = true; CPU_FOREACH(cpu) { sev_es_set_reset_vector(cpu); @@ -1561,8 +1541,9 @@ sev_get_send_session_length(void) { int ret, fw_err = 0; struct kvm_sev_send_start start = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_START, &start, &fw_err); if (fw_err != SEV_RET_INVALID_LEN) { ret = -1; error_report("%s: failed to get session length ret=%d fw_error=%d '%s'", @@ -1583,6 +1564,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) struct kvm_sev_send_start start = { }; guchar *pdh = NULL, *plat_cert = NULL, *session = NULL; Error *local_err = NULL; + SevCommonState *sev_common = SEV_COMMON(s); if (!s->remote_pdh || !s->remote_plat_cert || !s->amd_cert_len) { error_report("%s: missing remote PDH or PLAT_CERT", __func__); @@ -1610,7 +1592,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) start.session_len = session_len; /* Get our PDH certificate */ - ret = sev_get_pdh_info(s->sev_fd, &pdh, &pdh_len, + ret = sev_get_pdh_info(sev_common->sev_fd, &pdh, &pdh_len, &plat_cert, &plat_cert_len, &local_err); if (ret) { error_report("Failed to get our PDH cert"); @@ -1621,7 +1603,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) start.plat_certs_uaddr, start.plat_certs_len, start.amd_certs_uaddr, start.amd_certs_len); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_START, &start, &fw_error); if (ret < 0) { error_report("%s: SEND_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1635,7 +1617,7 @@ sev_send_start(SevGuestState *s, QEMUFile *f, uint64_t *bytes_sent) qemu_put_buffer(f, (uint8_t *)start.session_uaddr, start.session_len); *bytes_sent = 12 + pdh_len + start.session_len; - sev_set_guest_state(s, SEV_STATE_SEND_UPDATE); + sev_set_guest_state(sev_common, SEV_STATE_SEND_UPDATE); err: g_free(pdh); @@ -1648,8 +1630,9 @@ sev_send_get_packet_len(int *fw_err) { int ret; struct kvm_sev_send_update_data update = { 0, }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { ret = 0; @@ -1671,6 +1654,7 @@ sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, int ret, fw_error; guchar *trans; struct kvm_sev_send_update_data update = { }; + SevCommonState *sev_common = SEV_COMMON(s); /* * If this is first call then query the packet header bytes and allocate @@ -1699,7 +1683,7 @@ sev_send_update_data(SevGuestState *s, QEMUFile *f, uint8_t *ptr, uint32_t size, trace_kvm_sev_send_update_data(ptr, trans, size); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: SEND_UPDATE_DATA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -1722,13 +1706,13 @@ err: int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, uint32_t sz, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* * If this is a first buffer then create outgoing encryption context * and write our PDH, policy and session data. */ - if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_SEND_UPDATE) && sev_send_start(s, f, bytes_sent)) { error_report("Failed to create outgoing context"); return 1; @@ -1744,6 +1728,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) int fw_error; struct kvm_sev_receive_start start = { }; gchar *session = NULL, *pdh_cert = NULL; + SevCommonState *sev_common = SEV_COMMON(sev); /* get SEV guest handle */ start.handle = object_property_get_int(OBJECT(sev), "handle", @@ -1773,7 +1758,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) trace_kvm_sev_receive_start(start.policy, session, pdh_cert); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_START, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_START, &start, &fw_error); if (ret < 0) { error_report("Error RECEIVE_START ret=%d fw_error=%d '%s'", @@ -1782,7 +1767,7 @@ sev_receive_start(SevGuestState *sev, QEMUFile *f) } object_property_set_int(OBJECT(sev), "handle", start.handle, &error_abort); - sev_set_guest_state(sev, SEV_STATE_RECEIVE_UPDATE); + sev_set_guest_state(sev_common, SEV_STATE_RECEIVE_UPDATE); err: g_free(session); g_free(pdh_cert); @@ -1795,6 +1780,7 @@ static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) int ret = 1, fw_error = 0; gchar *hdr = NULL, *trans = NULL; struct kvm_sev_receive_update_data update = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* get packet header */ update.hdr_len = qemu_get_be32(f); @@ -1822,7 +1808,7 @@ static int sev_receive_update_data(QEMUFile *f, uint8_t *ptr) trace_kvm_sev_receive_update_data(trans, ptr, update.guest_len, hdr, update.hdr_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("Error RECEIVE_UPDATE_DATA ret=%d fw_error=%d '%s'", @@ -1837,13 +1823,13 @@ err: int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* * If this is first buffer and SEV is not in recieiving state then * use RECEIVE_START command to create a encryption context. */ - if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_RECEIVE_UPDATE) && sev_receive_start(s, f)) { return 1; } @@ -1853,7 +1839,7 @@ int sev_load_incoming_page(QEMUFile *f, uint8_t *ptr) int sev_remove_shared_regions_list(unsigned long start, unsigned long end) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos, *next_pos; QTAILQ_FOREACH_SAFE(pos, &s->shared_regions_list, list, next_pos) { @@ -1901,7 +1887,7 @@ int sev_add_shared_regions_list(unsigned long start, unsigned long end) { struct shared_region *shrd_region; struct shared_region *pos; - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); if (QTAILQ_EMPTY(&s->shared_regions_list)) { shrd_region = g_malloc0(sizeof(*shrd_region)); @@ -1952,7 +1938,7 @@ int sev_add_shared_regions_list(unsigned long start, unsigned long end) int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos; QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { @@ -1969,7 +1955,7 @@ int sev_save_outgoing_shared_regions_list(QEMUFile *f, uint64_t *bytes_sent) int sev_load_incoming_shared_regions_list(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *shrd_region; int status; @@ -1992,7 +1978,7 @@ int sev_load_incoming_shared_regions_list(QEMUFile *f) bool sev_is_gfn_in_unshared_region(unsigned long gfn) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); struct shared_region *pos; QTAILQ_FOREACH(pos, &s->shared_regions_list, list) { @@ -2220,11 +2206,12 @@ csv_command_batch(uint32_t cmd_id, uint64_t head_uaddr, int *fw_err) { int ret; struct kvm_csv_command_batch command_batch = { }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); command_batch.command_id = cmd_id; command_batch.csv_batch_list_uaddr = head_uaddr; - ret = sev_ioctl(sev_guest->sev_fd, KVM_CSV_COMMAND_BATCH, + ret = sev_ioctl(sev_common->sev_fd, KVM_CSV_COMMAND_BATCH, &command_batch, fw_err); if (ret) { error_report("%s: COMMAND_BATCH ret=%d fw_err=%d '%s'", @@ -2299,7 +2286,7 @@ csv_receive_update_data_batch(SevGuestState *s) int csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2312,7 +2299,7 @@ csv_queue_outgoing_page(uint8_t *ptr, uint32_t sz, uint64_t addr) int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2324,7 +2311,7 @@ int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) * If this is first buffer and SEV is not in recieiving state then * use RECEIVE_START command to create a encryption context. */ - if (!sev_check_state(s, SEV_STATE_RECEIVE_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_RECEIVE_UPDATE) && sev_receive_start(s, f)) { return 1; } @@ -2335,7 +2322,7 @@ int csv_queue_incoming_page(QEMUFile *f, uint8_t *ptr) int csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2347,7 +2334,7 @@ csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) * If this is a first buffer then create outgoing encryption context * and write our PDH, policy and session data. */ - if (!sev_check_state(s, SEV_STATE_SEND_UPDATE) && + if (!sev_check_state(SEV_COMMON(s), SEV_STATE_SEND_UPDATE) && sev_send_start(s, f, bytes_sent)) { error_report("Failed to create outgoing context"); return 1; @@ -2358,7 +2345,7 @@ csv_save_queued_outgoing_pages(QEMUFile *f, uint64_t *bytes_sent) int csv_load_queued_incoming_pages(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); /* Only support for HYGON CSV */ if (!is_hygon_cpu()) { @@ -2374,8 +2361,9 @@ sev_send_vmsa_get_packet_len(int *fw_err) { int ret; struct kvm_sev_send_update_vmsa update = { 0, }; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, fw_err); if (*fw_err != SEV_RET_INVALID_LEN) { ret = 0; @@ -2397,6 +2385,7 @@ sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, int ret, fw_error; guchar *trans = NULL; struct kvm_sev_send_update_vmsa update = {}; + SevCommonState *sev_common = SEV_COMMON(s); /* * If this is first call then query the packet header bytes and allocate @@ -2424,7 +2413,7 @@ sev_send_update_vmsa(SevGuestState *s, QEMUFile *f, uint32_t cpu_id, trace_kvm_sev_send_update_vmsa(cpu_id, cpu_index, trans, size); - ret = sev_ioctl(s->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SEND_UPDATE_VMSA, &update, &fw_error); if (ret) { error_report("%s: SEND_UPDATE_VMSA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -2454,7 +2443,7 @@ err: int csv_save_outgoing_cpu_state(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); CPUState *cpu; int ret = 0; @@ -2487,6 +2476,7 @@ static int sev_receive_update_vmsa(QEMUFile *f) uint32_t cpu_index, cpu_id = 0; gchar *hdr = NULL, *trans = NULL; struct kvm_sev_receive_update_vmsa update = {}; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* get cpu index buffer */ assert(qemu_get_be32(f) == sizeof(uint32_t)); @@ -2523,7 +2513,7 @@ static int sev_receive_update_vmsa(QEMUFile *f) trace_kvm_sev_receive_update_vmsa(cpu_id, cpu_index, trans, update.trans_len, hdr, update.hdr_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_RECEIVE_UPDATE_VMSA, &update, &fw_error); if (ret) { error_report("Error RECEIVE_UPDATE_VMSA ret=%d fw_error=%d '%s'", @@ -2638,12 +2628,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) hwaddr mapped_len = sizeof(*padded_ht); MemTxAttrs attrs = { 0 }; bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* * Only add the kernel hashes if the sev-guest configuration explicitly * stated kernel-hashes=on. */ - if (!sev_guest->kernel_hashes) { + if (!sev_common->kernel_hashes) { return false; } @@ -2746,14 +2737,14 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); return sev_send_start(s, f, bytes_sent); } static int _sev_receive_start(QEMUFile *f) { - SevGuestState *s = sev_guest; + SevGuestState *s = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); return sev_receive_start(s, f); } @@ -2765,8 +2756,30 @@ struct sev_ops sev_ops = { .sev_receive_start = _sev_receive_start, }; +static char * +sev_common_get_sev_device(Object *obj, Error **errp) +{ + return g_strdup(SEV_COMMON(obj)->sev_device); +} + static void -sev_guest_class_init(ObjectClass *oc, void *data) +sev_common_set_sev_device(Object *obj, const char *value, Error **errp) +{ + SEV_COMMON(obj)->sev_device = g_strdup(value); +} + +static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) +{ + return SEV_COMMON(obj)->kernel_hashes; +} + +static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) +{ + SEV_COMMON(obj)->kernel_hashes = value; +} + +static void +sev_common_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -2775,10 +2788,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); + sev_common_get_sev_device, + sev_common_set_sev_device); object_class_property_set_description(oc, "sev-device", "SEV device to use"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_common_get_kernel_hashes, + sev_common_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); +} + +static void +sev_common_instance_init(Object *obj) +{ + SevCommonState *sev_common = SEV_COMMON(obj); + + sev_common->kvm_type = -1; + + sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + + object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev_common->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); +} + +/* sev guest info common to sev/sev-es/sev-snp */ +static const TypeInfo sev_common_info = { + .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_COMMON, + .instance_size = sizeof(SevCommonState), + .instance_init = sev_common_instance_init, + .class_size = sizeof(SevCommonStateClass), + .class_init = sev_common_class_init, + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static char * +sev_guest_get_dh_cert_file(Object *obj, Error **errp) +{ + return g_strdup(SEV_GUEST(obj)->dh_cert_file); +} + +static void +sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->dh_cert_file = g_strdup(value); +} + +static char * +sev_guest_get_session_file(Object *obj, Error **errp) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + + return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; +} + +static void +sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->session_file = g_strdup(value); +} + +static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +{ + return SEV_GUEST(obj)->legacy_vm_type; +} + +static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +{ + SEV_GUEST(obj)->legacy_vm_type = value; +} + +static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, sev_guest_set_dh_cert_file); @@ -2789,12 +2879,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_session_file); object_class_property_set_description(oc, "session-file", "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); - object_class_property_add_str(oc, "user-id", sev_guest_get_user_id, sev_guest_set_user_id); @@ -2820,41 +2904,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) static void sev_guest_instance_init(Object *obj) { - SevGuestState *sev = SEV_GUEST(obj); - - sev->kvm_type = -1; + SevGuestState *sev_guest = SEV_GUEST(obj); - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, + sev_guest->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, OBJ_PROP_FLAG_READWRITE); object_apply_compat_props(obj); } -/* sev guest info */ +/* guest info specific sev/sev-es */ static const TypeInfo sev_guest_info = { - .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .parent = TYPE_SEV_COMMON, .name = TYPE_SEV_GUEST, .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } + .class_init = sev_guest_class_init, }; static void sev_register_types(void) { + type_register_static(&sev_common_info); type_register_static(&sev_guest_info); } diff --git a/target/i386/sev.h b/target/i386/sev.h index 0436c966cc..df7b319f48 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -20,6 +20,9 @@ #include "exec/confidential-guest-support.h" +#define TYPE_SEV_COMMON "sev-common" +#define TYPE_SEV_GUEST "sev-guest" + #define SEV_POLICY_NODBG 0x1 #define SEV_POLICY_NOKS 0x2 #define SEV_POLICY_ES 0x4 -- Gitee From d8d9af7f9c7fba2efaabce44de1d468cde27c5d4 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:17 -0500 Subject: [PATCH 043/115] i386/sev: Move sev_launch_update to separate class method commit 6600f1ac0c81cbe67faf048ea07f78542dea925f upstream. When sev-snp-guest objects are introduced there will be a number of differences in how the launch data is handled compared to the existing sev-guest object. Move sev_launch_start() to a class method to make it easier to implement SNP-specific launch update functionality later. [Backport Changes] In target/i386/sev.c, since sev_launch_start() has been moved to SevCommonStateClass as a class method, the non-upstream block in sev_common_kvm_init() (introduced in commit c8a6d5f18c450 as part of CSV support) was updated to invoke the new class method via klass->launch_start(sev_common). This ensures the non-upstream feature remains consistent with the upstream structural changes. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/sev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 56fedfa22f..413f6b8aa2 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -77,6 +77,8 @@ struct SevCommonState { struct SevCommonStateClass { X86ConfidentialGuestClass parent_class; + /* public */ + int (*launch_start)(SevCommonState *sev_common); }; /** @@ -735,16 +737,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SevGuestState *sev_guest) +sev_launch_start(SevCommonState *sev_common) { gsize sz; int ret = 1; int fw_error, rc; + SevGuestState *sev_guest = SEV_GUEST(sev_common); struct kvm_sev_launch_start start = { .handle = sev_guest->handle, .policy = sev_guest->policy }; guchar *session = NULL, *dh_cert = NULL; - SevCommonState *sev_common = SEV_COMMON(sev_guest); if (sev_guest->session_file) { if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { @@ -1109,6 +1111,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); @@ -1244,7 +1247,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * then RECEIVE context will be created after the connection is established. */ if (!runstate_check(RUN_STATE_INMIGRATE)) { - ret = sev_launch_start(sev_guest); + ret = klass->launch_start(sev_common); if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); goto err; @@ -2869,6 +2872,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) static void sev_guest_class_init(ObjectClass *oc, void *data) { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; + object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, sev_guest_set_dh_cert_file); -- Gitee From 975089ccffd303dc5ae13d62f4c2f36b8b7855e1 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:18 -0500 Subject: [PATCH 044/115] i386/sev: Move sev_launch_finish to separate class method commit bce615a14aec07cab0488e5a242f6a91e641efcb upstream. When sev-snp-guest objects are introduced there will be a number of differences in how the launch finish is handled compared to the existing sev-guest object. Move sev_launch_finish() to a class method to make it easier to implement SNP-specific launch update functionality later. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/sev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 413f6b8aa2..b73a4c4d6f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -79,6 +79,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); }; /** @@ -920,12 +921,12 @@ static Notifier sev_machine_done_notify = { }; static void -sev_launch_finish(SevGuestState *sev_guest) +sev_launch_finish(SevCommonState *sev_common) { int ret, error; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", @@ -933,7 +934,7 @@ sev_launch_finish(SevGuestState *sev_guest) exit(1); } - sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, @@ -969,12 +970,13 @@ static void sev_vm_state_change(void *opaque, bool running, RunState state) { SevCommonState *sev_common = opaque; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); if (running) { if (sev_check_state(sev_common, SEV_STATE_RECEIVE_UPDATE)) { sev_receive_finish(sev_common); } else if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { - sev_launch_finish(SEV_GUEST(sev_common)); + klass->launch_finish(sev_common); } } } @@ -2875,6 +2877,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, -- Gitee From 08fe1f16b46bbe2cbd69e9d900cb962bddfb6702 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:19 -0500 Subject: [PATCH 045/115] i386/sev: Introduce 'sev-snp-guest' object commit 7b34df44260b391e33bc3acf1ced30019d9aadf1 upstream. SEV-SNP support relies on a different set of properties/state than the existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' object, which can be used to configure an SEV-SNP guest. For example, a default-configured SEV-SNP guest with no additional information passed in for use with attestation: -object sev-snp-guest,id=sev0 or a fully-specified SEV-SNP guest where all spec-defined binary blobs are passed in as base64-encoded strings: -object sev-snp-guest,id=sev0, \ policy=0x30000, \ init-flags=0, \ id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ author-key-enabled=on, \ host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ guest-visible-workarounds=AA==, \ See the QAPI schema updates included in this patch for more usage details. In some cases these blobs may be up to 4096 characters, but this is generally well below the default limit for linux hosts where command-line sizes are defined by the sysconf-configurable ARG_MAX value, which defaults to 2097152 characters for Ubuntu hosts, for example. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Acked-by: Markus Armbruster (for QAPI schema) Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- docs/system/i386/amd-memory-encryption.rst | 70 +++++- qapi/qom.json | 58 +++++ target/i386/sev.c | 253 +++++++++++++++++++++ target/i386/sev.h | 1 + 4 files changed, 380 insertions(+), 2 deletions(-) diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index b7e3f46ff6..ea53ebc502 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs are about to occur. This allows the guest to selectively share information with the hypervisor to satisfy the requested function. -Launching ---------- +Launching (SEV and SEV-ES) +-------------------------- Boot images (such as bios) must be encrypted before a guest can be booted. The ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, @@ -161,6 +161,72 @@ The value of GCTX.LD is If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. +Launching (SEV-SNP) +------------------- +Boot images (such as bios) must be encrypted before a guest can be booted. The +``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: +``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These +three commands communicate with SEV-SNP firmware to generate a fresh memory +encryption key for the VM, encrypt the boot images for a successful launch. For +more details on the SEV-SNP firmware interfaces used by these commands please +see the SEV-SNP Firmware ABI. + +``SNP_LAUNCH_START`` is called first to create a cryptographic launch context +within the firmware. To create this context, the guest owner must provide a +guest policy and other parameters as described in the SEV-SNP firmware +specification. The launch parameters should be specified as described in the +QAPI schema for the sev-snp-guest object. + +The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------+-------+----------+-------------------------------------------------+ +| key | type | default | meaning | ++---------------------------+-------------------------------------------------+ +| policy | hex | 0x30000 | a 64-bit guest policy | ++---------------------------+-------------------------------------------------+ +| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| +| | | | for guest OS visible | +| | | | workarounds. | ++---------------------------+-------------------------------------------------+ + +``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context +created via the ``SNP_LAUNCH_START`` command. If required, this command can be +called multiple times to encrypt different memory regions. The command also +calculates the measurement of the memory contents as it encrypts. + +``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while +finalizing the launch the firmware can perform checks on the launch digest +computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must +supply the id block, authentication blob and host data that should be included +in the attestation report. See the SEV-SNP spec for further details. + +The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------------------+-------+----------+-------------------------------------+ +| key | type | default | meaning | ++--------------------+-------+----------+-------------------------------------+ +| id-block | string| none | base64 encoded ID block | ++--------------------+-------+----------+-------------------------------------+ +| id-auth | string| none | base64 encoded authentication | +| | | | information | ++--------------------+-------+----------+-------------------------------------+ +| author-key-enabled | bool | 0 | auth block contains author key | ++--------------------+-------+----------+-------------------------------------+ +| host_data | string| none | host provided data | ++--------------------+-------+----------+-------------------------------------+ + +To launch a SEV-SNP guest (additional parameters are documented in the QAPI +schema for the 'sev-snp-guest' object):: + + # ${QEMU} \ + -machine ...,confidential-guest-support=sev0 \ + -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 + + Debugging --------- diff --git a/qapi/qom.json b/qapi/qom.json index cd3e6c31f3..c9bff57bbc 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -924,6 +924,62 @@ '*secret-file': 'str', '*legacy-vm-type': 'bool' } } +## +# @SevSnpGuestProperties: +# +# Properties for sev-snp-guest objects. Most of these are direct +# arguments for the KVM_SNP_* interfaces documented in the Linux +# kernel source under +# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn +# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands +# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). +# +# More usage information is also available in the QEMU source tree +# under docs/amd-memory-encryption. +# +# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as +# defined in the SEV-SNP firmware ABI (default: 0x30000) +# +# @guest-visible-workarounds: 16-byte, base64-encoded blob to report +# hypervisor-defined workarounds, corresponding to the 'GOSVW' +# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP +# firmware ABI (default: all-zero) +# +# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' +# structure for the SNP_LAUNCH_FINISH command defined in the +# SEV-SNP firmware ABI (default: all-zero) +# +# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID +# Authentication Information Structure' for the SNP_LAUNCH_FINISH +# command defined in the SEV-SNP firmware ABI (default: all-zero) +# +# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' +# field defined SEV-SNP firmware ABI (default: false) +# +# @host-data: 32-byte, base64-encoded, user-defined blob to provide to +# the guest, as documented for the 'HOST_DATA' parameter of the +# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: +# all-zero) +# +# @vcek-disabled: Guests are by default allowed to choose between VLEK +# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip +# Endorsement Key) when requesting attestation reports from +# firmware. Set this to true to disable the use of VCEK. +# (default: false) (since: 9.1) +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestProperties', + 'base': 'SevCommonProperties', + 'data': { + '*policy': 'uint64', + '*guest-visible-workarounds': 'str', + '*id-block': 'str', + '*id-auth': 'str', + '*author-key-enabled': 'bool', + '*host-data': 'str', + '*vcek-disabled': 'bool' } } + ## # @ThreadContextProperties: # @@ -1065,6 +1121,7 @@ { 'name': 'secret_keyring', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest', + 'sev-snp-guest', 'thread-context', 's390-pv-guest', 'throttle-group', @@ -1136,6 +1193,7 @@ 'secret_keyring': { 'type': 'SecretKeyringProperties', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', + 'sev-snp-guest': 'SevSnpGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/target/i386/sev.c b/target/i386/sev.c index b73a4c4d6f..4ec73a01e0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -45,6 +45,7 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) struct shared_region { unsigned long gfn_start, gfn_end; @@ -125,8 +126,22 @@ struct SevGuestState { CsvBatchCmdList *csv_batch_cmd_list; }; +struct SevSnpGuestState { + SevCommonState parent_obj; + + /* configuration parameters */ + char *guest_visible_workarounds; + char *id_block; + char *id_auth; + char *host_data; + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; +}; + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ #define DEFAULT_SEV_DEVICE "/dev/sev" +#define DEFAULT_SEV_SNP_POLICY 0x30000 #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" typedef struct __attribute__((__packed__)) SevInfoBlock { @@ -2933,11 +2948,249 @@ static const TypeInfo sev_guest_info = { .class_init = sev_guest_class_init, }; +static void +sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static void +sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static char * +sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) +{ + return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); +} + +static void +sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, + Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + g_autofree guchar *blob; + gsize len; + + g_free(sev_snp_guest->guest_visible_workarounds); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->guest_visible_workarounds = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, + -1, &len, errp); + if (!blob) { + return; + } + + if (len != sizeof(start->gosvw)) { + error_setg(errp, "parameter length of %lu exceeds max of %lu", + len, sizeof(start->gosvw)); + return; + } + + memcpy(start->gosvw, blob, len); +} + +static char * +sev_snp_guest_get_id_block(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_block); +} + +static void +sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_block = g_strdup(value); + + finish->id_block_uaddr = + (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); + + if (!finish->id_block_uaddr) { + return; + } + + if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "parameter length of %lu not equal to %u", + len, KVM_SEV_SNP_ID_BLOCK_SIZE); + return; + } + + finish->id_block_en = (len) ? 1 : 0; +} + +static char * +sev_snp_guest_get_id_auth(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_auth); +} + +static void +sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_auth = g_strdup(value); + + finish->id_auth_uaddr = + (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); + + if (!finish->id_auth_uaddr) { + return; + } + + if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", + len, KVM_SEV_SNP_ID_AUTH_SIZE); + return; + } +} + +static bool +sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.auth_key_en; +} + +static void +sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.auth_key_en = value; +} + +static bool +sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; +} + +static void +sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.vcek_disabled = value; +} + +static char * +sev_snp_guest_get_host_data(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->host_data); +} + +static void +sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + g_autofree guchar *blob; + gsize len; + + g_free(sev_snp_guest->host_data); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->host_data = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); + + if (!blob) { + return; + } + + if (len != sizeof(finish->host_data)) { + error_setg(errp, "parameter length of %lu not equal to %lu", + len, sizeof(finish->host_data)); + return; + } + + memcpy(finish->host_data, blob, len); +} + +static void +sev_snp_guest_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); + object_class_property_add_str(oc, "guest-visible-workarounds", + sev_snp_guest_get_guest_visible_workarounds, + sev_snp_guest_set_guest_visible_workarounds); + object_class_property_add_str(oc, "id-block", + sev_snp_guest_get_id_block, + sev_snp_guest_set_id_block); + object_class_property_add_str(oc, "id-auth", + sev_snp_guest_get_id_auth, + sev_snp_guest_set_id_auth); + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); + object_class_property_add_bool(oc, "vcek-required", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", + sev_snp_guest_get_host_data, + sev_snp_guest_set_host_data); +} + +static void +sev_snp_guest_instance_init(Object *obj) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; +} + +/* guest info specific to sev-snp */ +static const TypeInfo sev_snp_guest_info = { + .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_SNP_GUEST, + .instance_size = sizeof(SevSnpGuestState), + .class_init = sev_snp_guest_class_init, + .instance_init = sev_snp_guest_instance_init, +}; + static void sev_register_types(void) { type_register_static(&sev_common_info); type_register_static(&sev_guest_info); + type_register_static(&sev_snp_guest_info); } type_init(sev_register_types); diff --git a/target/i386/sev.h b/target/i386/sev.h index df7b319f48..776e900cca 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -22,6 +22,7 @@ #define TYPE_SEV_COMMON "sev-common" #define TYPE_SEV_GUEST "sev-guest" +#define TYPE_SEV_SNP_GUEST "sev-snp-guest" #define SEV_POLICY_NODBG 0x1 #define SEV_POLICY_NOKS 0x2 -- Gitee From 468658be00aa8cede2867949b7e5532b22964ef7 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:20 -0500 Subject: [PATCH 046/115] i386/sev: Add a sev_snp_enabled() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 99190f805dca9475fe244fbd8041961842657dc2 upstream. Add a simple helper to check if the current guest type is SNP. Also have SNP-enabled imply that SEV-ES is enabled as well, and fix up any places where the sev_es_enabled() check is expecting a pure/non-SNP guest. [Backport Changes] 1. In target/i386/kvm/kvm.c, updated the kvm_arch_init_vcpu(), kvm_arch_reset_vcpu(), and kvm_arch_init() functions to conditionally mask non-upstream SEV live migration–related logics using the newly introduced sev_snp_enabled() helper. This change aligns with the upstream commit and ensures that SEV live migration which is only supported for SEV and SEV-ES (non-SNP) guests is not applied to SNP guests. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 22 ++++++++++++---------- target/i386/sev.c | 13 ++++++++++++- target/i386/sev.h | 2 ++ 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 10187850d1..8b387b1e95 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -2243,7 +2243,7 @@ int kvm_arch_init_vcpu(CPUState *cs) c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); } - if (sev_enabled()) { + if (!sev_snp_enabled() && sev_enabled()) { c = cpuid_find_entry(&cpuid_data.cpuid, KVM_CPUID_FEATURES | kvm_base, 0); if (c) { @@ -2321,7 +2321,7 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) env->mp_state = KVM_MP_STATE_RUNNABLE; } - if (cpu_is_bsp(cpu) && + if (!sev_snp_enabled() && cpu_is_bsp(cpu) && sev_enabled() && has_map_gpa_range) { sev_remove_shared_regions_list(0, -1); } @@ -2695,14 +2695,16 @@ int kvm_arch_init(MachineState *ms, KVMState *s) #endif } - has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); - if (has_map_gpa_range) { - ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, - KVM_EXIT_HYPERCALL_VALID_MASK); - if (ret < 0) { - error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", - strerror(-ret)); - return ret; + if (!sev_snp_enabled()){ + has_map_gpa_range = kvm_check_extension(s, KVM_CAP_EXIT_HYPERCALL); + if (has_map_gpa_range) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, + KVM_EXIT_HYPERCALL_VALID_MASK); + if (ret < 0) { + error_report("kvm: Failed to enable MAP_GPA_RANGE cap: %s", + strerror(-ret)); + return ret; + } } } diff --git a/target/i386/sev.c b/target/i386/sev.c index 4ec73a01e0..b6400668ff 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -427,12 +427,21 @@ sev_enabled(void) return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); } +bool +sev_snp_enabled(void) +{ + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); +} + bool sev_es_enabled(void) { ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + return sev_snp_enabled() || + (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); } uint32_t @@ -1196,7 +1205,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) "support", __func__); goto err; } + } + if (sev_es_enabled() && !sev_snp_enabled()) { if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", diff --git a/target/i386/sev.h b/target/i386/sev.h index 776e900cca..07a6a0a34c 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -52,9 +52,11 @@ typedef struct SevKernelLoaderContext { #ifdef CONFIG_SEV bool sev_enabled(void); bool sev_es_enabled(void); +bool sev_snp_enabled(void); #else #define sev_enabled() 0 #define sev_es_enabled() 0 +#define sev_snp_enabled() 0 #endif uint32_t sev_get_cbit_position(void); -- Gitee From 5f2d1d4d4847c841dcef09bad78596c689a3bfd8 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:21 -0500 Subject: [PATCH 047/115] i386/sev: Add sev_kvm_init() override for SEV class commit 990da8d243a8c59dafcbed78b56a0e4ffb1605d9 upstream. Some aspects of the init routine SEV are specific to SEV and not applicable for SNP guests, so move the SEV-specific bits into separate class method and retain only the common functionality. [Backport Changes] 1. In target/i386/sev.c, within sev_common_kvm_init(), the error handling if block (i.e `!runstate_check(RUN_STATE_INMIGRATE)`) added as part of non-upstreamed feature SEV-live-migration introduced was updated to use 'return -1' instead of 'goto err', to match the upstream changes and maintain consistency. 2. In target/i386/sev.c, the sev_common_kvm_init() function was updated to mask the non-upstream SEV live migration and CSV3 related logics in SNP guests by wrapping these logics with conditional block if (!sev_snp_enabled()). Along, with this pointer declaration to sev_guest was migrated inside these wrapper blocks to limit its scope to SEV/SEV-ES specific paths. 3. In target/i386/sev.c, the sev_common_kvm_init() function, the pointer declaration sev_guest (SevGuestState *sev_guest) was moved inside the modified if (is_hygon_cpu()) block to ensure it is only declared and accessed when the CPU type is Hygon and to limit its scope to SEV/SEV-ES specific paths. 4. In target/i386/sev.c, within sev_common_kvm_init(), the ram_block_notifier_add() call was removed along with the Hygon-specific conditional wrapper if (!csv3_enabled()), aligning the implementation with upstream changes. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/sev.c | 200 ++++++++++++++++++++++++++++------------------ 1 file changed, 123 insertions(+), 77 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index b6400668ff..a47dcdec96 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -81,6 +81,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; /** @@ -1129,7 +1130,7 @@ out: return sev_common->kvm_type; } -static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; @@ -1142,12 +1143,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); - ret = ram_block_discard_disable(true); - if (ret) { - error_report("%s: cannot disable RAM discard", __func__); - return -1; - } - sev_common->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); @@ -1161,7 +1156,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (host_cbitpos != sev_common->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", __func__, host_cbitpos, sev_common->cbitpos); - goto err; + return -1; } /* @@ -1174,7 +1169,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: reduced_phys_bits check failed," " it should be in the range of 1 to 63, requested '%d'", __func__, sev_common->reduced_phys_bits); - goto err; + return -1; } devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); @@ -1183,7 +1178,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: Failed to open %s '%s'", __func__, devname, strerror(errno)); g_free(devname); - goto err; + return -1; } g_free(devname); @@ -1193,7 +1188,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: failed to get platform status ret=%d " "fw_error='%d: %s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } sev_common->build_id = status.build; sev_common->api_major = status.api_major; @@ -1203,7 +1198,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (!kvm_kernel_irqchip_allowed()) { error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" "support", __func__); - goto err; + return -1; } } @@ -1212,7 +1207,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", __func__); - goto err; + return -1; } cmd = KVM_SEV_ES_INIT; } else { @@ -1220,25 +1215,25 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } trace_kvm_sev_init(); - - SevGuestState *sev_guest = SEV_GUEST(sev_common); /* Only support reuse asid for CSV/CSV2 guest */ - if (is_hygon_cpu() && - (sev_guest->policy & GUEST_POLICY_REUSE_ASID)) { - char *user_id = NULL; - struct kvm_csv_init *init_cmd_buf = NULL; - - user_id = object_property_get_str(OBJECT(sev_guest), "user-id", NULL); - if (user_id && strlen(user_id)) { - init_cmd_buf = g_new0(struct kvm_csv_init, 1); - init_cmd_buf->len = strlen(user_id); - init_cmd_buf->userid_addr = (__u64)user_id; - } - ret = sev_ioctl(sev_common->sev_fd, cmd, init_cmd_buf, &fw_error); + if (is_hygon_cpu()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + if (sev_guest->policy & GUEST_POLICY_REUSE_ASID) { + char *user_id = NULL; + struct kvm_csv_init *init_cmd_buf = NULL; + + user_id = object_property_get_str(OBJECT(sev_guest), "user-id", NULL); + if (user_id && strlen(user_id)) { + init_cmd_buf = g_new0(struct kvm_csv_init, 1); + init_cmd_buf->len = strlen(user_id); + init_cmd_buf->userid_addr = (__u64)user_id; + } + ret = sev_ioctl(sev_common->sev_fd, cmd, init_cmd_buf, &fw_error); - if (user_id) { - g_free(user_id); - g_free(init_cmd_buf); + if (user_id) { + g_free(user_id); + g_free(init_cmd_buf); + } } } else { if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { @@ -1255,71 +1250,121 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } - /* Support CSV3 */ - if (!ret && cmd == KVM_SEV_ES_INIT) { - ret = csv3_init(sev_guest->policy, sev_common->sev_fd, (void *)&sev_common->state, &sev_ops); - if (ret) { - error_setg(errp, "%s: failed to init csv3 context", __func__); - goto err; + if (!sev_snp_enabled()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + /* Support CSV3 */ + if (!ret && cmd == KVM_SEV_ES_INIT) { + ret = csv3_init(sev_guest->policy, sev_common->sev_fd, (void *)&sev_common->state, &sev_ops); + if (ret) { + error_setg(errp, "%s: failed to init csv3 context", __func__); + return -1; + } + /* The CSV3 guest is not resettable */ + if (csv3_enabled()) + csv_kvm_cpu_reset_inhibit = true; } - /* The CSV3 guest is not resettable */ - if (csv3_enabled()) - csv_kvm_cpu_reset_inhibit = true; - } - /* - * The LAUNCH context is used for new guest, if its an incoming guest - * then RECEIVE context will be created after the connection is established. - */ - if (!runstate_check(RUN_STATE_INMIGRATE)) { + /* + * The LAUNCH context is used for new guest, if its an incoming guest + * then RECEIVE context will be created after the connection is established. + */ + if (!runstate_check(RUN_STATE_INMIGRATE)) { + ret = klass->launch_start(sev_common); + if (ret) { + error_setg(errp, "%s: failed to create encryption context", __func__); + return -1; + } + if (klass->kvm_init && klass->kvm_init(cgs, errp)) { + return -1; + } + } else { + /* + * The CSV2 guest is not resettable after migrated to target machine, + * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is + * not resettable. + */ + if (is_hygon_cpu() && sev_es_enabled()) { + csv_kvm_cpu_reset_inhibit = true; + } + } + } else { ret = klass->launch_start(sev_common); + if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); - goto err; + return -1; } - } else { - /* - * The CSV2 guest is not resettable after migrated to target machine, - * set csv_kvm_cpu_reset_inhibit to true to indicate the CSV2 guest is - * not resettable. - */ - if (is_hygon_cpu() && sev_es_enabled()) { - csv_kvm_cpu_reset_inhibit = true; + if (klass->kvm_init && klass->kvm_init(cgs, errp)) { + return -1; } } - /* CSV3 guest do not need notifier to reg/unreg memory */ - if (!csv3_enabled()) { - ram_block_notifier_add(&sev_ram_notifier); - } - qemu_add_machine_init_done_notifier(&sev_machine_done_notify); qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); - migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); - if (csv3_enabled()) { - cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; - } else { - cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; - } - QTAILQ_INIT(&sev_guest->shared_regions_list); + if (!sev_snp_enabled()) { + SevGuestState *sev_guest = SEV_GUEST(sev_common); + migration_add_notifier(&sev_migration_state, sev_migration_state_notifier); - /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ - if (sev_es_enabled()) { - sev_kvm_has_msr_ghcb = - kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); - } else { - sev_kvm_has_msr_ghcb = false; + if (csv3_enabled()) { + cgs_class->memory_encryption_ops = &csv3_memory_encryption_ops; + } else { + cgs_class->memory_encryption_ops = &sev_memory_encryption_ops; + } + QTAILQ_INIT(&sev_guest->shared_regions_list); + + /* Determine whether support MSR_AMD64_SEV_ES_GHCB */ + if (sev_es_enabled()) { + sev_kvm_has_msr_ghcb = + kvm_vm_check_extension(kvm_state, KVM_CAP_SEV_ES_GHCB); + } else { + sev_kvm_has_msr_ghcb = false; + } } cgs->ready = true; return 0; -err: - ram_block_discard_disable(false); - return -1; +} + +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + int ret; + + /* + * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding + * isn't actually possible. With SNP, only guest_memfd pages are used + * for private guest memory, so discarding of shared memory is still + * possible.. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_setg(errp, "%s: cannot disable RAM discard", __func__); + return -1; + } + + /* + * SEV uses these notifiers to register/pin pages prior to guest use, + * but SNP relies on guest_memfd for private pages, which has its + * own internal mechanisms for registering/pinning private memory. + */ + /* CSV3 guest do not need notifier to reg/unreg memory */ + if (!csv3_enabled()) { + ram_block_notifier_add(&sev_ram_notifier); + } + + /* + * The machine done notify event is used for SEV guests to get the + * measurement of the encrypted images. When SEV-SNP is enabled, the + * measurement is part of the guest attestation process where it can + * be collected without any reliance on the VMM. So skip registering + * the notifier for SNP in favor of using guest attestation instead. + */ + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + + return 0; } int @@ -2815,7 +2860,7 @@ sev_common_class_init(ObjectClass *oc, void *data) ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); - klass->kvm_init = sev_kvm_init; + klass->kvm_init = sev_common_kvm_init; x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", @@ -2904,6 +2949,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, -- Gitee From 8ce2c5eb7422ca6f455834780da91d969d5280c7 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:22 -0500 Subject: [PATCH 048/115] i386/sev: Add snp_kvm_init() override for SNP class commit 125b95a6d465a03ff30816eff0b1889aec01f0c3 upstream. SNP does not support SMM and requires guest_memfd for private guest memory, so add SNP specific kvm_init() functionality in snp_kvm_init() class method. Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/sev.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index a47dcdec96..f39dd3f72f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1132,12 +1132,12 @@ out: static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ConfidentialGuestSupportClass *cgs_class = @@ -1367,6 +1367,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return 0; } +static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "SEV-SNP does not support SMM."); + return -1; + } + + return 0; +} + int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { @@ -3201,6 +3216,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) static void sev_snp_guest_class_init(ObjectClass *oc, void *data) { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; + object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); @@ -3227,8 +3246,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) static void sev_snp_guest_instance_init(Object *obj) { + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + cgs->require_guest_memfd = true; + /* default init/start/finish params for kvm */ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; } -- Gitee From 683098aeeba816c7f8e26a30d20283937d61fca4 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:23 -0500 Subject: [PATCH 049/115] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled commit 7831221941cccbde922412c1550ed8b4bce7c361 upstream. SNP guests will rely on this bit to determine certain feature support. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 8360ea3d61..a70cc4b3d8 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7180,6 +7180,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax = 0x2; *eax |= sev_es_enabled() ? 0x8 : 0; *eax |= csv3_enabled() ? 0x40000000 : 0; /* bit 30 for CSV3 */ + *eax |= sev_snp_enabled() ? 0x10 : 0; *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } -- Gitee From 81cdb36ed5f96d8fff81732666341f3bc6613bdc Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:24 -0500 Subject: [PATCH 050/115] i386/sev: Don't return launch measurements for SEV-SNP guests commit 73ae63b162fc1fed520f53ad200712964d7d0264 upstream. For SEV-SNP guests, launch measurement is queried from within the guest during attestation, so don't attempt to return it as part of query-sev-launch-measure. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/sev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index f39dd3f72f..1220418265 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -914,7 +914,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) static char *sev_get_launch_measurement(void) { - SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevGuestState *sev_guest = + (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); if (sev_guest && SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { -- Gitee From 4528b4e0aa71b0b7b18d415c582cde205fc82986 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 12:44:44 +0200 Subject: [PATCH 051/115] i386/sev: Add a class method to determine KVM VM type for SNP guests commit a808132f6d8e855bd83a400570ec91d2e00bebe3 upstream. SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, or KVM_X86_SEV_ES_VM depending on the configuration and what the host kernel supports. SNP guests on the other hand can only ever use KVM_X86_SNP_VM, so split determination of VM type out into a separate class method that can be set accordingly for sev-guest vs. sev-snp-guest objects and add handling for SNP. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> [Remove unnecessary function pointer declaration. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 1 + target/i386/sev.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 8b387b1e95..a6bccfd1ac 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -168,6 +168,7 @@ static const char *vm_type_name[] = { [KVM_X86_DEFAULT_VM] = "default", [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", + [KVM_X86_SNP_VM] = "SEV-SNP", }; bool kvm_is_vm_type_supported(int type) diff --git a/target/i386/sev.c b/target/i386/sev.c index 1220418265..5882ff8903 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1132,6 +1132,11 @@ out: return sev_common->kvm_type; } +static int sev_snp_kvm_type(X86ConfidentialGuest *cg) +{ + return KVM_X86_SNP_VM; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -1141,6 +1146,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) struct sev_user_data_status status = {}; SevCommonState *sev_common = SEV_COMMON(cgs); SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); ConfidentialGuestSupportClass *cgs_class = (ConfidentialGuestSupportClass *) object_get_class(OBJECT(cgs)); @@ -1238,7 +1245,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } } else { - if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); @@ -2875,10 +2882,8 @@ static void sev_common_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); - X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_common_kvm_init; - x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", sev_common_get_sev_device, @@ -2963,10 +2968,12 @@ static void sev_guest_class_init(ObjectClass *oc, void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, @@ -3219,8 +3226,10 @@ static void sev_snp_guest_class_init(ObjectClass *oc, void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, -- Gitee From 6ad0d1e75de60f03eb9f7ea5c33cc5da24dc1eaf Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:26 -0500 Subject: [PATCH 052/115] i386/sev: Update query-sev QAPI format to handle SEV-SNP commit 59d3740cb4ac0f010ce35877572904f6297284b4 upstream. Most of the current 'query-sev' command is relevant to both legacy SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and the meaning of the bit positions has changed - 'handle' is not relevant to SEV-SNP To address this, this patch adds a new 'sev-type' field that can be used as a discriminator to select between SEV and SEV-SNP-specific fields/formats without breaking compatibility for existing management tools (so long as management tools that add support for launching SEV-SNP guest update their handling of query-sev appropriately). The corresponding HMP command has also been fixed up similarly. Signed-off-by: Michael Roth Co-developed-by:Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: PrithivishS --- qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- target/i386/sev.c | 55 +++++++++++++++++++++------------ target/i386/sev.h | 3 ++ 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 3df0062f3d..e9952b218c 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -47,6 +47,50 @@ 'send-update', 'receive-update' ], 'if': 'TARGET_I386' } +## +# @SevGuestType: +# +# An enumeration indicating the type of SEV guest being run. +# +# @sev: The guest is a legacy SEV or SEV-ES guest. +# +# @sev-snp: The guest is an SEV-SNP guest. +# +# Since: 6.2 +## +{ 'enum': 'SevGuestType', + 'data': [ 'sev', 'sev-snp' ], + 'if': 'TARGET_I386' } + +## +# @SevGuestInfo: +# +# Information specific to legacy SEV/SEV-ES guests. +# +# @policy: SEV policy value +# +# @handle: SEV firmware handle +# +# Since: 2.12 +## +{ 'struct': 'SevGuestInfo', + 'data': { 'policy': 'uint32', + 'handle': 'uint32' }, + 'if': 'TARGET_I386' } + +## +# @SevSnpGuestInfo: +# +# Information specific to SEV-SNP guests. +# +# @snp-policy: SEV-SNP policy value +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestInfo', + 'data': { 'snp-policy': 'uint64' }, + 'if': 'TARGET_I386' } + ## # @SevInfo: # @@ -60,25 +104,25 @@ # # @build-id: SEV FW build id # -# @policy: SEV policy value -# # @state: SEV guest state # -# @handle: SEV firmware handle +# @sev-type: Type of SEV guest being run # # Since: 2.12 ## -{ 'struct': 'SevInfo', - 'data': { 'enabled': 'bool', - 'api-major': 'uint8', - 'api-minor' : 'uint8', - 'build-id' : 'uint8', - 'policy' : 'uint32', - 'state' : 'SevState', - 'handle' : 'uint32' - }, - 'if': 'TARGET_I386' -} +{ 'union': 'SevInfo', + 'base': { 'enabled': 'bool', + 'api-major': 'uint8', + 'api-minor' : 'uint8', + 'build-id' : 'uint8', + 'state' : 'SevState', + 'sev-type' : 'SevGuestType' }, + 'discriminator': 'sev-type', + 'data': { + 'sev': 'SevGuestInfo', + 'sev-snp': 'SevSnpGuestInfo' }, + 'if': 'TARGET_I386' } + ## # @query-sev: diff --git a/target/i386/sev.c b/target/i386/sev.c index 5882ff8903..981cd35e46 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -465,25 +465,27 @@ static SevInfo *sev_get_info(void) { SevInfo *info; SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - SevGuestState *sev_guest = - (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), - TYPE_SEV_GUEST); info = g_new0(SevInfo, 1); info->enabled = sev_enabled(); if (info->enabled) { - if (sev_guest) { - info->handle = sev_guest->handle; - } info->api_major = sev_common->api_major; info->api_minor = sev_common->api_minor; info->build_id = sev_common->build_id; info->state = sev_common->state; - /* we only report the lower 32-bits of policy for SNP, ok for now... */ - info->policy = - (uint32_t)object_property_get_uint(OBJECT(sev_common), - "policy", NULL); + + if (sev_snp_enabled()) { + info->sev_type = SEV_GUEST_TYPE_SEV_SNP; + info->u.sev_snp.snp_policy = + object_property_get_uint(OBJECT(sev_common), "policy", NULL); + } else { + info->sev_type = SEV_GUEST_TYPE_SEV; + info->u.sev.handle = SEV_GUEST(sev_common)->handle; + info->u.sev.policy = + (uint32_t)object_property_get_uint(OBJECT(sev_common), + "policy", NULL); + } } return info; @@ -506,20 +508,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { SevInfo *info = sev_get_info(); - if (info && info->enabled) { - monitor_printf(mon, "handle: %d\n", info->handle); - monitor_printf(mon, "state: %s\n", SevState_str(info->state)); - monitor_printf(mon, "build: %d\n", info->build_id); - monitor_printf(mon, "api version: %d.%d\n", - info->api_major, info->api_minor); + if (!info || !info->enabled) { + monitor_printf(mon, "SEV is not enabled\n"); + goto out; + } + + monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); + monitor_printf(mon, "state: %s\n", SevState_str(info->state)); + monitor_printf(mon, "build: %d\n", info->build_id); + monitor_printf(mon, "api version: %d.%d\n", info->api_major, + info->api_minor); + + if (sev_snp_enabled()) { monitor_printf(mon, "debug: %s\n", - info->policy & SEV_POLICY_NODBG ? "off" : "on"); - monitor_printf(mon, "key-sharing: %s\n", - info->policy & SEV_POLICY_NOKS ? "off" : "on"); + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" + : "off"); + monitor_printf(mon, "SMT allowed: %s\n", + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" + : "off"); } else { - monitor_printf(mon, "SEV is not enabled\n"); + monitor_printf(mon, "handle: %d\n", info->u.sev.handle); + monitor_printf(mon, "debug: %s\n", + info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); + monitor_printf(mon, "key-sharing: %s\n", + info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); } +out: qapi_free_SevInfo(info); } diff --git a/target/i386/sev.h b/target/i386/sev.h index 07a6a0a34c..37d95ea969 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -31,6 +31,9 @@ #define SEV_POLICY_DOMAIN 0x10 #define SEV_POLICY_SEV 0x20 +#define SEV_SNP_POLICY_SMT 0x10000 +#define SEV_SNP_POLICY_DBG 0x80000 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; -- Gitee From a9e0dcd2ecd83653275ea77c8bde47a8f10d55f1 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:27 -0500 Subject: [PATCH 053/115] i386/sev: Add the SNP launch start context commit d3107f882ec22cfb211eab7efa0c4e95f5ce11bb upstream. The SNP_LAUNCH_START is called first to create a cryptographic launch context within the firmware. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/sev.c | 38 ++++++++++++++++++++++++++++++++++++++ target/i386/trace-events | 1 + 2 files changed, 39 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 981cd35e46..61b377e3cf 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -42,6 +42,7 @@ #include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" +#include "qemu/queue.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -144,6 +145,16 @@ struct SevSnpGuestState { #define DEFAULT_SEV_DEVICE "/dev/sev" #define DEFAULT_SEV_SNP_POLICY 0x30000 +typedef struct SevLaunchUpdateData { + QTAILQ_ENTRY(SevLaunchUpdateData) next; + hwaddr gpa; + void *hva; + uint64_t len; + int type; +} SevLaunchUpdateData; + +static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" typedef struct __attribute__((__packed__)) SevInfoBlock { /* SEV-ES Reset Vector Address */ @@ -777,6 +788,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) return 0; } +static int +sev_snp_launch_start(SevCommonState *sev_common) +{ + int fw_error, rc; + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { + error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, rc, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + QTAILQ_INIT(&launch_update); + + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + + return 0; +} + static int sev_launch_start(SevCommonState *sev_common) { @@ -3243,9 +3279,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->launch_start = sev_snp_launch_start; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; + object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); diff --git a/target/i386/trace-events b/target/i386/trace-events index 5d4a709a39..40c55dd08c 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -11,6 +11,7 @@ kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" +kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" kvm_sev_send_finish(void) "" -- Gitee From f210b62bacfce307fea3dd3d4572286b23d03037 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:28 -0500 Subject: [PATCH 054/115] i386/sev: Add handling to encrypt/finalize guest launch data commit 9f3a6999f9730a694d7db448a99f9c9cb6515992 upstream. Process any queued up launch data and encrypt/measure it into the SNP guest instance prior to initial guest launch. This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial update responses. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- target/i386/trace-events | 2 + 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 61b377e3cf..3f26d6b580 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -859,6 +859,76 @@ out: return ret; } +static const char * +snp_page_type_to_str(int type) +{ + switch (type) { + case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; + case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; + case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; + case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; + case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; + default: return "unknown"; + } +} + +static int +sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) +{ + int ret, fw_error; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { + error_report("SNP_LAUNCH_UPDATE called with invalid address" + "/ length: %p / %lx", + data->hva, data->len); + return 1; + } + + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; + update.type = data->type; + + /* + * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private + * memory attribute set in advance. + */ + ret = kvm_set_memory_attributes_private(data->gpa, data->len); + if (ret) { + error_report("SEV-SNP: failed to configure initial" + "private guest memory"); + goto out; + } + + while (update.len || ret == -EAGAIN) { + trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << + TARGET_PAGE_BITS, update.len, + snp_page_type_to_str(update.type)); + + ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, + KVM_SEV_SNP_LAUNCH_UPDATE, + &update, &fw_error); + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + break; + } + } + +out: + if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { + error_report("SEV-SNP: expected update of GPA range %lx-%lx," + "got GPA range %lx-%llx", + data->gpa, data->gpa + data->len, data->gpa, + update.gfn_start << TARGET_PAGE_BITS); + ret = -EIO; + } + + return ret; +} + static int sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) { @@ -1044,6 +1114,46 @@ err: return ret; } +static void +sev_snp_launch_finish(SevCommonState *sev_common) +{ + int ret, error; + Error *local_err = NULL; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { + exit(1); + } + } + + trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, + sev_snp->host_data); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, + finish, &error); + if (ret) { + error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", + ret, error, fw_error_to_str(error)); + exit(1); + } + + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, + "SEV-SNP: Migration is not implemented"); + ret = migrate_add_blocker(&sev_mig_blocker, &local_err); + if (local_err) { + error_report_err(local_err); + error_free(sev_mig_blocker); + exit(1); + } +} + + static void sev_vm_state_change(void *opaque, bool running, RunState state) { @@ -3280,10 +3390,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; - object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); diff --git a/target/i386/trace-events b/target/i386/trace-events index 40c55dd08c..6c784acdbc 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -12,6 +12,8 @@ kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" +kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" kvm_sev_send_start(uint64_t pdh, int l1, uint64_t plat, int l2, uint64_t amd, int l3) "pdh 0x%" PRIx64 " len %d plat 0x%" PRIx64 " len %d amd 0x%" PRIx64 " len %d" kvm_sev_send_update_data(void *src, void *dst, int len) "guest %p trans %p len %d" kvm_sev_send_finish(void) "" -- Gitee From 5cefb70e10189bee1be3ebdc96dc8d02c6e66959 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:29 -0500 Subject: [PATCH 055/115] i386/sev: Set CPU state to protected once SNP guest payload is finalized commit 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a upstream. Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU state afterward will only be acting on the initial data and so are effectively no-ops. Set the vCPU state to protected at this point so that QEMU don't continue trying to re-sync vCPU data during guest runtime. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 3f26d6b580..0a212ec91b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1140,6 +1140,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) exit(1); } + kvm_mark_guest_state_protected(); sev_set_guest_state(sev_common, SEV_STATE_RUNNING); /* add migration blocker */ -- Gitee From 7d5fc9dae408c49b2a8ab4dc4401e725435c9f61 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:30 -0500 Subject: [PATCH 056/115] hw/i386/sev: Add function to get SEV metadata from OVMF header commit f3c30c575d34122573b7370a7da5ca3a27dde481 upstream. A recent version of OVMF expanded the reset vector GUID list to add SEV-specific metadata GUID. The SEV metadata describes the reserved memory regions such as the secrets and CPUID page used during the SEV-SNP guest launch. The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV metadata pointer from the OVMF GUID list. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/i386/pc_sysfw.c | 4 ++++ include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ target/i386/sev-sysemu-stub.c | 4 ++++ target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ target/i386/sev.h | 2 ++ 5 files changed, 68 insertions(+) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 7c6a910250..f0c0cea47d 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -259,6 +259,10 @@ void x86_firmware_configure(void *ptr, int size) pc_system_parse_ovmf_flash(ptr, size); if (sev_enabled()) { + + /* Copy the SEV metadata table (if it exists) */ + pc_system_parse_sev_metadata(ptr, size); + ret = sev_es_save_reset_vector(ptr, size); if (ret) { error_report("failed to locate and/or save reset vector"); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ee5138cb97..4cee7640b1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -167,6 +167,32 @@ void pc_guest_info_init(PCMachineState *pcms); #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" +typedef enum { + SEV_DESC_TYPE_UNDEF, + /* The section contains the region that must be validated by the VMM. */ + SEV_DESC_TYPE_SNP_SEC_MEM, + /* The section contains the SNP secrets page */ + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, + +} ovmf_sev_metadata_desc_type; + +typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { + uint32_t base; + uint32_t len; + ovmf_sev_metadata_desc_type type; +} OvmfSevMetadataDesc; + +typedef struct __attribute__((__packed__)) OvmfSevMetadata { + uint8_t signature[4]; + uint32_t len; + uint32_t version; + uint32_t num_desc; + OvmfSevMetadataDesc descs[]; +} OvmfSevMetadata; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); void pc_pci_as_mapping_init(MemoryRegion *system_memory, MemoryRegion *pci_address_space); diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c index 96e1c15cc3..fc1c57c411 100644 --- a/target/i386/sev-sysemu-stub.c +++ b/target/i386/sev-sysemu-stub.c @@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); } + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ +} diff --git a/target/i386/sev.c b/target/i386/sev.c index 0a212ec91b..1a74963c0b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -700,6 +700,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) return sev_get_capabilities(errp); } +static OvmfSevMetadata *ovmf_sev_metadata_table; + +#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" +typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { + uint32_t offset; +} OvmfSevMetadataOffset; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) +{ + return ovmf_sev_metadata_table; +} + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ + OvmfSevMetadata *metadata; + OvmfSevMetadataOffset *data; + + if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, + NULL)) { + return; + } + + metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); + if (memcmp(metadata->signature, "ASEV", 4) != 0 || + metadata->len < sizeof(OvmfSevMetadata) || + metadata->len > flash_size - data->offset) { + return; + } + + ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); +} + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, Error **errp) { diff --git a/target/i386/sev.h b/target/i386/sev.h index 37d95ea969..55ec43c655 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -96,4 +96,6 @@ struct sev_ops { extern struct sev_ops sev_ops; +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); + #endif -- Gitee From ae5499532ba2acc9d94e99129976e85b5c09a05e Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:31 -0500 Subject: [PATCH 057/115] i386/sev: Add support for populating OVMF metadata pages commit 3d8c2a7f4806ff39423312e503737fd76c34dcae upstream. OVMF reserves various pages so they can be pre-initialized/validated prior to launching the guest. Add support for populating these pages with the expected content. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 1a74963c0b..c3112225de 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1146,15 +1146,89 @@ err: return ret; } +static int +snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) +{ + SevLaunchUpdateData *data; + + data = g_new0(SevLaunchUpdateData, 1); + data->gpa = gpa; + data->hva = hva; + data->len = len; + data->type = type; + + QTAILQ_INSERT_TAIL(&launch_update, data, next); + + return 0; +} + +static int +snp_metadata_desc_to_page_type(int desc_type) +{ + switch (desc_type) { + /* Add the umeasured prevalidated pages as a zero page */ + case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; + case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; + case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; + default: + return KVM_SEV_SNP_PAGE_TYPE_ZERO; + } +} + +static void +snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + OvmfSevMetadata *metadata) +{ + OvmfSevMetadataDesc *desc; + int type, ret, i; + void *hva; + MemoryRegion *mr = NULL; + + for (i = 0; i < metadata->num_desc; i++) { + desc = &metadata->descs[i]; + + type = snp_metadata_desc_to_page_type(desc->type); + + hva = gpa2hva(&mr, desc->base, desc->len, NULL); + if (!hva) { + error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", + __func__, desc->base, desc->len); + exit(1); + } + + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); + exit(1); + } + } +} + static void sev_snp_launch_finish(SevCommonState *sev_common) { int ret, error; Error *local_err = NULL; + OvmfSevMetadata *metadata; SevLaunchUpdateData *data; SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + /* + * To boot the SNP guest, the hypervisor is required to populate the CPUID + * and Secrets page before finalizing the launch flow. The location of + * the secrets and CPUID page is available through the OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } + + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); if (ret) { -- Gitee From 9f61e0042cb77fa87243b0b0a1fbcecb93c06c81 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:32 -0500 Subject: [PATCH 058/115] i386/sev: Add support for SNP CPUID validation commit 70943ad8e4dfbe5f77006b880290219be9d03553 upstream. SEV-SNP firmware allows a special guest page to be populated with a table of guest CPUID values so that they can be validated through firmware before being loaded into encrypted guest memory where they can be used in place of hypervisor-provided values[1]. As part of SEV-SNP guest initialization, use this interface to validate the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest start and populate the CPUID page reserved by OVMF with the resulting encrypted data. [1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index c3112225de..57982872ac 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -254,6 +254,36 @@ static struct ConfidentialGuestMemoryEncryptionOps sev_memory_encryption_ops = { .load_incoming_cpu_state = csv_load_incoming_cpu_state, }; +/* doesn't expose this, so re-use the max from kvm.c */ +#define KVM_MAX_CPUID_ENTRIES 100 + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + +#define SNP_CPUID_FUNCTION_MAXCOUNT 64 +#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF + +typedef struct { + uint32_t eax_in; + uint32_t ecx_in; + uint64_t xcr0_in; + uint64_t xss_in; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint64_t reserved; +} __attribute__((packed)) SnpCpuidFunc; + +typedef struct { + uint32_t count; + uint32_t reserved1; + uint64_t reserved2; + SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; +} __attribute__((packed)) SnpCpuidInfo; + static int sev_ioctl(int fd, int cmd, void *data, int *error) { @@ -891,6 +921,35 @@ out: return ret; } +static void +sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + SnpCpuidInfo *new) +{ + size_t i; + + if (old->count != new->count) { + error_report("SEV-SNP: CPUID validation failed due to count mismatch," + "provided: %d, expected: %d", old->count, new->count); + return; + } + + for (i = 0; i < old->count; i++) { + SnpCpuidFunc *old_func, *new_func; + + old_func = &old->entries[i]; + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { + error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" + "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, + new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); + } + } +} + static const char * snp_page_type_to_str(int type) { @@ -909,6 +968,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, SevLaunchUpdateData *data) { int ret, fw_error; + SnpCpuidInfo snp_cpuid_info; struct kvm_sev_snp_launch_update update = {0}; if (!data->hva || !data->len) { @@ -918,6 +978,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, return 1; } + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ + memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); + } + update.uaddr = (__u64)(unsigned long)data->hva; update.gfn_start = data->gpa >> TARGET_PAGE_BITS; update.len = data->len; @@ -945,6 +1010,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, if (ret && ret != -EAGAIN) { error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ret, fw_error, fw_error_to_str(fw_error)); + + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); + error_report("SEV-SNP: failed update CPUID page"); + } break; } } @@ -1147,7 +1217,8 @@ err: } static int -snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) +snp_launch_update_data(uint64_t gpa, void *hva, + uint32_t len, int type) { SevLaunchUpdateData *data; @@ -1162,6 +1233,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) return 0; } +static int +sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +{ + size_t i; + + if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { + error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); + return -1; + } + + memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); + + for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { + const struct kvm_cpuid_entry2 *kvm_cpuid_entry; + SnpCpuidFunc *snp_cpuid_entry; + + kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; + snp_cpuid_entry = &snp_cpuid_info->entries[i]; + + snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; + if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { + snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; + } + snp_cpuid_entry->eax = kvm_cpuid_entry->eax; + snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; + snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; + snp_cpuid_entry->edx = kvm_cpuid_entry->edx; + + /* + * Guest kernels will calculate EBX themselves using the 0xD + * subfunctions corresponding to the individual XSAVE areas, so only + * encode the base XSAVE size in the initial leaves, corresponding + * to the initial XCR0=1 state. + */ + if (snp_cpuid_entry->eax_in == 0xD && + (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { + snp_cpuid_entry->ebx = 0x240; + snp_cpuid_entry->xcr0_in = 1; + snp_cpuid_entry->xss_in = 0; + } + } + + snp_cpuid_info->count = i; + + return 0; +} + +static int +snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) +{ + KvmCpuidInfo kvm_cpuid_info = {0}; + SnpCpuidInfo snp_cpuid_info; + CPUState *cs = first_cpu; + int ret; + uint32_t i = 0; + + assert(sizeof(snp_cpuid_info) <= cpuid_len); + + /* get the cpuid list from KVM */ + do { + kvm_cpuid_info.cpuid.nent = ++i; + ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); + } while (ret == -E2BIG); + + if (ret) { + error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return 1; + } + + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); + if (ret) { + error_report("SEV-SNP: failed to generate CPUID table information"); + return 1; + } + + memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); + + return snp_launch_update_data(cpuid_addr, hva, cpuid_len, + KVM_SEV_SNP_PAGE_TYPE_CPUID); +} + static int snp_metadata_desc_to_page_type(int desc_type) { @@ -1196,7 +1351,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, exit(1); } - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } + if (ret) { error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", __func__, desc->base, desc->len, desc->type); -- Gitee From 6d50d26abc19e8c27c2c5e27411f8ff708f427c8 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Jan 2024 10:35:25 -0500 Subject: [PATCH 059/115] system/cpus: rename qemu_mutex_lock_iothread() to bql_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 195801d700c008b6a8d8acfa299aa5f177446647 upstream. The Big QEMU Lock (BQL) has many names and they are confusing. The actual QemuMutex variable is called qemu_global_mutex but it's commonly referred to as the BQL in discussions and some code comments. The locking APIs, however, are called qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread(). The "iothread" name is historic and comes from when the main thread was split into into KVM vcpu threads and the "iothread" (now called the main loop thread). I have contributed to the confusion myself by introducing a separate --object iothread, a separate concept unrelated to the BQL. The "iothread" name is no longer appropriate for the BQL. Rename the locking APIs to: - void bql_lock(void) - void bql_unlock(void) - bool bql_locked(void) There are more APIs with "iothread" in their names. Subsequent patches will rename them. There are also comments and documentation that will be updated in later patches. [Backport Changes] In the files listed below the usage of qemu_mutex_lock_iothread() and qemu_mutex_unlock_iothread() was replaced with the new bql_lock() and bql_unlock() APIs, in line with current upstream commit. These changes maintain consistency with the new naming convention for the Big QEMU Lock (BQL), while preserving compatibility with existing Hygon-specific modifications. - migration/colo.c (colo_process_incoming_thread()), - migration/savevm.c (qemu_loadvm_state_main()), - target/arm/psci.c (kvm_arm_handle_hypercall()). Signed-off-by: Stefan Hajnoczi Reviewed-by: Paul Durrant Acked-by: Fabiano Rosas Acked-by: David Woodhouse Reviewed-by: Cédric Le Goater Acked-by: Peter Xu Acked-by: Eric Farman Reviewed-by: Harsh Prateek Bora Acked-by: Hyman Huang Reviewed-by: Akihiko Odaki Message-id: 20240102153529.486531-2-stefanha@redhat.com Signed-off-by: Stefan Hajnoczi Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- accel/accel-blocker.c | 10 +-- accel/dummy-cpus.c | 8 +- accel/hvf/hvf-accel-ops.c | 4 +- accel/kvm/kvm-accel-ops.c | 4 +- accel/kvm/kvm-all.c | 22 ++--- accel/tcg/cpu-exec.c | 26 +++--- accel/tcg/cputlb.c | 16 ++-- accel/tcg/tcg-accel-ops-icount.c | 4 +- accel/tcg/tcg-accel-ops-mttcg.c | 12 +-- accel/tcg/tcg-accel-ops-rr.c | 14 ++-- accel/tcg/tcg-accel-ops.c | 2 +- accel/tcg/translate-all.c | 2 +- audio/coreaudio.m | 4 +- cpu-common.c | 4 +- dump/dump.c | 4 +- hw/core/cpu-common.c | 6 +- hw/i386/intel_iommu.c | 6 +- hw/i386/kvm/xen_evtchn.c | 16 ++-- hw/i386/kvm/xen_overlay.c | 2 +- hw/i386/kvm/xen_xenstore.c | 2 +- hw/intc/arm_gicv3_cpuif.c | 2 +- hw/intc/s390_flic.c | 18 ++-- hw/misc/edu.c | 4 +- hw/misc/imx6_src.c | 2 +- hw/misc/imx7_src.c | 2 +- hw/net/xen_nic.c | 8 +- hw/ppc/pegasos2.c | 2 +- hw/ppc/ppc.c | 4 +- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_rng.c | 4 +- hw/ppc/spapr_softmmu.c | 4 +- hw/remote/mpqemu-link.c | 20 ++--- hw/remote/vfio-user-obj.c | 2 +- hw/s390x/s390-skeys.c | 2 +- include/block/aio-wait.h | 2 +- include/qemu/main-loop.h | 39 +++++---- include/qemu/thread.h | 2 +- memory_ldst.c.inc | 18 ++-- migration/block-dirty-bitmap.c | 4 +- migration/block.c | 16 ++-- migration/colo.c | 66 +++++++-------- migration/dirtyrate.c | 12 +-- migration/migration.c | 52 ++++++------ migration/ram.c | 12 +-- migration/savevm.c | 4 +- replay/replay-internal.c | 2 +- semihosting/console.c | 8 +- stubs/iothread-lock.c | 6 +- system/cpu-throttle.c | 4 +- system/cpus.c | 51 ++++++------ system/dirtylimit.c | 4 +- system/memory.c | 2 +- system/physmem.c | 8 +- system/runstate.c | 2 +- system/watchpoint.c | 4 +- target/arm/arm-powerctl.c | 14 ++-- target/arm/helper.c | 4 +- target/arm/hvf/hvf.c | 8 +- target/arm/kvm.c | 8 +- target/arm/kvm64.c | 4 +- target/arm/psci.c | 2 +- target/arm/ptw.c | 6 +- target/arm/tcg/helper-a64.c | 8 +- target/arm/tcg/m_helper.c | 6 +- target/arm/tcg/op_helper.c | 24 +++--- target/hppa/int_helper.c | 8 +- target/i386/hvf/README.md | 2 +- target/i386/hvf/hvf.c | 6 +- target/i386/kvm/hyperv.c | 4 +- target/i386/kvm/kvm.c | 28 +++---- target/i386/kvm/xen-emu.c | 14 ++-- target/i386/nvmm/nvmm-accel-ops.c | 4 +- target/i386/nvmm/nvmm-all.c | 20 ++--- target/i386/tcg/sysemu/fpu_helper.c | 6 +- target/i386/tcg/sysemu/misc_helper.c | 4 +- target/i386/whpx/whpx-accel-ops.c | 4 +- target/i386/whpx/whpx-all.c | 24 +++--- target/loongarch/tcg/csr_helper.c | 4 +- target/mips/kvm.c | 4 +- target/mips/tcg/sysemu/cp0_helper.c | 4 +- target/openrisc/sys_helper.c | 16 ++-- target/ppc/excp_helper.c | 12 +-- target/ppc/kvm.c | 4 +- target/ppc/misc_helper.c | 8 +- target/ppc/timebase_helper.c | 8 +- target/s390x/kvm/kvm.c | 4 +- target/s390x/tcg/misc_helper.c | 118 +++++++++++++-------------- target/sparc/int32_helper.c | 2 +- target/sparc/int64_helper.c | 6 +- target/sparc/win_helper.c | 20 ++--- target/xtensa/exc_helper.c | 8 +- ui/cocoa.m | 50 ++++++------ ui/spice-core.c | 4 +- util/async.c | 2 +- util/main-loop.c | 8 +- util/qsp.c | 6 +- util/rcu.c | 14 ++-- 97 files changed, 536 insertions(+), 536 deletions(-) diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c index 1e7f423462..e083f24aa8 100644 --- a/accel/accel-blocker.c +++ b/accel/accel-blocker.c @@ -41,7 +41,7 @@ void accel_blocker_init(void) void accel_ioctl_begin(void) { - if (likely(qemu_mutex_iothread_locked())) { + if (likely(bql_locked())) { return; } @@ -51,7 +51,7 @@ void accel_ioctl_begin(void) void accel_ioctl_end(void) { - if (likely(qemu_mutex_iothread_locked())) { + if (likely(bql_locked())) { return; } @@ -62,7 +62,7 @@ void accel_ioctl_end(void) void accel_cpu_ioctl_begin(CPUState *cpu) { - if (unlikely(qemu_mutex_iothread_locked())) { + if (unlikely(bql_locked())) { return; } @@ -72,7 +72,7 @@ void accel_cpu_ioctl_begin(CPUState *cpu) void accel_cpu_ioctl_end(CPUState *cpu) { - if (unlikely(qemu_mutex_iothread_locked())) { + if (unlikely(bql_locked())) { return; } @@ -105,7 +105,7 @@ void accel_ioctl_inhibit_begin(void) * We allow to inhibit only when holding the BQL, so we can identify * when an inhibitor wants to issue an ioctl easily. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* Block further invocations of the ioctls outside the BQL. */ CPU_FOREACH(cpu) { diff --git a/accel/dummy-cpus.c b/accel/dummy-cpus.c index b75c919ac3..f4b0ec5890 100644 --- a/accel/dummy-cpus.c +++ b/accel/dummy-cpus.c @@ -24,7 +24,7 @@ static void *dummy_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->neg.can_do_io = true; @@ -43,7 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg) qemu_guest_random_seed_thread_part2(cpu->random_seed); do { - qemu_mutex_unlock_iothread(); + bql_unlock(); #ifndef _WIN32 do { int sig; @@ -56,11 +56,11 @@ static void *dummy_cpu_thread_fn(void *arg) #else qemu_sem_wait(&cpu->sem); #endif - qemu_mutex_lock_iothread(); + bql_lock(); qemu_wait_io_event(cpu); } while (!cpu->unplug); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index abe7adf7ee..8eabb696fa 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -424,7 +424,7 @@ static void *hvf_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -449,7 +449,7 @@ static void *hvf_cpu_thread_fn(void *arg) hvf_vcpu_destroy(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/accel/kvm/kvm-accel-ops.c b/accel/kvm/kvm-accel-ops.c index afb4dfb3e7..58a6601642 100644 --- a/accel/kvm/kvm-accel-ops.c +++ b/accel/kvm/kvm-accel-ops.c @@ -33,7 +33,7 @@ static void *kvm_vcpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); cpu->neg.can_do_io = true; @@ -58,7 +58,7 @@ static void *kvm_vcpu_thread_fn(void *arg) kvm_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 4cf2179c29..bb99cf4b42 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -885,7 +885,7 @@ static void kvm_dirty_ring_flush(void) * should always be with BQL held, serialization is guaranteed. * However, let's be sure of it. */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* * First make sure to flush the hardware buffers by kicking all * vcpus out in a synchronous way. @@ -1523,9 +1523,9 @@ static void *kvm_dirty_ring_reaper_thread(void *data) trace_kvm_dirty_ring_reaper("wakeup"); r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING; - qemu_mutex_lock_iothread(); + bql_lock(); kvm_dirty_ring_reap(s, NULL); - qemu_mutex_unlock_iothread(); + bql_unlock(); r->reaper_iteration++; } @@ -3070,7 +3070,7 @@ int kvm_cpu_exec(CPUState *cpu) return EXCP_HLT; } - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_start(cpu); do { @@ -3110,11 +3110,11 @@ int kvm_cpu_exec(CPUState *cpu) #ifdef KVM_HAVE_MCE_INJECTION if (unlikely(have_sigbus_pending)) { - qemu_mutex_lock_iothread(); + bql_lock(); kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code, pending_sigbus_addr); have_sigbus_pending = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif @@ -3186,7 +3186,7 @@ int kvm_cpu_exec(CPUState *cpu) * still full. Got kicked by KVM_RESET_DIRTY_RINGS. */ trace_kvm_dirty_ring_full(cpu->cpu_index); - qemu_mutex_lock_iothread(); + bql_lock(); /* * We throttle vCPU by making it sleep once it exit from kernel * due to dirty ring full. In the dirtylimit scenario, reaping @@ -3198,7 +3198,7 @@ int kvm_cpu_exec(CPUState *cpu) } else { kvm_dirty_ring_reap(kvm_state, NULL); } - qemu_mutex_unlock_iothread(); + bql_unlock(); dirtylimit_vcpu_execute(cpu); ret = 0; break; @@ -3214,9 +3214,9 @@ int kvm_cpu_exec(CPUState *cpu) break; case KVM_SYSTEM_EVENT_CRASH: kvm_cpu_synchronize_state(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = 0; break; default: @@ -3246,7 +3246,7 @@ int kvm_cpu_exec(CPUState *cpu) } while (ret == 0); cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); if (ret < 0) { cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index c938eb96f8..67eda9865e 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -558,8 +558,8 @@ static void cpu_exec_longjmp_cleanup(CPUState *cpu) tcg_ctx->gen_tb = NULL; } #endif - if (qemu_mutex_iothread_locked()) { - qemu_mutex_unlock_iothread(); + if (bql_locked()) { + bql_unlock(); } assert_no_pages_locked(); } @@ -680,10 +680,10 @@ static inline bool cpu_handle_halt(CPUState *cpu) #if defined(TARGET_I386) if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { X86CPU *x86_cpu = X86_CPU(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif /* TARGET_I386 */ if (!cpu_has_work(cpu)) { @@ -749,9 +749,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) #else if (replay_exception()) { CPUClass *cc = CPU_GET_CLASS(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); cc->tcg_ops->do_interrupt(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu->exception_index = -1; if (unlikely(cpu->singlestep_enabled)) { @@ -812,7 +812,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, if (unlikely(qatomic_read(&cpu->interrupt_request))) { int interrupt_request; - qemu_mutex_lock_iothread(); + bql_lock(); interrupt_request = cpu->interrupt_request; if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) { /* Mask out external interrupts for this step. */ @@ -821,7 +821,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, if (interrupt_request & CPU_INTERRUPT_DEBUG) { cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG; cpu->exception_index = EXCP_DEBUG; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #if !defined(CONFIG_USER_ONLY) @@ -832,7 +832,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu->interrupt_request &= ~CPU_INTERRUPT_HALT; cpu->halted = 1; cpu->exception_index = EXCP_HLT; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #if defined(TARGET_I386) @@ -843,14 +843,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0); do_cpu_init(x86_cpu); cpu->exception_index = EXCP_HALTED; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #else else if (interrupt_request & CPU_INTERRUPT_RESET) { replay_interrupt(); cpu_reset(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } #endif /* !TARGET_I386 */ @@ -873,7 +873,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, */ if (unlikely(cpu->singlestep_enabled)) { cpu->exception_index = EXCP_DEBUG; - qemu_mutex_unlock_iothread(); + bql_unlock(); return true; } cpu->exception_index = -1; @@ -892,7 +892,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */ - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Finally, check if we need to exit to the main loop. */ diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index db3f93fda9..5698a9fd8e 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -2030,10 +2030,10 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); ret = int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx, type, ra, mr, mr_offset); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2054,12 +2054,12 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset); b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx, MMU_DATA_LOAD, ra, mr, mr_offset + size - 8); - qemu_mutex_unlock_iothread(); + bql_unlock(); return int128_make128(b, a); } @@ -2577,10 +2577,10 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); ret = int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx, ra, mr, mr_offset); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2601,12 +2601,12 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full, section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra); mr = section->mr; - qemu_mutex_lock_iothread(); + bql_lock(); int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8, mmu_idx, ra, mr, mr_offset); ret = int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8, size - 8, mmu_idx, ra, mr, mr_offset + 8); - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c index b25685fb71..5824d92580 100644 --- a/accel/tcg/tcg-accel-ops-icount.c +++ b/accel/tcg/tcg-accel-ops-icount.c @@ -126,9 +126,9 @@ void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget) * We're called without the iothread lock, so must take it while * we're calling timer handlers. */ - qemu_mutex_lock_iothread(); + bql_lock(); icount_notify_aio_contexts(); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c index 73866990ce..d1cb6a3620 100644 --- a/accel/tcg/tcg-accel-ops-mttcg.c +++ b/accel/tcg/tcg-accel-ops-mttcg.c @@ -76,7 +76,7 @@ static void *mttcg_cpu_thread_fn(void *arg) rcu_add_force_rcu_notifier(&force_rcu.notifier); tcg_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -91,9 +91,9 @@ static void *mttcg_cpu_thread_fn(void *arg) do { if (cpu_can_run(cpu)) { int r; - qemu_mutex_unlock_iothread(); + bql_unlock(); r = tcg_cpus_exec(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); switch (r) { case EXCP_DEBUG: cpu_handle_guest_debug(cpu); @@ -105,9 +105,9 @@ static void *mttcg_cpu_thread_fn(void *arg) */ break; case EXCP_ATOMIC: - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); default: /* Ignore everything else? */ break; @@ -119,7 +119,7 @@ static void *mttcg_cpu_thread_fn(void *arg) } while (!cpu->unplug || cpu_can_run(cpu)); tcg_cpus_destroy(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_remove_force_rcu_notifier(&force_rcu.notifier); rcu_unregister_thread(); tcg_unregister_thread(); diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c index 611932f3c3..c4ea372a3f 100644 --- a/accel/tcg/tcg-accel-ops-rr.c +++ b/accel/tcg/tcg-accel-ops-rr.c @@ -188,7 +188,7 @@ static void *rr_cpu_thread_fn(void *arg) rcu_add_force_rcu_notifier(&force_rcu); tcg_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); @@ -218,9 +218,9 @@ static void *rr_cpu_thread_fn(void *arg) /* Only used for icount_enabled() */ int64_t cpu_budget = 0; - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); if (icount_enabled()) { int cpu_count = rr_cpu_count(); @@ -254,7 +254,7 @@ static void *rr_cpu_thread_fn(void *arg) if (cpu_can_run(cpu)) { int r; - qemu_mutex_unlock_iothread(); + bql_unlock(); if (icount_enabled()) { icount_prepare_for_run(cpu, cpu_budget); } @@ -262,15 +262,15 @@ static void *rr_cpu_thread_fn(void *arg) if (icount_enabled()) { icount_process_data(cpu); } - qemu_mutex_lock_iothread(); + bql_lock(); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); break; } else if (r == EXCP_ATOMIC) { - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_step_atomic(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); break; } } else if (cpu->stop) { diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c index 1b57290682..813065c0ec 100644 --- a/accel/tcg/tcg-accel-ops.c +++ b/accel/tcg/tcg-accel-ops.c @@ -88,7 +88,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu) /* mask must never be zero, except for A20 change call */ void tcg_handle_interrupt(CPUState *cpu, int mask) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); cpu->interrupt_request |= mask; diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 79a88f5fb7..1737bb3da5 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -649,7 +649,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) void cpu_interrupt(CPUState *cpu, int mask) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); cpu->interrupt_request |= mask; qatomic_set(&cpu->neg.icount_decr.u16.high, -1); } diff --git a/audio/coreaudio.m b/audio/coreaudio.m index 8cd129a27d..9d2db9883c 100644 --- a/audio/coreaudio.m +++ b/audio/coreaudio.m @@ -547,7 +547,7 @@ static OSStatus handle_voice_change( { coreaudioVoiceOut *core = in_client_data; - qemu_mutex_lock_iothread(); + bql_lock(); if (core->outputDeviceID) { fini_out_device(core); @@ -557,7 +557,7 @@ static OSStatus handle_voice_change( update_device_playback_state(core); } - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } diff --git a/cpu-common.c b/cpu-common.c index a949ad7ca3..b83ed4164e 100644 --- a/cpu-common.c +++ b/cpu-common.c @@ -395,11 +395,11 @@ void process_queued_cpu_work(CPUState *cpu) * BQL, so it goes to sleep; start_exclusive() is sleeping too, so * neither CPU can proceed. */ - qemu_mutex_unlock_iothread(); + bql_unlock(); start_exclusive(); wi->func(cpu, wi->data); end_exclusive(); - qemu_mutex_lock_iothread(); + bql_lock(); } else { wi->func(cpu, wi->data); } diff --git a/dump/dump.c b/dump/dump.c index 787059ac2c..0f9b37718e 100644 --- a/dump/dump.c +++ b/dump/dump.c @@ -109,11 +109,11 @@ static int dump_cleanup(DumpState *s) s->guest_note = NULL; if (s->resume) { if (s->detached) { - qemu_mutex_lock_iothread(); + bql_lock(); } vm_start(); if (s->detached) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } migrate_del_blocker(&dump_migration_blocker); diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c index e36ca2c207..6292eb774b 100644 --- a/hw/core/cpu-common.c +++ b/hw/core/cpu-common.c @@ -70,14 +70,14 @@ CPUState *cpu_create(const char *typename) * BQL here if we need to. cpu_interrupt assumes it is held.*/ void cpu_reset_interrupt(CPUState *cpu, int mask) { - bool need_lock = !qemu_mutex_iothread_locked(); + bool need_lock = !bql_locked(); if (need_lock) { - qemu_mutex_lock_iothread(); + bql_lock(); } cpu->interrupt_request &= ~mask; if (need_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 60d86e0cb6..2def6d8a10 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1690,7 +1690,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) { bool use_iommu, pt; /* Whether we need to take the BQL on our own */ - bool take_bql = !qemu_mutex_iothread_locked(); + bool take_bql = !bql_locked(); assert(as); @@ -1708,7 +1708,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) * it. We'd better make sure we have had it already, or, take it. */ if (take_bql) { - qemu_mutex_lock_iothread(); + bql_lock(); } /* Turn off first then on the other */ @@ -1763,7 +1763,7 @@ static bool vtd_switch_address_space(VTDAddressSpace *as) } if (take_bql) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return use_iommu; diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 02b8cbf8df..d7d15cfaf7 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -425,7 +425,7 @@ void xen_evtchn_set_callback_level(int level) * effect immediately. That just leaves interdomain loopback as the case * which uses the BH. */ - if (!qemu_mutex_iothread_locked()) { + if (!bql_locked()) { qemu_bh_schedule(s->gsi_bh); return; } @@ -459,7 +459,7 @@ int xen_evtchn_set_callback_param(uint64_t param) * We need the BQL because set_callback_pci_intx() may call into PCI code, * and because we may need to manipulate the old and new GSI levels. */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); qemu_mutex_lock(&s->port_lock); switch (type) { @@ -1037,7 +1037,7 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port, XenEvtchnPort *p = &s->port_table[port]; /* Because it *might* be a PIRQ port */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); switch (p->type) { case EVTCHNSTAT_closed: @@ -1104,7 +1104,7 @@ int xen_evtchn_soft_reset(void) return -ENOTSUP; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); qemu_mutex_lock(&s->port_lock); @@ -1601,7 +1601,7 @@ bool xen_evtchn_set_gsi(int gsi, int level) XenEvtchnState *s = xen_evtchn_singleton; int pirq; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (!s || gsi < 0 || gsi >= IOAPIC_NUM_PINS) { return false; @@ -1712,7 +1712,7 @@ void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector, return; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(addr, data); @@ -1749,7 +1749,7 @@ int xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route, return 1; /* Not a PIRQ */ } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(address, data); if (!pirq || pirq >= s->nr_pirqs) { @@ -1796,7 +1796,7 @@ bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data) return false; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); pirq = msi_pirq_target(address, data); if (!pirq || pirq >= s->nr_pirqs) { diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c index 39fda1b72c..1722294638 100644 --- a/hw/i386/kvm/xen_overlay.c +++ b/hw/i386/kvm/xen_overlay.c @@ -194,7 +194,7 @@ int xen_overlay_map_shinfo_page(uint64_t gpa) return -ENOENT; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (s->shinfo_gpa) { /* If removing shinfo page, turn the kernel magic off first */ diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 6e651960b3..ae27889a3f 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -1341,7 +1341,7 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token) { XenXenstoreState *s = opaque; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* * If there's a response pending, we obviously can't scribble over diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c index a013510074..edd727ad37 100644 --- a/hw/intc/arm_gicv3_cpuif.c +++ b/hw/intc/arm_gicv3_cpuif.c @@ -938,7 +938,7 @@ void gicv3_cpuif_update(GICv3CPUState *cs) return; } - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, cs->hppi.grp, cs->hppi.prio); diff --git a/hw/intc/s390_flic.c b/hw/intc/s390_flic.c index 74e02858d4..93b8531ad0 100644 --- a/hw/intc/s390_flic.c +++ b/hw/intc/s390_flic.c @@ -106,7 +106,7 @@ static int qemu_s390_clear_io_flic(S390FLICState *fs, uint16_t subchannel_id, QEMUS390FlicIO *cur, *next; uint8_t isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!(flic->pending & FLIC_PENDING_IO)) { return 0; } @@ -223,7 +223,7 @@ uint32_t qemu_s390_flic_dequeue_service(QEMUS390FLICState *flic) { uint32_t tmp; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(flic->pending & FLIC_PENDING_SERVICE); tmp = flic->service_param; flic->service_param = 0; @@ -238,7 +238,7 @@ QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, uint64_t cr6) QEMUS390FlicIO *io; uint8_t isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!(flic->pending & CR6_TO_PENDING_IO(cr6))) { return NULL; } @@ -262,7 +262,7 @@ QEMUS390FlicIO *qemu_s390_flic_dequeue_io(QEMUS390FLICState *flic, uint64_t cr6) void qemu_s390_flic_dequeue_crw_mchk(QEMUS390FLICState *flic) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(flic->pending & FLIC_PENDING_MCHK_CR); flic->pending &= ~FLIC_PENDING_MCHK_CR; } @@ -271,7 +271,7 @@ static void qemu_s390_inject_service(S390FLICState *fs, uint32_t parm) { QEMUS390FLICState *flic = s390_get_qemu_flic(fs); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* multiplexing is good enough for sclp - kvm does it internally as well */ flic->service_param |= parm; flic->pending |= FLIC_PENDING_SERVICE; @@ -287,7 +287,7 @@ static void qemu_s390_inject_io(S390FLICState *fs, uint16_t subchannel_id, QEMUS390FLICState *flic = s390_get_qemu_flic(fs); QEMUS390FlicIO *io; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); io = g_new0(QEMUS390FlicIO, 1); io->id = subchannel_id; io->nr = subchannel_nr; @@ -304,7 +304,7 @@ static void qemu_s390_inject_crw_mchk(S390FLICState *fs) { QEMUS390FLICState *flic = s390_get_qemu_flic(fs); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); flic->pending |= FLIC_PENDING_MCHK_CR; qemu_s390_flic_notify(FLIC_PENDING_MCHK_CR); @@ -330,7 +330,7 @@ bool qemu_s390_flic_has_crw_mchk(QEMUS390FLICState *flic) bool qemu_s390_flic_has_any(QEMUS390FLICState *flic) { - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return !!flic->pending; } @@ -340,7 +340,7 @@ static void qemu_s390_flic_reset(DeviceState *dev) QEMUS390FlicIO *cur, *next; int isc; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); flic->simm = 0; flic->nimm = 0; flic->pending = 0; diff --git a/hw/misc/edu.c b/hw/misc/edu.c index e64a246d3f..2a976ca2b1 100644 --- a/hw/misc/edu.c +++ b/hw/misc/edu.c @@ -355,9 +355,9 @@ static void *edu_fact_thread(void *opaque) smp_mb__after_rmw(); if (qatomic_read(&edu->status) & EDU_STATUS_IRQFACT) { - qemu_mutex_lock_iothread(); + bql_lock(); edu_raise_irq(edu, FACT_IRQ); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } diff --git a/hw/misc/imx6_src.c b/hw/misc/imx6_src.c index a9c64d06eb..2b9bb07540 100644 --- a/hw/misc/imx6_src.c +++ b/hw/misc/imx6_src.c @@ -131,7 +131,7 @@ static void imx6_clear_reset_bit(CPUState *cpu, run_on_cpu_data data) struct SRCSCRResetInfo *ri = data.host_ptr; IMX6SRCState *s = ri->s; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); s->regs[SRC_SCR] = deposit32(s->regs[SRC_SCR], ri->reset_bit, 1, 0); DPRINTF("reg[%s] <= 0x%" PRIx32 "\n", diff --git a/hw/misc/imx7_src.c b/hw/misc/imx7_src.c index 983251e86f..77ad7a7eef 100644 --- a/hw/misc/imx7_src.c +++ b/hw/misc/imx7_src.c @@ -136,7 +136,7 @@ static void imx7_clear_reset_bit(CPUState *cpu, run_on_cpu_data data) struct SRCSCRResetInfo *ri = data.host_ptr; IMX7SRCState *s = ri->s; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); s->regs[SRC_A7RCR0] = deposit32(s->regs[SRC_A7RCR0], ri->reset_bit, 1, 0); diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c index 1e2b3baeb1..453fdb9819 100644 --- a/hw/net/xen_nic.c +++ b/hw/net/xen_nic.c @@ -133,7 +133,7 @@ static bool net_tx_packets(struct XenNetDev *netdev) void *page; void *tmpbuf = NULL; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); for (;;) { rc = netdev->tx_ring.req_cons; @@ -260,7 +260,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size RING_IDX rc, rp; void *page; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (xen_device_backend_get_state(&netdev->xendev) != XenbusStateConnected) { return -1; @@ -354,7 +354,7 @@ static bool xen_netdev_connect(XenDevice *xendev, Error **errp) XenNetDev *netdev = XEN_NET_DEVICE(xendev); unsigned int port, rx_copy; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (xen_device_frontend_scanf(xendev, "tx-ring-ref", "%u", &netdev->tx_ring_ref) != 1) { @@ -425,7 +425,7 @@ static void xen_netdev_disconnect(XenDevice *xendev, Error **errp) trace_xen_netdev_disconnect(netdev->dev); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); netdev->tx_ring.sring = NULL; netdev->rx_ring.sring = NULL; diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 3203a4a728..d84f3f977d 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -515,7 +515,7 @@ static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) CPUPPCState *env = &cpu->env; /* The TCG path should also be holding the BQL at this point */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (FIELD_EX64(env->msr, MSR, PR)) { qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n"); diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index be167710a3..b6581c16fc 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -314,7 +314,7 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) { PowerPCCPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); switch ((val >> 28) & 0x3) { case 0x0: @@ -334,7 +334,7 @@ void store_40x_dbcr0(CPUPPCState *env, uint32_t val) break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* PowerPC 40x internal IRQ controller */ diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e56cfb1d8b..38f7e293b6 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -1304,7 +1304,7 @@ static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, CPUPPCState *env = &cpu->env; /* The TCG path should also be holding the BQL at this point */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); g_assert(!vhyp_cpu_in_nested(cpu)); diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c index df5c4b9687..c2fda7ad20 100644 --- a/hw/ppc/spapr_rng.c +++ b/hw/ppc/spapr_rng.c @@ -82,9 +82,9 @@ static target_ulong h_random(PowerPCCPU *cpu, SpaprMachineState *spapr, while (hrdata.received < 8) { rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received, random_recv, &hrdata); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_sem_wait(&hrdata.sem); - qemu_mutex_lock_iothread(); + bql_lock(); } qemu_sem_destroy(&hrdata.sem); diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c index 278666317e..fc1bbc0b61 100644 --- a/hw/ppc/spapr_softmmu.c +++ b/hw/ppc/spapr_softmmu.c @@ -334,7 +334,7 @@ static void *hpt_prepare_thread(void *opaque) pending->ret = H_NO_MEM; } - qemu_mutex_lock_iothread(); + bql_lock(); if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { /* Ready to go */ @@ -344,7 +344,7 @@ static void *hpt_prepare_thread(void *opaque) free_pending_hpt(pending); } - qemu_mutex_unlock_iothread(); + bql_unlock(); return NULL; } diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c index 9bd98e8219..d04ac93621 100644 --- a/hw/remote/mpqemu-link.c +++ b/hw/remote/mpqemu-link.c @@ -33,7 +33,7 @@ */ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) { - bool iolock = qemu_mutex_iothread_locked(); + bool drop_bql = bql_locked(); bool iothread = qemu_in_iothread(); struct iovec send[2] = {}; int *fds = NULL; @@ -63,8 +63,8 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) * for IOThread case. * Also skip lock handling while in a co-routine in the main context. */ - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_unlock(); } if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send), @@ -74,9 +74,9 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp) trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds); } - if (iolock && !iothread && !qemu_in_coroutine()) { + if (drop_bql && !iothread && !qemu_in_coroutine()) { /* See above comment why skip locking here. */ - qemu_mutex_lock_iothread(); + bql_lock(); } return ret; @@ -96,7 +96,7 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, size_t *nfds, Error **errp) { struct iovec iov = { .iov_base = buf, .iov_len = len }; - bool iolock = qemu_mutex_iothread_locked(); + bool drop_bql = bql_locked(); bool iothread = qemu_in_iothread(); int ret = -1; @@ -106,14 +106,14 @@ static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds, */ assert(qemu_in_coroutine() || !iothread); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_unlock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_unlock(); } ret = qio_channel_readv_full_all_eof(ioc, &iov, 1, fds, nfds, errp); - if (iolock && !iothread && !qemu_in_coroutine()) { - qemu_mutex_lock_iothread(); + if (drop_bql && !iothread && !qemu_in_coroutine()) { + bql_lock(); } return (ret <= 0) ? ret : iov.iov_len; diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index 8b708422fe..8dbafafb9e 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -400,7 +400,7 @@ static int vfu_object_mr_rw(MemoryRegion *mr, uint8_t *buf, hwaddr offset, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c index 8f5159d85d..5c535d483e 100644 --- a/hw/s390x/s390-skeys.c +++ b/hw/s390x/s390-skeys.c @@ -153,7 +153,7 @@ void qmp_dump_skeys(const char *filename, Error **errp) goto out; } - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); guest_phys_blocks_init(&guest_phys_blocks); guest_phys_blocks_append(&guest_phys_blocks); diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h index 5449b6d742..d22ca24329 100644 --- a/include/block/aio-wait.h +++ b/include/block/aio-wait.h @@ -151,7 +151,7 @@ static inline bool in_aio_context_home_thread(AioContext *ctx) } if (ctx == qemu_get_aio_context()) { - return qemu_mutex_iothread_locked(); + return bql_locked(); } else { return false; } diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h index 68e70e61aa..72ebc0cb3a 100644 --- a/include/qemu/main-loop.h +++ b/include/qemu/main-loop.h @@ -248,19 +248,19 @@ GSource *iohandler_get_g_source(void); AioContext *iohandler_get_aio_context(void); /** - * qemu_mutex_iothread_locked: Return lock status of the main loop mutex. + * bql_locked: Return lock status of the Big QEMU Lock (BQL) * - * The main loop mutex is the coarsest lock in QEMU, and as such it + * The Big QEMU Lock (BQL) is the coarsest lock in QEMU, and as such it * must always be taken outside other locks. This function helps * functions take different paths depending on whether the current - * thread is running within the main loop mutex. + * thread is running within the BQL. * * This function should never be used in the block layer, because * unit tests, block layer tools and qemu-storage-daemon do not * have a BQL. * Please instead refer to qemu_in_main_thread(). */ -bool qemu_mutex_iothread_locked(void); +bool bql_locked(void); /** * qemu_in_main_thread: return whether it's possible to safely access @@ -312,58 +312,57 @@ bool qemu_in_main_thread(void); } while (0) /** - * qemu_mutex_lock_iothread: Lock the main loop mutex. + * bql_lock: Lock the Big QEMU Lock (BQL). * - * This function locks the main loop mutex. The mutex is taken by + * This function locks the Big QEMU Lock (BQL). The lock is taken by * main() in vl.c and always taken except while waiting on - * external events (such as with select). The mutex should be taken + * external events (such as with select). The lock should be taken * by threads other than the main loop thread when calling * qemu_bh_new(), qemu_set_fd_handler() and basically all other * functions documented in this file. * - * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread + * NOTE: tools currently are single-threaded and bql_lock * is a no-op there. */ -#define qemu_mutex_lock_iothread() \ - qemu_mutex_lock_iothread_impl(__FILE__, __LINE__) -void qemu_mutex_lock_iothread_impl(const char *file, int line); +#define bql_lock() bql_lock_impl(__FILE__, __LINE__) +void bql_lock_impl(const char *file, int line); /** - * qemu_mutex_unlock_iothread: Unlock the main loop mutex. + * bql_unlock: Unlock the Big QEMU Lock (BQL). * - * This function unlocks the main loop mutex. The mutex is taken by + * This function unlocks the Big QEMU Lock. The lock is taken by * main() in vl.c and always taken except while waiting on - * external events (such as with select). The mutex should be unlocked + * external events (such as with select). The lock should be unlocked * as soon as possible by threads other than the main loop thread, * because it prevents the main loop from processing callbacks, * including timers and bottom halves. * - * NOTE: tools currently are single-threaded and qemu_mutex_unlock_iothread + * NOTE: tools currently are single-threaded and bql_unlock * is a no-op there. */ -void qemu_mutex_unlock_iothread(void); +void bql_unlock(void); /** * QEMU_IOTHREAD_LOCK_GUARD * - * Wrap a block of code in a conditional qemu_mutex_{lock,unlock}_iothread. + * Wrap a block of code in a conditional bql_{lock,unlock}. */ typedef struct IOThreadLockAuto IOThreadLockAuto; static inline IOThreadLockAuto *qemu_iothread_auto_lock(const char *file, int line) { - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { return NULL; } - qemu_mutex_lock_iothread_impl(file, line); + bql_lock_impl(file, line); /* Anything non-NULL causes the cleanup function to be called */ return (IOThreadLockAuto *)(uintptr_t)1; } static inline void qemu_iothread_auto_unlock(IOThreadLockAuto *l) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } G_DEFINE_AUTOPTR_CLEANUP_FUNC(IOThreadLockAuto, qemu_iothread_auto_unlock) diff --git a/include/qemu/thread.h b/include/qemu/thread.h index dd3822d7ce..fb74e21c08 100644 --- a/include/qemu/thread.h +++ b/include/qemu/thread.h @@ -47,7 +47,7 @@ typedef void (*QemuCondWaitFunc)(QemuCond *c, QemuMutex *m, const char *f, typedef bool (*QemuCondTimedWaitFunc)(QemuCond *c, QemuMutex *m, int ms, const char *f, int l); -extern QemuMutexLockFunc qemu_bql_mutex_lock_func; +extern QemuMutexLockFunc bql_mutex_lock_func; extern QemuMutexLockFunc qemu_mutex_lock_func; extern QemuMutexTrylockFunc qemu_mutex_trylock_func; extern QemuRecMutexLockFunc qemu_rec_mutex_lock_func; diff --git a/memory_ldst.c.inc b/memory_ldst.c.inc index 84b868f294..0e6f3940a9 100644 --- a/memory_ldst.c.inc +++ b/memory_ldst.c.inc @@ -61,7 +61,7 @@ static inline uint32_t glue(address_space_ldl_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -130,7 +130,7 @@ static inline uint64_t glue(address_space_ldq_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -186,7 +186,7 @@ uint8_t glue(address_space_ldub, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -234,7 +234,7 @@ static inline uint16_t glue(address_space_lduw_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); return val; @@ -295,7 +295,7 @@ void glue(address_space_stl_notdirty, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -339,7 +339,7 @@ static inline void glue(address_space_stl_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -391,7 +391,7 @@ void glue(address_space_stb, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -435,7 +435,7 @@ static inline void glue(address_space_stw_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } @@ -499,7 +499,7 @@ static void glue(address_space_stq_internal, SUFFIX)(ARG1_DECL, *result = r; } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } RCU_READ_UNLOCK(); } diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 24347ab0f7..92e031b6fa 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -774,7 +774,7 @@ static void dirty_bitmap_state_pending(void *opaque, SaveBitmapState *dbms; uint64_t pending = 0; - qemu_mutex_lock_iothread(); + bql_lock(); QSIMPLEQ_FOREACH(dbms, &s->dbms_list, entry) { uint64_t gran = bdrv_dirty_bitmap_granularity(dbms->bitmap); @@ -784,7 +784,7 @@ static void dirty_bitmap_state_pending(void *opaque, pending += DIV_ROUND_UP(sectors * BDRV_SECTOR_SIZE, gran); } - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_dirty_bitmap_state_pending(pending); diff --git a/migration/block.c b/migration/block.c index 710ef6f490..f7e34e5f8d 100644 --- a/migration/block.c +++ b/migration/block.c @@ -269,7 +269,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) int64_t count; if (bmds->shared_base) { - qemu_mutex_lock_iothread(); + bql_lock(); aio_context_acquire(blk_get_aio_context(bb)); /* Skip unallocated sectors; intentionally treats failure or * partial sector as an allocated sector */ @@ -282,7 +282,7 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) cur_sector += count >> BDRV_SECTOR_BITS; } aio_context_release(blk_get_aio_context(bb)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } if (cur_sector >= total_sectors) { @@ -321,14 +321,14 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) * This is ugly and will disappear when we make bdrv_* thread-safe, * without the need to acquire the AioContext. */ - qemu_mutex_lock_iothread(); + bql_lock(); aio_context_acquire(blk_get_aio_context(bmds->blk)); bdrv_reset_dirty_bitmap(bmds->dirty_bitmap, cur_sector * BDRV_SECTOR_SIZE, nr_sectors * BDRV_SECTOR_SIZE); blk->aiocb = blk_aio_preadv(bb, cur_sector * BDRV_SECTOR_SIZE, &blk->qiov, 0, blk_mig_read_cb, blk); aio_context_release(blk_get_aio_context(bmds->blk)); - qemu_mutex_unlock_iothread(); + bql_unlock(); bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); @@ -789,9 +789,9 @@ static int block_save_iterate(QEMUFile *f, void *opaque) /* Always called with iothread lock taken for * simplicity, block_save_complete also calls it. */ - qemu_mutex_lock_iothread(); + bql_lock(); ret = blk_mig_save_dirty_block(f, 1); - qemu_mutex_unlock_iothread(); + bql_unlock(); } if (ret < 0) { return ret; @@ -863,9 +863,9 @@ static void block_state_pending(void *opaque, uint64_t *must_precopy, /* Estimate pending number of bytes to send */ uint64_t pending; - qemu_mutex_lock_iothread(); + bql_lock(); pending = get_remaining_dirty(); - qemu_mutex_unlock_iothread(); + bql_unlock(); blk_mig_lock(); pending += block_mig_state.submitted * BLK_MIG_BLOCK_SIZE + diff --git a/migration/colo.c b/migration/colo.c index 8f301b7e57..27cb3a9bea 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -420,13 +420,13 @@ static int colo_do_checkpoint_transaction(MigrationState *s, qio_channel_io_seek(QIO_CHANNEL(bioc), 0, 0, NULL); bioc->usage = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (failover_get_state() != FAILOVER_STATUS_NONE) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_stop_force_state(RUN_STATE_COLO); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("run", "stop"); /* * Failover request bh could be called after vm_stop_force_state(), @@ -435,23 +435,23 @@ static int colo_do_checkpoint_transaction(MigrationState *s, if (failover_get_state() != FAILOVER_STATUS_NONE) { goto out; } - qemu_mutex_lock_iothread(); + bql_lock(); replication_do_checkpoint_all(&local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } /* Note: device state is saved into buffer */ ret = qemu_save_device_state(fb); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret < 0) { goto out; } @@ -504,9 +504,9 @@ static int colo_do_checkpoint_transaction(MigrationState *s, ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); out: @@ -557,15 +557,15 @@ static void colo_process_checkpoint(MigrationState *s) fb = qemu_file_new_output(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); - qemu_mutex_lock_iothread(); + bql_lock(); replication_start_all(REPLICATION_MODE_PRIMARY, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); timer_mod(s->colo_delay_timer, qemu_clock_get_ms(QEMU_CLOCK_HOST) + @@ -639,14 +639,14 @@ out: void migrate_start_colo_process(MigrationState *s) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_init(&s->colo_checkpoint_event, false); s->colo_delay_timer = timer_new_ms(QEMU_CLOCK_HOST, colo_checkpoint_notify, s); qemu_sem_init(&s->colo_exit_sem, 0); colo_process_checkpoint(s); - qemu_mutex_lock_iothread(); + bql_lock(); } static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, @@ -657,9 +657,9 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, Error *local_err = NULL; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); vm_stop_force_state(RUN_STATE_COLO); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("run", "stop"); /* FIXME: This is unnecessary for periodic checkpoint mode */ @@ -677,10 +677,10 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, return; } - qemu_mutex_lock_iothread(); + bql_lock(); cpu_synchronize_all_states(); ret = qemu_loadvm_state_main(mis->from_src_file, mis); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret < 0) { error_setg(errp, "Load VM's live state (ram) error"); @@ -719,14 +719,14 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, return; } - qemu_mutex_lock_iothread(); + bql_lock(); vmstate_loading = true; colo_flush_ram_cache(); ret = qemu_load_device_state(fb); if (ret < 0) { error_setg(errp, "COLO: load device state failed"); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } @@ -734,7 +734,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } @@ -743,7 +743,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } /* Notify all filters of all NIC to do checkpoint */ @@ -752,13 +752,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis, if (local_err) { error_propagate(errp, local_err); vmstate_loading = false; - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } vmstate_loading = false; vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { @@ -831,14 +831,14 @@ static void *colo_process_incoming_thread(void *opaque) } /* Make sure all file formats throw away their mutable metadata */ - qemu_mutex_lock_iothread(); + bql_lock(); bdrv_activate_all(&local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); error_report_err(local_err); return NULL; } - qemu_mutex_unlock_iothread(); + bql_unlock(); failover_init_state(); @@ -861,14 +861,14 @@ static void *colo_process_incoming_thread(void *opaque) fb = qemu_file_new_input(QIO_CHANNEL(bioc)); object_unref(OBJECT(bioc)); - qemu_mutex_lock_iothread(); + bql_lock(); replication_start_all(REPLICATION_MODE_SECONDARY, &local_err); if (local_err) { - qemu_mutex_unlock_iothread(); + bql_unlock(); goto out; } vm_start(); - qemu_mutex_unlock_iothread(); + bql_unlock(); trace_colo_vm_state_change("stop", "run"); colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, @@ -929,7 +929,7 @@ int coroutine_fn colo_incoming_co(void) MigrationIncomingState *mis = migration_incoming_get_current(); QemuThread th; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); if (!migration_incoming_colo_enabled()) { return 0; @@ -942,10 +942,10 @@ int coroutine_fn colo_incoming_co(void) qemu_coroutine_yield(); mis->colo_incoming_co = NULL; - qemu_mutex_unlock_iothread(); + bql_unlock(); /* Wait checkpoint incoming thread exit before free resource */ qemu_thread_join(&th); - qemu_mutex_lock_iothread(); + bql_lock(); /* We hold the global iothread lock, so it is safe here */ colo_release_ram_cache(); diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c index 036ac017fc..429d10c4d9 100644 --- a/migration/dirtyrate.c +++ b/migration/dirtyrate.c @@ -90,13 +90,13 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages, void global_dirty_log_change(unsigned int flag, bool start) { - qemu_mutex_lock_iothread(); + bql_lock(); if (start) { memory_global_dirty_log_start(flag); } else { memory_global_dirty_log_stop(flag); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -106,12 +106,12 @@ void global_dirty_log_change(unsigned int flag, bool start) */ static void global_dirty_log_sync(unsigned int flag, bool one_shot) { - qemu_mutex_lock_iothread(); + bql_lock(); memory_global_dirty_log_sync(false); if (one_shot) { memory_global_dirty_log_stop(flag); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat) @@ -610,7 +610,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) int64_t start_time; DirtyPageRecord dirty_pages; - qemu_mutex_lock_iothread(); + bql_lock(); memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE); /* @@ -627,7 +627,7 @@ static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config) * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled. */ dirtyrate_manual_reset_protect(); - qemu_mutex_unlock_iothread(); + bql_unlock(); record_dirtypages_bitmap(&dirty_pages, true); diff --git a/migration/migration.c b/migration/migration.c index 1ac0a9dfac..7e3202bd43 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1312,12 +1312,12 @@ static void migrate_fd_cleanup(MigrationState *s) QEMUFile *tmp; trace_migrate_fd_cleanup(); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (s->migration_thread_running) { qemu_thread_join(&s->thread); s->migration_thread_running = false; } - qemu_mutex_lock_iothread(); + bql_lock(); multifd_send_shutdown(); qemu_mutex_lock(&s->qemu_file_lock); @@ -2462,7 +2462,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) } trace_postcopy_start(); - qemu_mutex_lock_iothread(); + bql_lock(); trace_postcopy_start_set_run(); migration_downtime_start(ms); @@ -2571,7 +2571,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_downtime_end(ms); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (migrate_postcopy_ram()) { /* @@ -2612,7 +2612,7 @@ fail: error_report_err(local_err); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); return -1; } @@ -2646,14 +2646,14 @@ static int migration_maybe_pause(MigrationState *s, * wait for the 'pause_sem' semaphore. */ if (s->state != MIGRATION_STATUS_CANCELLING) { - qemu_mutex_unlock_iothread(); + bql_unlock(); migrate_set_state(&s->state, *current_active_state, MIGRATION_STATUS_PRE_SWITCHOVER); qemu_sem_wait(&s->pause_sem); migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, new_state); *current_active_state = new_state; - qemu_mutex_lock_iothread(); + bql_lock(); } return s->state == new_state ? 0 : -EINVAL; @@ -2664,7 +2664,7 @@ static int migration_completion_precopy(MigrationState *s, { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); migration_downtime_start(s); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); @@ -2692,7 +2692,7 @@ static int migration_completion_precopy(MigrationState *s, ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, s->block_inactive); out_unlock: - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -2700,9 +2700,9 @@ static void migration_completion_postcopy(MigrationState *s) { trace_migration_completion_postcopy_end(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_complete_postcopy(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * Shutdown the postcopy fast path thread. This is only needed when dest @@ -2726,14 +2726,14 @@ static void migration_completion_failed(MigrationState *s, */ Error *local_err = NULL; - qemu_mutex_lock_iothread(); + bql_lock(); bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); } else { s->block_inactive = false; } - qemu_mutex_unlock_iothread(); + bql_unlock(); } migrate_set_state(&s->state, current_active_state, @@ -3173,7 +3173,7 @@ static void migration_iteration_finish(MigrationState *s) /* If we enabled cpu throttling for auto-converge, turn it off. */ cpu_throttle_stop(); - qemu_mutex_lock_iothread(); + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: migration_calculate_complete(s); @@ -3204,7 +3204,7 @@ static void migration_iteration_finish(MigrationState *s) break; } migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + bql_unlock(); } static void bg_migration_iteration_finish(MigrationState *s) @@ -3216,7 +3216,7 @@ static void bg_migration_iteration_finish(MigrationState *s) */ ram_write_tracking_stop(); - qemu_mutex_lock_iothread(); + bql_lock(); switch (s->state) { case MIGRATION_STATUS_COMPLETED: migration_calculate_complete(s); @@ -3235,7 +3235,7 @@ static void bg_migration_iteration_finish(MigrationState *s) } migrate_fd_cleanup_schedule(s); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -3364,9 +3364,9 @@ static void *migration_thread(void *opaque) goto out; } - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_header(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * If we opened the return path, we need to make sure dst has it @@ -3394,9 +3394,9 @@ static void *migration_thread(void *opaque) qemu_savevm_send_colo_enable(s->to_dst_file); } - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_setup(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3508,10 +3508,10 @@ static void *bg_migration_thread(void *opaque) ram_write_tracking_prepare(); #endif - qemu_mutex_lock_iothread(); + bql_lock(); qemu_savevm_state_header(s->to_dst_file); qemu_savevm_state_setup(s->to_dst_file); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_ACTIVE); @@ -3521,7 +3521,7 @@ static void *bg_migration_thread(void *opaque) trace_migration_thread_setup_complete(); migration_downtime_start(s); - qemu_mutex_lock_iothread(); + bql_lock(); /* * If VM is currently in suspended state, then, to make a valid runstate @@ -3564,7 +3564,7 @@ static void *bg_migration_thread(void *opaque) s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s); qemu_bh_schedule(s->vm_start_bh); - qemu_mutex_unlock_iothread(); + bql_unlock(); while (migration_is_active(s)) { MigIterateState iter_state = bg_migration_iteration_run(s); @@ -3593,7 +3593,7 @@ fail: if (early_fail) { migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); - qemu_mutex_unlock_iothread(); + bql_unlock(); } bg_migration_iteration_finish(s); diff --git a/migration/ram.c b/migration/ram.c index e6baecf143..26708cf3f2 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3443,9 +3443,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) migration_ops->ram_save_target_page = ram_save_target_page_legacy; } - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = multifd_send_sync_main(); - qemu_mutex_lock_iothread(); + bql_lock(); if (ret < 0) { return ret; } @@ -3702,11 +3702,11 @@ static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy, uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; if (!migration_in_postcopy() && remaining_size < s->threshold_size) { - qemu_mutex_lock_iothread(); + bql_lock(); WITH_RCU_READ_LOCK_GUARD() { migration_bitmap_sync_precopy(rs, false); } - qemu_mutex_unlock_iothread(); + bql_unlock(); remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; } @@ -3934,7 +3934,7 @@ void colo_incoming_start_dirty_log(void) { RAMBlock *block = NULL; /* For memory_global_dirty_log_start below. */ - qemu_mutex_lock_iothread(); + bql_lock(); qemu_mutex_lock_ramlist(); memory_global_dirty_log_sync(false); @@ -3948,7 +3948,7 @@ void colo_incoming_start_dirty_log(void) } ram_state->migration_dirty_pages = 0; qemu_mutex_unlock_ramlist(); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* It is need to hold the global lock to call this helper */ diff --git a/migration/savevm.c b/migration/savevm.c index bde05eebe4..563a4e5599 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2857,7 +2857,7 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) uint8_t section_type; int ret = 0; - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { memory_region_transaction_begin(); } @@ -2904,7 +2904,7 @@ retry: } out: - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { memory_region_transaction_commit(); } if (ret < 0) { diff --git a/replay/replay-internal.c b/replay/replay-internal.c index 77d0c82327..3e08e381cb 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -216,7 +216,7 @@ void replay_mutex_lock(void) { if (replay_mode != REPLAY_MODE_NONE) { unsigned long id; - g_assert(!qemu_mutex_iothread_locked()); + g_assert(!bql_locked()); g_assert(!replay_mutex_locked()); qemu_mutex_lock(&lock); id = mutex_tail++; diff --git a/semihosting/console.c b/semihosting/console.c index 5d61e8207e..60102bbab6 100644 --- a/semihosting/console.c +++ b/semihosting/console.c @@ -43,7 +43,7 @@ static SemihostingConsole console; static int console_can_read(void *opaque) { SemihostingConsole *c = opaque; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return (int)fifo8_num_free(&c->fifo); } @@ -58,7 +58,7 @@ static void console_wake_up(gpointer data, gpointer user_data) static void console_read(void *opaque, const uint8_t *buf, int size) { SemihostingConsole *c = opaque; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); while (size-- && !fifo8_is_full(&c->fifo)) { fifo8_push(&c->fifo, *buf++); } @@ -70,7 +70,7 @@ bool qemu_semihosting_console_ready(void) { SemihostingConsole *c = &console; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); return !fifo8_is_empty(&c->fifo); } @@ -78,7 +78,7 @@ void qemu_semihosting_console_block_until_ready(CPUState *cs) { SemihostingConsole *c = &console; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* Block if the fifo is completely empty. */ if (fifo8_is_empty(&c->fifo)) { diff --git a/stubs/iothread-lock.c b/stubs/iothread-lock.c index 5b45b7fc8b..d7890e5581 100644 --- a/stubs/iothread-lock.c +++ b/stubs/iothread-lock.c @@ -1,15 +1,15 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" -bool qemu_mutex_iothread_locked(void) +bool bql_locked(void) { return false; } -void qemu_mutex_lock_iothread_impl(const char *file, int line) +void bql_lock_impl(const char *file, int line) { } -void qemu_mutex_unlock_iothread(void) +void bql_unlock(void) { } diff --git a/system/cpu-throttle.c b/system/cpu-throttle.c index d9bb30a223..786a9a5639 100644 --- a/system/cpu-throttle.c +++ b/system/cpu-throttle.c @@ -57,9 +57,9 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque) qemu_cond_timedwait_iothread(cpu->halt_cond, sleeptime_ns / SCALE_MS); } else { - qemu_mutex_unlock_iothread(); + bql_unlock(); g_usleep(sleeptime_ns / SCALE_US); - qemu_mutex_lock_iothread(); + bql_lock(); } sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME); } diff --git a/system/cpus.c b/system/cpus.c index d9de09b9e8..27622356c9 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -65,7 +65,8 @@ #endif /* CONFIG_LINUX */ -static QemuMutex qemu_global_mutex; +/* The Big QEMU Lock (BQL) */ +static QemuMutex bql; /* * The chosen accelerator is supposed to register this. @@ -403,14 +404,14 @@ void qemu_init_cpu_loop(void) qemu_init_sigbus(); qemu_cond_init(&qemu_cpu_cond); qemu_cond_init(&qemu_pause_cond); - qemu_mutex_init(&qemu_global_mutex); + qemu_mutex_init(&bql); qemu_thread_get_self(&io_thread); } void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) { - do_run_on_cpu(cpu, func, data, &qemu_global_mutex); + do_run_on_cpu(cpu, func, data, &bql); } static void qemu_cpu_stop(CPUState *cpu, bool exit) @@ -442,7 +443,7 @@ void qemu_wait_io_event(CPUState *cpu) slept = true; qemu_plugin_vcpu_idle_cb(cpu); } - qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); + qemu_cond_wait(cpu->halt_cond, &bql); } if (slept) { qemu_plugin_vcpu_resume_cb(cpu); @@ -495,46 +496,46 @@ bool qemu_in_vcpu_thread(void) return current_cpu && qemu_cpu_is_self(current_cpu); } -QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked) +QEMU_DEFINE_STATIC_CO_TLS(bool, bql_locked) -bool qemu_mutex_iothread_locked(void) +bool bql_locked(void) { - return get_iothread_locked(); + return get_bql_locked(); } bool qemu_in_main_thread(void) { - return qemu_mutex_iothread_locked(); + return bql_locked(); } /* * The BQL is taken from so many places that it is worth profiling the * callers directly, instead of funneling them all through a single function. */ -void qemu_mutex_lock_iothread_impl(const char *file, int line) +void bql_lock_impl(const char *file, int line) { - QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func); + QemuMutexLockFunc bql_lock_fn = qatomic_read(&bql_mutex_lock_func); - g_assert(!qemu_mutex_iothread_locked()); - bql_lock(&qemu_global_mutex, file, line); - set_iothread_locked(true); + g_assert(!bql_locked()); + bql_lock_fn(&bql, file, line); + set_bql_locked(true); } -void qemu_mutex_unlock_iothread(void) +void bql_unlock(void) { - g_assert(qemu_mutex_iothread_locked()); - set_iothread_locked(false); - qemu_mutex_unlock(&qemu_global_mutex); + g_assert(bql_locked()); + set_bql_locked(false); + qemu_mutex_unlock(&bql); } void qemu_cond_wait_iothread(QemuCond *cond) { - qemu_cond_wait(cond, &qemu_global_mutex); + qemu_cond_wait(cond, &bql); } void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) { - qemu_cond_timedwait(cond, &qemu_global_mutex, ms); + qemu_cond_timedwait(cond, &bql, ms); } /* signal CPU creation */ @@ -595,7 +596,7 @@ retry: replay_mutex_unlock(); while (!all_vcpus_paused()) { - qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); + qemu_cond_wait(&qemu_pause_cond, &bql); /* During we waited on qemu_pause_cond the bql was unlocked, * the vcpu's state may has been changed by other thread, so * we must request the pause state on all vcpus again. @@ -603,9 +604,9 @@ retry: request_pause_all_vcpus(); } - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); /* During the bql was unlocked, the vcpu's state may has been * changed by other thread, so we must retry. @@ -644,9 +645,9 @@ void cpu_remove_sync(CPUState *cpu) cpu->stop = true; cpu->unplug = true; qemu_cpu_kick(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_join(cpu->thread); - qemu_mutex_lock_iothread(); + bql_lock(); } void cpus_register_accel(const AccelOpsClass *ops) @@ -685,7 +686,7 @@ void qemu_init_vcpu(CPUState *cpu) cpus_accel->create_vcpu_thread(cpu); while (!cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); + qemu_cond_wait(&qemu_cpu_cond, &bql); } } diff --git a/system/dirtylimit.c b/system/dirtylimit.c index 495c7a7082..b5607eb8c2 100644 --- a/system/dirtylimit.c +++ b/system/dirtylimit.c @@ -148,9 +148,9 @@ void vcpu_dirty_rate_stat_stop(void) { qatomic_set(&vcpu_dirty_rate_stat->running, 0); dirtylimit_state_unlock(); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_join(&vcpu_dirty_rate_stat->thread); - qemu_mutex_lock_iothread(); + bql_lock(); dirtylimit_state_lock(); } diff --git a/system/memory.c b/system/memory.c index 30eaa753bc..7f7f64bd4b 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1169,7 +1169,7 @@ void memory_region_commit(void) void memory_region_transaction_commit(void) { assert(memory_region_transaction_depth); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); --memory_region_transaction_depth; if (!memory_region_transaction_depth) { diff --git a/system/physmem.c b/system/physmem.c index 4d3bf75ce8..f4bc6eeec4 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -2726,8 +2726,8 @@ bool prepare_mmio_access(MemoryRegion *mr) { bool release_lock = false; - if (!qemu_mutex_iothread_locked()) { - qemu_mutex_lock_iothread(); + if (!bql_locked()) { + bql_lock(); release_lock = true; } if (mr->flush_coalesced_mmio) { @@ -2808,7 +2808,7 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } @@ -2886,7 +2886,7 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, } if (release_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); release_lock = false; } diff --git a/system/runstate.c b/system/runstate.c index da1c959f17..05dcd0cc8f 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -832,7 +832,7 @@ void qemu_init_subsystems(void) qemu_init_cpu_list(); qemu_init_cpu_loop(); - qemu_mutex_lock_iothread(); + bql_lock(); atexit(qemu_run_exit_notifiers); diff --git a/system/watchpoint.c b/system/watchpoint.c index ba5ad13352..b76007ebf6 100644 --- a/system/watchpoint.c +++ b/system/watchpoint.c @@ -155,9 +155,9 @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len, * Now raise the debug interrupt so that it will * trigger after the current instruction. */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c index fb19b04189..35b9386134 100644 --- a/target/arm/arm-powerctl.c +++ b/target/arm/arm-powerctl.c @@ -108,7 +108,7 @@ static void arm_set_cpu_on_async_work(CPUState *target_cpu_state, g_free(info); /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -120,7 +120,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, CPUArchId *arch_id; struct CpuOnInfo *info; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 " (EL %d, %s) @ 0x%" PRIx64 " with R0 = 0x%" PRIx64 "\n", cpuid, target_el, target_aa64 ? "aarch64" : "aarch32", entry, @@ -229,7 +229,7 @@ static void arm_set_cpu_on_and_reset_async_work(CPUState *target_cpu_state, target_cpu_state->halted = 0; /* Finally set the power status */ - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_ON; } @@ -238,7 +238,7 @@ int arm_set_cpu_on_and_reset(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); /* Retrieve the cpu we are powering up */ target_cpu_state = arm_get_cpu_by_id(cpuid); @@ -280,7 +280,7 @@ static void arm_set_cpu_off_async_work(CPUState *target_cpu_state, { ARMCPU *target_cpu = ARM_CPU(target_cpu_state); - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); target_cpu->power_state = PSCI_OFF; target_cpu_state->halted = 1; target_cpu_state->exception_index = EXCP_HLT; @@ -291,7 +291,7 @@ int arm_set_cpu_off(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); @@ -327,7 +327,7 @@ int arm_reset_cpu(uint64_t cpuid) CPUState *target_cpu_state; ARMCPU *target_cpu; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); DPRINTF("cpu %" PRId64 "\n", cpuid); diff --git a/target/arm/helper.c b/target/arm/helper.c index 0370a739e3..89ae7c69db 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5852,7 +5852,7 @@ static void do_hcr_write(CPUARMState *env, uint64_t value, uint64_t valid_mask) * VFIQ are masked unless running at EL0 or EL1, and HCR * can only be written at EL2. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_cpu_update_virq(cpu); arm_cpu_update_vfiq(cpu); arm_cpu_update_vserr(cpu); @@ -11395,7 +11395,7 @@ void arm_cpu_do_interrupt(CPUState *cs) * BQL needs to be held for any modification of * cs->interrupt_request. */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); arm_call_pre_el_change_hook(cpu); diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index d7cc00a084..c34f1e725d 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -1719,9 +1719,9 @@ static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts) * sleeping. */ qatomic_set_mb(&cpu->thread_kicked, false); - qemu_mutex_unlock_iothread(); + bql_unlock(); pselect(0, 0, 0, 0, ts, &cpu->accel->unblock_ipi_mask); - qemu_mutex_lock_iothread(); + bql_lock(); } static void hvf_wfi(CPUState *cpu) @@ -1822,7 +1822,7 @@ int hvf_vcpu_exec(CPUState *cpu) flush_cpu_state(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); assert_hvf_ok(hv_vcpu_run(cpu->accel->fd)); /* handle VMEXIT */ @@ -1831,7 +1831,7 @@ int hvf_vcpu_exec(CPUState *cpu) uint32_t ec = syn_get_ec(syndrome); ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); switch (exit_reason) { case HV_EXIT_REASON_EXCEPTION: /* This is the main one, handle below. */ diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 127e33b42a..5e71b74892 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -1009,7 +1009,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) if (run->s.regs.device_irq_level != cpu->device_irq_level) { switched_level = cpu->device_irq_level ^ run->s.regs.device_irq_level; - qemu_mutex_lock_iothread(); + bql_lock(); if (switched_level & KVM_ARM_DEV_EL1_VTIMER) { qemu_set_irq(cpu->gt_timer_outputs[GTIMER_VIRT], @@ -1038,7 +1038,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) /* We also mark unknown levels as processed to not waste cycles */ cpu->device_irq_level = run->s.regs.device_irq_level; - qemu_mutex_unlock_iothread(); + bql_unlock(); } return MEMTXATTRS_UNSPECIFIED; @@ -1122,9 +1122,9 @@ static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) env->exception.syndrome = syn_aa64_hvc(0); } env->exception.target_el = 1; - qemu_mutex_lock_iothread(); + bql_lock(); arm_cpu_do_interrupt(cs); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* * For PSCI, exit the kvm_run loop and process the work. Especially diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c index 6a8aad0f06..1af807cab5 100644 --- a/target/arm/kvm64.c +++ b/target/arm/kvm64.c @@ -1368,9 +1368,9 @@ bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) env->exception.syndrome = debug_exit->hsr; env->exception.vaddress = debug_exit->far; env->exception.target_el = 1; - qemu_mutex_lock_iothread(); + bql_lock(); arm_cpu_do_interrupt(cs); - qemu_mutex_unlock_iothread(); + bql_unlock(); return false; } diff --git a/target/arm/psci.c b/target/arm/psci.c index a8690a16af..8a084cf501 100644 --- a/target/arm/psci.c +++ b/target/arm/psci.c @@ -109,7 +109,7 @@ void arm_handle_psci_call(ARMCPU *cpu) } target_cpu = ARM_CPU(target_cpu_state); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); ret = target_cpu->power_state; break; default: diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 1762b058ae..0ecd3a36da 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -772,9 +772,9 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, #if !TCG_OVERSIZED_GUEST # error "Unexpected configuration" #endif - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } if (ptw->out_be) { cur_val = ldq_be_p(host); @@ -788,7 +788,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val, } } if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 8ad84623d3..198b975f20 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -809,9 +809,9 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) goto illegal_return; } - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!return_to_aa64) { env->aarch64 = false; @@ -876,9 +876,9 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc) */ aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64); - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index a26adb75aa..d1f1e02acc 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -373,8 +373,8 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) bool ts = is_secure && (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK); bool take_exception; - /* Take the iothread lock as we are going to touch the NVIC */ - qemu_mutex_lock_iothread(); + /* Take the BQL as we are going to touch the NVIC */ + bql_lock(); /* Check the background context had access to the FPU */ if (!v7m_cpacr_pass(env, is_secure, is_priv)) { @@ -428,7 +428,7 @@ void HELPER(v7m_preserve_fp_state)(CPUARMState *env) take_exception = !stacked_ok && armv7m_nvic_can_take_pending_exception(env->nvic); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (take_exception) { raise_exception_ra(env, EXCP_LAZYFP, 0, 1, GETPC()); diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index ea08936a85..34e706e0fd 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -427,9 +427,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) { uint32_t mask; - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_pre_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); mask = aarch32_cpsr_valid_mask(env->features, &env_archcpu(env)->isar); cpsr_write(env, val, mask, CPSRWriteExceptionReturn); @@ -442,9 +442,9 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val) env->regs[15] &= (env->thumb ? ~1 : ~3); arm_rebuild_hflags(env); - qemu_mutex_lock_iothread(); + bql_lock(); arm_call_el_change_hook(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Access to user mode registers from privileged modes. */ @@ -803,9 +803,9 @@ void HELPER(set_cp_reg)(CPUARMState *env, const void *rip, uint32_t value) const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } @@ -817,9 +817,9 @@ uint32_t HELPER(get_cp_reg)(CPUARMState *env, const void *rip) uint32_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } @@ -832,9 +832,9 @@ void HELPER(set_cp_reg64)(CPUARMState *env, const void *rip, uint64_t value) const ARMCPRegInfo *ri = rip; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); ri->writefn(env, ri, value); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { ri->writefn(env, ri, value); } @@ -846,9 +846,9 @@ uint64_t HELPER(get_cp_reg64)(CPUARMState *env, const void *rip) uint64_t res; if (ri->type & ARM_CP_IO) { - qemu_mutex_lock_iothread(); + bql_lock(); res = ri->readfn(env, ri); - qemu_mutex_unlock_iothread(); + bql_unlock(); } else { res = ri->readfn(env, ri); } diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index 98e9d688f6..efe638b36e 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -84,17 +84,17 @@ void hppa_cpu_alarm_timer(void *opaque) void HELPER(write_eirr)(CPUHPPAState *env, target_ulong val) { env->cr[CR_EIRR] &= ~val; - qemu_mutex_lock_iothread(); + bql_lock(); eval_interrupt(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(write_eiem)(CPUHPPAState *env, target_ulong val) { env->cr[CR_EIEM] = val; - qemu_mutex_lock_iothread(); + bql_lock(); eval_interrupt(env_archcpu(env)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void hppa_cpu_do_interrupt(CPUState *cs) diff --git a/target/i386/hvf/README.md b/target/i386/hvf/README.md index 2d33477aca..64a8935237 100644 --- a/target/i386/hvf/README.md +++ b/target/i386/hvf/README.md @@ -4,4 +4,4 @@ These sources (and ../hvf-all.c) are adapted from Veertu Inc's vdhh (Veertu Desk 1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, xsave_buf) due to historical differences + QEMU needing to handle more emulation targets. 2. Removal of `apic_page` and hyperv-related functionality. -3. More relaxed use of `qemu_mutex_lock_iothread`. +3. More relaxed use of `bql_lock`. diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 20b9ca3ef5..11ffdd4c69 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -429,9 +429,9 @@ int hvf_vcpu_exec(CPUState *cpu) } vmx_update_tpr(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { - qemu_mutex_lock_iothread(); + bql_lock(); return EXCP_HLT; } @@ -450,7 +450,7 @@ int hvf_vcpu_exec(CPUState *cpu) rip = rreg(cpu->accel->fd, HV_X86_RIP); env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS); - qemu_mutex_lock_iothread(); + bql_lock(); update_apic_tpr(cpu); current_cpu = cpu; diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index e3ac978648..6825c89af3 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -45,9 +45,9 @@ void hyperv_x86_synic_update(X86CPU *cpu) static void async_synic_update(CPUState *cs, run_on_cpu_data data) { - qemu_mutex_lock_iothread(); + bql_lock(); hyperv_x86_synic_update(X86_CPU(cs)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a6bccfd1ac..a3cf447028 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -4932,9 +4932,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) /* Inject NMI */ if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - qemu_mutex_unlock_iothread(); + bql_unlock(); DPRINTF("injected NMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_NMI); if (ret < 0) { @@ -4943,9 +4943,9 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; - qemu_mutex_unlock_iothread(); + bql_unlock(); DPRINTF("injected SMI\n"); ret = kvm_vcpu_ioctl(cpu, KVM_SMI); if (ret < 0) { @@ -4956,7 +4956,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } if (!kvm_pic_in_kernel()) { - qemu_mutex_lock_iothread(); + bql_lock(); } /* Force the VCPU out of its inner loop to process any INIT requests @@ -5009,7 +5009,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -5057,12 +5057,12 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) /* We need to protect the apic state against concurrent accesses from * different threads in case the userspace irqchip is used. */ if (!kvm_irqchip_in_kernel()) { - qemu_mutex_lock_iothread(); + bql_lock(); } cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); if (!kvm_irqchip_in_kernel()) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return cpu_get_mem_attrs(env); } @@ -5521,17 +5521,17 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) switch (run->exit_reason) { case KVM_EXIT_HLT: DPRINTF("handle_hlt\n"); - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_halt(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_SET_TPR: ret = 0; break; case KVM_EXIT_TPR_ACCESS: - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_tpr_access(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_FAIL_ENTRY: code = run->fail_entry.hardware_entry_failure_reason; @@ -5557,9 +5557,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; case KVM_EXIT_DEBUG: DPRINTF("kvm_exit_debug\n"); - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_handle_debug(cpu, &run->debug.arch); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case KVM_EXIT_HYPERV: ret = kvm_hv_handle_exit(cpu, &run->hyperv); diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index c0631f9cf4..b0ed2e6aeb 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -403,7 +403,7 @@ void kvm_xen_maybe_deassert_callback(CPUState *cs) /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */ if (!vi->evtchn_upcall_pending) { - qemu_mutex_lock_iothread(); + bql_lock(); /* * Check again now we have the lock, because it may have been * asserted in the interim. And we don't want to take the lock @@ -413,7 +413,7 @@ void kvm_xen_maybe_deassert_callback(CPUState *cs) X86_CPU(cs)->env.xen_callback_asserted = false; xen_evtchn_set_callback_level(0); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -773,9 +773,9 @@ static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, switch (hp.index) { case HVM_PARAM_CALLBACK_IRQ: - qemu_mutex_lock_iothread(); + bql_lock(); err = xen_evtchn_set_callback_param(hp.value); - qemu_mutex_unlock_iothread(); + bql_unlock(); xen_set_long_mode(exit->u.hcall.longmode); break; default: @@ -1408,7 +1408,7 @@ int kvm_xen_soft_reset(void) CPUState *cpu; int err; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); trace_kvm_xen_soft_reset(); @@ -1481,9 +1481,9 @@ static int schedop_shutdown(CPUState *cs, uint64_t arg) break; case SHUTDOWN_soft_reset: - qemu_mutex_lock_iothread(); + bql_lock(); ret = kvm_xen_soft_reset(); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; default: diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c index 6c46101ac1..f9d5e9a37a 100644 --- a/target/i386/nvmm/nvmm-accel-ops.c +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -25,7 +25,7 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); current_cpu = cpu; @@ -55,7 +55,7 @@ static void *qemu_nvmm_cpu_thread_fn(void *arg) nvmm_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 7d752bc5e0..cfdca91123 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -399,7 +399,7 @@ nvmm_vcpu_pre_run(CPUState *cpu) uint8_t tpr; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); tpr = cpu_get_apic_tpr(x86_cpu->apic_state); if (tpr != qcpu->tpr) { @@ -462,7 +462,7 @@ nvmm_vcpu_pre_run(CPUState *cpu) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* @@ -485,9 +485,9 @@ nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit) tpr = exit->exitstate.cr8; if (qcpu->tpr != tpr) { qcpu->tpr = tpr; - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -648,7 +648,7 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, CPUX86State *env = cpu_env(cpu); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && @@ -658,7 +658,7 @@ nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, ret = 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -721,7 +721,7 @@ nvmm_vcpu_loop(CPUState *cpu) return 0; } - qemu_mutex_unlock_iothread(); + bql_unlock(); cpu_exec_start(cpu); /* @@ -806,16 +806,16 @@ nvmm_vcpu_loop(CPUState *cpu) error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]", exit->reason, exit->u.inv.hwcode); nvmm_get_registers(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); ret = -1; break; } } while (ret == 0); cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qatomic_set(&cpu->exit_request, false); diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c index 93506cdd94..e0305ba234 100644 --- a/target/i386/tcg/sysemu/fpu_helper.c +++ b/target/i386/tcg/sysemu/fpu_helper.c @@ -32,9 +32,9 @@ void x86_register_ferr_irq(qemu_irq irq) void fpu_check_raise_ferr_irq(CPUX86State *env) { if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); qemu_irq_raise(ferr_irq); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } } @@ -49,7 +49,7 @@ void cpu_set_ignne(void) { CPUX86State *env = &X86_CPU(first_cpu)->env; - assert(qemu_mutex_iothread_locked()); + assert(bql_locked()); env->hflags2 |= HF2_IGNNE_MASK; /* diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index e1528b7f80..1ddfc9fe09 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -118,9 +118,9 @@ void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0); - qemu_mutex_unlock_iothread(); + bql_unlock(); } env->int_ctl = (env->int_ctl & ~V_TPR_MASK) | (t0 & V_TPR_MASK); diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c index 67cad86720..e783a760a7 100644 --- a/target/i386/whpx/whpx-accel-ops.c +++ b/target/i386/whpx/whpx-accel-ops.c @@ -25,7 +25,7 @@ static void *whpx_cpu_thread_fn(void *arg) rcu_register_thread(); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_thread_get_self(cpu->thread); cpu->thread_id = qemu_get_thread_id(); current_cpu = cpu; @@ -55,7 +55,7 @@ static void *whpx_cpu_thread_fn(void *arg) whpx_destroy_vcpu(cpu); cpu_thread_signal_destroyed(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); rcu_unregister_thread(); return NULL; } diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index d29ba916a0..a7262654ac 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -1324,7 +1324,7 @@ static int whpx_first_vcpu_starting(CPUState *cpu) struct whpx_state *whpx = &whpx_global; HRESULT hr; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (!QTAILQ_EMPTY(&cpu->breakpoints) || (whpx->breakpoints.breakpoints && @@ -1442,7 +1442,7 @@ static int whpx_handle_halt(CPUState *cpu) CPUX86State *env = cpu_env(cpu); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && (env->eflags & IF_MASK)) && !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { @@ -1450,7 +1450,7 @@ static int whpx_handle_halt(CPUState *cpu) cpu->halted = true; ret = 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -1472,7 +1472,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) memset(&new_int, 0, sizeof(new_int)); memset(reg_values, 0, sizeof(reg_values)); - qemu_mutex_lock_iothread(); + bql_lock(); /* Inject NMI */ if (!vcpu->interruption_pending && @@ -1563,7 +1563,7 @@ static void whpx_vcpu_pre_run(CPUState *cpu) reg_count += 1; } - qemu_mutex_unlock_iothread(); + bql_unlock(); vcpu->ready_for_pic_interrupt = false; if (reg_count) { @@ -1590,9 +1590,9 @@ static void whpx_vcpu_post_run(CPUState *cpu) uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; if (vcpu->tpr != tpr) { vcpu->tpr = tpr; - qemu_mutex_lock_iothread(); + bql_lock(); cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr)); - qemu_mutex_unlock_iothread(); + bql_unlock(); } vcpu->interruption_pending = @@ -1652,7 +1652,7 @@ static int whpx_vcpu_run(CPUState *cpu) WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE; int ret; - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (whpx->running_cpus++ == 0) { /* Insert breakpoints into memory, update exception exit bitmap. */ @@ -1690,7 +1690,7 @@ static int whpx_vcpu_run(CPUState *cpu) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); if (exclusive_step_mode != WHPX_STEP_NONE) { start_exclusive(); @@ -2028,9 +2028,9 @@ static int whpx_vcpu_run(CPUState *cpu) error_report("WHPX: Unexpected VP exit code %d", vcpu->exit_ctx.ExitReason); whpx_get_registers(cpu); - qemu_mutex_lock_iothread(); + bql_lock(); qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; } @@ -2055,7 +2055,7 @@ static int whpx_vcpu_run(CPUState *cpu) cpu_exec_end(cpu); } - qemu_mutex_lock_iothread(); + bql_lock(); current_cpu = cpu; if (--whpx->running_cpus == 0) { diff --git a/target/loongarch/tcg/csr_helper.c b/target/loongarch/tcg/csr_helper.c index 55341551a5..15f94caefa 100644 --- a/target/loongarch/tcg/csr_helper.c +++ b/target/loongarch/tcg/csr_helper.c @@ -89,9 +89,9 @@ target_ulong helper_csrwr_ticlr(CPULoongArchState *env, target_ulong val) int64_t old_v = 0; if (val & 0x1) { - qemu_mutex_lock_iothread(); + bql_lock(); loongarch_cpu_set_irq(cpu, IRQ_TIMER, 0); - qemu_mutex_unlock_iothread(); + bql_unlock(); } return old_v; } diff --git a/target/mips/kvm.c b/target/mips/kvm.c index 6393bc3da2..2f6a44ee46 100644 --- a/target/mips/kvm.c +++ b/target/mips/kvm.c @@ -138,7 +138,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) int r; struct kvm_mips_interrupt intr; - qemu_mutex_lock_iothread(); + bql_lock(); if ((cs->interrupt_request & CPU_INTERRUPT_HARD) && cpu_mips_io_interrupts_pending(cpu)) { @@ -151,7 +151,7 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) diff --git a/target/mips/tcg/sysemu/cp0_helper.c b/target/mips/tcg/sysemu/cp0_helper.c index d349548743..cc545aed9c 100644 --- a/target/mips/tcg/sysemu/cp0_helper.c +++ b/target/mips/tcg/sysemu/cp0_helper.c @@ -59,9 +59,9 @@ static inline void mips_vpe_wake(MIPSCPU *c) * because there might be other conditions that state that c should * be sleeping. */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_interrupt(CPU(c), CPU_INTERRUPT_WAKE); - qemu_mutex_unlock_iothread(); + bql_unlock(); } static inline void mips_vpe_sleep(MIPSCPU *cpu) diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index 782a5751b7..77567afba4 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -160,20 +160,20 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) break; case TO_SPR(9, 0): /* PICMR */ env->picmr = rb; - qemu_mutex_lock_iothread(); + bql_lock(); if (env->picsr & env->picmr) { cpu_interrupt(cs, CPU_INTERRUPT_HARD); } else { cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); } - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case TO_SPR(9, 2): /* PICSR */ env->picsr &= ~rb; break; case TO_SPR(10, 0): /* TTMR */ { - qemu_mutex_lock_iothread(); + bql_lock(); if ((env->ttmr & TTMR_M) ^ (rb & TTMR_M)) { switch (rb & TTMR_M) { case TIMER_NONE: @@ -198,15 +198,15 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) cs->interrupt_request &= ~CPU_INTERRUPT_TIMER; } cpu_openrisc_timer_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); } break; case TO_SPR(10, 1): /* TTCR */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_openrisc_count_set(cpu, rb); cpu_openrisc_timer_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; } #endif @@ -347,9 +347,9 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, return env->ttmr; case TO_SPR(10, 1): /* TTCR */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_openrisc_count_update(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); return cpu_openrisc_count_get(cpu); } #endif diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index a42743a3e0..8a2bfb5aa2 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3056,7 +3056,7 @@ void helper_msgsnd(target_ulong rb) return; } - qemu_mutex_lock_iothread(); + bql_lock(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; @@ -3065,7 +3065,7 @@ void helper_msgsnd(target_ulong rb) ppc_set_irq(cpu, irq, 1); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } /* Server Processor Control */ @@ -3093,7 +3093,7 @@ static void book3s_msgsnd_common(int pir, int irq) { CPUState *cs; - qemu_mutex_lock_iothread(); + bql_lock(); CPU_FOREACH(cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; @@ -3103,7 +3103,7 @@ static void book3s_msgsnd_common(int pir, int irq) ppc_set_irq(cpu, irq, 1); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); } void helper_book3s_msgsnd(target_ulong rb) @@ -3157,14 +3157,14 @@ void helper_book3s_msgsndp(CPUPPCState *env, target_ulong rb) } /* Does iothread need to be locked for walking CPU list? */ - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); uint32_t thread_id = ppc_cpu_tir(ccpu); if (ttir == thread_id) { ppc_set_irq(ccpu, PPC_INTERRUPT_DOORBELL, 1); - qemu_mutex_unlock_iothread(); + bql_unlock(); return; } } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index ddb5e7f8e7..892e14a467 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -1656,7 +1656,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) CPUPPCState *env = &cpu->env; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); switch (run->exit_reason) { case KVM_EXIT_DCR: @@ -1715,7 +1715,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index a05bdf78c9..a9d41d2802 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -238,7 +238,7 @@ target_ulong helper_load_dpdes(CPUPPCState *env) return dpdes; } - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); CPUPPCState *cenv = &ccpu->env; @@ -248,7 +248,7 @@ target_ulong helper_load_dpdes(CPUPPCState *env) dpdes |= (0x1 << thread_id); } } - qemu_mutex_unlock_iothread(); + bql_unlock(); return dpdes; } @@ -278,14 +278,14 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) } /* Does iothread need to be locked for walking CPU list? */ - qemu_mutex_lock_iothread(); + bql_lock(); THREAD_SIBLING_FOREACH(cs, ccs) { PowerPCCPU *ccpu = POWERPC_CPU(ccs); uint32_t thread_id = ppc_cpu_tir(ccpu); ppc_set_irq(cpu, PPC_INTERRUPT_DOORBELL, val & (0x1 << thread_id)); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif /* defined(TARGET_PPC64) */ diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c index 08a6b47ee0..f618ed2922 100644 --- a/target/ppc/timebase_helper.c +++ b/target/ppc/timebase_helper.c @@ -173,9 +173,9 @@ target_ulong helper_load_dcr(CPUPPCState *env, target_ulong dcrn) } else { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); ret = ppc_dcr_read(env->dcr_env, (uint32_t)dcrn, &val); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (unlikely(ret != 0)) { qemu_log_mask(LOG_GUEST_ERROR, "DCR read error %d %03x\n", (uint32_t)dcrn, (uint32_t)dcrn); @@ -196,9 +196,9 @@ void helper_store_dcr(CPUPPCState *env, target_ulong dcrn, target_ulong val) POWERPC_EXCP_INVAL_INVAL, GETPC()); } else { int ret; - qemu_mutex_lock_iothread(); + bql_lock(); ret = ppc_dcr_write(env->dcr_env, (uint32_t)dcrn, (uint32_t)val); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (unlikely(ret != 0)) { qemu_log_mask(LOG_GUEST_ERROR, "DCR write error %d %03x\n", (uint32_t)dcrn, (uint32_t)dcrn); diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 0a22833f47..b7e50e3178 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -1923,7 +1923,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) S390CPU *cpu = S390_CPU(cs); int ret = 0; - qemu_mutex_lock_iothread(); + bql_lock(); kvm_cpu_synchronize_state(cs); @@ -1947,7 +1947,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason); break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); if (ret == 0) { ret = EXCP_INTERRUPT; diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index 6aa7907438..89b5268fd4 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -101,9 +101,9 @@ uint64_t HELPER(stck)(CPUS390XState *env) /* SCLP service call */ uint32_t HELPER(servc)(CPUS390XState *env, uint64_t r1, uint64_t r2) { - qemu_mutex_lock_iothread(); + bql_lock(); int r = sclp_service_call(env_archcpu(env), r1, r2); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (r < 0) { tcg_s390_program_interrupt(env, -r, GETPC()); } @@ -117,9 +117,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) switch (num) { case 0x500: /* KVM hypercall */ - qemu_mutex_lock_iothread(); + bql_lock(); r = s390_virtio_hypercall(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); break; case 0x44: /* yield */ @@ -127,9 +127,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t r3, uint32_t num) break; case 0x308: /* ipl */ - qemu_mutex_lock_iothread(); + bql_lock(); handle_diag_308(env, r1, r3, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); r = 0; break; case 0x288: @@ -185,7 +185,7 @@ static void update_ckc_timer(CPUS390XState *env) /* stop the timer and remove pending CKC IRQs */ timer_del(env->tod_timer); - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); env->pending_int &= ~INTERRUPT_EXT_CLOCK_COMPARATOR; /* the tod has to exceed the ckc, this can never happen if ckc is all 1's */ @@ -207,9 +207,9 @@ void HELPER(sckc)(CPUS390XState *env, uint64_t ckc) { env->ckc = ckc; - qemu_mutex_lock_iothread(); + bql_lock(); update_ckc_timer(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void tcg_s390_tod_updated(CPUState *cs, run_on_cpu_data opaque) @@ -229,9 +229,9 @@ uint32_t HELPER(sck)(CPUS390XState *env, uint64_t tod_low) .low = tod_low, }; - qemu_mutex_lock_iothread(); + bql_lock(); tdc->set(td, &tod, &error_abort); - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } @@ -421,9 +421,9 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, int cc; /* TODO: needed to inject interrupts - push further down */ - qemu_mutex_lock_iothread(); + bql_lock(); cc = handle_sigp(env, order_code & SIGP_ORDER_MASK, r1, r3); - qemu_mutex_unlock_iothread(); + bql_unlock(); return cc; } @@ -433,92 +433,92 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1, void HELPER(xsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_xsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(csch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_csch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(hsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_hsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(msch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_msch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(rchp)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_rchp(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(rsch)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_rsch(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(sal)(CPUS390XState *env, uint64_t r1) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_sal(cpu, r1, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(schm)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_schm(cpu, r1, r2, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(ssch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_ssch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stcrw)(CPUS390XState *env, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_stcrw(cpu, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_stsch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) @@ -533,10 +533,10 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra); } - qemu_mutex_lock_iothread(); + bql_lock(); io = qemu_s390_flic_dequeue_io(flic, env->cregs[6]); if (!io) { - qemu_mutex_unlock_iothread(); + bql_unlock(); return 0; } @@ -554,7 +554,7 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) if (s390_cpu_virt_mem_write(cpu, addr, 0, &intc, sizeof(intc))) { /* writing failed, reinject and properly clean up */ s390_io_interrupt(io->id, io->nr, io->parm, io->word); - qemu_mutex_unlock_iothread(); + bql_unlock(); g_free(io); s390_cpu_virt_mem_handle_exc(cpu, ra); return 0; @@ -570,24 +570,24 @@ uint32_t HELPER(tpi)(CPUS390XState *env, uint64_t addr) } g_free(io); - qemu_mutex_unlock_iothread(); + bql_unlock(); return 1; } void HELPER(tsch)(CPUS390XState *env, uint64_t r1, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_tsch(cpu, r1, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(chsc)(CPUS390XState *env, uint64_t inst) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); ioinst_handle_chsc(cpu, inst >> 16, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif @@ -726,27 +726,27 @@ void HELPER(clp)(CPUS390XState *env, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); clp_service_call(cpu, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcilg)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcilg_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcistg)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcistg_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, @@ -754,9 +754,9 @@ void HELPER(stpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); stpcifc_service_call(cpu, r1, fiba, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3) @@ -764,9 +764,9 @@ void HELPER(sic)(CPUS390XState *env, uint64_t r1, uint64_t r3) S390CPU *cpu = env_archcpu(env); int r; - qemu_mutex_lock_iothread(); + bql_lock(); r = css_do_sic(cpu, (r3 >> 27) & 0x7, r1 & 0xffff); - qemu_mutex_unlock_iothread(); + bql_unlock(); /* css_do_sic() may actually return a PGM_xxx value to inject */ if (r) { tcg_s390_program_interrupt(env, -r, GETPC()); @@ -777,9 +777,9 @@ void HELPER(rpcit)(CPUS390XState *env, uint32_t r1, uint32_t r2) { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); rpcit_service_call(cpu, r1, r2, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3, @@ -787,9 +787,9 @@ void HELPER(pcistb)(CPUS390XState *env, uint32_t r1, uint32_t r3, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); pcistb_service_call(cpu, r1, r3, gaddr, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, @@ -797,8 +797,8 @@ void HELPER(mpcifc)(CPUS390XState *env, uint32_t r1, uint64_t fiba, { S390CPU *cpu = env_archcpu(env); - qemu_mutex_lock_iothread(); + bql_lock(); mpcifc_service_call(cpu, r1, fiba, ar, GETPC()); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif diff --git a/target/sparc/int32_helper.c b/target/sparc/int32_helper.c index 1563613582..49a9149263 100644 --- a/target/sparc/int32_helper.c +++ b/target/sparc/int32_helper.c @@ -70,7 +70,7 @@ void cpu_check_irqs(CPUSPARCState *env) CPUState *cs; /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); if (env->pil_in && (env->interrupt_index == 0 || (env->interrupt_index & ~15) == TT_EXTINT)) { diff --git a/target/sparc/int64_helper.c b/target/sparc/int64_helper.c index 1b4155f5f3..27df9dba89 100644 --- a/target/sparc/int64_helper.c +++ b/target/sparc/int64_helper.c @@ -69,7 +69,7 @@ void cpu_check_irqs(CPUSPARCState *env) (env->softint & ~(SOFTINT_TIMER | SOFTINT_STIMER)); /* We should be holding the BQL before we mess with IRQs */ - g_assert(qemu_mutex_iothread_locked()); + g_assert(bql_locked()); /* TT_IVEC has a higher priority (16) than TT_EXTINT (31..17) */ if (env->ivec_status & 0x20) { @@ -267,9 +267,9 @@ static bool do_modify_softint(CPUSPARCState *env, uint32_t value) env->softint = value; #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif return true; diff --git a/target/sparc/win_helper.c b/target/sparc/win_helper.c index 16d1c70fe7..b53fc9ce94 100644 --- a/target/sparc/win_helper.c +++ b/target/sparc/win_helper.c @@ -179,9 +179,9 @@ void helper_wrpsr(CPUSPARCState *env, target_ulong new_psr) cpu_raise_exception_ra(env, TT_ILL_INSN, GETPC()); } else { /* cpu_put_psr may trigger interrupts, hence BQL */ - qemu_mutex_lock_iothread(); + bql_lock(); cpu_put_psr(env, new_psr); - qemu_mutex_unlock_iothread(); + bql_unlock(); } } @@ -407,9 +407,9 @@ void helper_wrpstate(CPUSPARCState *env, target_ulong new_state) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -422,9 +422,9 @@ void helper_wrpil(CPUSPARCState *env, target_ulong new_pil) env->psrpil = new_pil; if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -451,9 +451,9 @@ void helper_done(CPUSPARCState *env) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } @@ -480,9 +480,9 @@ void helper_retry(CPUSPARCState *env) #if !defined(CONFIG_USER_ONLY) if (cpu_interrupts_enabled(env)) { - qemu_mutex_lock_iothread(); + bql_lock(); cpu_check_irqs(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } #endif } diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index 91354884f7..168419a505 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -105,9 +105,9 @@ void HELPER(waiti)(CPUXtensaState *env, uint32_t pc, uint32_t intlevel) env->sregs[PS] = (env->sregs[PS] & ~PS_INTLEVEL) | (intlevel << PS_INTLEVEL_SHIFT); - qemu_mutex_lock_iothread(); + bql_lock(); check_interrupts(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (env->pending_irq_level) { cpu_loop_exit(cpu); @@ -120,9 +120,9 @@ void HELPER(waiti)(CPUXtensaState *env, uint32_t pc, uint32_t intlevel) void HELPER(check_interrupts)(CPUXtensaState *env) { - qemu_mutex_lock_iothread(); + bql_lock(); check_interrupts(env); - qemu_mutex_unlock_iothread(); + bql_unlock(); } void HELPER(intset)(CPUXtensaState *env, uint32_t v) diff --git a/ui/cocoa.m b/ui/cocoa.m index cd069da696..5ebb535070 100644 --- a/ui/cocoa.m +++ b/ui/cocoa.m @@ -117,29 +117,29 @@ static void cocoa_switch(DisplayChangeListener *dcl, typedef void (^CodeBlock)(void); typedef bool (^BoolCodeBlock)(void); -static void with_iothread_lock(CodeBlock block) +static void with_bql(CodeBlock block) { - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } block(); if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } } -static bool bool_with_iothread_lock(BoolCodeBlock block) +static bool bool_with_bql(BoolCodeBlock block) { - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); bool val; if (!locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } val = block(); if (!locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } return val; } @@ -605,7 +605,7 @@ - (void) updateUIInfo return; } - with_iothread_lock(^{ + with_bql(^{ [self updateUIInfoLocked]; }); } @@ -790,7 +790,7 @@ - (void) handleMonitorInput:(NSEvent *)event - (bool) handleEvent:(NSEvent *)event { - return bool_with_iothread_lock(^{ + return bool_with_bql(^{ return [self handleEventLocked:event]; }); } @@ -1182,7 +1182,7 @@ - (QEMUScreen) gscreen {return screen;} */ - (void) raiseAllKeys { - with_iothread_lock(^{ + with_bql(^{ qkbd_state_lift_all_keys(kbd); }); } @@ -1282,7 +1282,7 @@ - (void)applicationWillTerminate:(NSNotification *)aNotification { COCOA_DEBUG("QemuCocoaAppController: applicationWillTerminate\n"); - with_iothread_lock(^{ + with_bql(^{ shutdown_action = SHUTDOWN_ACTION_POWEROFF; qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI); }); @@ -1420,7 +1420,7 @@ - (void)displayConsole:(id)sender /* Pause the guest */ - (void)pauseQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_stop(NULL); }); [sender setEnabled: NO]; @@ -1431,7 +1431,7 @@ - (void)pauseQEMU:(id)sender /* Resume running the guest operating system */ - (void)resumeQEMU:(id) sender { - with_iothread_lock(^{ + with_bql(^{ qmp_cont(NULL); }); [sender setEnabled: NO]; @@ -1461,7 +1461,7 @@ - (void)removePause /* Restarts QEMU */ - (void)restartQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_system_reset(NULL); }); } @@ -1469,7 +1469,7 @@ - (void)restartQEMU:(id)sender /* Powers down QEMU */ - (void)powerDownQEMU:(id)sender { - with_iothread_lock(^{ + with_bql(^{ qmp_system_powerdown(NULL); }); } @@ -1488,7 +1488,7 @@ - (void)ejectDeviceMedia:(id)sender } __block Error *err = NULL; - with_iothread_lock(^{ + with_bql(^{ qmp_eject([drive cStringUsingEncoding: NSASCIIStringEncoding], NULL, false, false, &err); }); @@ -1523,7 +1523,7 @@ - (void)changeDeviceMedia:(id)sender } __block Error *err = NULL; - with_iothread_lock(^{ + with_bql(^{ qmp_blockdev_change_medium([drive cStringUsingEncoding: NSASCIIStringEncoding], NULL, @@ -1605,7 +1605,7 @@ - (void)adjustSpeed:(id)sender // get the throttle percentage throttle_pct = [sender tag]; - with_iothread_lock(^{ + with_bql(^{ cpu_throttle_set(throttle_pct); }); COCOA_DEBUG("cpu throttling at %d%c\n", cpu_throttle_get_percentage(), '%'); @@ -1819,7 +1819,7 @@ - (void)pasteboard:(NSPasteboard *)sender provideDataForType:(NSPasteboardType)t return; } - with_iothread_lock(^{ + with_bql(^{ QemuClipboardInfo *info = qemu_clipboard_info_ref(cbinfo); qemu_event_reset(&cbevent); qemu_clipboard_request(info, QEMU_CLIPBOARD_TYPE_TEXT); @@ -1827,9 +1827,9 @@ - (void)pasteboard:(NSPasteboard *)sender provideDataForType:(NSPasteboardType)t while (info == cbinfo && info->types[QEMU_CLIPBOARD_TYPE_TEXT].available && info->types[QEMU_CLIPBOARD_TYPE_TEXT].data == NULL) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_wait(&cbevent); - qemu_mutex_lock_iothread(); + bql_lock(); } if (info == cbinfo) { @@ -1927,9 +1927,9 @@ static void cocoa_clipboard_request(QemuClipboardInfo *info, int status; COCOA_DEBUG("Second thread: calling qemu_default_main()\n"); - qemu_mutex_lock_iothread(); + bql_lock(); status = qemu_default_main(); - qemu_mutex_unlock_iothread(); + bql_unlock(); COCOA_DEBUG("Second thread: qemu_default_main() returned, exiting\n"); [cbowner release]; exit(status); @@ -1941,7 +1941,7 @@ static int cocoa_main(void) COCOA_DEBUG("Entered %s()\n", __func__); - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_thread_create(&thread, "qemu_main", call_qemu_main, NULL, QEMU_THREAD_DETACHED); diff --git a/ui/spice-core.c b/ui/spice-core.c index db21db2c94..b6ee495a8f 100644 --- a/ui/spice-core.c +++ b/ui/spice-core.c @@ -222,7 +222,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info) */ bool need_lock = !qemu_thread_is_self(&me); if (need_lock) { - qemu_mutex_lock_iothread(); + bql_lock(); } if (info->flags & SPICE_CHANNEL_EVENT_FLAG_ADDR_EXT) { @@ -260,7 +260,7 @@ static void channel_event(int event, SpiceChannelEventInfo *info) } if (need_lock) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } qapi_free_SpiceServerInfo(server); diff --git a/util/async.c b/util/async.c index 8f90ddc304..def720045b 100644 --- a/util/async.c +++ b/util/async.c @@ -741,7 +741,7 @@ AioContext *qemu_get_current_aio_context(void) if (ctx) { return ctx; } - if (qemu_mutex_iothread_locked()) { + if (bql_locked()) { /* Possibly in a vCPU thread. */ return qemu_get_aio_context(); } diff --git a/util/main-loop.c b/util/main-loop.c index 797b640c41..bfbff4f246 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -302,13 +302,13 @@ static int os_host_main_loop_wait(int64_t timeout) glib_pollfds_fill(&timeout); - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_unlock(); ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout); replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); glib_pollfds_poll(); @@ -517,7 +517,7 @@ static int os_host_main_loop_wait(int64_t timeout) poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout); - qemu_mutex_unlock_iothread(); + bql_unlock(); replay_mutex_unlock(); @@ -525,7 +525,7 @@ static int os_host_main_loop_wait(int64_t timeout) replay_mutex_lock(); - qemu_mutex_lock_iothread(); + bql_lock(); if (g_poll_ret > 0) { for (i = 0; i < w->num; i++) { w->revents[i] = poll_fds[n_poll_fds + i].revents; diff --git a/util/qsp.c b/util/qsp.c index 2fe3764906..6b783e2e7f 100644 --- a/util/qsp.c +++ b/util/qsp.c @@ -124,7 +124,7 @@ static const char * const qsp_typenames[] = { [QSP_CONDVAR] = "condvar", }; -QemuMutexLockFunc qemu_bql_mutex_lock_func = qemu_mutex_lock_impl; +QemuMutexLockFunc bql_mutex_lock_func = qemu_mutex_lock_impl; QemuMutexLockFunc qemu_mutex_lock_func = qemu_mutex_lock_impl; QemuMutexTrylockFunc qemu_mutex_trylock_func = qemu_mutex_trylock_impl; QemuRecMutexLockFunc qemu_rec_mutex_lock_func = qemu_rec_mutex_lock_impl; @@ -439,7 +439,7 @@ void qsp_enable(void) { qatomic_set(&qemu_mutex_lock_func, qsp_mutex_lock); qatomic_set(&qemu_mutex_trylock_func, qsp_mutex_trylock); - qatomic_set(&qemu_bql_mutex_lock_func, qsp_bql_mutex_lock); + qatomic_set(&bql_mutex_lock_func, qsp_bql_mutex_lock); qatomic_set(&qemu_rec_mutex_lock_func, qsp_rec_mutex_lock); qatomic_set(&qemu_rec_mutex_trylock_func, qsp_rec_mutex_trylock); qatomic_set(&qemu_cond_wait_func, qsp_cond_wait); @@ -450,7 +450,7 @@ void qsp_disable(void) { qatomic_set(&qemu_mutex_lock_func, qemu_mutex_lock_impl); qatomic_set(&qemu_mutex_trylock_func, qemu_mutex_trylock_impl); - qatomic_set(&qemu_bql_mutex_lock_func, qemu_mutex_lock_impl); + qatomic_set(&bql_mutex_lock_func, qemu_mutex_lock_impl); qatomic_set(&qemu_rec_mutex_lock_func, qemu_rec_mutex_lock_impl); qatomic_set(&qemu_rec_mutex_trylock_func, qemu_rec_mutex_trylock_impl); qatomic_set(&qemu_cond_wait_func, qemu_cond_wait_impl); diff --git a/util/rcu.c b/util/rcu.c index e587bcc483..bb7f633b5c 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -283,24 +283,24 @@ static void *call_rcu_thread(void *opaque) qatomic_sub(&rcu_call_count, n); synchronize_rcu(); - qemu_mutex_lock_iothread(); + bql_lock(); while (n > 0) { node = try_dequeue(); while (!node) { - qemu_mutex_unlock_iothread(); + bql_unlock(); qemu_event_reset(&rcu_call_ready_event); node = try_dequeue(); if (!node) { qemu_event_wait(&rcu_call_ready_event); node = try_dequeue(); } - qemu_mutex_lock_iothread(); + bql_lock(); } n--; node->func(node); } - qemu_mutex_unlock_iothread(); + bql_unlock(); } abort(); } @@ -337,13 +337,13 @@ static void drain_rcu_callback(struct rcu_head *node) void drain_call_rcu(void) { struct rcu_drain rcu_drain; - bool locked = qemu_mutex_iothread_locked(); + bool locked = bql_locked(); memset(&rcu_drain, 0, sizeof(struct rcu_drain)); qemu_event_init(&rcu_drain.drain_complete_event, false); if (locked) { - qemu_mutex_unlock_iothread(); + bql_unlock(); } @@ -365,7 +365,7 @@ void drain_call_rcu(void) qatomic_dec(&in_drain_call_rcu); if (locked) { - qemu_mutex_lock_iothread(); + bql_lock(); } } -- Gitee From 85519349272882acb9b345219b14b90cba6cec8e Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:43:58 +0700 Subject: [PATCH 060/115] i386/tcg: implement x2APIC registers MSR access commit b2101358e591c9f0a93739dd3aee72935a79af80 upstream. This commit creates apic_register_read/write which are used by both apic_mem_read/write for MMIO access and apic_msr_read/write for MSR access. The apic_msr_read/write returns -1 on error, accelerator can use this to raise the appropriate exception. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-2-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/intc/apic.c | 122 ++++++++++++++++++++------- hw/intc/trace-events | 4 +- include/hw/i386/apic.h | 3 + target/i386/cpu.h | 3 + target/i386/tcg/sysemu/misc_helper.c | 27 ++++++ 5 files changed, 127 insertions(+), 32 deletions(-) diff --git a/hw/intc/apic.c b/hw/intc/apic.c index ac3d47d231..7a349c0723 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -288,6 +288,13 @@ void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); } +bool is_x2apic_mode(DeviceState *dev) +{ + APICCommonState *s = APIC(dev); + + return s->apicbase & MSR_IA32_APICBASE_EXTD; +} + static void apic_set_base(APICCommonState *s, uint64_t val) { s->apicbase = (val & 0xfffff000) | @@ -636,24 +643,19 @@ static void apic_timer(void *opaque) apic_timer_update(s, s->next_time); } -static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +static int apic_register_read(int index, uint64_t *value) { DeviceState *dev; APICCommonState *s; uint32_t val; - int index; - - if (size < 4) { - return 0; - } + int ret = 0; dev = cpu_get_current_apic(); if (!dev) { - return 0; + return -1; } s = APIC(dev); - index = (addr >> 4) & 0xff; switch(index) { case 0x02: /* id */ val = s->id << 24; @@ -718,12 +720,46 @@ static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; val = 0; + ret = -1; break; } - trace_apic_mem_readl(addr, val); + + trace_apic_register_read(index, val); + *value = val; + return ret; +} + +static uint64_t apic_mem_read(void *opaque, hwaddr addr, unsigned size) +{ + uint64_t val; + int index; + + if (size < 4) { + return 0; + } + + index = (addr >> 4) & 0xff; + apic_register_read(index, &val); + return val; } +int apic_msr_read(int index, uint64_t *val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; + } + + return apic_register_read(index, val); +} + static void apic_send_msi(MSIMessage *msi) { uint64_t addr = msi->address; @@ -737,35 +773,18 @@ static void apic_send_msi(MSIMessage *msi) apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode); } -static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, - unsigned size) +static int apic_register_write(int index, uint64_t val) { DeviceState *dev; APICCommonState *s; - int index = (addr >> 4) & 0xff; - - if (size < 4) { - return; - } - - if (addr > 0xfff || !index) { - /* MSI and MMIO APIC are at the same memory location, - * but actually not on the global bus: MSI is on PCI bus - * APIC is connected directly to the CPU. - * Mapping them on the global bus happens to work because - * MSI registers are reserved in APIC MMIO and vice versa. */ - MSIMessage msi = { .address = addr, .data = val }; - apic_send_msi(&msi); - return; - } dev = cpu_get_current_apic(); if (!dev) { - return; + return -1; } s = APIC(dev); - trace_apic_mem_writel(addr, val); + trace_apic_register_write(index, val); switch(index) { case 0x02: @@ -839,8 +858,51 @@ static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, break; default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; - break; + return -1; + } + + return 0; +} + +static void apic_mem_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + int index = (addr >> 4) & 0xff; + + if (size < 4) { + return; + } + + if (addr > 0xfff || !index) { + /* + * MSI and MMIO APIC are at the same memory location, + * but actually not on the global bus: MSI is on PCI bus + * APIC is connected directly to the CPU. + * Mapping them on the global bus happens to work because + * MSI registers are reserved in APIC MMIO and vice versa. + */ + MSIMessage msi = { .address = addr, .data = val }; + apic_send_msi(&msi); + return; + } + + apic_register_write(index, val); +} + +int apic_msr_write(int index, uint64_t val) +{ + DeviceState *dev; + + dev = cpu_get_current_apic(); + if (!dev) { + return -1; + } + + if (!is_x2apic_mode(dev)) { + return -1; } + + return apic_register_write(index, val); } static void apic_pre_save(APICCommonState *s) diff --git a/hw/intc/trace-events b/hw/intc/trace-events index 36ff71f947..1ef29d0256 100644 --- a/hw/intc/trace-events +++ b/hw/intc/trace-events @@ -14,8 +14,8 @@ cpu_get_apic_base(uint64_t val) "0x%016"PRIx64 # apic.c apic_local_deliver(int vector, uint32_t lvt) "vector %d delivery mode %d" apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode) "dest %d dest_mode %d delivery_mode %d vector %d trigger_mode %d" -apic_mem_readl(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" -apic_mem_writel(uint64_t addr, uint32_t val) "0x%"PRIx64" = 0x%08x" +apic_register_read(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 +apic_register_write(uint8_t reg, uint64_t val) "register 0x%02x = 0x%"PRIx64 # ioapic.c ioapic_set_remote_irr(int n) "set remote irr for pin %d" diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index bdc15a7a73..ddea4213db 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -18,6 +18,9 @@ void apic_sipi(DeviceState *s); void apic_poll_irq(DeviceState *d); void apic_designate_bsp(DeviceState *d, bool bsp); int apic_get_highest_priority_irr(DeviceState *dev); +int apic_msr_read(int index, uint64_t *val); +int apic_msr_write(int index, uint64_t val); +bool is_x2apic_mode(DeviceState *d); /* pc.c */ DeviceState *cpu_get_current_apic(void); diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 4424e58d1b..b99d4b3367 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -567,6 +567,9 @@ typedef enum X86Seg { #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 #define MSR_IA32_VMX_VMFUNC 0x00000491 +#define MSR_APIC_START 0x00000800 +#define MSR_APIC_END 0x000008ff + #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 #define XSTATE_YMM_BIT 2 diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 1ddfc9fe09..1c43a9f4f7 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -25,6 +25,7 @@ #include "exec/address-spaces.h" #include "exec/exec-all.h" #include "tcg/helper-tcg.h" +#include "hw/i386/apic.h" void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) { @@ -289,6 +290,19 @@ void helper_wrmsr(CPUX86State *env) env->msr_bndcfgs = val; cpu_sync_bndcs_hflags(env); break; + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_write(index, val); + bql_unlock(); + if (ret < 0) { + goto error; + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + @@ -455,6 +469,19 @@ void helper_rdmsr(CPUX86State *env) val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16); break; } + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_read(index, &val); + bql_unlock(); + if (ret < 0) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + -- Gitee From a348d4184c5786c50e1308b4bba894236f292a38 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 11 Jan 2024 22:43:59 +0700 Subject: [PATCH 061/115] apic: add support for x2APIC mode commit b5ee0468e9d28c6bd47cce70f90b5032dd10ecc2 upstream. This commit extends the APIC ID to 32-bit long and remove the 255 max APIC ID limit in userspace APIC. The array that manages local APICs is now dynamically allocated based on the max APIC ID of created x86 machine. Also, new x2APIC IPI destination determination scheme, self IPI and x2APIC mode register access are supported. Signed-off-by: Bui Quang Minh Message-Id: <20240111154404.5333-3-minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/i386/x86.c | 6 +- hw/intc/apic.c | 287 ++++++++++++++++++++++++-------- hw/intc/apic_common.c | 9 + include/hw/i386/apic.h | 3 +- include/hw/i386/apic_internal.h | 7 +- target/i386/cpu-sysemu.c | 18 +- target/i386/cpu.h | 2 + 7 files changed, 258 insertions(+), 74 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 864fbd037e..aa7eb865b4 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -137,7 +137,7 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) * a literal `0` in configurations where kvm_* aren't defined) */ if (kvm_enabled() && x86ms->apic_id_limit > 255 && - (!kvm_irqchip_in_kernel() || !kvm_enable_x2apic())) { + kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { error_report("current -smp configuration requires kernel " "irqchip and X2APIC API support."); exit(EXIT_FAILURE); @@ -147,6 +147,10 @@ void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) kvm_set_max_apic_id(x86ms->apic_id_limit); } + if (!kvm_irqchip_in_kernel()) { + apic_set_max_apic_id(x86ms->apic_id_limit); + } + possible_cpus = mc->possible_cpu_arch_ids(ms); for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); diff --git a/hw/intc/apic.c b/hw/intc/apic.c index 7a349c0723..178fb26b47 100644 --- a/hw/intc/apic.c +++ b/hw/intc/apic.c @@ -32,14 +32,13 @@ #include "qapi/error.h" #include "qom/object.h" -#define MAX_APICS 255 -#define MAX_APIC_WORDS 8 - #define SYNC_FROM_VAPIC 0x1 #define SYNC_TO_VAPIC 0x2 #define SYNC_ISR_IRR_TO_VAPIC 0x4 -static APICCommonState *local_apics[MAX_APICS + 1]; +static APICCommonState **local_apics; +static uint32_t max_apics; +static uint32_t max_apic_words; #define TYPE_APIC "apic" /*This is reusing the APICCommonState typedef from APIC_COMMON */ @@ -49,7 +48,19 @@ DECLARE_INSTANCE_CHECKER(APICCommonState, APIC, static void apic_set_irq(APICCommonState *s, int vector_num, int trigger_mode); static void apic_update_irq(APICCommonState *s); static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode); + uint32_t dest, uint8_t dest_mode); + +void apic_set_max_apic_id(uint32_t max_apic_id) +{ + int word_size = 32; + + /* round up the max apic id to next multiple of words */ + max_apics = (max_apic_id + word_size - 1) & ~(word_size - 1); + + local_apics = g_malloc0(sizeof(*local_apics) * max_apics); + max_apic_words = max_apics >> 5; +} + /* Find first bit starting from msb */ static int apic_fls_bit(uint32_t value) @@ -199,10 +210,10 @@ static void apic_external_nmi(APICCommonState *s) #define foreach_apic(apic, deliver_bitmask, code) \ {\ int __i, __j;\ - for(__i = 0; __i < MAX_APIC_WORDS; __i++) {\ + for (__i = 0; __i < max_apic_words; __i++) {\ uint32_t __mask = deliver_bitmask[__i];\ if (__mask) {\ - for(__j = 0; __j < 32; __j++) {\ + for (__j = 0; __j < 32; __j++) {\ if (__mask & (1U << __j)) {\ apic = local_apics[__i * 32 + __j];\ if (apic) {\ @@ -226,7 +237,7 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, { int i, d; d = -1; - for(i = 0; i < MAX_APIC_WORDS; i++) { + for (i = 0; i < max_apic_words; i++) { if (deliver_bitmask[i]) { d = i * 32 + apic_ffs_bit(deliver_bitmask[i]); break; @@ -276,16 +287,18 @@ static void apic_bus_deliver(const uint32_t *deliver_bitmask, apic_set_irq(apic_iter, vector_num, trigger_mode) ); } -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode) +static void apic_deliver_irq(uint32_t dest, uint8_t dest_mode, + uint8_t delivery_mode, uint8_t vector_num, + uint8_t trigger_mode) { - uint32_t deliver_bitmask[MAX_APIC_WORDS]; + uint32_t *deliver_bitmask = g_malloc(max_apic_words * sizeof(uint32_t)); trace_apic_deliver_irq(dest, dest_mode, delivery_mode, vector_num, trigger_mode); apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); } bool is_x2apic_mode(DeviceState *dev) @@ -442,57 +455,123 @@ static void apic_eoi(APICCommonState *s) apic_update_irq(s); } -static int apic_find_dest(uint8_t dest) +static bool apic_match_dest(APICCommonState *apic, uint32_t dest) { - APICCommonState *apic = local_apics[dest]; - int i; + if (is_x2apic_mode(&apic->parent_obj)) { + return apic->initial_apic_id == dest; + } else { + return apic->id == (uint8_t)dest; + } +} - if (apic && apic->id == dest) - return dest; /* shortcut in case apic->id == local_apics[dest]->id */ +static void apic_find_dest(uint32_t *deliver_bitmask, uint32_t dest) +{ + APICCommonState *apic = NULL; + int i; - for (i = 0; i < MAX_APICS; i++) { + for (i = 0; i < max_apics; i++) { apic = local_apics[i]; - if (apic && apic->id == dest) - return i; - if (!apic) - break; + if (apic && apic_match_dest(apic, dest)) { + apic_set_bit(deliver_bitmask, i); + } } +} - return -1; +/* + * Deliver interrupt to x2APIC CPUs if it is x2APIC broadcast. + * Otherwise, deliver interrupt to xAPIC CPUs if it is xAPIC + * broadcast. + */ +static void apic_get_broadcast_bitmask(uint32_t *deliver_bitmask, + bool is_x2apic_broadcast) +{ + int i; + APICCommonState *apic_iter; + + for (i = 0; i < max_apics; i++) { + apic_iter = local_apics[i]; + if (apic_iter) { + bool apic_in_x2apic = is_x2apic_mode(&apic_iter->parent_obj); + + if (is_x2apic_broadcast && apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } else if (!is_x2apic_broadcast && !apic_in_x2apic) { + apic_set_bit(deliver_bitmask, i); + } + } + } } static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, - uint8_t dest, uint8_t dest_mode) + uint32_t dest, uint8_t dest_mode) { - APICCommonState *apic_iter; + APICCommonState *apic; int i; - if (dest_mode == 0) { - if (dest == 0xff) { - memset(deliver_bitmask, 0xff, MAX_APIC_WORDS * sizeof(uint32_t)); + memset(deliver_bitmask, 0x00, max_apic_words * sizeof(uint32_t)); + + /* + * x2APIC broadcast is delivered to all x2APIC CPUs regardless of + * destination mode. In case the destination mode is physical, it is + * broadcasted to all xAPIC CPUs too. Otherwise, if the destination + * mode is logical, we need to continue checking if xAPIC CPUs accepts + * the interrupt. + */ + if (dest == 0xffffffff) { + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + memset(deliver_bitmask, 0xff, max_apic_words * sizeof(uint32_t)); + return; } else { - int idx = apic_find_dest(dest); - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - if (idx >= 0) - apic_set_bit(deliver_bitmask, idx); + apic_get_broadcast_bitmask(deliver_bitmask, true); + } + } + + if (dest_mode == APIC_DESTMODE_PHYSICAL) { + apic_find_dest(deliver_bitmask, dest); + /* Any APIC in xAPIC mode will interpret 0xFF as broadcast */ + if (dest == 0xff) { + apic_get_broadcast_bitmask(deliver_bitmask, false); } } else { - /* XXX: cluster mode */ - memset(deliver_bitmask, 0x00, MAX_APIC_WORDS * sizeof(uint32_t)); - for(i = 0; i < MAX_APICS; i++) { - apic_iter = local_apics[i]; - if (apic_iter) { - if (apic_iter->dest_mode == 0xf) { - if (dest & apic_iter->log_dest) - apic_set_bit(deliver_bitmask, i); - } else if (apic_iter->dest_mode == 0x0) { - if ((dest & 0xf0) == (apic_iter->log_dest & 0xf0) && - (dest & apic_iter->log_dest & 0x0f)) { + /* XXX: logical mode */ + for (i = 0; i < max_apics; i++) { + apic = local_apics[i]; + if (apic) { + /* x2APIC logical mode */ + if (apic->apicbase & MSR_IA32_APICBASE_EXTD) { + if ((dest >> 16) == (apic->extended_log_dest >> 16) && + (dest & apic->extended_log_dest & 0xffff)) { apic_set_bit(deliver_bitmask, i); } + continue; } - } else { - break; + + /* xAPIC logical mode */ + dest = (uint8_t)dest; + if (apic->dest_mode == APIC_DESTMODE_LOGICAL_FLAT) { + if (dest & apic->log_dest) { + apic_set_bit(deliver_bitmask, i); + } + } else if (apic->dest_mode == APIC_DESTMODE_LOGICAL_CLUSTER) { + /* + * In cluster model of xAPIC logical mode IPI, 4 higher + * bits are used as cluster address, 4 lower bits are + * the bitmask for local APICs in the cluster. The IPI + * is delivered to an APIC if the cluster address + * matches and the APIC's address bit in the cluster is + * set in bitmask of destination ID in IPI. + * + * The cluster address ranges from 0 - 14, the cluster + * address 15 (0xf) is the broadcast address to all + * clusters. + */ + if ((dest & 0xf0) == 0xf0 || + (dest & 0xf0) == (apic->log_dest & 0xf0)) { + if (dest & apic->log_dest & 0x0f) { + apic_set_bit(deliver_bitmask, i); + } + } + } } } } @@ -516,29 +595,36 @@ void apic_sipi(DeviceState *dev) s->wait_for_sipi = 0; } -static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, +static void apic_deliver(DeviceState *dev, uint32_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, - uint8_t trigger_mode) + uint8_t trigger_mode, uint8_t dest_shorthand) { APICCommonState *s = APIC(dev); - uint32_t deliver_bitmask[MAX_APIC_WORDS]; - int dest_shorthand = (s->icr[0] >> 18) & 3; APICCommonState *apic_iter; + uint32_t deliver_bitmask_size = max_apic_words * sizeof(uint32_t); + uint32_t *deliver_bitmask = g_malloc(deliver_bitmask_size); + uint32_t current_apic_id; + + if (is_x2apic_mode(dev)) { + current_apic_id = s->initial_apic_id; + } else { + current_apic_id = s->id; + } switch (dest_shorthand) { case 0: apic_get_delivery_bitmask(deliver_bitmask, dest, dest_mode); break; case 1: - memset(deliver_bitmask, 0x00, sizeof(deliver_bitmask)); - apic_set_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0x00, deliver_bitmask_size); + apic_set_bit(deliver_bitmask, current_apic_id); break; case 2: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); break; case 3: - memset(deliver_bitmask, 0xff, sizeof(deliver_bitmask)); - apic_reset_bit(deliver_bitmask, s->id); + memset(deliver_bitmask, 0xff, deliver_bitmask_size); + apic_reset_bit(deliver_bitmask, current_apic_id); break; } @@ -562,6 +648,7 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode, } apic_bus_deliver(deliver_bitmask, delivery_mode, vector_num, trigger_mode); + g_free(deliver_bitmask); } static bool apic_check_pic(APICCommonState *s) @@ -658,7 +745,11 @@ static int apic_register_read(int index, uint64_t *value) switch(index) { case 0x02: /* id */ - val = s->id << 24; + if (is_x2apic_mode(dev)) { + val = s->initial_apic_id; + } else { + val = s->id << 24; + } break; case 0x03: /* version */ val = s->version | ((APIC_LVT_NB - 1) << 16); @@ -681,10 +772,19 @@ static int apic_register_read(int index, uint64_t *value) val = 0; break; case 0x0d: - val = s->log_dest << 24; + if (is_x2apic_mode(dev)) { + val = s->extended_log_dest; + } else { + val = s->log_dest << 24; + } break; case 0x0e: - val = (s->dest_mode << 28) | 0xfffffff; + if (is_x2apic_mode(dev)) { + val = 0; + ret = -1; + } else { + val = (s->dest_mode << 28) | 0xfffffff; + } break; case 0x0f: val = s->spurious_vec; @@ -764,7 +864,12 @@ static void apic_send_msi(MSIMessage *msi) { uint64_t addr = msi->address; uint32_t data = msi->data; - uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint32_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + /* + * The higher 3 bytes of destination id is stored in higher word of + * msi address. See x86_iommu_irq_to_msi_message() + */ + dest = dest | (addr >> 32); uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; @@ -788,6 +893,10 @@ static int apic_register_write(int index, uint64_t val) switch(index) { case 0x02: + if (is_x2apic_mode(dev)) { + return -1; + } + s->id = (val >> 24); break; case 0x03: @@ -807,9 +916,17 @@ static int apic_register_write(int index, uint64_t val) apic_eoi(s); break; case 0x0d: + if (is_x2apic_mode(dev)) { + return -1; + } + s->log_dest = val >> 24; break; case 0x0e: + if (is_x2apic_mode(dev)) { + return -1; + } + s->dest_mode = val >> 28; break; case 0x0f: @@ -821,13 +938,27 @@ static int apic_register_write(int index, uint64_t val) case 0x20 ... 0x27: case 0x28: break; - case 0x30: + case 0x30: { + uint32_t dest; + s->icr[0] = val; - apic_deliver(dev, (s->icr[1] >> 24) & 0xff, (s->icr[0] >> 11) & 1, + if (is_x2apic_mode(dev)) { + s->icr[1] = val >> 32; + dest = s->icr[1]; + } else { + dest = (s->icr[1] >> 24) & 0xff; + } + + apic_deliver(dev, dest, (s->icr[0] >> 11) & 1, (s->icr[0] >> 8) & 7, (s->icr[0] & 0xff), - (s->icr[0] >> 15) & 1); + (s->icr[0] >> 15) & 1, (s->icr[0] >> 18) & 3); break; + } case 0x31: + if (is_x2apic_mode(dev)) { + return -1; + } + s->icr[1] = val; break; case 0x32 ... 0x37: @@ -856,6 +987,23 @@ static int apic_register_write(int index, uint64_t val) s->count_shift = (v + 1) & 7; } break; + case 0x3f: { + int vector = val & 0xff; + + if (!is_x2apic_mode(dev)) { + return -1; + } + + /* + * Self IPI is identical to IPI with + * - Destination shorthand: 1 (Self) + * - Trigger mode: 0 (Edge) + * - Delivery mode: 0 (Fixed) + */ + apic_deliver(dev, 0, 0, APIC_DM_FIXED, vector, 0, 1); + + break; + } default: s->esr |= APIC_ESR_ILLEGAL_ADDRESS; return -1; @@ -933,12 +1081,6 @@ static void apic_realize(DeviceState *dev, Error **errp) { APICCommonState *s = APIC(dev); - if (s->id >= MAX_APICS) { - error_setg(errp, "%s initialization failed. APIC ID %d is invalid", - object_get_typename(OBJECT(dev)), s->id); - return; - } - if (kvm_enabled()) { warn_report("Userspace local APIC is deprecated for KVM."); warn_report("Do not use kernel-irqchip except for the -M isapc machine type."); @@ -955,7 +1097,16 @@ static void apic_realize(DeviceState *dev, Error **errp) s->io_memory.disable_reentrancy_guard = true; s->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, apic_timer, s); - local_apics[s->id] = s; + + /* + * The --machine none does not call apic_set_max_apic_id before creating + * apic, so we need to call it here and set it to 1 which is the max cpus + * in machine none. + */ + if (!local_apics) { + apic_set_max_apic_id(1); + } + local_apics[s->initial_apic_id] = s; msi_nonbroken = true; } @@ -965,7 +1116,7 @@ static void apic_unrealize(DeviceState *dev) APICCommonState *s = APIC(dev); timer_free(s->timer); - local_apics[s->id] = NULL; + local_apics[s->initial_apic_id] = NULL; } static void apic_class_init(ObjectClass *klass, void *data) diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index bccb4241c2..b7468bcd4c 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -287,6 +287,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp) } vmstate_register_with_alias_id(NULL, instance_id, &vmstate_apic_common, s, -1, 0, NULL); + + /* APIC LDR in x2APIC mode */ + s->extended_log_dest = ((s->initial_apic_id >> 4) << 16) | + (1 << (s->initial_apic_id & 0xf)); } static void apic_common_unrealize(DeviceState *dev) @@ -427,6 +431,11 @@ static void apic_common_set_id(Object *obj, Visitor *v, const char *name, return; } + if (value >= 255 && !cpu_has_x2apic_feature(&s->cpu->env)) { + error_setg(errp, "APIC ID %d requires x2APIC feature in CPU", value); + return; + } + s->initial_apic_id = value; s->id = (uint8_t)value; } diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index ddea4213db..c8ca41ab44 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -3,8 +3,7 @@ /* apic.c */ -void apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, - uint8_t vector_num, uint8_t trigger_mode); +void apic_set_max_apic_id(uint32_t max_apic_id); int apic_accept_pic_intr(DeviceState *s); void apic_deliver_pic_intr(DeviceState *s, int level); void apic_deliver_nmi(DeviceState *d); diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h index 5f2ba24bfc..e796e6cae3 100644 --- a/include/hw/i386/apic_internal.h +++ b/include/hw/i386/apic_internal.h @@ -46,8 +46,10 @@ #define APIC_DM_EXTINT 7 /* APIC destination mode */ -#define APIC_DESTMODE_FLAT 0xf -#define APIC_DESTMODE_CLUSTER 1 +#define APIC_DESTMODE_PHYSICAL 0 +#define APIC_DESTMODE_LOGICAL 1 +#define APIC_DESTMODE_LOGICAL_FLAT 0xf +#define APIC_DESTMODE_LOGICAL_CLUSTER 0 #define APIC_TRIGGER_EDGE 0 #define APIC_TRIGGER_LEVEL 1 @@ -187,6 +189,7 @@ struct APICCommonState { DeviceState *vapic; hwaddr vapic_paddr; /* note: persistence via kvmvapic */ bool legacy_instance_id; + uint32_t extended_log_dest; }; typedef struct VAPICState { diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c index 2375e48178..7422096737 100644 --- a/target/i386/cpu-sysemu.c +++ b/target/i386/cpu-sysemu.c @@ -235,6 +235,16 @@ void cpu_clear_apic_feature(CPUX86State *env) env->features[FEAT_1_EDX] &= ~CPUID_APIC; } +void cpu_set_apic_feature(CPUX86State *env) +{ + env->features[FEAT_1_EDX] |= CPUID_APIC; +} + +bool cpu_has_x2apic_feature(CPUX86State *env) +{ + return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; +} + bool cpu_is_bsp(X86CPU *cpu) { return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP; @@ -281,11 +291,17 @@ void x86_cpu_apic_create(X86CPU *cpu, Error **errp) OBJECT(cpu->apic_state)); object_unref(OBJECT(cpu->apic_state)); - qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); /* TODO: convert to link<> */ apic = APIC_COMMON(cpu->apic_state); apic->cpu = cpu; apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; + + /* + * apic_common_set_id needs to check if the CPU has x2APIC + * feature in case APIC ID >= 255, so we need to set apic->cpu + * before setting APIC ID + */ + qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); } void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index b99d4b3367..19e8f67e4b 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2323,8 +2323,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); void cpu_clear_apic_feature(CPUX86State *env); +void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); +bool cpu_has_x2apic_feature(CPUX86State *env); /* helper.c */ void x86_cpu_set_a20(X86CPU *cpu, int a20_state); -- Gitee From 6364e337aef6d958ce275978415c896bb814e44b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 6 Jan 2024 14:25:44 +0100 Subject: [PATCH 062/115] hw/i386/x86: Reverse if statement commit f22f3a92eb728497dcd0f43e31b9148992db99bd upstream. The if statement currently uses double negation when executing the else branch. So swap the branches and simplify the condition to make the code more comprehensible. Signed-off-by: Bernhard Beschow Message-Id: <20240106132546.21248-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- hw/i386/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index aa7eb865b4..e4acad2101 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -520,10 +520,10 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); - if (!cpu->apic_state) { - cpu_interrupt(cs, CPU_INTERRUPT_NMI); - } else { + if (cpu->apic_state) { apic_deliver_nmi(cpu->apic_state); + } else { + cpu_interrupt(cs, CPU_INTERRUPT_NMI); } } } -- Gitee From 64545819049253d1d2fb92f96792984d6c23dc49 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 6 Jan 2024 14:25:45 +0100 Subject: [PATCH 063/115] hw/i386/x86: Fix PIC interrupt handling if APIC is globally disabled commit c2e6d7d8e7fc270a90c61944ef36574b1549ddcf upstream QEMU populates the apic_state attribute of x86 CPUs if supported by real hardware or if SMP is active. When handling interrupts, it just checks whether apic_state is populated to route the interrupt to the PIC or to the APIC. However, chapter 10.4.3 of [1] requires that: When IA32_APIC_BASE[11] is 0, the processor is functionally equivalent to an IA-32 processor without an on-chip APIC. This means that when apic_state is populated, QEMU needs to check for the MSR_IA32_APICBASE_ENABLE flag in addition. Implement this which fixes some real-world BIOSes. [1] Intel 64 and IA-32 Architectures Software Developer's Manual, Vol. 3A: System Programming Guide, Part 1 Signed-off-by: Bernhard Beschow Message-Id: <20240106132546.21248-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/x86.c | 4 ++-- hw/intc/apic_common.c | 13 +++++++++++++ include/hw/i386/apic.h | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index e4acad2101..a2660c38da 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -520,7 +520,7 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) CPU_FOREACH(cs) { X86CPU *cpu = X86_CPU(cs); - if (cpu->apic_state) { + if (cpu_is_apic_enabled(cpu->apic_state)) { apic_deliver_nmi(cpu->apic_state); } else { cpu_interrupt(cs, CPU_INTERRUPT_NMI); @@ -555,7 +555,7 @@ static void pic_irq_request(void *opaque, int irq, int level) X86CPU *cpu = X86_CPU(cs); trace_x86_pic_interrupt(irq, level); - if (cpu->apic_state && !kvm_irqchip_in_kernel() && + if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { CPU_FOREACH(cs) { cpu = X86_CPU(cs); diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index b7468bcd4c..62e757dba2 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -63,6 +63,19 @@ uint64_t cpu_get_apic_base(DeviceState *dev) } } +bool cpu_is_apic_enabled(DeviceState *dev) +{ + APICCommonState *s; + + if (!dev) { + return false; + } + + s = APIC_COMMON(dev); + + return s->apicbase & MSR_IA32_APICBASE_ENABLE; +} + void cpu_set_apic_tpr(DeviceState *dev, uint8_t val) { APICCommonState *s; diff --git a/include/hw/i386/apic.h b/include/hw/i386/apic.h index c8ca41ab44..98a87b2ded 100644 --- a/include/hw/i386/apic.h +++ b/include/hw/i386/apic.h @@ -10,6 +10,7 @@ void apic_deliver_nmi(DeviceState *d); int apic_get_interrupt(DeviceState *s); void cpu_set_apic_base(DeviceState *s, uint64_t val); uint64_t cpu_get_apic_base(DeviceState *s); +bool cpu_is_apic_enabled(DeviceState *s); void cpu_set_apic_tpr(DeviceState *s, uint8_t val); uint8_t cpu_get_apic_tpr(DeviceState *s); void apic_init_reset(DeviceState *s); -- Gitee From b22e8b55ee8778ec7897444f30abd45a41a918f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 13 Feb 2024 05:14:33 +0100 Subject: [PATCH 064/115] hw/i386/q35: Simplify pc_q35_init() since PCI is always enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 88ad980c0fa8fa47c844f4d8c09fdbcc5d3c8c1d upstream We can not create the Q35 machine without PCI, so simplify pc_q35_init() removing pointless checks. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-Id: <20240213041952.58840-1-philmd@linaro.org> Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/pc_q35.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index b19d3a2d89..5b17a911d0 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -130,8 +130,7 @@ static void pc_q35_init(MachineState *machine) ISADevice *rtc_state; MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); - MemoryRegion *pci_memory; - MemoryRegion *rom_memory; + MemoryRegion *pci_memory = g_new(MemoryRegion, 1); GSIState *gsi_state; ISABus *isa_bus; int i; @@ -143,6 +142,8 @@ static void pc_q35_init(MachineState *machine) bool keep_pci_slot_hpc; uint64_t pci_hole64_size = 0; + assert(pcmc->pci_enabled); + /* Check whether RAM fits below 4G (leaving 1/2 GByte for IO memory * and 256 Mbytes for PCI Express Enhanced Configuration Access Mapping * also known as MMCFG). @@ -189,16 +190,6 @@ static void pc_q35_init(MachineState *machine) kvmclock_create(pcmc->kvmclock_create_always); } - /* pci enabled */ - if (pcmc->pci_enabled) { - pci_memory = g_new(MemoryRegion, 1); - memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); - rom_memory = pci_memory; - } else { - pci_memory = NULL; - rom_memory = system_memory; - } - pc_guest_info_init(pcms); if (pcmc->smbios_defaults) { @@ -212,14 +203,13 @@ static void pc_q35_init(MachineState *machine) /* create pci host bus */ phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); - if (pcmc->pci_enabled) { - pci_hole64_size = object_property_get_uint(phb, - PCI_HOST_PROP_PCI_HOLE64_SIZE, - &error_abort); - } + pci_hole64_size = object_property_get_uint(phb, + PCI_HOST_PROP_PCI_HOLE64_SIZE, + &error_abort); /* allocate ram and load rom/bios */ - pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); + memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); + pc_memory_init(pcms, system_memory, pci_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", phb); object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, @@ -245,7 +235,7 @@ static void pc_q35_init(MachineState *machine) pcms->bus = host_bus; /* irq lines */ - gsi_state = pc_gsi_create(&x86ms->gsi, pcmc->pci_enabled); + gsi_state = pc_gsi_create(&x86ms->gsi, true); /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), @@ -288,9 +278,7 @@ static void pc_q35_init(MachineState *machine) pc_i8259_create(isa_bus, gsi_state->i8259_irq); } - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "q35"); - } + ioapic_init_gsi(gsi_state, "q35"); if (tcg_enabled()) { x86_register_ferr_irq(x86ms->gsi[13]); -- Gitee From 9ef409071a324f3341fe99a3f65b0524bc019815 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Thu, 8 Feb 2024 23:03:41 +0100 Subject: [PATCH 065/115] hw/i386/x86: Let ioapic_init_gsi() take parent as pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9b0c44334cf7032dc5529c5ecbe1929f76668c11 upstream Rather than taking a QOM name which has to be resolved, let's pass the parent directly as pointer. This simplifies the code. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Zhao Liu Message-ID: <20240224135851.100361-2-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/microvm.c | 2 +- hw/i386/pc_piix.c | 7 +++---- hw/i386/pc_q35.c | 2 +- hw/i386/x86.c | 7 +++---- include/hw/i386/x86.h | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index ca55aecc3b..61a772dfe6 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -175,7 +175,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) &error_abort); isa_bus_register_input_irqs(isa_bus, x86ms->gsi); - ioapic_init_gsi(gsi_state, "machine"); + ioapic_init_gsi(gsi_state, OBJECT(mms)); if (ioapics > 1) { x86ms->ioapic2 = ioapic_init_secondary(gsi_state); } diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 95a773062a..45b1543de8 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -112,6 +112,7 @@ static void pc_init1(MachineState *machine, X86MachineState *x86ms = X86_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); + Object *phb = NULL; PCIBus *pci_bus = NULL; ISABus *isa_bus; Object *piix4_pm = NULL; @@ -195,8 +196,6 @@ static void pc_init1(MachineState *machine, } if (pcmc->pci_enabled) { - Object *phb; - pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; @@ -323,8 +322,8 @@ static void pc_init1(MachineState *machine, pc_i8259_create(isa_bus, gsi_state->i8259_irq); } - if (pcmc->pci_enabled) { - ioapic_init_gsi(gsi_state, "i440fx"); + if (phb) { + ioapic_init_gsi(gsi_state, phb); } if (tcg_enabled()) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 5b17a911d0..e1b7dce0f2 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -278,7 +278,7 @@ static void pc_q35_init(MachineState *machine) pc_i8259_create(isa_bus, gsi_state->i8259_irq); } - ioapic_init_gsi(gsi_state, "q35"); + ioapic_init_gsi(gsi_state, OBJECT(phb)); if (tcg_enabled()) { x86_register_ferr_irq(x86ms->gsi[13]); diff --git a/hw/i386/x86.c b/hw/i386/x86.c index a2660c38da..734c35e4ae 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -640,20 +640,19 @@ void gsi_handler(void *opaque, int n, int level) } } -void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name) +void ioapic_init_gsi(GSIState *gsi_state, Object *parent) { DeviceState *dev; SysBusDevice *d; unsigned int i; - assert(parent_name); + assert(parent); if (kvm_ioapic_in_kernel()) { dev = qdev_new(TYPE_KVM_IOAPIC); } else { dev = qdev_new(TYPE_IOAPIC); } - object_property_add_child(object_resolve_path(parent_name, NULL), - "ioapic", OBJECT(dev)); + object_property_add_child(parent, "ioapic", OBJECT(dev)); d = SYS_BUS_DEVICE(dev); sysbus_realize_and_unref(d, &error_fatal); sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index da19ae1546..6fe29fbb87 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -138,7 +138,7 @@ typedef struct GSIState { qemu_irq x86_allocate_cpu_irq(void); void gsi_handler(void *opaque, int n, int level); -void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); +void ioapic_init_gsi(GSIState *gsi_state, Object *parent); DeviceState *ioapic_init_secondary(GSIState *gsi_state); /* pc_sysfw.c */ -- Gitee From 072c457c87776974df0b48887f9341086bf9e5e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 29 Jan 2024 17:44:44 +0100 Subject: [PATCH 066/115] hw/core: Declare CPUArchId::cpu as CPUState instead of Object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 97e03106018301969f9e7c1eb22d3838adbe0bcc upstream Do not accept any Object for CPUArchId::cpu field, restrict it to CPUState type. [Backport Changes] Changes to file hw/loongarch/virt.c were skipped since the current codebase has been updated with latest changes in commit 8d440efd992fd6be0aca. Signed-off-by: Philippe Mathieu-Daudé Message-ID: <20240129164514.73104-3-philmd@linaro.org> Reviewed-by: Thomas Huth Signed-off-by: Thomas Huth Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/arm/virt.c | 4 ++-- hw/core/machine.c | 4 ++-- hw/i386/x86.c | 2 +- hw/ppc/spapr.c | 5 ++--- hw/s390x/s390-virtio-ccw.c | 2 +- include/hw/boards.h | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 9c8626ee07..3d094e8dfe 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -2842,7 +2842,7 @@ static void machvirt_init(MachineState *machine) * after GIC and machine has been fully initialized during * machine_init_done() phase. */ - cpu_slot->cpu = OBJECT(cs); + cpu_slot->cpu = cs; } } fdt_add_timer_nodes(vms); @@ -3552,7 +3552,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, /* insert the cold/hot-plugged vcpu in the slot */ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); - cpu_slot->cpu = OBJECT(dev); + cpu_slot->cpu = CPU(dev); /* * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. diff --git a/hw/core/machine.c b/hw/core/machine.c index a5ecdbd996..dbf5b81204 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -721,7 +721,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) mc->possible_cpu_arch_ids(machine); for (i = 0; i < machine->possible_cpus->len; i++) { - Object *cpu; + CPUState *cpu; HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1); cpu_item->type = g_strdup(machine->possible_cpus->cpus[i].type); @@ -731,7 +731,7 @@ HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine) cpu = machine->possible_cpus->cpus[i].cpu; if (cpu) { - cpu_item->qom_path = object_get_canonical_path(cpu); + cpu_item->qom_path = object_get_canonical_path(OBJECT(cpu)); } QAPI_LIST_PREPEND(head, cpu_item); } diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 734c35e4ae..3d5b51e92d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -225,7 +225,7 @@ void x86_cpu_plug(HotplugHandler *hotplug_dev, } found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = OBJECT(dev); + found_cpu->cpu = CPU(dev); out: error_propagate(errp, local_err); } diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 38f7e293b6..1bb2d363ed 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3989,7 +3989,6 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev)); CPUCore *cc = CPU_CORE(dev); - CPUState *cs; SpaprDrc *drc; CPUArchId *core_slot; int index; @@ -4023,7 +4022,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) } } - core_slot->cpu = OBJECT(dev); + core_slot->cpu = CPU(dev); /* * Set compatibility mode to match the boot CPU, which was either set @@ -4039,7 +4038,7 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) if (smc->pre_2_10_has_unused_icps) { for (i = 0; i < cc->nr_threads; i++) { - cs = CPU(core->threads[i]); + CPUState *cs = CPU(core->threads[i]); pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); } } diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index bd5c430da8..3021685659 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -319,7 +319,7 @@ static void s390_cpu_plug(HotplugHandler *hotplug_dev, ERRP_GUARD(); g_assert(!ms->possible_cpus->cpus[cpu->env.core_id].cpu); - ms->possible_cpus->cpus[cpu->env.core_id].cpu = OBJECT(dev); + ms->possible_cpus->cpus[cpu->env.core_id].cpu = CPU(dev); if (s390_has_topology()) { s390_topology_setup_cpu(ms, cpu, errp); diff --git a/include/hw/boards.h b/include/hw/boards.h index ca4237d7bb..1f410b8d66 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -115,7 +115,7 @@ typedef struct CPUArchId { uint64_t arch_id; int64_t vcpus_count; CpuInstanceProperties props; - Object *cpu; + CPUState *cpu; const char *type; } CPUArchId; -- Gitee From 5f18bdacca6c6f539691314fd3bf802e11bf463c Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 22 Apr 2024 22:06:22 +0200 Subject: [PATCH 067/115] hw/i386/pc_sysfw: Remove unused parameter from pc_isa_bios_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f4b63768b91811cdcf1fb7b270587123251dfea5 upstream Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20240422200625.2768-2-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/pc_sysfw.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index f0c0cea47d..5f65675ce5 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -42,8 +42,7 @@ #define FLASH_SECTOR_SIZE 4096 static void pc_isa_bios_init(MemoryRegion *rom_memory, - MemoryRegion *flash_mem, - int ram_size) + MemoryRegion *flash_mem) { int isa_bios_size; MemoryRegion *isa_bios; @@ -189,7 +188,7 @@ static void pc_system_flash_map(PCMachineState *pcms, if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); - pc_isa_bios_init(rom_memory, flash_mem, size); + pc_isa_bios_init(rom_memory, flash_mem); /* Encrypt the pflash boot ROM */ if (sev_enabled()) { -- Gitee From aac3d23febaf4ecd4af7a6759dd5ee66cc134af5 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 30 Apr 2024 17:06:38 +0200 Subject: [PATCH 068/115] hw/i386/x86: Eliminate two if statements in x86_bios_rom_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 014dbdac8798799d081abc9dff3e4876ca54f49e upstream Given that memory_region_set_readonly() is a no-op when the readonlyness is already as requested it is possible to simplify the pattern if (condition) { foo(true); } to foo(condition); which is shorter and allows to see the invariant of the code more easily. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20240430150643.111976-2-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/x86.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 3d5b51e92d..2a4f3ee285 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1163,9 +1163,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, load_image_size(filename, ptr, bios_size); x86_firmware_configure(ptr, bios_size); } else { - if (!isapc_ram_fw) { - memory_region_set_readonly(bios, true); - } + memory_region_set_readonly(bios, !isapc_ram_fw); ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); if (ret != 0) { goto bios_error; @@ -1182,9 +1180,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, 0x100000 - isa_bios_size, isa_bios, 1); - if (!isapc_ram_fw) { - memory_region_set_readonly(isa_bios, true); - } + memory_region_set_readonly(isa_bios, !isapc_ram_fw); /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory, -- Gitee From ba21aef835ba6d439b0e3f7753d6185f10b3487d Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 30 Apr 2024 17:06:39 +0200 Subject: [PATCH 069/115] hw/i386: Have x86_bios_rom_init() take X86MachineState rather than MachineState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 848351840148f8c3b53ddf6210194506547d3ffd upstream The function creates and leaks two MemoryRegion objects regarding the BIOS which will be moved into X86MachineState in the next steps to avoid the leakage. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20240430150643.111976-3-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/microvm.c | 2 +- hw/i386/pc_sysfw.c | 4 ++-- hw/i386/x86.c | 4 ++-- include/hw/i386/x86.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c index 61a772dfe6..fec63cacfa 100644 --- a/hw/i386/microvm.c +++ b/hw/i386/microvm.c @@ -278,7 +278,7 @@ static void microvm_devices_init(MicrovmMachineState *mms) default_firmware = x86_machine_is_acpi_enabled(x86ms) ? MICROVM_BIOS_FILENAME : MICROVM_QBOOT_FILENAME; - x86_bios_rom_init(MACHINE(mms), default_firmware, get_system_memory(), true); + x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); } static void microvm_memory_init(MicrovmMachineState *mms) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 5f65675ce5..02833dc96e 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -208,7 +208,7 @@ void pc_system_firmware_init(PCMachineState *pcms, BlockBackend *pflash_blk[ARRAY_SIZE(pcms->flash)]; if (!pcmc->pci_enabled) { - x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, true); + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, true); return; } @@ -229,7 +229,7 @@ void pc_system_firmware_init(PCMachineState *pcms, if (!pflash_blk[0]) { /* Machine property pflash0 not set, use ROM mode */ - x86_bios_rom_init(MACHINE(pcms), "bios.bin", rom_memory, false); + x86_bios_rom_init(X86_MACHINE(pcms), "bios.bin", rom_memory, false); } else { if (kvm_enabled() && !kvm_readonly_mem_enabled()) { /* diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 2a4f3ee285..6d3c72f124 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1128,7 +1128,7 @@ void x86_load_linux(X86MachineState *x86ms, nb_option_roms++; } -void x86_bios_rom_init(MachineState *ms, const char *default_firmware, +void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, MemoryRegion *rom_memory, bool isapc_ram_fw) { const char *bios_name; @@ -1138,7 +1138,7 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware, ssize_t ret; /* BIOS load */ - bios_name = ms->firmware ?: default_firmware; + bios_name = MACHINE(x86ms)->firmware ?: default_firmware; filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); if (filename) { bios_size = get_image_size(filename); diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 6fe29fbb87..8227118e9e 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -115,7 +115,7 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); -void x86_bios_rom_init(MachineState *ms, const char *default_firmware, +void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, MemoryRegion *rom_memory, bool isapc_ram_fw); void x86_load_linux(X86MachineState *x86ms, -- Gitee From 8436b68c196ff41a3443076d655f002adf8f0376 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 8 May 2024 19:55:04 +0200 Subject: [PATCH 070/115] hw/i386/x86: Don't leak "isa-bios" memory regions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 32d3ee87a17fc91e981a23dba94855bff89f5920 upstream Fix the leaking in x86_bios_rom_init() and pc_isa_bios_init() by adding an "isa_bios" attribute to X86MachineState. Suggested-by: Philippe Mathieu-Daudé Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-ID: <20240508175507.22270-4-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/pc_sysfw.c | 7 +++---- hw/i386/x86.c | 9 ++++----- include/hw/i386/x86.h | 7 +++++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 02833dc96e..7b7e65272b 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -41,11 +41,10 @@ #define FLASH_SECTOR_SIZE 4096 -static void pc_isa_bios_init(MemoryRegion *rom_memory, +static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, MemoryRegion *flash_mem) { int isa_bios_size; - MemoryRegion *isa_bios; uint64_t flash_size; void *flash_ptr, *isa_bios_ptr; @@ -53,7 +52,6 @@ static void pc_isa_bios_init(MemoryRegion *rom_memory, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(flash_size, 128 * KiB); - isa_bios = g_malloc(sizeof(*isa_bios)); memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, &error_fatal); memory_region_add_subregion_overlap(rom_memory, @@ -139,6 +137,7 @@ void pc_system_flash_cleanup_unused(PCMachineState *pcms) static void pc_system_flash_map(PCMachineState *pcms, MemoryRegion *rom_memory) { + X86MachineState *x86ms = X86_MACHINE(pcms); hwaddr total_size = 0; int i; BlockBackend *blk; @@ -188,7 +187,7 @@ static void pc_system_flash_map(PCMachineState *pcms, if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); - pc_isa_bios_init(rom_memory, flash_mem); + pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); /* Encrypt the pflash boot ROM */ if (sev_enabled()) { diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 6d3c72f124..457e8a34a5 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1133,7 +1133,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, { const char *bios_name; char *filename; - MemoryRegion *bios, *isa_bios; + MemoryRegion *bios; int bios_size, isa_bios_size; ssize_t ret; @@ -1173,14 +1173,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(bios_size, 128 * KiB); - isa_bios = g_malloc(sizeof(*isa_bios)); - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, bios_size - isa_bios_size, isa_bios_size); memory_region_add_subregion_overlap(rom_memory, 0x100000 - isa_bios_size, - isa_bios, + &x86ms->isa_bios, 1); - memory_region_set_readonly(isa_bios, !isapc_ram_fw); + memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory, diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 8227118e9e..e2ca44f5bb 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -18,6 +18,7 @@ #define HW_I386_X86_H #include "exec/hwaddr.h" +#include "exec/memory.h" #include "hw/boards.h" #include "hw/intc/ioapic.h" @@ -50,6 +51,12 @@ struct X86MachineState { GMappedFile *initrd_mapped_file; HotplugHandler *acpi_dev; + /* + * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address + * boundary. + */ + MemoryRegion isa_bios; + /* RAM information (sizes, addresses, configuration): */ ram_addr_t below_4g_mem_size, above_4g_mem_size; -- Gitee From e38a7175792d7d6cc4cad4193c30ff449bf40bfd Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 8 May 2024 19:55:05 +0200 Subject: [PATCH 071/115] hw/i386/x86: Don't leak "pc.bios" memory region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 865d95321ffc8d9941e33000b10140550f094556 upstream Fix the leaking in x86_bios_rom_init() by adding a "bios" attribute to X86MachineState. Note that it is only used in the -bios case. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-ID: <20240508175507.22270-5-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/x86.c | 13 ++++++------- include/hw/i386/x86.h | 6 ++++++ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 457e8a34a5..29167de97d 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1133,7 +1133,6 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, { const char *bios_name; char *filename; - MemoryRegion *bios; int bios_size, isa_bios_size; ssize_t ret; @@ -1149,8 +1148,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, (bios_size % 65536) != 0) { goto bios_error; } - bios = g_malloc(sizeof(*bios)); - memory_region_init_ram(bios, NULL, "pc.bios", bios_size, &error_fatal); + memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, + &error_fatal); if (sev_enabled()) { /* * The concept of a "reset" simply doesn't exist for @@ -1159,11 +1158,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, * the firmware as rom to properly re-initialize on reset. * Just go for a straight file load instead. */ - void *ptr = memory_region_get_ram_ptr(bios); + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); load_image_size(filename, ptr, bios_size); x86_firmware_configure(ptr, bios_size); } else { - memory_region_set_readonly(bios, !isapc_ram_fw); + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); if (ret != 0) { goto bios_error; @@ -1173,7 +1172,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(bios_size, 128 * KiB); - memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", bios, + memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, bios_size - isa_bios_size, isa_bios_size); memory_region_add_subregion_overlap(rom_memory, 0x100000 - isa_bios_size, @@ -1184,7 +1183,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory, (uint32_t)(-bios_size), - bios); + &x86ms->bios); return; bios_error: diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index e2ca44f5bb..54e7a0f543 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -51,6 +51,12 @@ struct X86MachineState { GMappedFile *initrd_mapped_file; HotplugHandler *acpi_dev; + /* + * Map the whole BIOS just underneath the 4 GiB address boundary. Only used + * in the ROM (-bios) case. + */ + MemoryRegion bios; + /* * Map the upper 128 KiB of the BIOS just underneath the 1 MiB address * boundary. -- Gitee From 0e7ca6bb0ffd9a308f9ee42a3200122312b12a9d Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 8 May 2024 19:55:06 +0200 Subject: [PATCH 072/115] hw/i386/x86: Extract x86_isa_bios_init() from x86_bios_rom_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5c5ffec12c30d2017cbdee6798f54d8fad3f9656 upstream The function is inspired by pc_isa_bios_init() and should eventually replace it. Using x86_isa_bios_init() rather than pc_isa_bios_init() fixes pflash commands to work in the isa-bios region. While at it convert the magic number 0x100000 (== 1MiB) to increase readability. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-ID: <20240508175507.22270-6-shentey@gmail.com> Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: TauqirAzam Signed-off-by: PrithivishS --- hw/i386/x86.c | 25 ++++++++++++++++--------- include/hw/i386/x86.h | 2 ++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/hw/i386/x86.c b/hw/i386/x86.c index 29167de97d..c61f4ebfa6 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -1128,12 +1128,25 @@ void x86_load_linux(X86MachineState *x86ms, nb_option_roms++; } +void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, + MemoryRegion *bios, bool read_only) +{ + uint64_t bios_size = memory_region_size(bios); + uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); + + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, + isa_bios, 1); + memory_region_set_readonly(isa_bios, read_only); +} + void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, MemoryRegion *rom_memory, bool isapc_ram_fw) { const char *bios_name; char *filename; - int bios_size, isa_bios_size; + int bios_size; ssize_t ret; /* BIOS load */ @@ -1171,14 +1184,8 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, g_free(filename); /* map the last 128KB of the BIOS in ISA space */ - isa_bios_size = MIN(bios_size, 128 * KiB); - memory_region_init_alias(&x86ms->isa_bios, NULL, "isa-bios", &x86ms->bios, - bios_size - isa_bios_size, isa_bios_size); - memory_region_add_subregion_overlap(rom_memory, - 0x100000 - isa_bios_size, - &x86ms->isa_bios, - 1); - memory_region_set_readonly(&x86ms->isa_bios, !isapc_ram_fw); + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, + !isapc_ram_fw); /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory, diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 54e7a0f543..29b38eb5ef 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -128,6 +128,8 @@ void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); +void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, + MemoryRegion *bios, bool read_only); void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, MemoryRegion *rom_memory, bool isapc_ram_fw); -- Gitee From 69fdcc7ba3c9bd049eaf65bb7256606360e81c39 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 May 2024 19:00:41 +0200 Subject: [PATCH 073/115] hw/i386: split x86.c in multiple parts commit b061f0598b9231f7992aff4fcdf3f336f9747d11 upstream. Keep the basic X86MachineState definition in x86.c. Move out functions that are only needed by other files: x86-common.c for the pc and microvm machines, x86-cpu.c for those used by accelerator code. Signed-off-by: Paolo Bonzini Reviewed-by: Zhao Liu Message-ID: <20240509170044.190795-11-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- hw/i386/meson.build | 4 +- hw/i386/x86-common.c | 1007 +++++++++++++++++++++++++++++++++++++++ hw/i386/x86-cpu.c | 97 ++++ hw/i386/x86.c | 1052 +---------------------------------------- include/hw/i386/x86.h | 6 +- 5 files changed, 1113 insertions(+), 1053 deletions(-) create mode 100644 hw/i386/x86-common.c create mode 100644 hw/i386/x86-cpu.c diff --git a/hw/i386/meson.build b/hw/i386/meson.build index 369c6bf823..1341361eda 100644 --- a/hw/i386/meson.build +++ b/hw/i386/meson.build @@ -5,13 +5,14 @@ i386_ss.add(files( 'e820_memory_layout.c', 'multiboot.c', 'x86.c', + 'x86-cpu.c', )) i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), if_false: files('x86-iommu-stub.c')) i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c')) i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) -i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) +i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) @@ -21,6 +22,7 @@ i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'), i386_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-common.c')) i386_ss.add(when: 'CONFIG_PC', if_true: files( + 'x86-common.c', 'pc.c', 'pc_sysfw.c', 'acpi-build.c', diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c new file mode 100644 index 0000000000..67b03c913a --- /dev/null +++ b/hw/i386/x86-common.c @@ -0,0 +1,1007 @@ +/* + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2019, 2024 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/cutils.h" +#include "qemu/units.h" +#include "qemu/datadir.h" +#include "qapi/error.h" +#include "sysemu/numa.h" +#include "sysemu/sysemu.h" +#include "sysemu/xen.h" +#include "trace.h" + +#include "hw/i386/x86.h" +#include "target/i386/cpu.h" +#include "hw/rtc/mc146818rtc.h" +#include "target/i386/sev.h" + +#include "hw/acpi/cpu_hotplug.h" +#include "hw/irq.h" +#include "hw/loader.h" +#include "multiboot.h" +#include "elf.h" +#include "standard-headers/asm-x86/bootparam.h" +#include CONFIG_DEVICES +#include "kvm/kvm_i386.h" + +#ifdef CONFIG_XEN_EMU +#include "hw/xen/xen.h" +#include "hw/i386/kvm/xen_evtchn.h" +#endif + +/* Physical Address of PVH entry point read from kernel ELF NOTE */ +static size_t pvh_start_addr; + +static void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) +{ + Object *cpu = object_new(MACHINE(x86ms)->cpu_type); + + if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { + goto out; + } + qdev_realize(DEVICE(cpu), NULL, errp); + +out: + object_unref(cpu); +} + +void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) +{ + int i; + const CPUArchIdList *possible_cpus; + MachineState *ms = MACHINE(x86ms); + MachineClass *mc = MACHINE_GET_CLASS(x86ms); + + x86_cpu_set_default_version(default_cpu_version); + + /* + * Calculates the limit to CPU APIC ID values + * + * Limit for the APIC ID value, so that all + * CPU APIC IDs are < x86ms->apic_id_limit. + * + * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). + */ + x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, + ms->smp.max_cpus - 1) + 1; + + /* + * Can we support APIC ID 255 or higher? With KVM, that requires + * both in-kernel lapic and X2APIC userspace API. + * + * kvm_enabled() must go first to ensure that kvm_* references are + * not emitted for the linker to consume (kvm_enabled() is + * a literal `0` in configurations where kvm_* aren't defined) + */ + if (kvm_enabled() && x86ms->apic_id_limit > 255 && + kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { + error_report("current -smp configuration requires kernel " + "irqchip and X2APIC API support."); + exit(EXIT_FAILURE); + } + + if (kvm_enabled()) { + kvm_set_max_apic_id(x86ms->apic_id_limit); + } + + if (!kvm_irqchip_in_kernel()) { + apic_set_max_apic_id(x86ms->apic_id_limit); + } + + possible_cpus = mc->possible_cpu_arch_ids(ms); + for (i = 0; i < ms->smp.cpus; i++) { + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); + } +} + +void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) +{ + MC146818RtcState *rtc = MC146818_RTC(s); + + if (cpus_count > 0xff) { + /* + * If the number of CPUs can't be represented in 8 bits, the + * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just + * to make old BIOSes fail more predictably. + */ + mc146818rtc_set_cmos_data(rtc, 0x5f, 0); + } else { + mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); + } +} + +static int x86_apic_cmp(const void *a, const void *b) +{ + CPUArchId *apic_a = (CPUArchId *)a; + CPUArchId *apic_b = (CPUArchId *)b; + + return apic_a->arch_id - apic_b->arch_id; +} + +/* + * returns pointer to CPUArchId descriptor that matches CPU's apic_id + * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no + * entry corresponding to CPU's apic_id returns NULL. + */ +static CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +{ + CPUArchId apic_id, *found_cpu; + + apic_id.arch_id = id; + found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, + ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), + x86_apic_cmp); + if (found_cpu && idx) { + *idx = found_cpu - ms->possible_cpus->cpus; + } + return found_cpu; +} + +void x86_cpu_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + CPUArchId *found_cpu; + Error *local_err = NULL; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + if (x86ms->acpi_dev) { + hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); + if (local_err) { + goto out; + } + } + + /* increment the number of CPUs */ + x86ms->boot_cpus++; + if (x86ms->rtc) { + x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); + } + if (x86ms->fw_cfg) { + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); + } + + found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); + found_cpu->cpu = CPU(dev); +out: + error_propagate(errp, local_err); +} + +void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int idx = -1; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + if (!x86ms->acpi_dev) { + error_setg(errp, "CPU hot unplug not supported without ACPI"); + return; + } + + x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); + assert(idx != -1); + if (idx == 0) { + error_setg(errp, "Boot CPU is unpluggable"); + return; + } + + hotplug_handler_unplug_request(x86ms->acpi_dev, dev, + errp); +} + +void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + CPUArchId *found_cpu; + Error *local_err = NULL; + X86CPU *cpu = X86_CPU(dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + + hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); + if (local_err) { + goto out; + } + + found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); + found_cpu->cpu = NULL; + qdev_unrealize(dev); + + /* decrement the number of CPUs */ + x86ms->boot_cpus--; + /* Update the number of CPUs in CMOS */ + x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); + fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); + out: + error_propagate(errp, local_err); +} + +void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + int idx; + CPUState *cs; + CPUArchId *cpu_slot; + X86CPUTopoIDs topo_ids; + X86CPU *cpu = X86_CPU(dev); + CPUX86State *env = &cpu->env; + MachineState *ms = MACHINE(hotplug_dev); + X86MachineState *x86ms = X86_MACHINE(hotplug_dev); + unsigned int smp_cores = ms->smp.cores; + unsigned int smp_threads = ms->smp.threads; + X86CPUTopoInfo topo_info; + + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", + ms->cpu_type); + return; + } + + if (x86ms->acpi_dev) { + Error *local_err = NULL; + + hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + + init_topo_info(&topo_info, x86ms); + + env->nr_dies = ms->smp.dies; + + /* + * If APIC ID is not set, + * set it based on socket/die/core/thread properties. + */ + if (cpu->apic_id == UNASSIGNED_APIC_ID) { + int max_socket = (ms->smp.max_cpus - 1) / + smp_threads / smp_cores / ms->smp.dies; + + /* + * die-id was optional in QEMU 4.0 and older, so keep it optional + * if there's only one die per socket. + */ + if (cpu->die_id < 0 && ms->smp.dies == 1) { + cpu->die_id = 0; + } + + if (cpu->socket_id < 0) { + error_setg(errp, "CPU socket-id is not set"); + return; + } else if (cpu->socket_id > max_socket) { + error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", + cpu->socket_id, max_socket); + return; + } + if (cpu->die_id < 0) { + error_setg(errp, "CPU die-id is not set"); + return; + } else if (cpu->die_id > ms->smp.dies - 1) { + error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", + cpu->die_id, ms->smp.dies - 1); + return; + } + if (cpu->core_id < 0) { + error_setg(errp, "CPU core-id is not set"); + return; + } else if (cpu->core_id > (smp_cores - 1)) { + error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", + cpu->core_id, smp_cores - 1); + return; + } + if (cpu->thread_id < 0) { + error_setg(errp, "CPU thread-id is not set"); + return; + } else if (cpu->thread_id > (smp_threads - 1)) { + error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", + cpu->thread_id, smp_threads - 1); + return; + } + + topo_ids.pkg_id = cpu->socket_id; + topo_ids.die_id = cpu->die_id; + topo_ids.core_id = cpu->core_id; + topo_ids.smt_id = cpu->thread_id; + cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); + } + + cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); + if (!cpu_slot) { + x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + error_setg(errp, + "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" + " APIC ID %" PRIu32 ", valid index range 0:%d", + topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, + cpu->apic_id, ms->possible_cpus->len - 1); + return; + } + + if (cpu_slot->cpu) { + error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", + idx, cpu->apic_id); + return; + } + + /* if 'address' properties socket-id/core-id/thread-id are not set, set them + * so that machine_query_hotpluggable_cpus would show correct values + */ + /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() + * once -smp refactoring is complete and there will be CPU private + * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ + x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); + if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { + error_setg(errp, "property socket-id: %u doesn't match set apic-id:" + " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, + topo_ids.pkg_id); + return; + } + cpu->socket_id = topo_ids.pkg_id; + + if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { + error_setg(errp, "property die-id: %u doesn't match set apic-id:" + " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); + return; + } + cpu->die_id = topo_ids.die_id; + + if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { + error_setg(errp, "property core-id: %u doesn't match set apic-id:" + " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, + topo_ids.core_id); + return; + } + cpu->core_id = topo_ids.core_id; + + if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { + error_setg(errp, "property thread-id: %u doesn't match set apic-id:" + " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, + topo_ids.smt_id); + return; + } + cpu->thread_id = topo_ids.smt_id; + + /* + * kvm_enabled() must go first to ensure that kvm_* references are + * not emitted for the linker to consume (kvm_enabled() is + * a literal `0` in configurations where kvm_* aren't defined) + */ + if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && + !kvm_hv_vpindex_settable()) { + error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); + return; + } + + cs = CPU(cpu); + cs->cpu_index = idx; + + numa_cpu_pre_plug(cpu_slot, dev, errp); +} + +static long get_file_size(FILE *f) +{ + long where, size; + + /* XXX: on Unix systems, using fstat() probably makes more sense */ + + where = ftell(f); + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, where, SEEK_SET); + + return size; +} + +void gsi_handler(void *opaque, int n, int level) +{ + GSIState *s = opaque; + + trace_x86_gsi_interrupt(n, level); + switch (n) { + case 0 ... ISA_NUM_IRQS - 1: + if (s->i8259_irq[n]) { + /* Under KVM, Kernel will forward to both PIC and IOAPIC */ + qemu_set_irq(s->i8259_irq[n], level); + } + /* fall through */ + case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: +#ifdef CONFIG_XEN_EMU + /* + * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC + * routing actually works properly under Xen). And then to + * *either* the PIRQ handling or the I/OAPIC depending on + * whether the former wants it. + */ + if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { + break; + } +#endif + qemu_set_irq(s->ioapic_irq[n], level); + break; + case IO_APIC_SECONDARY_IRQBASE + ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: + qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); + break; + } +} + +void ioapic_init_gsi(GSIState *gsi_state, Object *parent) +{ + DeviceState *dev; + SysBusDevice *d; + unsigned int i; + + assert(parent); + if (kvm_ioapic_in_kernel()) { + dev = qdev_new(TYPE_KVM_IOAPIC); + } else { + dev = qdev_new(TYPE_IOAPIC); + } + object_property_add_child(parent, "ioapic", OBJECT(dev)); + d = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(d, &error_fatal); + sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); + } +} + +DeviceState *ioapic_init_secondary(GSIState *gsi_state) +{ + DeviceState *dev; + SysBusDevice *d; + unsigned int i; + + dev = qdev_new(TYPE_IOAPIC); + d = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(d, &error_fatal); + sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); + } + return dev; +} + +/* + * The entry point into the kernel for PVH boot is different from + * the native entry point. The PVH entry is defined by the x86/HVM + * direct boot ABI and is available in an ELFNOTE in the kernel binary. + * + * This function is passed to load_elf() when it is called from + * load_elfboot() which then additionally checks for an ELF Note of + * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to + * parse the PVH entry address from the ELF Note. + * + * Due to trickery in elf_opts.h, load_elf() is actually available as + * load_elf32() or load_elf64() and this routine needs to be able + * to deal with being called as 32 or 64 bit. + * + * The address of the PVH entry point is saved to the 'pvh_start_addr' + * global variable. (although the entry point is 32-bit, the kernel + * binary can be either 32-bit or 64-bit). + */ +static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) +{ + size_t *elf_note_data_addr; + + /* Check if ELF Note header passed in is valid */ + if (arg1 == NULL) { + return 0; + } + + if (is64) { + struct elf64_note *nhdr64 = (struct elf64_note *)arg1; + uint64_t nhdr_size64 = sizeof(struct elf64_note); + uint64_t phdr_align = *(uint64_t *)arg2; + uint64_t nhdr_namesz = nhdr64->n_namesz; + + elf_note_data_addr = + ((void *)nhdr64) + nhdr_size64 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + + pvh_start_addr = *elf_note_data_addr; + } else { + struct elf32_note *nhdr32 = (struct elf32_note *)arg1; + uint32_t nhdr_size32 = sizeof(struct elf32_note); + uint32_t phdr_align = *(uint32_t *)arg2; + uint32_t nhdr_namesz = nhdr32->n_namesz; + + elf_note_data_addr = + ((void *)nhdr32) + nhdr_size32 + + QEMU_ALIGN_UP(nhdr_namesz, phdr_align); + + pvh_start_addr = *(uint32_t *)elf_note_data_addr; + } + + return pvh_start_addr; +} + +static bool load_elfboot(const char *kernel_filename, + int kernel_file_size, + uint8_t *header, + size_t pvh_xen_start_addr, + FWCfgState *fw_cfg) +{ + uint32_t flags = 0; + uint32_t mh_load_addr = 0; + uint32_t elf_kernel_size = 0; + uint64_t elf_entry; + uint64_t elf_low, elf_high; + int kernel_size; + + if (ldl_p(header) != 0x464c457f) { + return false; /* no elfboot */ + } + + bool elf_is64 = header[EI_CLASS] == ELFCLASS64; + flags = elf_is64 ? + ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; + + if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ + error_report("elfboot unsupported flags = %x", flags); + exit(1); + } + + uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; + kernel_size = load_elf(kernel_filename, read_pvh_start_addr, + NULL, &elf_note_type, &elf_entry, + &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, + 0, 0); + + if (kernel_size < 0) { + error_report("Error while loading elf kernel"); + exit(1); + } + mh_load_addr = elf_low; + elf_kernel_size = elf_high - elf_low; + + if (pvh_start_addr == 0) { + error_report("Error loading uncompressed kernel without PVH ELF Note"); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); + + return true; +} + +void x86_load_linux(X86MachineState *x86ms, + FWCfgState *fw_cfg, + int acpi_data_size, + bool pvh_enabled) +{ + bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; + uint16_t protocol; + int setup_size, kernel_size, cmdline_size; + int dtb_size, setup_data_offset; + uint32_t initrd_max; + uint8_t header[8192], *setup, *kernel; + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; + FILE *f; + char *vmode; + MachineState *machine = MACHINE(x86ms); + struct setup_data *setup_data; + const char *kernel_filename = machine->kernel_filename; + const char *initrd_filename = machine->initrd_filename; + const char *dtb_filename = machine->dtb; + const char *kernel_cmdline = machine->kernel_cmdline; + SevKernelLoaderContext sev_load_ctx = {}; + + /* Align to 16 bytes as a paranoia measure */ + cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; + + /* load the kernel header */ + f = fopen(kernel_filename, "rb"); + if (!f) { + fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + kernel_size = get_file_size(f); + if (!kernel_size || + fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != + MIN(ARRAY_SIZE(header), kernel_size)) { + fprintf(stderr, "qemu: could not load kernel '%s': %s\n", + kernel_filename, strerror(errno)); + exit(1); + } + + /* kernel protocol version */ + if (ldl_p(header + 0x202) == 0x53726448) { + protocol = lduw_p(header + 0x206); + } else { + /* + * This could be a multiboot kernel. If it is, let's stop treating it + * like a Linux kernel. + * Note: some multiboot images could be in the ELF format (the same of + * PVH), so we try multiboot first since we check the multiboot magic + * header before to load it. + */ + if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, + kernel_cmdline, kernel_size, header)) { + return; + } + /* + * Check if the file is an uncompressed kernel file (ELF) and load it, + * saving the PVH entry point used by the x86/HVM direct boot ABI. + * If load_elfboot() is successful, populate the fw_cfg info. + */ + if (pvh_enabled && + load_elfboot(kernel_filename, kernel_size, + header, pvh_start_addr, fw_cfg)) { + fclose(f); + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, + header, sizeof(header)); + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, + initrd_size); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "pvh.bin"; + nb_option_roms++; + + return; + } + protocol = 0; + } + + if (protocol < 0x200 || !(header[0x211] & 0x01)) { + /* Low kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x10000; + } else if (protocol < 0x202) { + /* High but ancient kernel */ + real_addr = 0x90000; + cmdline_addr = 0x9a000 - cmdline_size; + prot_addr = 0x100000; + } else { + /* High and recent kernel */ + real_addr = 0x10000; + cmdline_addr = 0x20000; + prot_addr = 0x100000; + } + + /* highest address for loading the initrd */ + if (protocol >= 0x20c && + lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { + /* + * Linux has supported initrd up to 4 GB for a very long time (2007, + * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), + * though it only sets initrd_max to 2 GB to "work around bootloader + * bugs". Luckily, QEMU firmware(which does something like bootloader) + * has supported this. + * + * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can + * be loaded into any address. + * + * In addition, initrd_max is uint32_t simply because QEMU doesn't + * support the 64-bit boot protocol (specifically the ext_ramdisk_image + * field). + * + * Therefore here just limit initrd_max to UINT32_MAX simply as well. + */ + initrd_max = UINT32_MAX; + } else if (protocol >= 0x203) { + initrd_max = ldl_p(header + 0x22c); + } else { + initrd_max = 0x37ffffff; + } + + if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { + initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); + sev_load_ctx.cmdline_data = (char *)kernel_cmdline; + sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; + + if (protocol >= 0x202) { + stl_p(header + 0x228, cmdline_addr); + } else { + stw_p(header + 0x20, 0xA33F); + stw_p(header + 0x22, cmdline_addr - real_addr); + } + + /* handle vga= parameter */ + vmode = strstr(kernel_cmdline, "vga="); + if (vmode) { + unsigned int video_mode; + const char *end; + int ret; + /* skip "vga=" */ + vmode += 4; + if (!strncmp(vmode, "normal", 6)) { + video_mode = 0xffff; + } else if (!strncmp(vmode, "ext", 3)) { + video_mode = 0xfffe; + } else if (!strncmp(vmode, "ask", 3)) { + video_mode = 0xfffd; + } else { + ret = qemu_strtoui(vmode, &end, 0, &video_mode); + if (ret != 0 || (*end && *end != ' ')) { + fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); + exit(1); + } + } + stw_p(header + 0x1fa, video_mode); + } + + /* loader type */ + /* + * High nybble = B reserved for QEMU; low nybble is revision number. + * If this code is substantially changed, you may want to consider + * incrementing the revision. + */ + if (protocol >= 0x200) { + header[0x210] = 0xB0; + } + /* heap */ + if (protocol >= 0x201) { + header[0x211] |= 0x80; /* CAN_USE_HEAP */ + stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); + } + + /* load initrd */ + if (initrd_filename) { + GMappedFile *mapped_file; + gsize initrd_size; + gchar *initrd_data; + GError *gerr = NULL; + + if (protocol < 0x200) { + fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); + exit(1); + } + + mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); + if (!mapped_file) { + fprintf(stderr, "qemu: error reading initrd %s: %s\n", + initrd_filename, gerr->message); + exit(1); + } + x86ms->initrd_mapped_file = mapped_file; + + initrd_data = g_mapped_file_get_contents(mapped_file); + initrd_size = g_mapped_file_get_length(mapped_file); + if (initrd_size >= initrd_max) { + fprintf(stderr, "qemu: initrd is too large, cannot support." + "(max: %"PRIu32", need %"PRId64")\n", + initrd_max, (uint64_t)initrd_size); + exit(1); + } + + initrd_addr = (initrd_max - initrd_size) & ~4095; + + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); + sev_load_ctx.initrd_data = initrd_data; + sev_load_ctx.initrd_size = initrd_size; + + stl_p(header + 0x218, initrd_addr); + stl_p(header + 0x21c, initrd_size); + } + + /* load kernel and setup */ + setup_size = header[0x1f1]; + if (setup_size == 0) { + setup_size = 4; + } + setup_size = (setup_size + 1) * 512; + if (setup_size > kernel_size) { + fprintf(stderr, "qemu: invalid kernel header\n"); + exit(1); + } + kernel_size -= setup_size; + + setup = g_malloc(setup_size); + kernel = g_malloc(kernel_size); + fseek(f, 0, SEEK_SET); + if (fread(setup, 1, setup_size, f) != setup_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + if (fread(kernel, 1, kernel_size, f) != kernel_size) { + fprintf(stderr, "fread() failed\n"); + exit(1); + } + fclose(f); + + /* append dtb to kernel */ + if (dtb_filename) { + if (protocol < 0x209) { + fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); + exit(1); + } + + dtb_size = get_image_size(dtb_filename); + if (dtb_size <= 0) { + fprintf(stderr, "qemu: error reading dtb %s: %s\n", + dtb_filename, strerror(errno)); + exit(1); + } + + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); + kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; + kernel = g_realloc(kernel, kernel_size); + + stq_p(header + 0x250, prot_addr + setup_data_offset); + + setup_data = (struct setup_data *)(kernel + setup_data_offset); + setup_data->next = 0; + setup_data->type = cpu_to_le32(SETUP_DTB); + setup_data->len = cpu_to_le32(dtb_size); + + load_image_size(dtb_filename, setup_data->data, dtb_size); + } + + /* + * If we're starting an encrypted VM, it will be OVMF based, which uses the + * efi stub for booting and doesn't require any values to be placed in the + * kernel header. We therefore don't update the header so the hash of the + * kernel on the other side of the fw_cfg interface matches the hash of the + * file the user passed in. + */ + if (!sev_enabled()) { + memcpy(setup, header, MIN(sizeof(header), setup_size)); + } + + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); + sev_load_ctx.kernel_data = (char *)kernel; + sev_load_ctx.kernel_size = kernel_size; + + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); + fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); + fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); + sev_load_ctx.setup_data = (char *)setup; + sev_load_ctx.setup_size = setup_size; + + if (sev_enabled()) { + sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); + } + + option_rom[nb_option_roms].bootindex = 0; + option_rom[nb_option_roms].name = "linuxboot.bin"; + if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { + option_rom[nb_option_roms].name = "linuxboot_dma.bin"; + } + nb_option_roms++; +} + +void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, + MemoryRegion *bios, bool read_only) +{ + uint64_t bios_size = memory_region_size(bios); + uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); + + memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, + bios_size - isa_bios_size, isa_bios_size); + memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, + isa_bios, 1); + memory_region_set_readonly(isa_bios, read_only); +} + +void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, + MemoryRegion *rom_memory, bool isapc_ram_fw) +{ + const char *bios_name; + char *filename; + int bios_size; + ssize_t ret; + + /* BIOS load */ + bios_name = MACHINE(x86ms)->firmware ?: default_firmware; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = get_image_size(filename); + } else { + bios_size = -1; + } + if (bios_size <= 0 || + (bios_size % 65536) != 0) { + goto bios_error; + } + memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, + &error_fatal); + if (sev_enabled()) { + /* + * The concept of a "reset" simply doesn't exist for + * confidential computing guests, we have to destroy and + * re-launch them instead. So there is no need to register + * the firmware as rom to properly re-initialize on reset. + * Just go for a straight file load instead. + */ + void *ptr = memory_region_get_ram_ptr(&x86ms->bios); + load_image_size(filename, ptr, bios_size); + x86_firmware_configure(ptr, bios_size); + } else { + memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); + ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); + if (ret != 0) { + goto bios_error; + } + } + g_free(filename); + + /* map the last 128KB of the BIOS in ISA space */ + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, + !isapc_ram_fw); + + /* map all the bios at the top of memory */ + memory_region_add_subregion(rom_memory, + (uint32_t)(-bios_size), + &x86ms->bios); + return; + +bios_error: + fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); + exit(1); +} diff --git a/hw/i386/x86-cpu.c b/hw/i386/x86-cpu.c new file mode 100644 index 0000000000..ab2920522d --- /dev/null +++ b/hw/i386/x86-cpu.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2019, 2024 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "sysemu/whpx.h" +#include "sysemu/cpu-timers.h" +#include "trace.h" + +#include "hw/i386/x86.h" +#include "target/i386/cpu.h" +#include "hw/intc/i8259.h" +#include "hw/irq.h" +#include "sysemu/kvm.h" + +/* TSC handling */ +uint64_t cpu_get_tsc(CPUX86State *env) +{ + return cpus_get_elapsed_ticks(); +} + +/* IRQ handling */ +static void pic_irq_request(void *opaque, int irq, int level) +{ + CPUState *cs = first_cpu; + X86CPU *cpu = X86_CPU(cs); + + trace_x86_pic_interrupt(irq, level); + if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && + !whpx_apic_in_platform()) { + CPU_FOREACH(cs) { + cpu = X86_CPU(cs); + if (apic_accept_pic_intr(cpu->apic_state)) { + apic_deliver_pic_intr(cpu->apic_state, level); + } + } + } else { + if (level) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + } +} + +qemu_irq x86_allocate_cpu_irq(void) +{ + return qemu_allocate_irq(pic_irq_request, NULL, 0); +} + +int cpu_get_pic_interrupt(CPUX86State *env) +{ + X86CPU *cpu = env_archcpu(env); + int intno; + + if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { + intno = apic_get_interrupt(cpu->apic_state); + if (intno >= 0) { + return intno; + } + /* read the irq from the PIC */ + if (!apic_accept_pic_intr(cpu->apic_state)) { + return -1; + } + } + + intno = pic_read_irq(isa_pic); + return intno; +} + +DeviceState *cpu_get_current_apic(void) +{ + if (current_cpu) { + X86CPU *cpu = X86_CPU(current_cpu); + return cpu->apic_state; + } else { + return NULL; + } +} diff --git a/hw/i386/x86.c b/hw/i386/x86.c index c61f4ebfa6..935eea641b 100644 --- a/hw/i386/x86.c +++ b/hw/i386/x86.c @@ -22,52 +22,25 @@ */ #include "qemu/osdep.h" #include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/cutils.h" #include "qemu/units.h" -#include "qemu/datadir.h" #include "qapi/error.h" #include "qapi/qapi-visit-common.h" -#include "qapi/clone-visitor.h" #include "qapi/qapi-visit-machine.h" #include "qapi/visitor.h" #include "sysemu/qtest.h" -#include "sysemu/whpx.h" #include "sysemu/numa.h" -#include "sysemu/replay.h" -#include "sysemu/sysemu.h" -#include "sysemu/cpu-timers.h" -#include "sysemu/xen.h" #include "trace.h" +#include "hw/acpi/aml-build.h" #include "hw/i386/x86.h" -#include "target/i386/cpu.h" #include "hw/i386/topology.h" -#include "hw/i386/fw_cfg.h" -#include "hw/intc/i8259.h" -#include "hw/rtc/mc146818rtc.h" -#include "target/i386/sev.h" -#include "hw/acpi/cpu_hotplug.h" -#include "hw/irq.h" #include "hw/nmi.h" -#include "hw/loader.h" -#include "multiboot.h" -#include "elf.h" -#include "standard-headers/asm-x86/bootparam.h" -#include CONFIG_DEVICES #include "kvm/kvm_i386.h" -#ifdef CONFIG_XEN_EMU -#include "hw/xen/xen.h" -#include "hw/i386/kvm/xen_evtchn.h" -#endif -/* Physical Address of PVH entry point read from kernel ELF NOTE */ -static size_t pvh_start_addr; - -static void init_topo_info(X86CPUTopoInfo *topo_info, - const X86MachineState *x86ms) +void init_topo_info(X86CPUTopoInfo *topo_info, + const X86MachineState *x86ms) { MachineState *ms = MACHINE(x86ms); @@ -94,355 +67,6 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, return x86_apicid_from_cpu_idx(&topo_info, cpu_index); } - -void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) -{ - Object *cpu = object_new(MACHINE(x86ms)->cpu_type); - - if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { - goto out; - } - qdev_realize(DEVICE(cpu), NULL, errp); - -out: - object_unref(cpu); -} - -void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) -{ - int i; - const CPUArchIdList *possible_cpus; - MachineState *ms = MACHINE(x86ms); - MachineClass *mc = MACHINE_GET_CLASS(x86ms); - - x86_cpu_set_default_version(default_cpu_version); - - /* - * Calculates the limit to CPU APIC ID values - * - * Limit for the APIC ID value, so that all - * CPU APIC IDs are < x86ms->apic_id_limit. - * - * This is used for FW_CFG_MAX_CPUS. See comments on fw_cfg_arch_create(). - */ - x86ms->apic_id_limit = x86_cpu_apic_id_from_index(x86ms, - ms->smp.max_cpus - 1) + 1; - - /* - * Can we support APIC ID 255 or higher? With KVM, that requires - * both in-kernel lapic and X2APIC userspace API. - * - * kvm_enabled() must go first to ensure that kvm_* references are - * not emitted for the linker to consume (kvm_enabled() is - * a literal `0` in configurations where kvm_* aren't defined) - */ - if (kvm_enabled() && x86ms->apic_id_limit > 255 && - kvm_irqchip_in_kernel() && !kvm_enable_x2apic()) { - error_report("current -smp configuration requires kernel " - "irqchip and X2APIC API support."); - exit(EXIT_FAILURE); - } - - if (kvm_enabled()) { - kvm_set_max_apic_id(x86ms->apic_id_limit); - } - - if (!kvm_irqchip_in_kernel()) { - apic_set_max_apic_id(x86ms->apic_id_limit); - } - - possible_cpus = mc->possible_cpu_arch_ids(ms); - for (i = 0; i < ms->smp.cpus; i++) { - x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); - } -} - -void x86_rtc_set_cpus_count(ISADevice *s, uint16_t cpus_count) -{ - MC146818RtcState *rtc = MC146818_RTC(s); - - if (cpus_count > 0xff) { - /* - * If the number of CPUs can't be represented in 8 bits, the - * BIOS must use "FW_CFG_NB_CPUS". Set RTC field to 0 just - * to make old BIOSes fail more predictably. - */ - mc146818rtc_set_cmos_data(rtc, 0x5f, 0); - } else { - mc146818rtc_set_cmos_data(rtc, 0x5f, cpus_count - 1); - } -} - -static int x86_apic_cmp(const void *a, const void *b) -{ - CPUArchId *apic_a = (CPUArchId *)a; - CPUArchId *apic_b = (CPUArchId *)b; - - return apic_a->arch_id - apic_b->arch_id; -} - -/* - * returns pointer to CPUArchId descriptor that matches CPU's apic_id - * in ms->possible_cpus->cpus, if ms->possible_cpus->cpus has no - * entry corresponding to CPU's apic_id returns NULL. - */ -CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) -{ - CPUArchId apic_id, *found_cpu; - - apic_id.arch_id = id; - found_cpu = bsearch(&apic_id, ms->possible_cpus->cpus, - ms->possible_cpus->len, sizeof(*ms->possible_cpus->cpus), - x86_apic_cmp); - if (found_cpu && idx) { - *idx = found_cpu - ms->possible_cpus->cpus; - } - return found_cpu; -} - -void x86_cpu_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - CPUArchId *found_cpu; - Error *local_err = NULL; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - if (x86ms->acpi_dev) { - hotplug_handler_plug(x86ms->acpi_dev, dev, &local_err); - if (local_err) { - goto out; - } - } - - /* increment the number of CPUs */ - x86ms->boot_cpus++; - if (x86ms->rtc) { - x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); - } - if (x86ms->fw_cfg) { - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); - } - - found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = CPU(dev); -out: - error_propagate(errp, local_err); -} - -void x86_cpu_unplug_request_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - int idx = -1; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - if (!x86ms->acpi_dev) { - error_setg(errp, "CPU hot unplug not supported without ACPI"); - return; - } - - x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); - assert(idx != -1); - if (idx == 0) { - error_setg(errp, "Boot CPU is unpluggable"); - return; - } - - hotplug_handler_unplug_request(x86ms->acpi_dev, dev, - errp); -} - -void x86_cpu_unplug_cb(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - CPUArchId *found_cpu; - Error *local_err = NULL; - X86CPU *cpu = X86_CPU(dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - - hotplug_handler_unplug(x86ms->acpi_dev, dev, &local_err); - if (local_err) { - goto out; - } - - found_cpu = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, NULL); - found_cpu->cpu = NULL; - qdev_unrealize(dev); - - /* decrement the number of CPUs */ - x86ms->boot_cpus--; - /* Update the number of CPUs in CMOS */ - x86_rtc_set_cpus_count(x86ms->rtc, x86ms->boot_cpus); - fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); - out: - error_propagate(errp, local_err); -} - -void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, - DeviceState *dev, Error **errp) -{ - int idx; - CPUState *cs; - CPUArchId *cpu_slot; - X86CPUTopoIDs topo_ids; - X86CPU *cpu = X86_CPU(dev); - CPUX86State *env = &cpu->env; - MachineState *ms = MACHINE(hotplug_dev); - X86MachineState *x86ms = X86_MACHINE(hotplug_dev); - unsigned int smp_cores = ms->smp.cores; - unsigned int smp_threads = ms->smp.threads; - X86CPUTopoInfo topo_info; - - if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { - error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", - ms->cpu_type); - return; - } - - if (x86ms->acpi_dev) { - Error *local_err = NULL; - - hotplug_handler_pre_plug(HOTPLUG_HANDLER(x86ms->acpi_dev), dev, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - } - - init_topo_info(&topo_info, x86ms); - - env->nr_dies = ms->smp.dies; - - /* - * If APIC ID is not set, - * set it based on socket/die/core/thread properties. - */ - if (cpu->apic_id == UNASSIGNED_APIC_ID) { - int max_socket = (ms->smp.max_cpus - 1) / - smp_threads / smp_cores / ms->smp.dies; - - /* - * die-id was optional in QEMU 4.0 and older, so keep it optional - * if there's only one die per socket. - */ - if (cpu->die_id < 0 && ms->smp.dies == 1) { - cpu->die_id = 0; - } - - if (cpu->socket_id < 0) { - error_setg(errp, "CPU socket-id is not set"); - return; - } else if (cpu->socket_id > max_socket) { - error_setg(errp, "Invalid CPU socket-id: %u must be in range 0:%u", - cpu->socket_id, max_socket); - return; - } - if (cpu->die_id < 0) { - error_setg(errp, "CPU die-id is not set"); - return; - } else if (cpu->die_id > ms->smp.dies - 1) { - error_setg(errp, "Invalid CPU die-id: %u must be in range 0:%u", - cpu->die_id, ms->smp.dies - 1); - return; - } - if (cpu->core_id < 0) { - error_setg(errp, "CPU core-id is not set"); - return; - } else if (cpu->core_id > (smp_cores - 1)) { - error_setg(errp, "Invalid CPU core-id: %u must be in range 0:%u", - cpu->core_id, smp_cores - 1); - return; - } - if (cpu->thread_id < 0) { - error_setg(errp, "CPU thread-id is not set"); - return; - } else if (cpu->thread_id > (smp_threads - 1)) { - error_setg(errp, "Invalid CPU thread-id: %u must be in range 0:%u", - cpu->thread_id, smp_threads - 1); - return; - } - - topo_ids.pkg_id = cpu->socket_id; - topo_ids.die_id = cpu->die_id; - topo_ids.core_id = cpu->core_id; - topo_ids.smt_id = cpu->thread_id; - cpu->apic_id = x86_apicid_from_topo_ids(&topo_info, &topo_ids); - } - - cpu_slot = x86_find_cpu_slot(MACHINE(x86ms), cpu->apic_id, &idx); - if (!cpu_slot) { - x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - error_setg(errp, - "Invalid CPU [socket: %u, die: %u, core: %u, thread: %u] with" - " APIC ID %" PRIu32 ", valid index range 0:%d", - topo_ids.pkg_id, topo_ids.die_id, topo_ids.core_id, topo_ids.smt_id, - cpu->apic_id, ms->possible_cpus->len - 1); - return; - } - - if (cpu_slot->cpu) { - error_setg(errp, "CPU[%d] with APIC ID %" PRIu32 " exists", - idx, cpu->apic_id); - return; - } - - /* if 'address' properties socket-id/core-id/thread-id are not set, set them - * so that machine_query_hotpluggable_cpus would show correct values - */ - /* TODO: move socket_id/core_id/thread_id checks into x86_cpu_realizefn() - * once -smp refactoring is complete and there will be CPU private - * CPUState::nr_cores and CPUState::nr_threads fields instead of globals */ - x86_topo_ids_from_apicid(cpu->apic_id, &topo_info, &topo_ids); - if (cpu->socket_id != -1 && cpu->socket_id != topo_ids.pkg_id) { - error_setg(errp, "property socket-id: %u doesn't match set apic-id:" - " 0x%x (socket-id: %u)", cpu->socket_id, cpu->apic_id, - topo_ids.pkg_id); - return; - } - cpu->socket_id = topo_ids.pkg_id; - - if (cpu->die_id != -1 && cpu->die_id != topo_ids.die_id) { - error_setg(errp, "property die-id: %u doesn't match set apic-id:" - " 0x%x (die-id: %u)", cpu->die_id, cpu->apic_id, topo_ids.die_id); - return; - } - cpu->die_id = topo_ids.die_id; - - if (cpu->core_id != -1 && cpu->core_id != topo_ids.core_id) { - error_setg(errp, "property core-id: %u doesn't match set apic-id:" - " 0x%x (core-id: %u)", cpu->core_id, cpu->apic_id, - topo_ids.core_id); - return; - } - cpu->core_id = topo_ids.core_id; - - if (cpu->thread_id != -1 && cpu->thread_id != topo_ids.smt_id) { - error_setg(errp, "property thread-id: %u doesn't match set apic-id:" - " 0x%x (thread-id: %u)", cpu->thread_id, cpu->apic_id, - topo_ids.smt_id); - return; - } - cpu->thread_id = topo_ids.smt_id; - - /* - * kvm_enabled() must go first to ensure that kvm_* references are - * not emitted for the linker to consume (kvm_enabled() is - * a literal `0` in configurations where kvm_* aren't defined) - */ - if (kvm_enabled() && hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && - !kvm_hv_vpindex_settable()) { - error_setg(errp, "kernel doesn't allow setting HyperV VP_INDEX"); - return; - } - - cs = CPU(cpu); - cs->cpu_index = idx; - - numa_cpu_pre_plug(cpu_slot, dev, errp); -} - CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index) { @@ -528,676 +152,6 @@ static void x86_nmi(NMIState *n, int cpu_index, Error **errp) } } -static long get_file_size(FILE *f) -{ - long where, size; - - /* XXX: on Unix systems, using fstat() probably makes more sense */ - - where = ftell(f); - fseek(f, 0, SEEK_END); - size = ftell(f); - fseek(f, where, SEEK_SET); - - return size; -} - -/* TSC handling */ -uint64_t cpu_get_tsc(CPUX86State *env) -{ - return cpus_get_elapsed_ticks(); -} - -/* IRQ handling */ -static void pic_irq_request(void *opaque, int irq, int level) -{ - CPUState *cs = first_cpu; - X86CPU *cpu = X86_CPU(cs); - - trace_x86_pic_interrupt(irq, level); - if (cpu_is_apic_enabled(cpu->apic_state) && !kvm_irqchip_in_kernel() && - !whpx_apic_in_platform()) { - CPU_FOREACH(cs) { - cpu = X86_CPU(cs); - if (apic_accept_pic_intr(cpu->apic_state)) { - apic_deliver_pic_intr(cpu->apic_state, level); - } - } - } else { - if (level) { - cpu_interrupt(cs, CPU_INTERRUPT_HARD); - } else { - cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); - } - } -} - -qemu_irq x86_allocate_cpu_irq(void) -{ - return qemu_allocate_irq(pic_irq_request, NULL, 0); -} - -int cpu_get_pic_interrupt(CPUX86State *env) -{ - X86CPU *cpu = env_archcpu(env); - int intno; - - if (!kvm_irqchip_in_kernel() && !whpx_apic_in_platform()) { - intno = apic_get_interrupt(cpu->apic_state); - if (intno >= 0) { - return intno; - } - /* read the irq from the PIC */ - if (!apic_accept_pic_intr(cpu->apic_state)) { - return -1; - } - } - - intno = pic_read_irq(isa_pic); - return intno; -} - -DeviceState *cpu_get_current_apic(void) -{ - if (current_cpu) { - X86CPU *cpu = X86_CPU(current_cpu); - return cpu->apic_state; - } else { - return NULL; - } -} - -void gsi_handler(void *opaque, int n, int level) -{ - GSIState *s = opaque; - - trace_x86_gsi_interrupt(n, level); - switch (n) { - case 0 ... ISA_NUM_IRQS - 1: - if (s->i8259_irq[n]) { - /* Under KVM, Kernel will forward to both PIC and IOAPIC */ - qemu_set_irq(s->i8259_irq[n], level); - } - /* fall through */ - case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1: -#ifdef CONFIG_XEN_EMU - /* - * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC - * routing actually works properly under Xen). And then to - * *either* the PIRQ handling or the I/OAPIC depending on - * whether the former wants it. - */ - if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) { - break; - } -#endif - qemu_set_irq(s->ioapic_irq[n], level); - break; - case IO_APIC_SECONDARY_IRQBASE - ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1: - qemu_set_irq(s->ioapic2_irq[n - IO_APIC_SECONDARY_IRQBASE], level); - break; - } -} - -void ioapic_init_gsi(GSIState *gsi_state, Object *parent) -{ - DeviceState *dev; - SysBusDevice *d; - unsigned int i; - - assert(parent); - if (kvm_ioapic_in_kernel()) { - dev = qdev_new(TYPE_KVM_IOAPIC); - } else { - dev = qdev_new(TYPE_IOAPIC); - } - object_property_add_child(parent, "ioapic", OBJECT(dev)); - d = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(d, &error_fatal); - sysbus_mmio_map(d, 0, IO_APIC_DEFAULT_ADDRESS); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - gsi_state->ioapic_irq[i] = qdev_get_gpio_in(dev, i); - } -} - -DeviceState *ioapic_init_secondary(GSIState *gsi_state) -{ - DeviceState *dev; - SysBusDevice *d; - unsigned int i; - - dev = qdev_new(TYPE_IOAPIC); - d = SYS_BUS_DEVICE(dev); - sysbus_realize_and_unref(d, &error_fatal); - sysbus_mmio_map(d, 0, IO_APIC_SECONDARY_ADDRESS); - - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - gsi_state->ioapic2_irq[i] = qdev_get_gpio_in(dev, i); - } - return dev; -} - -/* - * The entry point into the kernel for PVH boot is different from - * the native entry point. The PVH entry is defined by the x86/HVM - * direct boot ABI and is available in an ELFNOTE in the kernel binary. - * - * This function is passed to load_elf() when it is called from - * load_elfboot() which then additionally checks for an ELF Note of - * type XEN_ELFNOTE_PHYS32_ENTRY and passes it to this function to - * parse the PVH entry address from the ELF Note. - * - * Due to trickery in elf_opts.h, load_elf() is actually available as - * load_elf32() or load_elf64() and this routine needs to be able - * to deal with being called as 32 or 64 bit. - * - * The address of the PVH entry point is saved to the 'pvh_start_addr' - * global variable. (although the entry point is 32-bit, the kernel - * binary can be either 32-bit or 64-bit). - */ -static uint64_t read_pvh_start_addr(void *arg1, void *arg2, bool is64) -{ - size_t *elf_note_data_addr; - - /* Check if ELF Note header passed in is valid */ - if (arg1 == NULL) { - return 0; - } - - if (is64) { - struct elf64_note *nhdr64 = (struct elf64_note *)arg1; - uint64_t nhdr_size64 = sizeof(struct elf64_note); - uint64_t phdr_align = *(uint64_t *)arg2; - uint64_t nhdr_namesz = nhdr64->n_namesz; - - elf_note_data_addr = - ((void *)nhdr64) + nhdr_size64 + - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); - - pvh_start_addr = *elf_note_data_addr; - } else { - struct elf32_note *nhdr32 = (struct elf32_note *)arg1; - uint32_t nhdr_size32 = sizeof(struct elf32_note); - uint32_t phdr_align = *(uint32_t *)arg2; - uint32_t nhdr_namesz = nhdr32->n_namesz; - - elf_note_data_addr = - ((void *)nhdr32) + nhdr_size32 + - QEMU_ALIGN_UP(nhdr_namesz, phdr_align); - - pvh_start_addr = *(uint32_t *)elf_note_data_addr; - } - - return pvh_start_addr; -} - -static bool load_elfboot(const char *kernel_filename, - int kernel_file_size, - uint8_t *header, - size_t pvh_xen_start_addr, - FWCfgState *fw_cfg) -{ - uint32_t flags = 0; - uint32_t mh_load_addr = 0; - uint32_t elf_kernel_size = 0; - uint64_t elf_entry; - uint64_t elf_low, elf_high; - int kernel_size; - - if (ldl_p(header) != 0x464c457f) { - return false; /* no elfboot */ - } - - bool elf_is64 = header[EI_CLASS] == ELFCLASS64; - flags = elf_is64 ? - ((Elf64_Ehdr *)header)->e_flags : ((Elf32_Ehdr *)header)->e_flags; - - if (flags & 0x00010004) { /* LOAD_ELF_HEADER_HAS_ADDR */ - error_report("elfboot unsupported flags = %x", flags); - exit(1); - } - - uint64_t elf_note_type = XEN_ELFNOTE_PHYS32_ENTRY; - kernel_size = load_elf(kernel_filename, read_pvh_start_addr, - NULL, &elf_note_type, &elf_entry, - &elf_low, &elf_high, NULL, 0, I386_ELF_MACHINE, - 0, 0); - - if (kernel_size < 0) { - error_report("Error while loading elf kernel"); - exit(1); - } - mh_load_addr = elf_low; - elf_kernel_size = elf_high - elf_low; - - if (pvh_start_addr == 0) { - error_report("Error loading uncompressed kernel without PVH ELF Note"); - exit(1); - } - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ENTRY, pvh_start_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, mh_load_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, elf_kernel_size); - - return true; -} - -void x86_load_linux(X86MachineState *x86ms, - FWCfgState *fw_cfg, - int acpi_data_size, - bool pvh_enabled) -{ - bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; - uint16_t protocol; - int setup_size, kernel_size, cmdline_size; - int dtb_size, setup_data_offset; - uint32_t initrd_max; - uint8_t header[8192], *setup, *kernel; - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; - FILE *f; - char *vmode; - MachineState *machine = MACHINE(x86ms); - struct setup_data *setup_data; - const char *kernel_filename = machine->kernel_filename; - const char *initrd_filename = machine->initrd_filename; - const char *dtb_filename = machine->dtb; - const char *kernel_cmdline = machine->kernel_cmdline; - SevKernelLoaderContext sev_load_ctx = {}; - - /* Align to 16 bytes as a paranoia measure */ - cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; - - /* load the kernel header */ - f = fopen(kernel_filename, "rb"); - if (!f) { - fprintf(stderr, "qemu: could not open kernel file '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - kernel_size = get_file_size(f); - if (!kernel_size || - fread(header, 1, MIN(ARRAY_SIZE(header), kernel_size), f) != - MIN(ARRAY_SIZE(header), kernel_size)) { - fprintf(stderr, "qemu: could not load kernel '%s': %s\n", - kernel_filename, strerror(errno)); - exit(1); - } - - /* kernel protocol version */ - if (ldl_p(header + 0x202) == 0x53726448) { - protocol = lduw_p(header + 0x206); - } else { - /* - * This could be a multiboot kernel. If it is, let's stop treating it - * like a Linux kernel. - * Note: some multiboot images could be in the ELF format (the same of - * PVH), so we try multiboot first since we check the multiboot magic - * header before to load it. - */ - if (load_multiboot(x86ms, fw_cfg, f, kernel_filename, initrd_filename, - kernel_cmdline, kernel_size, header)) { - return; - } - /* - * Check if the file is an uncompressed kernel file (ELF) and load it, - * saving the PVH entry point used by the x86/HVM direct boot ABI. - * If load_elfboot() is successful, populate the fw_cfg info. - */ - if (pvh_enabled && - load_elfboot(kernel_filename, kernel_size, - header, pvh_start_addr, fw_cfg)) { - fclose(f); - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, - strlen(kernel_cmdline) + 1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, sizeof(header)); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, - header, sizeof(header)); - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - x86ms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max - initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, - initrd_size); - } - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "pvh.bin"; - nb_option_roms++; - - return; - } - protocol = 0; - } - - if (protocol < 0x200 || !(header[0x211] & 0x01)) { - /* Low kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x10000; - } else if (protocol < 0x202) { - /* High but ancient kernel */ - real_addr = 0x90000; - cmdline_addr = 0x9a000 - cmdline_size; - prot_addr = 0x100000; - } else { - /* High and recent kernel */ - real_addr = 0x10000; - cmdline_addr = 0x20000; - prot_addr = 0x100000; - } - - /* highest address for loading the initrd */ - if (protocol >= 0x20c && - lduw_p(header + 0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { - /* - * Linux has supported initrd up to 4 GB for a very long time (2007, - * long before XLF_CAN_BE_LOADED_ABOVE_4G which was added in 2013), - * though it only sets initrd_max to 2 GB to "work around bootloader - * bugs". Luckily, QEMU firmware(which does something like bootloader) - * has supported this. - * - * It's believed that if XLF_CAN_BE_LOADED_ABOVE_4G is set, initrd can - * be loaded into any address. - * - * In addition, initrd_max is uint32_t simply because QEMU doesn't - * support the 64-bit boot protocol (specifically the ext_ramdisk_image - * field). - * - * Therefore here just limit initrd_max to UINT32_MAX simply as well. - */ - initrd_max = UINT32_MAX; - } else if (protocol >= 0x203) { - initrd_max = ldl_p(header + 0x22c); - } else { - initrd_max = 0x37ffffff; - } - - if (initrd_max >= x86ms->below_4g_mem_size - acpi_data_size) { - initrd_max = x86ms->below_4g_mem_size - acpi_data_size - 1; - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline) + 1); - fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, kernel_cmdline); - sev_load_ctx.cmdline_data = (char *)kernel_cmdline; - sev_load_ctx.cmdline_size = strlen(kernel_cmdline) + 1; - - if (protocol >= 0x202) { - stl_p(header + 0x228, cmdline_addr); - } else { - stw_p(header + 0x20, 0xA33F); - stw_p(header + 0x22, cmdline_addr - real_addr); - } - - /* handle vga= parameter */ - vmode = strstr(kernel_cmdline, "vga="); - if (vmode) { - unsigned int video_mode; - const char *end; - int ret; - /* skip "vga=" */ - vmode += 4; - if (!strncmp(vmode, "normal", 6)) { - video_mode = 0xffff; - } else if (!strncmp(vmode, "ext", 3)) { - video_mode = 0xfffe; - } else if (!strncmp(vmode, "ask", 3)) { - video_mode = 0xfffd; - } else { - ret = qemu_strtoui(vmode, &end, 0, &video_mode); - if (ret != 0 || (*end && *end != ' ')) { - fprintf(stderr, "qemu: invalid 'vga=' kernel parameter.\n"); - exit(1); - } - } - stw_p(header + 0x1fa, video_mode); - } - - /* loader type */ - /* - * High nybble = B reserved for QEMU; low nybble is revision number. - * If this code is substantially changed, you may want to consider - * incrementing the revision. - */ - if (protocol >= 0x200) { - header[0x210] = 0xB0; - } - /* heap */ - if (protocol >= 0x201) { - header[0x211] |= 0x80; /* CAN_USE_HEAP */ - stw_p(header + 0x224, cmdline_addr - real_addr - 0x200); - } - - /* load initrd */ - if (initrd_filename) { - GMappedFile *mapped_file; - gsize initrd_size; - gchar *initrd_data; - GError *gerr = NULL; - - if (protocol < 0x200) { - fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n"); - exit(1); - } - - mapped_file = g_mapped_file_new(initrd_filename, false, &gerr); - if (!mapped_file) { - fprintf(stderr, "qemu: error reading initrd %s: %s\n", - initrd_filename, gerr->message); - exit(1); - } - x86ms->initrd_mapped_file = mapped_file; - - initrd_data = g_mapped_file_get_contents(mapped_file); - initrd_size = g_mapped_file_get_length(mapped_file); - if (initrd_size >= initrd_max) { - fprintf(stderr, "qemu: initrd is too large, cannot support." - "(max: %"PRIu32", need %"PRId64")\n", - initrd_max, (uint64_t)initrd_size); - exit(1); - } - - initrd_addr = (initrd_max - initrd_size) & ~4095; - - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_INITRD_DATA, initrd_data, initrd_size); - sev_load_ctx.initrd_data = initrd_data; - sev_load_ctx.initrd_size = initrd_size; - - stl_p(header + 0x218, initrd_addr); - stl_p(header + 0x21c, initrd_size); - } - - /* load kernel and setup */ - setup_size = header[0x1f1]; - if (setup_size == 0) { - setup_size = 4; - } - setup_size = (setup_size + 1) * 512; - if (setup_size > kernel_size) { - fprintf(stderr, "qemu: invalid kernel header\n"); - exit(1); - } - kernel_size -= setup_size; - - setup = g_malloc(setup_size); - kernel = g_malloc(kernel_size); - fseek(f, 0, SEEK_SET); - if (fread(setup, 1, setup_size, f) != setup_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - if (fread(kernel, 1, kernel_size, f) != kernel_size) { - fprintf(stderr, "fread() failed\n"); - exit(1); - } - fclose(f); - - /* append dtb to kernel */ - if (dtb_filename) { - if (protocol < 0x209) { - fprintf(stderr, "qemu: Linux kernel too old to load a dtb\n"); - exit(1); - } - - dtb_size = get_image_size(dtb_filename); - if (dtb_size <= 0) { - fprintf(stderr, "qemu: error reading dtb %s: %s\n", - dtb_filename, strerror(errno)); - exit(1); - } - - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); - kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; - kernel = g_realloc(kernel, kernel_size); - - stq_p(header + 0x250, prot_addr + setup_data_offset); - - setup_data = (struct setup_data *)(kernel + setup_data_offset); - setup_data->next = 0; - setup_data->type = cpu_to_le32(SETUP_DTB); - setup_data->len = cpu_to_le32(dtb_size); - - load_image_size(dtb_filename, setup_data->data, dtb_size); - } - - /* - * If we're starting an encrypted VM, it will be OVMF based, which uses the - * efi stub for booting and doesn't require any values to be placed in the - * kernel header. We therefore don't update the header so the hash of the - * kernel on the other side of the fw_cfg interface matches the hash of the - * file the user passed in. - */ - if (!sev_enabled()) { - memcpy(setup, header, MIN(sizeof(header), setup_size)); - } - - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, prot_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_KERNEL_DATA, kernel, kernel_size); - sev_load_ctx.kernel_data = (char *)kernel; - sev_load_ctx.kernel_size = kernel_size; - - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_ADDR, real_addr); - fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size); - fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size); - sev_load_ctx.setup_data = (char *)setup; - sev_load_ctx.setup_size = setup_size; - - if (sev_enabled()) { - sev_add_kernel_loader_hashes(&sev_load_ctx, &error_fatal); - } - - option_rom[nb_option_roms].bootindex = 0; - option_rom[nb_option_roms].name = "linuxboot.bin"; - if (linuxboot_dma_enabled && fw_cfg_dma_enabled(fw_cfg)) { - option_rom[nb_option_roms].name = "linuxboot_dma.bin"; - } - nb_option_roms++; -} - -void x86_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *isa_memory, - MemoryRegion *bios, bool read_only) -{ - uint64_t bios_size = memory_region_size(bios); - uint64_t isa_bios_size = MIN(bios_size, 128 * KiB); - - memory_region_init_alias(isa_bios, NULL, "isa-bios", bios, - bios_size - isa_bios_size, isa_bios_size); - memory_region_add_subregion_overlap(isa_memory, 1 * MiB - isa_bios_size, - isa_bios, 1); - memory_region_set_readonly(isa_bios, read_only); -} - -void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, - MemoryRegion *rom_memory, bool isapc_ram_fw) -{ - const char *bios_name; - char *filename; - int bios_size; - ssize_t ret; - - /* BIOS load */ - bios_name = MACHINE(x86ms)->firmware ?: default_firmware; - filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); - if (filename) { - bios_size = get_image_size(filename); - } else { - bios_size = -1; - } - if (bios_size <= 0 || - (bios_size % 65536) != 0) { - goto bios_error; - } - memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, - &error_fatal); - if (sev_enabled()) { - /* - * The concept of a "reset" simply doesn't exist for - * confidential computing guests, we have to destroy and - * re-launch them instead. So there is no need to register - * the firmware as rom to properly re-initialize on reset. - * Just go for a straight file load instead. - */ - void *ptr = memory_region_get_ram_ptr(&x86ms->bios); - load_image_size(filename, ptr, bios_size); - x86_firmware_configure(ptr, bios_size); - } else { - memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); - ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); - if (ret != 0) { - goto bios_error; - } - } - g_free(filename); - - /* map the last 128KB of the BIOS in ISA space */ - x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, - !isapc_ram_fw); - - /* map all the bios at the top of memory */ - memory_region_add_subregion(rom_memory, - (uint32_t)(-bios_size), - &x86ms->bios); - return; - -bios_error: - fprintf(stderr, "qemu: could not load PC BIOS '%s'\n", bios_name); - exit(1); -} - bool x86_machine_is_smm_enabled(const X86MachineState *x86ms) { bool smm_available = false; diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 29b38eb5ef..51cc14df9a 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -21,6 +21,7 @@ #include "exec/memory.h" #include "hw/boards.h" +#include "hw/i386/topology.h" #include "hw/intc/ioapic.h" #include "hw/isa/isa.h" #include "qom/object.h" @@ -108,16 +109,15 @@ struct X86MachineState { #define TYPE_X86_MACHINE MACHINE_TYPE_NAME("x86") OBJECT_DECLARE_TYPE(X86MachineState, X86MachineClass, X86_MACHINE) -uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, +void init_topo_info(X86CPUTopoInfo *topo_info, const X86MachineState *x86ms); +uint32_t x86_cpu_apic_id_from_index(X86MachineState *x86ms, unsigned int cpu_index); -void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, unsigned cpu_index); int64_t x86_get_default_cpu_node_id(const MachineState *ms, int idx); const CPUArchIdList *x86_possible_cpu_arch_ids(MachineState *ms); -CPUArchId *x86_find_cpu_slot(MachineState *ms, uint32_t id, int *idx); void x86_rtc_set_cpus_count(ISADevice *rtc, uint16_t cpus_count); void x86_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); -- Gitee From 56971f0af394c672ed640c04b35acf50ada52eb0 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:36 -0500 Subject: [PATCH 074/115] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP is enabled commit 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b upstream. As with SEV, an SNP guest requires that the BIOS be part of the initial encrypted/measured guest payload. Extend sev_encrypt_flash() to handle the SNP case and plumb through the GPA of the BIOS location since this is needed for SNP. [Backport Changes] 1. In file hw/i386/pc_sysfw.c, In function x86_firmware_configure(), the upstream patch updates the call to sev_encrypt_flash() to include the guest physical address (gpa) as the first argument. In the current source tree, this call is wrapped inside an else block that handles CSV3-specific logic (csv3_enabled()). The function call was updated in place to pass gpa, preserving compatibility with upstream changes while retaining the existing CSV3-specific behavior. 2. In file target/i386/sev.c, in function sev_add_kernel_loader_hashes(), the upstream patch updates the call to sev_encrypt_flash() to include the guest physical address (area->base) as the first argument. However, In the current source tree, this call is located within an else block following CSV3-specific logic (csv3_enabled()). The function call was updated in place to pass area->base, maintaining compatibility with the upstream change while preserving the existing CSV3-specific handling. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- hw/i386/pc_sysfw.c | 12 +++++++----- hw/i386/x86-common.c | 2 +- include/hw/i386/x86.h | 2 +- target/i386/sev-sysemu-stub.c | 2 +- target/i386/sev.c | 5 +++-- target/i386/sev.h | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 7b7e65272b..10eeaa2d8a 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -150,6 +150,8 @@ static void pc_system_flash_map(PCMachineState *pcms, assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + hwaddr gpa; + system_flash = pcms->flash[i]; blk = pflash_cfi01_get_blk(system_flash); if (!blk) { @@ -179,11 +181,11 @@ static void pc_system_flash_map(PCMachineState *pcms, } total_size += size; + gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", size / FLASH_SECTOR_SIZE); sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, - 0x100000000ULL - total_size); + sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); @@ -193,7 +195,7 @@ static void pc_system_flash_map(PCMachineState *pcms, if (sev_enabled()) { flash_ptr = memory_region_get_ram_ptr(flash_mem); flash_size = memory_region_size(flash_mem); - x86_firmware_configure(flash_ptr, flash_size); + x86_firmware_configure(gpa, flash_ptr, flash_size); } } } @@ -246,7 +248,7 @@ void pc_system_firmware_init(PCMachineState *pcms, pc_system_flash_cleanup_unused(pcms); } -void x86_firmware_configure(void *ptr, int size) +void x86_firmware_configure(hwaddr gpa, void *ptr, int size) { int ret; @@ -280,7 +282,7 @@ void x86_firmware_configure(void *ptr, int size) } csv3_load_data(mr->addr + offset, ptr, size, &error_fatal); } else { - sev_encrypt_flash(ptr, size, &error_fatal); + sev_encrypt_flash(gpa, ptr, size, &error_fatal); } } } diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 67b03c913a..35fe6eabea 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -981,7 +981,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, */ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); load_image_size(filename, ptr, bios_size); - x86_firmware_configure(ptr, bios_size); + x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); } else { memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index 51cc14df9a..a68ffe34eb 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -157,6 +157,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); DeviceState *ioapic_init_secondary(GSIState *gsi_state); /* pc_sysfw.c */ -void x86_firmware_configure(void *ptr, int size); +void x86_firmware_configure(hwaddr gpa, void *ptr, int size); #endif diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c index fc1c57c411..d5bf886e79 100644 --- a/target/i386/sev-sysemu-stub.c +++ b/target/i386/sev-sysemu-stub.c @@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, error_setg(errp, "SEV is not available in this QEMU"); } -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); } diff --git a/target/i386/sev.c b/target/i386/sev.c index 57982872ac..207da8d12b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1820,7 +1820,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } int -sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); @@ -3253,7 +3253,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) error_report("%s: CSV3 load kernel hashes unsupported!", __func__); ret = false; } - } else if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { ret = false; } diff --git a/target/i386/sev.h b/target/i386/sev.h index 55ec43c655..a43097e605 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -66,7 +66,7 @@ uint32_t sev_get_cbit_position(void); uint32_t sev_get_reduced_phys_bits(void); bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); int sev_save_setup(const char *pdh, const char *plat_cert, const char *amd_cert); int sev_save_outgoing_page(QEMUFile *f, uint8_t *ptr, -- Gitee From 2c1ccedefa1efff11abbc9bcb91d64ca96a4eb26 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 12:51:44 +0200 Subject: [PATCH 075/115] i386/sev: Invoke launch_updata_data() for SEV class commit 9861405a8f845133b7984322c2df0c43a45553c3 upstream. Add launch_update_data() in SevCommonStateClass and invoke as sev_launch_update_data() for SEV object. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- target/i386/sev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 207da8d12b..b16436a279 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -82,6 +82,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -1032,7 +1033,7 @@ out: } static int -sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -1044,7 +1045,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) update.uaddr = (__u64)(unsigned long)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -1823,6 +1824,7 @@ int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); if (!sev_common) { return 0; @@ -1830,7 +1832,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) /* if SEV is in update state then encrypt the data else do nothing */ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { - int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + int ret; + + ret = klass->launch_update_data(sev_common, gpa, ptr, len); if (ret < 0) { error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; @@ -3401,6 +3405,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; klass->kvm_init = sev_kvm_init; x86_klass->kvm_type = sev_kvm_type; -- Gitee From c69a5786e697cb9a27bd48600fd66d828d312a6e Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:38 -0500 Subject: [PATCH 076/115] i386/sev: Invoke launch_updata_data() for SNP class commit 0765d136eba400ad1cb7cae18438bb10eace64dc upstream. Invoke as sev_snp_launch_update_data() for SNP object. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- target/i386/sev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index b16436a279..3d3db29289 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1234,6 +1234,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, return 0; } +static int +sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, uint64_t len) +{ + int ret = snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL); + return ret; +} + static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, const KvmCpuidInfo *kvm_cpuid_info) @@ -3664,6 +3673,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_snp_launch_start; klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; -- Gitee From 19e455754dd7f511ea13f89bf2961e0ac2e3df10 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:42 -0500 Subject: [PATCH 077/115] i386/kvm: Add KVM_EXIT_HYPERCALL handling for KVM_HC_MAP_GPA_RANGE commit 47e76d03b155e43beca550251a6eb7ea926c059f upstream. KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for private/shared memory attribute updates requested by the guest. Implement handling for that use-case along with some basic infrastructure for enabling specific hypercall events. [Backport Changes] 1. In target/i386/kvm/kvm.c, the KVM_EXIT_HYPERCALL case in kvm_arch_handle_exit() was originally added in the Euler QEMU to support SEV live migration in SEV/SEV-ES guests. However, the current upstream commit is now introducing the same handling case for SNP guests to process KVM_HC_MAP_GPA_RANGE hypercalls for memory attribute updates. To resolve this overlap, the code was updated to conditionally branch using the sev_snp_enabled() helper. This ensures the live migration exit path is restricted to SEV/SEV-ES guests, while enabling the upstream SNP-specific handling for SNP guests. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PKumarAditya Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/kvm/kvm.c | 58 +++++++++++++++++++++++++++++++++++- target/i386/kvm/kvm_i386.h | 1 + target/i386/kvm/trace-events | 1 + 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index a3cf447028..3fd2c2dc1a 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -21,6 +21,7 @@ #include #include +#include #include "standard-headers/asm-x86/kvm_para.h" #include "hw/xen/interface/arch-x86/cpuid.h" @@ -210,6 +211,13 @@ int kvm_get_vm_type(MachineState *ms) return kvm_type; } +bool kvm_enable_hypercall(uint64_t enable_mask) +{ + KVMState *s = KVM_STATE(current_accel()); + + return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); @@ -5507,6 +5515,50 @@ static bool host_supports_vmx(void) return ecx & CPUID_EXT_VMX; } +/* + * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE + * to service guest-initiated memory attribute update requests so that + * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be + * backed by the private memory pool provided by guest_memfd, and as such + * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). + * + * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live + * migration, are not implemented here currently. + * + * For the guest_memfd use-case, these exits will generally be synthesized + * by KVM based on platform-specific hypercalls, like GHCB requests in the + * case of SEV-SNP, and not issued directly within the guest though the + * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is + * not actually advertised to guests via the KVM CPUID feature bit, as + * opposed to SEV live migration where it would be. Since it is unlikely the + * SEV live migration use-case would be useful for guest-memfd backed guests, + * because private/shared page tracking is already provided through other + * means, these 2 use-cases should be treated as being mutually-exclusive. + */ +static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +{ + uint64_t gpa, size, attributes; + + if (!machine_require_guest_memfd(current_machine)) + return -EINVAL; + + gpa = run->hypercall.args[0]; + size = run->hypercall.args[1] * TARGET_PAGE_SIZE; + attributes = run->hypercall.args[2]; + + trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); + + return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); +} + +static int kvm_handle_hypercall(struct kvm_run *run) +{ + if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) + return kvm_handle_hc_map_gpa_range(run); + + return -EINVAL; +} + #define VMX_INVALID_GUEST_STATE 0x80000021 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -5603,7 +5655,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_HYPERCALL: - ret = kvm_handle_exit_hypercall(cpu, run); + if (!sev_snp_enabled()) { + ret = kvm_handle_exit_hypercall(cpu, run); + } else { + ret = kvm_handle_hypercall(run); + } break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 6b44844d95..34fc60774b 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -33,6 +33,7 @@ bool kvm_has_smm(void); bool kvm_enable_x2apic(void); bool kvm_hv_vpindex_settable(void); +bool kvm_enable_hypercall(uint64_t enable_mask); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events index b365a8e8e2..74a6234ff7 100644 --- a/target/i386/kvm/trace-events +++ b/target/i386/kvm/trace-events @@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" +kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 # xen-emu.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 -- Gitee From d62f6ec2104faa7a95d0fb7aad7ebe6cd310b1b2 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:43 -0500 Subject: [PATCH 078/115] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP guests commit e3cddff93c1f88fea3b26841e792dc0be6b6fae8 upstream. KVM will forward GHCB page-state change requests to userspace in the form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is enabled for SNP guests. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- target/i386/sev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 3d3db29289..b06a7f2eb6 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include +#include #include #include @@ -861,6 +862,10 @@ sev_snp_launch_start(SevCommonState *sev_common) trace_kvm_sev_snp_launch_start(start->policy, sev_snp_guest->guest_visible_workarounds); + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return 1; + } + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, start, &fw_error); if (rc < 0) { -- Gitee From 85b26660ac584aa72325d3f83b647931ff135f06 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 30 May 2024 06:16:33 -0500 Subject: [PATCH 079/115] i386/sev: Extract build_kernel_loader_hashes commit 06cbd66cecaa3230cccb330facac241a677b29d5 upstream. Extract the building of the kernel hashes table out from sev_add_kernel_loader_hashes() to allow building it in other memory areas (for SNP support). No functional change intended. [Backport Changes] In target/i386/sev.c, the upstream patch refactors the logic for building kernel loader hashes by extracting the core logic into a new helper function build_kernel_loader_hashes(), which is then refered from newly migrated sev_add_kernel_loader_hashes() function. However, In the current source code, the old sev_add_kernel_loader_hashes() function contains additional Hygon CSV3-specific logics (including csv3_enabled() check and CSV3-specific csv3_load_data() flow) were retained and integrated within the migrated function. This ensures compatibility with upstream design while preserving non-upstream CSV3 guest support. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- target/i386/sev.c | 116 ++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index b06a7f2eb6..d57082551c 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -3156,45 +3156,16 @@ static const QemuUUID sev_cmdline_entry_guid = { 0x4d, 0x36, 0xab, 0x2a) }; -/* - * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page - * which is included in SEV's initial memory measurement. - */ -bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + SevKernelLoaderContext *ctx, + Error **errp) { - uint8_t *data; - SevHashTableDescriptor *area; SevHashTable *ht; - PaddedSevHashTable *padded_ht; uint8_t cmdline_hash[HASH_SIZE]; uint8_t initrd_hash[HASH_SIZE]; uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - hwaddr mapped_len = sizeof(*padded_ht); - MemTxAttrs attrs = { 0 }; - bool ret = true; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - - /* - * Only add the kernel hashes if the sev-guest configuration explicitly - * stated kernel-hashes=on. - */ - if (!sev_common->kernel_hashes) { - return false; - } - - if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but guest firmware " - "has no hashes table GUID"); - return false; - } - area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < sizeof(PaddedSevHashTable)) { - error_setg(errp, "SEV: guest firmware hashes table area is invalid " - "(base=0x%x size=0x%x)", area->base, area->size); - return false; - } /* * Calculate hash of kernel command-line with the terminating null byte. If @@ -3231,16 +3202,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) } assert(hash_len == HASH_SIZE); - /* - * Populate the hashes table in the guest's memory at the OVMF-designated - * area for the SEV hashes table - */ - padded_ht = address_space_map(&address_space_memory, area->base, - &mapped_len, true, attrs); - if (!padded_ht || mapped_len != sizeof(*padded_ht)) { - error_setg(errp, "SEV: cannot map hashes table guest memory area"); - return false; - } ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; @@ -3261,18 +3222,71 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) /* zero the excess data so the measurement can be reliably calculated */ memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (csv3_enabled()) { - if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { - if (csv3_load_data(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { - ret = false; + return true; +} + +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + PaddedSevHashTable *padded_ht; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_common->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + + if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { + if (csv3_enabled()) { + if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { + if (csv3_load_data(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; + } + } else { + error_report("%s: CSV3 load kernel hashes unsupported!", __func__); + ret = false; } - } else { - error_report("%s: CSV3 load kernel hashes unsupported!", __func__); + } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { ret = false; } - } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { + } else { ret = false; } -- Gitee From 67b18c88855737dc3ac8bd8b7db64436a65828b6 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 30 May 2024 06:16:34 -0500 Subject: [PATCH 080/115] i386/sev: Reorder struct declarations commit cc483bf911931f405dea682c74a3d8b9b6c54369 upstream. Move the declaration of PaddedSevHashTable before SevSnpGuest so we can add a new such field to the latter. No functional change intended. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: priyankagunturi Signed-off-by: PrithivishS --- target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index d57082551c..5ec10cab25 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -54,6 +54,48 @@ struct shared_region { QTAILQ_ENTRY(shared_region) list; }; +/* hard code sha256 digest size */ +#define HASH_SIZE 32 + +typedef struct QEMU_PACKED SevHashTableEntry { + QemuUUID guid; + uint16_t len; + uint8_t hash[HASH_SIZE]; +} SevHashTableEntry; + +typedef struct QEMU_PACKED SevHashTable { + QemuUUID guid; + uint16_t len; + SevHashTableEntry cmdline; + SevHashTableEntry initrd; + SevHashTableEntry kernel; +} SevHashTable; + +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ + uint32_t reset_addr; +} SevInfoBlock; + +#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +typedef struct QEMU_PACKED SevHashTableDescriptor { + /* SEV hash table area guest address */ + uint32_t base; + /* SEV hash table area size (in bytes) */ + uint32_t size; +} SevHashTableDescriptor; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -157,48 +199,6 @@ typedef struct SevLaunchUpdateData { static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; -#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" -typedef struct __attribute__((__packed__)) SevInfoBlock { - /* SEV-ES Reset Vector Address */ - uint32_t reset_addr; -} SevInfoBlock; - -#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" -typedef struct QEMU_PACKED SevHashTableDescriptor { - /* SEV hash table area guest address */ - uint32_t base; - /* SEV hash table area size (in bytes) */ - uint32_t size; -} SevHashTableDescriptor; - -/* hard code sha256 digest size */ -#define HASH_SIZE 32 - -typedef struct QEMU_PACKED SevHashTableEntry { - QemuUUID guid; - uint16_t len; - uint8_t hash[HASH_SIZE]; -} SevHashTableEntry; - -typedef struct QEMU_PACKED SevHashTable { - QemuUUID guid; - uint16_t len; - SevHashTableEntry cmdline; - SevHashTableEntry initrd; - SevHashTableEntry kernel; -} SevHashTable; - -/* - * Data encrypted by sev_encrypt_flash() must be padded to a multiple of - * 16 bytes. - */ -typedef struct QEMU_PACKED PaddedSevHashTable { - SevHashTable ht; - uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; -} PaddedSevHashTable; - -QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); - static Error *sev_mig_blocker; bool sev_kvm_has_msr_ghcb; -- Gitee From eb6979e74326bb0136b86933d6bc5129120b7721 Mon Sep 17 00:00:00 2001 From: Jeevan deep J Date: Wed, 9 Jul 2025 12:01:54 +0530 Subject: [PATCH 081/115] i386/sev: Allow measured direct kernel boot on SNP commit c1996992cc882b00139f78067d6a64e2ec9cb0d8 upstream. In SNP, the hashes page designated with a specific metadata entry published in AmdSev OVMF. Therefore, if the user enabled kernel hashes (for measured direct boot), QEMU should prepare the content of hashes table, and during the processing of the metadata entry it copy the content into the designated page and encrypt it. Note that in SNP (unlike SEV and SEV-ES) the measurements is done in whole 4KB pages. Therefore QEMU zeros the whole page that includes the hashes table, and fills in the kernel hashes area in that page, and then encrypts the whole page. The rest of the page is reserved for SEV launch secrets which are not usable anyway on SNP. If the user disabled kernel hashes, QEMU pre-validates the kernel hashes page as a zero page. [Backport Changes] 1. In target/i386/sev.c, the upstream patch refactors the logic for building kernel loader hashes by extracting the core logic from sev_add_kernel_loader_hashes() into a new helper function sev_build_kernel_loader_hashes() specific to SEV guests. However, in the current source code, the original sev_add_kernel_loader_hashes() function includes additional Hygon CSV3-specific logic (including the csv3_enabled() check and csv3_load_data() flow), which was retained and integrated into the new helper function. This ensures compatibility with the upstream design while preserving non-upstream CSV3 guest support. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/hw/i386/pc.h | 2 + target/i386/sev.c | 128 ++++++++++++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 38 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 4cee7640b1..52bbbaa132 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -176,6 +176,8 @@ typedef enum { /* The section contains address that can be used as a CPUID page */ SEV_DESC_TYPE_CPUID, + /* The section contains the region for kernel hashes for measured direct boot */ + SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, } ovmf_sev_metadata_desc_type; typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { diff --git a/target/i386/sev.c b/target/i386/sev.c index 5ec10cab25..d2de0b12a0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -123,6 +123,10 @@ struct SevCommonStateClass { X86ConfidentialGuestClass parent_class; /* public */ + bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); @@ -183,6 +187,9 @@ struct SevSnpGuestState { struct kvm_sev_snp_launch_start kvm_start_conf; struct kvm_sev_snp_launch_finish kvm_finish_conf; + + uint32_t kernel_hashes_offset; + PaddedSevHashTable *kernel_hashes_data; }; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ @@ -1332,6 +1339,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) KVM_SEV_SNP_PAGE_TYPE_CPUID); } +static int +snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, + void *hva, uint32_t len) +{ + int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; + if (sev_snp->parent_obj.kernel_hashes) { + assert(sev_snp->kernel_hashes_data); + assert((sev_snp->kernel_hashes_offset + + sizeof(*sev_snp->kernel_hashes_data)) <= len); + memset(hva, 0, len); + memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, + sizeof(*sev_snp->kernel_hashes_data)); + type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; + } + return snp_launch_update_data(addr, hva, len, type); +} + static int snp_metadata_desc_to_page_type(int desc_type) { @@ -1368,6 +1392,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { + ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, + desc->len); } else { ret = snp_launch_update_data(desc->base, hva, desc->len, type); } @@ -3225,6 +3252,65 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, return true; } +static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) +{ + /* + * SNP: Populate the hashes table in an area that later in + * snp_launch_update_kernel_hashes() will be copied to the guest memory + * and encrypted. + */ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; + sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); + return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); +} + +static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) +{ + PaddedSevHashTable *padded_ht; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + + if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { + if (csv3_enabled()) { + if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { + if (csv3_load_data(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; } + } else { + error_report("%s: CSV3 load kernel hashes unsupported!", __func__); + ret = false; } + } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; } + } else { + ret = false; + } + + address_space_unmap(&address_space_memory, padded_ht, + mapped_len, true, mapped_len); + + return ret; +} + /* * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page * which is included in SEV's initial memory measurement. @@ -3233,11 +3319,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) { uint8_t *data; SevHashTableDescriptor *area; - PaddedSevHashTable *padded_ht; - hwaddr mapped_len = sizeof(*padded_ht); - MemTxAttrs attrs = { 0 }; - bool ret = true; SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); /* * Only add the kernel hashes if the sev-guest configuration explicitly @@ -3260,40 +3343,7 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return false; } - /* - * Populate the hashes table in the guest's memory at the OVMF-designated - * area for the SEV hashes table - */ - padded_ht = address_space_map(&address_space_memory, area->base, - &mapped_len, true, attrs); - if (!padded_ht || mapped_len != sizeof(*padded_ht)) { - error_setg(errp, "SEV: cannot map hashes table guest memory area"); - return false; - } - - if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { - if (csv3_enabled()) { - if (kvm_hygon_coco_ext_inuse & KVM_CAP_HYGON_COCO_EXT_CSV3_MULT_LUP_DATA) { - if (csv3_load_data(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { - ret = false; - } - } else { - error_report("%s: CSV3 load kernel hashes unsupported!", __func__); - ret = false; - } - } else if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { - ret = false; - } - } else { - ret = false; - } - - address_space_unmap(&address_space_memory, padded_ht, - mapped_len, true, mapped_len); - - return ret; + return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); } static int _sev_send_start(QEMUFile *f, uint64_t *bytes_sent) @@ -3431,6 +3481,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; klass->launch_update_data = sev_launch_update_data; @@ -3690,6 +3741,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; klass->launch_start = sev_snp_launch_start; klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; -- Gitee From b6aee5943d2481f0fee134f1a4c36fd90435f112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:24:45 +0100 Subject: [PATCH 082/115] memory: Have memory_region_init_ram_flags_nomigrate() return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cbbc4340232107a85809a7abc37435bead937597 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_ram_nomigrate return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-2-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index e2f99cf34a..9bfc373bd6 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1481,8 +1481,10 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 7f7f64bd4b..ffc1367615 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1588,7 +1588,7 @@ void memory_region_init_ram_nomigrate(MemoryRegion *mr, memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); } -void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1605,7 +1605,9 @@ void memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } void memory_region_init_resizeable_ram(MemoryRegion *mr, -- Gitee From f4a10f73d2f95dfe8d5b91511f35c1a7094849c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:21:56 +0100 Subject: [PATCH 083/115] memory: Have memory_region_init_ram_nomigrate() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 62c19b72c7fefb5fcd13ed4e2c833d2f99458193 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_ram_nomigrate return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-3-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 9bfc373bd6..c040fcb9cf 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1458,8 +1458,10 @@ void memory_region_init_io(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index ffc1367615..9f80158324 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1579,13 +1579,14 @@ void memory_region_init_io(MemoryRegion *mr, mr->terminates = true; } -void memory_region_init_ram_nomigrate(MemoryRegion *mr, +bool memory_region_init_ram_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); + return memory_region_init_ram_flags_nomigrate(mr, owner, name, + size, 0, errp); } bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, -- Gitee From 51b49bd339188772a65d5b2eb13d6653f4ab8f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:27:11 +0100 Subject: [PATCH 084/115] memory: Have memory_region_init_rom_nomigrate() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 197faa7006313407ade82de8bc49d52da610da46 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_rom_nomigrate return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-4-philmd@linaro.org> [PMD: Only update 'readonly' field on success (Manos Pitsidianakis)] Message-Id: Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index c040fcb9cf..a3b538267a 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1669,8 +1669,10 @@ void memory_region_init_alias(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 9f80158324..a84e6a9279 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1734,14 +1734,19 @@ void memory_region_init_alias(MemoryRegion *mr, mr->alias_offset = offset; } -void memory_region_init_rom_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_nomigrate(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, Error **errp) { - memory_region_init_ram_flags_nomigrate(mr, owner, name, size, 0, errp); + if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, + size, 0, errp)) { + return false; + } mr->readonly = true; + + return true; } void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, -- Gitee From cf1e0183942adf44ef5c8833b4a64a7559a1b9ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:26:39 +0100 Subject: [PATCH 085/115] memory: Simplify memory_region_init_rom_nomigrate() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fd7549ee1396fc8051aac5e4cfc81458591ceafe upstream Mechanical change using the following coccinelle script: @@ expression mr, owner, arg3, arg4, errp; @@ - memory_region_init_rom_nomigrate(mr, owner, arg3, arg4, &errp); if ( - errp + !memory_region_init_rom_nomigrate(mr, owner, arg3, arg4, &errp) ) { ... return; } and removing the local Error variable. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Richard Henderson Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-5-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- system/memory.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/system/memory.c b/system/memory.c index a84e6a9279..b7306ea53e 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3696,11 +3696,8 @@ void memory_region_init_rom(MemoryRegion *mr, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_rom_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); + if (!memory_region_init_rom_nomigrate(mr, owner, name, size, errp)) { return; } /* This will assert if owner is neither NULL nor a DeviceState. -- Gitee From 3cd5d9b7fb79105690995ffe36691f2a7f70222d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:27:16 +0100 Subject: [PATCH 086/115] memory: Simplify memory_region_init_ram_from_fd() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d3143bd53192d5921cd948e348650eb1f7857da7 upstream Mechanical change using the following coccinelle script: @@ expression mr, owner, arg3, arg4, arg5, arg6, arg7, errp; @@ - memory_region_init_ram_from_fd(mr, owner, arg3, arg4, arg5, arg6, arg7, &errp); if ( - errp + !memory_region_init_ram_from_fd(mr, owner, arg3, arg4, arg5, arg6, arg7, &errp) ) { ... return; } and removing the local Error variable. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-6-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- system/memory.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/system/memory.c b/system/memory.c index b7306ea53e..094f775040 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3672,11 +3672,8 @@ void memory_region_init_ram(MemoryRegion *mr, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_ram_nomigrate(mr, owner, name, size, &err); - if (err) { - error_propagate(errp, err); + if (!memory_region_init_ram_nomigrate(mr, owner, name, size, errp)) { return; } /* This will assert if owner is neither NULL nor a DeviceState. -- Gitee From de63e3d7caf85cce81b9cd9e8adcf6b4ce48408d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:28:36 +0100 Subject: [PATCH 087/115] memory: Have memory_region_init_ram() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fe5f33d6b086d6911d69dd77abe09b21bdb2ea5f upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_ram() return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-7-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index a3b538267a..8ca7e0a453 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1753,8 +1753,10 @@ void memory_region_init_iommu(void *_iommu_mr, * give the RAM block a unique name for migration purposes. * We should lift this restriction and allow arbitrary Objects. * If you pass a non-NULL non-device @owner then we will assert. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram(MemoryRegion *mr, +bool memory_region_init_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 094f775040..4d58bab11c 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3665,7 +3665,7 @@ void mtree_info(bool flatview, bool dispatch_tree, bool owner, bool disabled) } } -void memory_region_init_ram(MemoryRegion *mr, +bool memory_region_init_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -3674,7 +3674,7 @@ void memory_region_init_ram(MemoryRegion *mr, DeviceState *owner_dev; if (!memory_region_init_ram_nomigrate(mr, owner, name, size, errp)) { - return; + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3684,6 +3684,8 @@ void memory_region_init_ram(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } void memory_region_init_rom(MemoryRegion *mr, -- Gitee From a84176f5d78d51160c3be794bd17590e0f56b245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:29:31 +0100 Subject: [PATCH 088/115] memory: Have memory_region_init_rom() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b9159451d3411bfef528f431c98527c172cd46e8 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_rom() return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Manos Pitsidianakis Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-8-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 8ca7e0a453..44d1ddee6c 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1782,8 +1782,10 @@ bool memory_region_init_ram(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom(MemoryRegion *mr, +bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 4d58bab11c..c101488e4e 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3688,7 +3688,7 @@ bool memory_region_init_ram(MemoryRegion *mr, return true; } -void memory_region_init_rom(MemoryRegion *mr, +bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -3697,7 +3697,7 @@ void memory_region_init_rom(MemoryRegion *mr, DeviceState *owner_dev; if (!memory_region_init_rom_nomigrate(mr, owner, name, size, errp)) { - return; + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3707,6 +3707,8 @@ void memory_region_init_rom(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } void memory_region_init_rom_device(MemoryRegion *mr, -- Gitee From 235a9544525781062b13b26ef1929c4b80c6e255 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:30:34 +0100 Subject: [PATCH 089/115] memory: Have memory_region_init_rom_device_nomigrate() return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ae076b6c39bfd2027e35ca5be922dc8f1b18b901 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_rom_device_nomigrate() return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-9-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 44d1ddee6c..3c02458eb2 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1694,8 +1694,10 @@ bool memory_region_init_rom_nomigrate(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, diff --git a/system/memory.c b/system/memory.c index c101488e4e..e9856d05e2 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1749,7 +1749,7 @@ bool memory_region_init_rom_nomigrate(MemoryRegion *mr, return true; } -void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, +bool memory_region_init_rom_device_nomigrate(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -1770,7 +1770,9 @@ void memory_region_init_rom_device_nomigrate(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } void memory_region_init_iommu(void *_iommu_mr, -- Gitee From 77fae0e3801186864bd380456d42b04b98289d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:39:59 +0100 Subject: [PATCH 090/115] memory: Simplify memory_region_init_rom_device_nomigrate() calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bd3aa06950a17a7ff5acf231bcafe59b25cc0ff9 upstream Mechanical change using the following coccinelle script: @@ expression mr, owner, arg3, arg4, arg5, arg6, errp; @@ - memory_region_init_rom_device_nomigrate(mr, owner, arg3, arg4, arg5, arg6, &errp); if ( - errp + !memory_region_init_rom_device_nomigrate(mr, owner, arg3, arg4, arg5, arg6, &errp) ) { ... return; } and removing the local Error variable. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-10-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- system/memory.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/system/memory.c b/system/memory.c index e9856d05e2..db15de500f 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3722,12 +3722,9 @@ void memory_region_init_rom_device(MemoryRegion *mr, Error **errp) { DeviceState *owner_dev; - Error *err = NULL; - memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, - name, size, &err); - if (err) { - error_propagate(errp, err); + if (!memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, + name, size, errp)) { return; } /* This will assert if owner is neither NULL nor a DeviceState. -- Gitee From e2c96b53d36e6b0c4f8c1eaf7712bc6ef72ac11b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 11:31:30 +0100 Subject: [PATCH 091/115] memory: Have memory_region_init_rom_device() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 62f5c1b234e27357b08ba01124c17f61729ceae8 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_rom_device return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-11-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 3c02458eb2..e49278b944 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1817,8 +1817,10 @@ bool memory_region_init_rom(MemoryRegion *mr, * must be unique within any device * @size: size of the region. * @errp: pointer to Error*, to store an error if it happens. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_rom_device(MemoryRegion *mr, +bool memory_region_init_rom_device(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, diff --git a/system/memory.c b/system/memory.c index db15de500f..b04771a044 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3713,7 +3713,7 @@ bool memory_region_init_rom(MemoryRegion *mr, return true; } -void memory_region_init_rom_device(MemoryRegion *mr, +bool memory_region_init_rom_device(MemoryRegion *mr, Object *owner, const MemoryRegionOps *ops, void *opaque, @@ -3725,7 +3725,7 @@ void memory_region_init_rom_device(MemoryRegion *mr, if (!memory_region_init_rom_device_nomigrate(mr, owner, ops, opaque, name, size, errp)) { - return; + return false; } /* This will assert if owner is neither NULL nor a DeviceState. * We only want the owner here for the purposes of defining a @@ -3735,6 +3735,8 @@ void memory_region_init_rom_device(MemoryRegion *mr, */ owner_dev = DEVICE(owner); vmstate_register_ram(mr, owner_dev); + + return true; } /* -- Gitee From 9aa54aed66ffd30bd8e03f654fdb1ae9b2007aa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 12:12:03 +0100 Subject: [PATCH 092/115] memory: Have memory_region_init_resizeable_ram() return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f25a9fbb649f564f152c95c44a5f2c0c6f4237a9 upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_resizeable_ram return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-12-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index e49278b944..8c69d14321 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1513,8 +1513,10 @@ bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_resizeable_ram(MemoryRegion *mr, +bool memory_region_init_resizeable_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index b04771a044..34ee6c1bf2 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1611,7 +1611,7 @@ bool memory_region_init_ram_flags_nomigrate(MemoryRegion *mr, return true; } -void memory_region_init_resizeable_ram(MemoryRegion *mr, +bool memory_region_init_resizeable_ram(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1632,7 +1632,9 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } #ifdef CONFIG_POSIX -- Gitee From ea0fb8e99dc873a05aa1c9e6da2a503ef6ab5ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:45:13 +0100 Subject: [PATCH 093/115] memory: Have memory_region_init_ram_from_file() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9b9d11ac03b2802d67c132afaae3d37dd2f0b16f upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_ram_from_file return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-13-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 8c69d14321..7434b63e69 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1547,8 +1547,10 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_from_file(MemoryRegion *mr, +bool memory_region_init_ram_from_file(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 34ee6c1bf2..180cac3b23 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1638,7 +1638,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr, } #ifdef CONFIG_POSIX -void memory_region_init_ram_from_file(MemoryRegion *mr, +bool memory_region_init_ram_from_file(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1661,7 +1661,9 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } void memory_region_init_ram_from_fd(MemoryRegion *mr, -- Gitee From eec40ff24875cd5b7ce013bea5112beda356d6a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 20 Nov 2023 13:46:06 +0100 Subject: [PATCH 094/115] memory: Have memory_region_init_ram_from_fd() handler return a boolean MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9583a905793986d9018fe1fbb17d17fb4f0d76dd upstream Following the example documented since commit e3fe3988d7 ("error: Document Error API usage rules"), have memory_region_init_ram_from_fd return a boolean indicating whether an error is set or not. Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Peter Xu Reviewed-by: Gavin Shan Message-Id: <20231120213301.24349-14-philmd@linaro.org> Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 4 +++- system/memory.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 7434b63e69..005747041d 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1577,8 +1577,10 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, * * Note that this function does not do anything to cause the data in the * RAM memory region to be migrated; that is the responsibility of the caller. + * + * Return: true on success, else false setting @errp with error. */ -void memory_region_init_ram_from_fd(MemoryRegion *mr, +bool memory_region_init_ram_from_fd(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, diff --git a/system/memory.c b/system/memory.c index 180cac3b23..407057491e 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1666,7 +1666,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr, return true; } -void memory_region_init_ram_from_fd(MemoryRegion *mr, +bool memory_region_init_ram_from_fd(MemoryRegion *mr, Object *owner, const char *name, uint64_t size, @@ -1687,7 +1687,9 @@ void memory_region_init_ram_from_fd(MemoryRegion *mr, mr->size = int128_zero(); object_unparent(OBJECT(mr)); error_propagate(errp, err); + return false; } + return true; } #endif -- Gitee From 53fad91010dec22d01964e294bc755208e647aa0 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 30 May 2024 06:16:15 -0500 Subject: [PATCH 095/115] memory: Introduce memory_region_init_ram_guest_memfd() commit a0aa6db7ce72a08703774107185e639e73e7754c upstream Introduce memory_region_init_ram_guest_memfd() to allocate private guset memfd on the MemoryRegion initialization. It's for the use case of TDVF, which must be private on TDX case. Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- include/exec/memory.h | 6 ++++++ system/memory.c | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 005747041d..fc93bbba3c 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1770,6 +1770,12 @@ bool memory_region_init_ram(MemoryRegion *mr, uint64_t size, Error **errp); +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp); + /** * memory_region_init_rom: Initialize a ROM memory region. * diff --git a/system/memory.c b/system/memory.c index 407057491e..c394957dbb 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3696,6 +3696,30 @@ bool memory_region_init_ram(MemoryRegion *mr, return true; } +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + + if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, + RAM_GUEST_MEMFD, errp)) { + return false; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); + + return true; +} + bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, -- Gitee From b2d173754460064cd773598a547b025cc23af39c Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Wed, 8 May 2024 19:55:07 +0200 Subject: [PATCH 096/115] hw/i386/pc_sysfw: Alias rather than copy isa-bios region commit a44ea3fa7f2aa1d809fdca1b84a52695b53d8ad0 upstream. In the -bios case the "isa-bios" memory region is an alias to the BIOS mapped to the top of the 4G memory boundary. Do the same in the -pflash case, but only for new machine versions for migration compatibility. This establishes common behavior and makes pflash commands work in the "isa-bios" region which some real-world legacy bioses rely on. Note that in the sev_enabled() case, the "isa-bios" memory region in the -pflash case will now also point to encrypted memory, just like it already does in the -bios case. When running `info mtree` before and after this commit with `qemu-system-x86_64 -S -drive \ if=pflash,format=raw,readonly=on,file=/usr/share/qemu/bios-256k.bin` and running `diff -u before.mtree after.mtree` results in the following changes in the memory tree: --- before.mtree +++ after.mtree @@ -71,7 +71,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -108,7 +108,7 @@ 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff 00000000000a0000-00000000000bffff (prio 1, i/o): alias smram-region @pci 00000000000a0000-00000000000bffff 00000000000c0000-00000000000c3fff (prio 1, i/o): alias pam-pci @pci 00000000000c0000-00000000000c3fff 00000000000c4000-00000000000c7fff (prio 1, i/o): alias pam-pci @pci 00000000000c4000-00000000000c7fff @@ -131,11 +131,14 @@ memory-region: pc.ram 0000000000000000-0000000007ffffff (prio 0, ram): pc.ram +memory-region: system.flash0 + 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 + memory-region: pci 0000000000000000-ffffffffffffffff (prio -1, i/o): pci 00000000000a0000-00000000000bffff (prio 1, i/o): vga-lowmem 00000000000c0000-00000000000dffff (prio 1, rom): pc.rom - 00000000000e0000-00000000000fffff (prio 1, rom): isa-bios + 00000000000e0000-00000000000fffff (prio 1, romd): alias isa-bios @system.flash0 0000000000020000-000000000003ffff memory-region: smram 00000000000a0000-00000000000bffff (prio 0, ram): alias smram-low @pc.ram 00000000000a0000-00000000000bffff Note that in both cases the "system" memory region contains the entry 00000000fffc0000-00000000ffffffff (prio 0, romd): system.flash0 but the "system.flash0" memory region only appears standalone when "isa-bios" is an alias. Signed-off-by: Bernhard Beschow Message-ID: <20240508175507.22270-7-shentey@gmail.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 3 +++ hw/i386/pc_q35.c | 2 ++ hw/i386/pc_sysfw.c | 8 +++++++- include/hw/i386/pc.h | 1 + 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 672ce2abd6..a159e0693c 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1944,6 +1944,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->kvmclock_enabled = true; pcmc->enforce_aligned_dimm = true; pcmc->enforce_amd_1tb_hole = true; + pcmc->isa_bios_alias = true; /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 45b1543de8..00c2ad61af 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -556,12 +556,15 @@ DEFINE_I440FX_MACHINE(v9_1, "pc-i440fx-9.1", NULL, static void pc_i440fx_9_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + pc_i440fx_9_1_machine_options(m); m->alias = NULL; m->is_default = false; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); + pcmc->isa_bios_alias = false; } DEFINE_I440FX_MACHINE(v9_0, "pc-i440fx-9.0", NULL, diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index e1b7dce0f2..ab1366ae44 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -384,10 +384,12 @@ DEFINE_Q35_MACHINE(v9_1, "pc-q35-9.1", NULL, static void pc_q35_9_0_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_9_1_machine_options(m); m->alias = NULL; compat_props_add(m->compat_props, hw_compat_9_0, hw_compat_9_0_len); compat_props_add(m->compat_props, pc_compat_9_0, pc_compat_9_0_len); + pcmc->isa_bios_alias = false; } DEFINE_Q35_MACHINE(v9_0, "pc-q35-9.0", NULL, diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 10eeaa2d8a..2536b7686d 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -138,6 +138,7 @@ static void pc_system_flash_map(PCMachineState *pcms, MemoryRegion *rom_memory) { X86MachineState *x86ms = X86_MACHINE(pcms); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); hwaddr total_size = 0; int i; BlockBackend *blk; @@ -189,7 +190,12 @@ static void pc_system_flash_map(PCMachineState *pcms, if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); - pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + if (pcmc->isa_bios_alias) { + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, + true); + } else { + pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + } /* Encrypt the pflash boot ROM */ if (sev_enabled()) { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 52bbbaa132..ebcb14cfb1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -120,6 +120,7 @@ struct PCMachineClass { bool enforce_aligned_dimm; bool broken_reserved_end; bool enforce_amd_1tb_hole; + bool isa_bios_alias; /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; -- Gitee From 5c632f199c36f30f02eedff389c902a9a19395f7 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:39 -0500 Subject: [PATCH 097/115] hw/i386/sev: Use guest_memfd for legacy ROMs commit 413a67450750e0459efeffc3db3ba9759c3e381c upstream Current SNP guest kernels will attempt to access these regions with with C-bit set, so guest_memfd is needed to handle that. Otherwise, kvm_convert_memory() will fail when the guest kernel tries to access it and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges to private. Whether guests should actually try to access ROM regions in this way (or need to deal with legacy ROM regions at all), is a separate issue to be addressed on kernel side, but current SNP guest kernels will exhibit this behavior and so this handling is needed to allow QEMU to continue running existing SNP guest kernels. Signed-off-by: Michael Roth [pankaj: Added sev_snp_enabled() check] Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- hw/i386/pc.c | 14 ++++++++++---- hw/i386/pc_sysfw.c | 19 +++++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index a159e0693c..18a1a95094 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -62,6 +62,7 @@ #include "hw/mem/memory-device.h" #include "e820_memory_layout.h" #include "trace.h" +#include "sev.h" #include CONFIG_DEVICES #ifdef CONFIG_XEN_EMU @@ -1167,10 +1168,15 @@ void pc_memory_init(PCMachineState *pcms, pc_system_firmware_init(pcms, rom_memory); option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } memory_region_add_subregion_overlap(rom_memory, PC_ROM_MIN_VGA, diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 2536b7686d..e488004d1d 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -41,8 +41,8 @@ #define FLASH_SECTOR_SIZE 4096 -static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, - MemoryRegion *flash_mem) +static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, + MemoryRegion *rom_memory, MemoryRegion *flash_mem) { int isa_bios_size; uint64_t flash_size; @@ -52,8 +52,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(flash_size, 128 * KiB); - memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, - &error_fatal); + if (machine_require_guest_memfd(MACHINE(pcms))) { + memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", + isa_bios_size, &error_fatal); + } else { + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + } memory_region_add_subregion_overlap(rom_memory, 0x100000 - isa_bios_size, isa_bios, @@ -66,7 +71,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), isa_bios_size); - memory_region_set_readonly(isa_bios, true); + if (!machine_require_guest_memfd(current_machine)) { + memory_region_set_readonly(isa_bios, true); + } } static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, @@ -194,7 +201,7 @@ static void pc_system_flash_map(PCMachineState *pcms, x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, true); } else { - pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); } /* Encrypt the pflash boot ROM */ -- Gitee From 6b4932e9f524fed668c0c77d89c7c37dfb0c410f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:40 -0500 Subject: [PATCH 098/115] hw/i386: Add support for loading BIOS using guest_memfd commit fc7a69e177e4ba26d11fcf47b853f85115b35a11 upstream When guest_memfd is enabled, the BIOS is generally part of the initial encrypted guest image and will be accessed as private guest memory. Add the necessary changes to set up the associated RAM region with a guest_memfd backend to allow for this. Current support centers around using -bios to load the BIOS data. Support for loading the BIOS via pflash requires additional enablement since those interfaces rely on the use of ROM memory regions which make use of the KVM_MEM_READONLY memslot flag, which is not supported for guest_memfd-backed memslots. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- hw/i386/x86-common.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index 35fe6eabea..6cbb76c25c 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -969,8 +969,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, (bios_size % 65536) != 0) { goto bios_error; } - memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, - &error_fatal); + if (machine_require_guest_memfd(MACHINE(x86ms))) { + memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } else { + memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } if (sev_enabled()) { /* * The concept of a "reset" simply doesn't exist for @@ -991,9 +996,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, } g_free(filename); - /* map the last 128KB of the BIOS in ISA space */ - x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, - !isapc_ram_fw); + if (!machine_require_guest_memfd(MACHINE(x86ms))) { + /* map the last 128KB of the BIOS in ISA space */ + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, + !isapc_ram_fw); + } /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory, -- Gitee From 636a2ac42b13983f21717c8c624d2a8e289e3788 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 7 Jun 2024 13:36:09 -0500 Subject: [PATCH 099/115] i386/sev: fix unreachable code coverity issue commit c94eb5db8e409c932da9eb187e68d4cdc14acc5b upstream Set 'finish->id_block_en' early, so that it is properly reset. Fixes coverity CID 1546887. Fixes: 7b34df4426 ("i386/sev: Introduce 'sev-snp-guest' object") Signed-off-by: Pankaj Gupta Message-ID: <20240607183611.1111100-2-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index d2de0b12a0..f3e5ca9fd7 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -3610,6 +3610,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; gsize len; + finish->id_block_en = 0; g_free(sev_snp_guest->id_block); g_free((guchar *)finish->id_block_uaddr); @@ -3629,7 +3630,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) return; } - finish->id_block_en = (len) ? 1 : 0; + finish->id_block_en = 1; } static char * -- Gitee From 4367780dac17837cd2e88ab370e6da56c3541d53 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 7 Jun 2024 13:36:10 -0500 Subject: [PATCH 100/115] i386/sev: Move SEV_COMMON null check before dereferencing commit 48779faef3c8e2fe70bd8285bffa731bd76dc844 upstream Fixes Coverity CID 1546886. Fixes: 9861405a8f ("i386/sev: Invoke launch_updata_data() for SEV class") Signed-off-by: Pankaj Gupta Message-ID: <20240607183611.1111100-3-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- target/i386/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index f3e5ca9fd7..5a21baf918 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1865,11 +1865,12 @@ int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + SevCommonStateClass *klass; if (!sev_common) { return 0; } + klass = SEV_COMMON_GET_CLASS(sev_common); /* if SEV is in update state then encrypt the data else do nothing */ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { -- Gitee From ad14c29d624d0ce2115d91b3d17f02ecee7958ec Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Fri, 7 Jun 2024 13:36:11 -0500 Subject: [PATCH 101/115] i386/sev: Return when sev_common is null commit cd7093a7a168a823d07671348996f049d45e8f67 upstream Fixes Coverity CID 1546885. Fixes: 16dcf200dc ("i386/sev: Introduce "sev-common" type to encapsulate common SEV state") Signed-off-by: Pankaj Gupta Message-ID: <20240607183611.1111100-4-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 5a21baf918..4a1983fa05 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -690,6 +690,7 @@ static SevCapability *sev_get_capabilities(Error **errp) sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_common) { error_setg(errp, "SEV is not configured"); + return NULL; } sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", -- Gitee From 36ece4a4350e9acafac78d86ec8757711ccef085 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 5 Jun 2024 20:09:44 +0200 Subject: [PATCH 102/115] target/i386: SEV: do not assume machine->cgs is SEV commit 109238a8d97cd8e85ca614109724a0b1222b21f5 upstream There can be other confidential computing classes that are not derived from sev-common. Avoid aborting when encountering them. Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 4a1983fa05..92546fbf6f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -2058,7 +2058,9 @@ void sev_es_set_reset_vector(CPUState *cpu) { X86CPU *x86; CPUX86State *env; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevCommonState *sev_common = SEV_COMMON( + object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); /* Only update if we have valid reset information */ if (!sev_common || !sev_common->reset_data_valid) { -- Gitee From a38a6b9e9467aeb28b9678973c5e785888644694 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jun 2024 19:03:38 +0200 Subject: [PATCH 103/115] target/i386: SEV: rename sev_snp_guest->id_block commit 68c3aa3e97d843b080d5e445ff3a900f6f703471 upstream Free the "id_block" name for the binary version of the data. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 92546fbf6f..6b1c9290f4 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -181,7 +181,7 @@ struct SevSnpGuestState { /* configuration parameters */ char *guest_visible_workarounds; - char *id_block; + char *id_block_base64; char *id_auth; char *host_data; @@ -1439,7 +1439,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) } } - trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, + trace_kvm_sev_snp_launch_finish(sev_snp->id_block_base64, sev_snp->id_auth, sev_snp->host_data); ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, finish, &error); @@ -3604,7 +3604,7 @@ sev_snp_guest_get_id_block(Object *obj, Error **errp) { SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); - return g_strdup(sev_snp_guest->id_block); + return g_strdup(sev_snp_guest->id_block_base64); } static void @@ -3615,14 +3615,14 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) gsize len; finish->id_block_en = 0; - g_free(sev_snp_guest->id_block); + g_free(sev_snp_guest->id_block_base64); g_free((guchar *)finish->id_block_uaddr); /* store the base64 str so we don't need to re-encode in getter */ - sev_snp_guest->id_block = g_strdup(value); + sev_snp_guest->id_block_base64 = g_strdup(value); finish->id_block_uaddr = - (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); + (uint64_t)qbase64_decode(sev_snp_guest->id_block_base64, -1, &len, errp); if (!finish->id_block_uaddr) { return; -- Gitee From 42dcd69073f2abb5f47b53bb69b5b6600400a084 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jun 2024 19:05:21 +0200 Subject: [PATCH 104/115] target/i386: SEV: store pointer to decoded id_block in SevSnpGuest commit dd1b2fb554fb0ace09319e96b21e3b776eb7f5ba upstream Do not rely on finish->id_block_uaddr, so that there are no casts from pointer to uint64_t. They break on 32-bit hosts. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 6b1c9290f4..3c6bf86470 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -182,6 +182,7 @@ struct SevSnpGuestState { /* configuration parameters */ char *guest_visible_workarounds; char *id_block_base64; + uint8_t *id_block; char *id_auth; char *host_data; @@ -3615,16 +3616,15 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) gsize len; finish->id_block_en = 0; + g_free(sev_snp_guest->id_block); g_free(sev_snp_guest->id_block_base64); - g_free((guchar *)finish->id_block_uaddr); /* store the base64 str so we don't need to re-encode in getter */ sev_snp_guest->id_block_base64 = g_strdup(value); + sev_snp_guest->id_block = + qbase64_decode(sev_snp_guest->id_block_base64, -1, &len, errp); - finish->id_block_uaddr = - (uint64_t)qbase64_decode(sev_snp_guest->id_block_base64, -1, &len, errp); - - if (!finish->id_block_uaddr) { + if (!sev_snp_guest->id_block) { return; } @@ -3635,6 +3635,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) } finish->id_block_en = 1; + finish->id_block_uaddr = (uintptr_t)sev_snp_guest->id_block; } static char * -- Gitee From 100b1756439229b2b578da9daf1a3502422d0478 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jun 2024 19:03:38 +0200 Subject: [PATCH 105/115] target/i386: SEV: rename sev_snp_guest->id_auth commit 803b7718e6de87a3ecd5555da7d9fce6435c7db9 upstream Free the "id_auth" name for the binary version of the data. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 3c6bf86470..aa6682780e 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -183,7 +183,7 @@ struct SevSnpGuestState { char *guest_visible_workarounds; char *id_block_base64; uint8_t *id_block; - char *id_auth; + char *id_auth_base64; char *host_data; struct kvm_sev_snp_launch_start kvm_start_conf; @@ -1440,7 +1440,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) } } - trace_kvm_sev_snp_launch_finish(sev_snp->id_block_base64, sev_snp->id_auth, + trace_kvm_sev_snp_launch_finish(sev_snp->id_block_base64, sev_snp->id_auth_base64, sev_snp->host_data); ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, finish, &error); @@ -3643,7 +3643,7 @@ sev_snp_guest_get_id_auth(Object *obj, Error **errp) { SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); - return g_strdup(sev_snp_guest->id_auth); + return g_strdup(sev_snp_guest->id_auth_base64); } static void @@ -3653,14 +3653,14 @@ sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; gsize len; - g_free(sev_snp_guest->id_auth); + g_free(sev_snp_guest->id_auth_base64); g_free((guchar *)finish->id_auth_uaddr); /* store the base64 str so we don't need to re-encode in getter */ - sev_snp_guest->id_auth = g_strdup(value); + sev_snp_guest->id_auth_base64 = g_strdup(value); finish->id_auth_uaddr = - (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); + (uint64_t)qbase64_decode(sev_snp_guest->id_auth_base64, -1, &len, errp); if (!finish->id_auth_uaddr) { return; -- Gitee From a3dcaa261176430accf02cc95f0f4765bd6759a8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Jun 2024 19:05:21 +0200 Subject: [PATCH 106/115] target/i386: SEV: store pointer to decoded id_auth in SevSnpGuest commit 1ab620bf3601a3c9e264031d80c7023690ddc2b4 upstream Do not rely on finish->id_auth_uaddr, so that there are no casts from pointer to uint64_t. They break on 32-bit hosts. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index aa6682780e..60acd5044f 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -184,6 +184,7 @@ struct SevSnpGuestState { char *id_block_base64; uint8_t *id_block; char *id_auth_base64; + uint8_t *id_auth; char *host_data; struct kvm_sev_snp_launch_start kvm_start_conf; @@ -3653,16 +3654,16 @@ sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; gsize len; + finish->id_auth_uaddr = 0; + g_free(sev_snp_guest->id_auth); g_free(sev_snp_guest->id_auth_base64); - g_free((guchar *)finish->id_auth_uaddr); /* store the base64 str so we don't need to re-encode in getter */ sev_snp_guest->id_auth_base64 = g_strdup(value); + sev_snp_guest->id_auth = + qbase64_decode(sev_snp_guest->id_auth_base64, -1, &len, errp); - finish->id_auth_uaddr = - (uint64_t)qbase64_decode(sev_snp_guest->id_auth_base64, -1, &len, errp); - - if (!finish->id_auth_uaddr) { + if (!sev_snp_guest->id_auth) { return; } @@ -3671,6 +3672,8 @@ sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) len, KVM_SEV_SNP_ID_AUTH_SIZE); return; } + + finish->id_auth_uaddr = (uintptr_t)sev_snp_guest->id_auth; } static bool -- Gitee From e90562244f85b535e42d44f81e1a786cfea530f6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Jun 2024 12:49:49 -0700 Subject: [PATCH 107/115] target/i386/sev: Use size_t for object sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cb61b174620abc41badf12b4caedb85c1747605c upstream This code was using both uint32_t and uint64_t for len. Consistently use size_t instead. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20240626194950.1725800-3-richard.henderson@linaro.org Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 16 ++++++++-------- target/i386/trace-events | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 60acd5044f..0579987e48 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -129,7 +129,7 @@ struct SevCommonStateClass { Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); - int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, size_t len); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -202,7 +202,7 @@ typedef struct SevLaunchUpdateData { QTAILQ_ENTRY(SevLaunchUpdateData) next; hwaddr gpa; void *hva; - uint64_t len; + size_t len; int type; } SevLaunchUpdateData; @@ -989,7 +989,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, if (!data->hva || !data->len) { error_report("SNP_LAUNCH_UPDATE called with invalid address" - "/ length: %p / %lx", + "/ length: %p / %zx", data->hva, data->len); return 1; } @@ -1048,7 +1048,8 @@ out: } static int -sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *addr, size_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -1233,8 +1234,7 @@ err: } static int -snp_launch_update_data(uint64_t gpa, void *hva, - uint32_t len, int type) +snp_launch_update_data(uint64_t gpa, void *hva, size_t len, int type) { SevLaunchUpdateData *data; @@ -1251,7 +1251,7 @@ snp_launch_update_data(uint64_t gpa, void *hva, static int sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, - uint8_t *ptr, uint64_t len) + uint8_t *ptr, size_t len) { int ret = snp_launch_update_data(gpa, ptr, len, KVM_SEV_SNP_PAGE_TYPE_NORMAL); @@ -1308,7 +1308,7 @@ sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, } static int -snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) +snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, size_t cpuid_len) { KvmCpuidInfo kvm_cpuid_info = {0}; SnpCpuidInfo snp_cpuid_info; diff --git a/target/i386/trace-events b/target/i386/trace-events index 6c784acdbc..b8eebf798d 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -6,7 +6,7 @@ kvm_memcrypt_register_region(void *addr, size_t len) "addr %p len 0x%zx" kvm_memcrypt_unregister_region(void *addr, size_t len) "addr %p len 0x%zx" kvm_sev_change_state(const char *old, const char *new) "%s -> %s" kvm_sev_launch_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" -kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64 +kvm_sev_launch_update_data(void *addr, size_t len) "addr %p len 0x%zx" kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" -- Gitee From ae5cef4450e062eb4e8a4724607f8dc72a9d2e07 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Jun 2024 12:49:50 -0700 Subject: [PATCH 108/115] target/i386/sev: Fix printf formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b31d386781cf85c193f3b1355dd0604cd6a59943 upstream hwaddr uses HWADDR_PRIx, sizeof yields size_t so uses %zu, and gsize uses G_GSIZE_FORMAT. Signed-off-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20240626194950.1725800-4-richard.henderson@linaro.org Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 0579987e48..60654e686d 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1037,8 +1037,9 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, out: if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { - error_report("SEV-SNP: expected update of GPA range %lx-%lx," - "got GPA range %lx-%llx", + error_report("SEV-SNP: expected update of GPA range %" + HWADDR_PRIx "-%" HWADDR_PRIx "," + "got GPA range %" HWADDR_PRIx "-%llx", data->gpa, data->gpa + data->len, data->gpa, update.gfn_start << TARGET_PAGE_BITS); ret = -EIO; @@ -3593,7 +3594,8 @@ sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, } if (len != sizeof(start->gosvw)) { - error_setg(errp, "parameter length of %lu exceeds max of %lu", + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " exceeds max of %zu", len, sizeof(start->gosvw)); return; } @@ -3630,7 +3632,8 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) } if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { - error_setg(errp, "parameter length of %lu not equal to %u", + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " not equal to %u", len, KVM_SEV_SNP_ID_BLOCK_SIZE); return; } @@ -3668,7 +3671,8 @@ sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) } if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { - error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", + error_setg(errp, "parameter length:ID_AUTH %" G_GSIZE_FORMAT + " exceeds max of %u", len, KVM_SEV_SNP_ID_AUTH_SIZE); return; } @@ -3736,7 +3740,8 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) } if (len != sizeof(finish->host_data)) { - error_setg(errp, "parameter length of %lu not equal to %lu", + error_setg(errp, "parameter length of %" G_GSIZE_FORMAT + " not equal to %zu", len, sizeof(finish->host_data)); return; } -- Gitee From 91ad2b5c2f807b70301f27d32fb1bd539873f9f5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 Jul 2024 10:37:23 +0200 Subject: [PATCH 109/115] target/i386: SEV: fix formatting of CPUID mismatch message commit 9b40d376f66640eb7b6080ca000c866dfe630dc7 upstream Fixes: 70943ad8e4d ("i386/sev: Add support for SNP CPUID validation", 2024-06-05) Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 60654e686d..82e7fbc0bb 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -944,7 +944,7 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, size_t i; if (old->count != new->count) { - error_report("SEV-SNP: CPUID validation failed due to count mismatch," + error_report("SEV-SNP: CPUID validation failed due to count mismatch, " "provided: %d, expected: %d", old->count, new->count); return; } @@ -956,8 +956,8 @@ sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, new_func = &new->entries[i]; if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { - error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" - "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" + error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x, " + "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x, " "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", old_func->eax_in, old_func->ecx_in, old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, -- Gitee From 28573e3ba8927c15e40e77a6dd3eabcdda1f7e1f Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Mon, 24 Jun 2024 10:52:48 +0200 Subject: [PATCH 110/115] i386/sev: Fix error message in sev_get_capabilities() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ab5f4edf721fc0403d40a4db7e96808175157def upstream When a custom path is provided to sev-guest object and opening the path fails an error message is reported. But the error message still mentions DEFAULT_SEV_DEVICE ("/dev/sev") instead of the custom path. Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 Signed-off-by: Michal Privoznik Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/b4648905d399780063dc70851d3d6a3cd28719a5.1719218926.git.mprivozn@redhat.com Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 82e7fbc0bb..0fce2e3b41 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -700,7 +700,7 @@ static SevCapability *sev_get_capabilities(Error **errp) fd = open(sev_device, O_RDWR); if (fd < 0) { error_setg_errno(errp, errno, "SEV: Failed to open %s", - DEFAULT_SEV_DEVICE); + sev_device); g_free(sev_device); return NULL; } -- Gitee From 49e5dc27569ab43e133965643e272c6043725238 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Mon, 24 Jun 2024 10:52:49 +0200 Subject: [PATCH 111/115] i386/sev: Fallback to the default SEV device if none provided in sev_get_capabilities() commit f4e5f302b3361f13349b2a44378e7cf578a8e012 upstream When management tools (e.g. libvirt) query QEMU capabilities, they start QEMU with a minimalistic configuration and issue various commands on monitor. One of the command issued is/might be "query-sev-capabilities" to learn values like cbitpos or reduced-phys-bits. But as of v9.0.0-1145-g16dcf200dc the monitor command returns an error instead. This creates a chicken-egg problem because in order to query those aforementioned values QEMU needs to be started with a 'sev-guest' object. But to start QEMU with the values must be known. I think it's safe to assume that the default path ("/dev/sev") provides the same data as user provided one. So fall back to it. Fixes: 16dcf200dc951c1cde3e5b442457db5f690b8cf0 Signed-off-by: Michal Privoznik Link: https://lore.kernel.org/r/157f93712c23818be193ce785f648f0060b33dee.1719218926.git.mprivozn@redhat.com Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 0fce2e3b41..29df012338 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -690,13 +690,13 @@ static SevCapability *sev_get_capabilities(Error **errp) } sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - if (!sev_common) { - error_setg(errp, "SEV is not configured"); - return NULL; + if (sev_common) { + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", + &error_abort); + } else { + sev_device = g_strdup(DEFAULT_SEV_DEVICE); } - sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", - &error_abort); fd = open(sev_device, O_RDWR); if (fd < 0) { error_setg_errno(errp, errno, "SEV: Failed to open %s", -- Gitee From 5150aaa7cc481e05c9f716246119c90b22e3f284 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 Jul 2024 11:12:06 +0200 Subject: [PATCH 112/115] target/i386: add support for masking CPUID features in confidential guests commit c28d8b097f6da0389d0e915e26ab210aaac38ba0 upstream Some CPUID features may be provided by KVM for some guests, independent of processor support, for example TSC deadline or TSC adjust. If these are not supported by the confidential computing firmware, however, the guest will fail to start. Add support for removing unsupported features from "-cpu host". Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/confidential-guest.h | 24 ++++++++++++++++++++++++ target/i386/kvm/kvm.c | 5 +++++ 2 files changed, 29 insertions(+) diff --git a/target/i386/confidential-guest.h b/target/i386/confidential-guest.h index 532e172a60..7342d2843a 100644 --- a/target/i386/confidential-guest.h +++ b/target/i386/confidential-guest.h @@ -39,6 +39,8 @@ struct X86ConfidentialGuestClass { /* */ int (*kvm_type)(X86ConfidentialGuest *cg); + uint32_t (*mask_cpuid_features)(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, + int reg, uint32_t value); }; /** @@ -56,4 +58,26 @@ static inline int x86_confidential_guest_kvm_type(X86ConfidentialGuest *cg) return 0; } } + +/** + * x86_confidential_guest_mask_cpuid_features: + * + * Removes unsupported features from a confidential guest's CPUID values, returns + * the value with the bits removed. The bits removed should be those that KVM + * provides independent of host-supported CPUID features, but are not supported by + * the confidential computing firmware. + */ +static inline int x86_confidential_guest_mask_cpuid_features(X86ConfidentialGuest *cg, + uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + X86ConfidentialGuestClass *klass = X86_CONFIDENTIAL_GUEST_GET_CLASS(cg); + + if (klass->mask_cpuid_features) { + return klass->mask_cpuid_features(cg, feature, index, reg, value); + } else { + return value; + } +} + #endif diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 3fd2c2dc1a..0a5af95a1e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -547,6 +547,11 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, ret |= 1U << KVM_HINTS_REALTIME; } + if (current_machine->cgs) { + ret = x86_confidential_guest_mask_cpuid_features( + X86_CONFIDENTIAL_GUEST(current_machine->cgs), + function, index, reg, ret); + } return ret; } -- Gitee From ae21b18af0c5f8ae7eeec391683180336f563db1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 Jul 2024 11:16:56 +0200 Subject: [PATCH 113/115] target/i386/SEV: implement mask_cpuid_features commit 188569c10d5dc6996bde90ce25645083e9661ecb upstream Drop features that are listed as "BitMask" in the PPR and currently not supported by AMD processors. The only ones that may become useful in the future are TSC deadline timer and x2APIC, everything else is not needed for SEV-SNP guests (e.g. VIRT_SSBD) or would require processor support (e.g. TSC_ADJUST). This allows running SEV-SNP guests with "-cpu host". [Backport Changes] In file target/i386/cpu.h, addition of macro CPUID_7_0_EBX_TSC_ADJUST was skipped since the macro already exists in current codebase. (Commit e6464174c2261 introduces it) Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/cpu.h | 2 ++ target/i386/sev.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 19e8f67e4b..f003558e40 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1016,6 +1016,8 @@ uint64_t x86_cpu_get_supported_feature_word(X86CPU *cpu, FeatureWord w); #define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) /* Speculative Store Bypass Disable */ #define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Paravirtualized Speculative Store Bypass Disable MSR */ +#define CPUID_8000_0008_EBX_VIRT_SSBD (1U << 25) /* Predictive Store Forwarding Disable */ #define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) diff --git a/target/i386/sev.c b/target/i386/sev.c index 29df012338..99508160d3 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1048,6 +1048,38 @@ out: return ret; } +static uint32_t +sev_snp_mask_cpuid_features(X86ConfidentialGuest *cg, uint32_t feature, uint32_t index, + int reg, uint32_t value) +{ + switch (feature) { + case 1: + if (reg == R_ECX) { + return value & ~CPUID_EXT_TSC_DEADLINE_TIMER; + } + break; + case 7: + if (index == 0 && reg == R_EBX) { + return value & ~CPUID_7_0_EBX_TSC_ADJUST; + } + if (index == 0 && reg == R_EDX) { + return value & ~(CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_STIBP | + CPUID_7_0_EDX_FLUSH_L1D | + CPUID_7_0_EDX_ARCH_CAPABILITIES | + CPUID_7_0_EDX_CORE_CAPABILITY | + CPUID_7_0_EDX_SPEC_CTRL_SSBD); + } + break; + case 0x80000008: + if (reg == R_EBX) { + return value & ~CPUID_8000_0008_EBX_VIRT_SSBD; + } + break; + } + return value; +} + static int sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, size_t len) @@ -3760,6 +3792,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; + x86_klass->mask_cpuid_features = sev_snp_mask_cpuid_features; x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", -- Gitee From ad92c880c825cf66c625e88bbd7707e1156a352e Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Tue, 9 Jul 2024 23:10:05 -0500 Subject: [PATCH 114/115] i386/sev: Don't allow automatic fallback to legacy KVM_SEV*_INIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9d38d9dca2a81aaf5752d45d221021ef96d496cd upstream Currently if the 'legacy-vm-type' property of the sev-guest object is 'on', QEMU will attempt to use the newer KVM_SEV_INIT2 kernel interface in conjunction with the newer KVM_X86_SEV_VM and KVM_X86_SEV_ES_VM KVM VM types. This can lead to measurement changes if, for instance, an SEV guest was created on a host that originally had an older kernel that didn't support KVM_SEV_INIT2, but is booted on the same host later on after the host kernel was upgraded. Instead, if legacy-vm-type is 'off', QEMU should fail if the KVM_SEV_INIT2 interface is not provided by the current host kernel. Modify the fallback handling accordingly. In the future, VMSA features and other flags might be added to QEMU which will require legacy-vm-type to be 'off' because they will rely on the newer KVM_SEV_INIT2 interface. It may be difficult to convey to users what values of legacy-vm-type are compatible with which features/options, so as part of this rework, switch legacy-vm-type to a tri-state OnOffAuto option. 'auto' in this case will automatically switch to using the newer KVM_SEV_INIT2, but only if it is required to make use of new VMSA features or other options only available via KVM_SEV_INIT2. Defining 'auto' in this way would avoid inadvertantly breaking compatibility with older kernels since it would only be used in cases where users opt into newer features that are only available via KVM_SEV_INIT2 and newer kernels, and provide better default behavior than the legacy-vm-type=off behavior that was previously in place, so make it the default for 9.1+ machine types. Cc: Daniel P. Berrangé Cc: Paolo Bonzini cc: kvm@vger.kernel.org Signed-off-by: Michael Roth Reviewed-by: Daniel P. Berrangé Link: https://lore.kernel.org/r/20240710041005.83720-1-michael.roth@amd.com Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- hw/i386/pc.c | 2 +- qapi/qom.json | 18 ++++++---- target/i386/sev.c | 85 +++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 83 insertions(+), 22 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 18a1a95094..4e90e227e9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -81,7 +81,7 @@ GlobalProperty pc_compat_9_0[] = { { TYPE_X86_CPU, "guest-phys-bits", "0" }, - { "sev-guest", "legacy-vm-type", "true" }, + { "sev-guest", "legacy-vm-type", "on" }, }; const size_t pc_compat_9_0_len = G_N_ELEMENTS(pc_compat_9_0); diff --git a/qapi/qom.json b/qapi/qom.json index c9bff57bbc..12e526aeb7 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -904,12 +904,16 @@ # @secret-file: the file guest owner's secret, only support on Hygon # CPUs (since 8.2) # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. -# The newer KVM_SEV_INIT2 interface syncs additional vCPU -# state when initializing the VMSA structures, which will -# result in a different guest measurement. Set this to -# maintain compatibility with older QEMU or kernel versions -# that rely on legacy KVM_SEV_INIT behavior. -# (default: false) (since 9.1) +# The newer KVM_SEV_INIT2 interface, from Linux >= 6.10, syncs +# additional vCPU state when initializing the VMSA structures, +# which will result in a different guest measurement. Set +# this to 'on' to force compatibility with older QEMU or kernel +# versions that rely on legacy KVM_SEV_INIT behavior. 'auto' +# will behave identically to 'on', but will automatically +# switch to using KVM_SEV_INIT2 if the user specifies any +# additional options that require it. If set to 'off', QEMU +# will require KVM_SEV_INIT2 unconditionally. +# (default: off) (since 9.1) # # Since: 2.12 ## @@ -922,7 +926,7 @@ '*user-id': 'str', '*secret-header-file': 'str', '*secret-file': 'str', - '*legacy-vm-type': 'bool' } } + '*legacy-vm-type': 'OnOffAuto' } } ## # @SevSnpGuestProperties: diff --git a/target/i386/sev.c b/target/i386/sev.c index 99508160d3..c1799143dc 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -152,7 +152,7 @@ struct SevGuestState { uint32_t policy; char *dh_cert_file; char *session_file; - bool legacy_vm_type; + OnOffAuto legacy_vm_type; char *user_id; char *secret_header_file; char *secret_file; @@ -1514,6 +1514,17 @@ sev_vm_state_change(void *opaque, bool running, RunState state) } } +/* + * This helper is to examine sev-guest properties and determine if any options + * have been set which rely on the newer KVM_SEV_INIT2 interface and associated + * KVM VM types. + */ +static bool sev_init2_required(SevGuestState *sev_guest) +{ + /* Currently no KVM_SEV_INIT2-specific options are exposed via QEMU */ + return false; +} + static inline bool check_blob_length(size_t value) { if (value > SEV_FW_BLOB_MAX_SIZE) { @@ -1626,14 +1637,39 @@ static int sev_kvm_type(X86ConfidentialGuest *cg) goto out; } + /* These are the only cases where legacy VM types can be used. */ + if (sev_guest->legacy_vm_type == ON_OFF_AUTO_ON || + (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO && + !sev_init2_required(sev_guest))) { + sev_common->kvm_type = KVM_X86_DEFAULT_VM; + goto out; + } + + /* + * Newer VM types are required, either explicitly via legacy-vm-type=on, or + * implicitly via legacy-vm-type=auto along with additional sev-guest + * properties that require the newer VM types. + */ kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { - sev_common->kvm_type = kvm_type; - } else { - sev_common->kvm_type = KVM_X86_DEFAULT_VM; + if (!kvm_is_vm_type_supported(kvm_type)) { + if (sev_guest->legacy_vm_type == ON_OFF_AUTO_AUTO) { + error_report("SEV: host kernel does not support requested %s VM type, which is required " + "for the set of options specified. To allow use of the legacy " + "KVM_X86_DEFAULT_VM VM type, please disable any options that are not " + "compatible with the legacy VM type, or upgrade your kernel.", + kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); + } else { + error_report("SEV: host kernel does not support requested %s VM type. To allow use of " + "the legacy KVM_X86_DEFAULT_VM VM type, the 'legacy-vm-type' argument " + "must be set to 'on' or 'auto' for the sev-guest object.", + kvm_type == KVM_X86_SEV_VM ? "KVM_X86_SEV_VM" : "KVM_X86_SEV_ES_VM"); + } + + return -1; } + sev_common->kvm_type = kvm_type; out: return sev_common->kvm_type; } @@ -1751,14 +1787,24 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } } } else { - if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + switch (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common))) { + case KVM_X86_DEFAULT_VM: cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); - } else { + break; + case KVM_X86_SEV_VM: + case KVM_X86_SEV_ES_VM: + case KVM_X86_SNP_VM: { struct kvm_sev_init args = { 0 }; ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + break; + } + default: + error_setg(errp, "%s: host kernel does not support the requested SEV configuration.", + __func__); + return -1; } } @@ -3504,14 +3550,23 @@ sev_guest_set_session_file(Object *obj, const char *value, Error **errp) SEV_GUEST(obj)->session_file = g_strdup(value); } -static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +static void sev_guest_get_legacy_vm_type(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) { - return SEV_GUEST(obj)->legacy_vm_type; + SevGuestState *sev_guest = SEV_GUEST(obj); + OnOffAuto legacy_vm_type = sev_guest->legacy_vm_type; + + visit_type_OnOffAuto(v, name, &legacy_vm_type, errp); } -static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +static void sev_guest_set_legacy_vm_type(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) { - SEV_GUEST(obj)->legacy_vm_type = value; + SevGuestState *sev_guest = SEV_GUEST(obj); + + visit_type_OnOffAuto(v, name, &sev_guest->legacy_vm_type, errp); } static void @@ -3552,9 +3607,9 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_secret_file); object_class_property_set_description(oc, "secret-file", "file of the guest owner's secret"); - object_class_property_add_bool(oc, "legacy-vm-type", - sev_guest_get_legacy_vm_type, - sev_guest_set_legacy_vm_type); + object_class_property_add(oc, "legacy-vm-type", "OnOffAuto", + sev_guest_get_legacy_vm_type, + sev_guest_set_legacy_vm_type, NULL, NULL); object_class_property_set_description(oc, "legacy-vm-type", "use legacy VM type to maintain measurement compatibility with older QEMU or kernel versions."); } @@ -3570,6 +3625,8 @@ sev_guest_instance_init(Object *obj) object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, OBJ_PROP_FLAG_READWRITE); object_apply_compat_props(obj); + + sev_guest->legacy_vm_type = ON_OFF_AUTO_AUTO; } /* guest info specific sev/sev-es */ -- Gitee From 5a62543c6fda391aa2b865969b937e742d75f662 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 2 Aug 2024 01:43:37 +0200 Subject: [PATCH 115/115] target/i386: SEV: fix mismatch in vcek-disabled property name commit d4392415c328f83b2e30517a3561be523874f441 upstream. The vcek-disabled property of the sev-snp-guest object is misspelled vcek-required (which I suppose would use the opposite polarity) in the call to object_class_property_add_bool(). Fix it. Reported-by: Zixi Chen Reviewed-by: Pankaj Gupta Signed-off-by: Paolo Bonzini Signed-off-by: PrithivishS --- target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index c1799143dc..75646550d2 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -3668,7 +3668,25 @@ sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, { SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; - g_autofree guchar *blob; +#if 0 +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 120100 + g_autofree guchar *blob ; +#else + g_autofree guchar *blob = NULL; +#endif +#endif + +#if (__GNUC__ > 12) || \ + (__GNUC__ == 12 && __GNUC_MINOR__ > 1) || \ + (__GNUC__ == 12 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 0) + + g_autofree guchar *blob ; +#else + + g_autofree guchar *blob = NULL; + +#endif + gsize len; g_free(sev_snp_guest->guest_visible_workarounds); @@ -3814,7 +3832,24 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) { SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; +#if 0 +#if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 120100 g_autofree guchar *blob; +#else + g_autofree guchar *blob = NULL; +#endif +#endif + +#if (__GNUC__ > 12) || \ + (__GNUC__ == 12 && __GNUC_MINOR__ > 1) || \ + (__GNUC__ == 12 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ >= 0) + + g_autofree guchar *blob ; +#else + + g_autofree guchar *blob = NULL; + +#endif gsize len; g_free(sev_snp_guest->host_data); @@ -3867,7 +3902,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) object_class_property_add_bool(oc, "author-key-enabled", sev_snp_guest_get_author_key_enabled, sev_snp_guest_set_author_key_enabled); - object_class_property_add_bool(oc, "vcek-required", + object_class_property_add_bool(oc, "vcek-disabled", sev_snp_guest_get_vcek_disabled, sev_snp_guest_set_vcek_disabled); object_class_property_add_str(oc, "host-data", -- Gitee