diff --git a/BinDir.tar.gz b/BinDir.tar.gz index 0a73a153b39c915f5629cea943239196bdb049fe..326fcb51c3998db1344613bba4f6cdbf0f858e3d 100644 Binary files a/BinDir.tar.gz and b/BinDir.tar.gz differ diff --git a/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b662075670f5b73ff590d750f671bb41313c1cc --- /dev/null +++ b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch @@ -0,0 +1,27 @@ +From 59f038d21c1901245ba0be417f6285cec465d6c1 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:24:32 +0800 +Subject: [PATCH] Currently, while kvm and qemu can not handle some kvm exit, + qemu will do vm_stop, which will make vm in pause state. This action make vm + unrecoverable, so send guest panic to libvirt instead. + +--- + accel/kvm/kvm-all.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index e39a810a4e..33f4c6d547 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2993,7 +2993,7 @@ int kvm_cpu_exec(CPUState *cpu) + + if (ret < 0) { + cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); +- vm_stop(RUN_STATE_INTERNAL_ERROR); ++ qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + } + + qatomic_set(&cpu->exit_request, 0); +-- +2.27.0 + diff --git a/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d68e7f54f43f58b3be56a880de48e15b7ebd5a2f --- /dev/null +++ b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch @@ -0,0 +1,147 @@ +From 6999ced63ca3bb05a1cbc4a667bd9fd27eeaeaee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 Sep 2023 00:04:04 +0000 +Subject: [PATCH] accel/kvm: Extract common KVM vCPU {creation,parking} code + +KVM vCPU creation is done once during the initialization of the VM when Qemu +threads are spawned. This is common to all the architectures. If the architecture +supports vCPU hot-{un}plug then this KVM vCPU creation could be deferred to +later point as well. Some architectures might in any case create KVM vCPUs for +the yet-to-be plugged vCPUs (i.e. QoM Object & thread does not exists) during VM +init time and park them. + +Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but +the KVM vCPU objects in the Host KVM are not destroyed and their representative +KVM vCPU objects in Qemu are parked. + +Signed-off-by: Salil Mehta +--- + accel/kvm/kvm-all.c | 61 ++++++++++++++++++++++++++++++++++---------- + include/sysemu/kvm.h | 2 ++ + 2 files changed, 49 insertions(+), 14 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index d900df93a4..6d503aa614 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -136,6 +136,7 @@ static QemuMutex kml_slots_lock; + #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) + + static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); ++static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); + + static inline void kvm_resample_fd_remove(int gsi) + { +@@ -324,11 +325,51 @@ err: + return ret; + } + ++void kvm_park_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ struct KVMParkedVcpu *vcpu; ++ ++ vcpu = g_malloc0(sizeof(*vcpu)); ++ vcpu->vcpu_id = vcpu_id; ++ vcpu->kvm_fd = cpu->kvm_fd; ++ QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++} ++ ++int kvm_create_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ KVMState *s = kvm_state; ++ int ret; ++ ++ DPRINTF("kvm_create_vcpu\n"); ++ ++ /* check if the KVM vCPU already exist but is parked */ ++ ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ if (ret > 0) { ++ goto found; ++ } ++ ++ /* create a new KVM vcpu */ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++found: ++ cpu->vcpu_dirty = true; ++ cpu->kvm_fd = ret; ++ cpu->kvm_state = s; ++ cpu->dirty_pages = 0; ++ cpu->throttle_us_per_full = 0; ++ ++ return 0; ++} ++ + static int do_kvm_destroy_vcpu(CPUState *cpu) + { + KVMState *s = kvm_state; + long mmap_size; +- struct KVMParkedVcpu *vcpu = NULL; + int ret = 0; + + DPRINTF("kvm_destroy_vcpu\n"); +@@ -357,10 +398,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) + } + } + +- vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); +- vcpu->kvm_fd = cpu->kvm_fd; +- QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++ kvm_park_vcpu(cpu); + err: + return ret; + } +@@ -388,7 +426,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) + } + } + +- return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ return -1; + } + + int kvm_init_vcpu(CPUState *cpu, Error **errp) +@@ -399,19 +437,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) + + trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + +- ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ ret = kvm_create_vcpu(cpu); + if (ret < 0) { +- error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", ++ error_setg_errno(errp, -ret, ++ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + goto err; + } + +- cpu->kvm_fd = ret; +- cpu->kvm_state = s; +- cpu->vcpu_dirty = true; +- cpu->dirty_pages = 0; +- cpu->throttle_us_per_full = 0; +- + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) { + ret = mmap_size; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index b46d6203b4..e534411ddc 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -434,6 +434,8 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + + int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); ++int kvm_create_vcpu(CPUState *cpu); ++void kvm_park_vcpu(CPUState *cpu); + + #endif /* NEED_CPU_H */ + +-- +2.27.0 + diff --git a/accel-kvm-Use-correct-id-for-parked-vcpu.patch b/accel-kvm-Use-correct-id-for-parked-vcpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec759c7697e70a45b61b8c7ba2e27a2be4c35b22 --- /dev/null +++ b/accel-kvm-Use-correct-id-for-parked-vcpu.patch @@ -0,0 +1,32 @@ +From 9de26d69c52db67f48619ad20b8cb9d8ee71e42c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:42:57 +0800 +Subject: [PATCH] accel/kvm: Use correct id for parked vcpu + +kvm_arch_vcpu_id is correct for all platform. + +Signed-off-by: Keqian Zhu +--- + accel/kvm/kvm-all.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6d503aa614..75a3075c14 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -327,11 +327,10 @@ err: + + void kvm_park_vcpu(CPUState *cpu) + { +- unsigned long vcpu_id = cpu->cpu_index; + struct KVMParkedVcpu *vcpu; + + vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = vcpu_id; ++ vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + } +-- +2.27.0 + diff --git a/acpi-cpu-Add-cpu_cppc-building-support.patch b/acpi-cpu-Add-cpu_cppc-building-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b045f296d183ddf356f8fbc54d8ddd34780f121 --- /dev/null +++ b/acpi-cpu-Add-cpu_cppc-building-support.patch @@ -0,0 +1,72 @@ +From c75a0102a1bb00190b07b06ede8b1f9fa0bdaa3c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 16:52:10 +0800 +Subject: [PATCH] acpi/cpu: Add cpu_cppc building support + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 8 +++++++- + hw/i386/acpi-build.c | 2 +- + include/hw/acpi/cpu.h | 6 +++++- + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index cf0c7e8538..c8c11e51c6 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -342,7 +342,9 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs) +@@ -668,6 +670,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, aml_name_decl("_UID", uid)); + } + ++ if (build_cpu_cppc) { ++ build_cpu_cppc(i, arch_ids->len, dev); ++ } ++ + method = aml_method("_STA", 0, AML_SERIALIZED); + aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); + aml_append(dev, method); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index db4ca8a66a..e10799ecc6 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1545,7 +1545,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .smi_path = pm->smi_on_cpuhp ? "\\_SB.PCI0.SMI0.SMIC" : NULL, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, + }; +- build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, ++ build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, NULL, + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", + AML_SYSTEM_IO); + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 76bc7eb251..b31a2e50d9 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -59,8 +59,12 @@ typedef struct CPUHotplugFeatures { + typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + ++typedef void (*build_cpu_cppc_fn)(int uid, int num_cpu, Aml *dev); ++ + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs); +-- +2.27.0 + diff --git a/acpi-cpu-Fix-cpu_hotplug_hw_init.patch b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..bca3afd0faa559c7881fd251b8d164e90e1e5b9f --- /dev/null +++ b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch @@ -0,0 +1,36 @@ +From 14c4062c4acc7d417d163276b65e59073ba18eeb Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:51:18 +0800 +Subject: [PATCH] acpi/cpu: Fix cpu_hotplug_hw_init() + +For the present but disabled vCPUs, they will be released after +cpu_hotplug_hw_init(), we should not assign it to AcpiCpuStatus. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c922c380aa..b258396e01 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -229,7 +229,6 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + for (i = 0; i < id_list->len; i++) { + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { +- state->devs[i].cpu = cpu; + state->devs[i].is_present = true; + } else { + if (qemu_persistent_cpu(cpu)) { +@@ -240,6 +239,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + } + + if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].cpu = cpu; + state->devs[i].is_enabled = true; + } else { + state->devs[i].is_enabled = false; +-- +2.27.0 + diff --git a/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch new file mode 100644 index 0000000000000000000000000000000000000000..514292717255b282761e77810dbdcf922f8230d6 --- /dev/null +++ b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch @@ -0,0 +1,47 @@ +From 6e17d32d6df25d4fac1a31da61d89e0bb9c8c7da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:20:20 +0800 +Subject: [PATCH] acpi/ged: Init cpu hotplug only when machine support it + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 0266733a54..6e4f5f075f 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -403,6 +403,7 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; ++ MachineClass *mc; + + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -427,12 +428,15 @@ static void acpi_ged_initfn(Object *obj) + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); + +- s->cpuhp.device = OBJECT(s); +- memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", +- ACPI_CPU_HOTPLUG_REG_LEN); +- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +- cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), +- &s->cpuhp_state, 0); ++ mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (mc->possible_cpu_arch_ids) { ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); ++ } + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +-- +2.27.0 + diff --git a/acpi-ged-Remove-cpuhp-field-of-ged.patch b/acpi-ged-Remove-cpuhp-field-of-ged.patch new file mode 100644 index 0000000000000000000000000000000000000000..760ad92d47604afba039502a7ff07598ac3d83fb --- /dev/null +++ b/acpi-ged-Remove-cpuhp-field-of-ged.patch @@ -0,0 +1,40 @@ +From 7af2722536b4b0d80f6c508066e8e77158869923 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:34:01 +0800 +Subject: [PATCH] acpi/ged: Remove cpuhp field of ged + +It's unused. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 1 - + include/hw/acpi/generic_event_device.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 6e4f5f075f..4731a614a3 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -430,7 +430,6 @@ static void acpi_ged_initfn(Object *obj) + + mc = MACHINE_GET_CLASS(qdev_get_machine()); + if (mc->possible_cpu_arch_ids) { +- s->cpuhp.device = OBJECT(s); + memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", + ACPI_CPU_HOTPLUG_REG_LEN); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index a803ea818e..90fc41cbb8 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -110,7 +110,6 @@ struct AcpiGedState { + MemoryRegion container_memhp; + CPUHotplugState cpuhp_state; + MemoryRegion container_cpuhp; +- AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 + diff --git a/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..0296a6428ac4fb1666e518b9ec80ca20f08fc8a6 --- /dev/null +++ b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch @@ -0,0 +1,51 @@ +From 37aab238363c8242aa76853396c4f272b5508bca Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 15:25:35 +0100 +Subject: [PATCH] arm/acpi: Enable ACPI support for vcpu hotplug + +ACPI is required to interface QEMU with the guest. Roughly falls into below +cases, + +1. Convey the possible vcpus config at the machine init time to the guest + using various DSDT tables like MADT etc. +2. Convey vcpu hotplug events to guest(using GED) +3. Assist in evaluation of various ACPI methods(like _EVT, _STA, _OST, _EJ0, + _MAT etc.) +4. Provides ACPI cpu hotplug state and 12 Byte memory mapped cpu hotplug + control register interface to the OSPM/guest corresponding to each possible + vcpu. The register interface consists of various R/W fields and their + handling operations. These are called when ever register fields or memory + regions are accessed(i.e. read or written) by OSPM when ever it evaluates + various ACPI methods. + +Note: lot of this framework code is inherited from the changes already done for + x86 but still some minor changes are required to make it compatible with + ARM64.) + +This patch enables the ACPI support for virtual cpu hotplug. ACPI changes +required will follow in subsequent patches. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335a24..c0a7d0bd58 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -29,6 +29,7 @@ config ARM_VIRT + select ACPI_HW_REDUCED + select ACPI_APEI + select ACPI_VIOT ++ select ACPI_CPU_HOTPLUG + select VIRTIO_MEM_SUPPORTED + select ACPI_CXL + select ACPI_HMAT +-- +2.27.0 + diff --git a/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch new file mode 100644 index 0000000000000000000000000000000000000000..1dac436c9f2e12709c9f97fe198c9ffd7115265e --- /dev/null +++ b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch @@ -0,0 +1,98 @@ +From d269fb9a41abf5888a9bfeec2f8d1684b2d4dfb0 Mon Sep 17 00:00:00 2001 +From: saarloos <9090-90-90-9090@163.com> +Date: Sat, 30 Mar 2024 21:32:27 +0800 +Subject: [PATCH] arm/acpi: Fix when make qemu-system-aarch64 at x86_64 host + bios_tables_test fail reason: __aarch64__ macro let build_pptt at x86_64 and + aarch64 host build different function that let bios_tables_test fail. + +Signed-off-by: Yangzi Zhang +Signed-off-by: Yuan Zhang +--- + hw/acpi/aml-build.c | 5 +---- + hw/arm/virt-acpi-build.c | 2 +- + include/hw/acpi/aml-build.h | 5 +++-- + 3 files changed, 5 insertions(+), 7 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 714498165a..bf9c59f544 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2016,7 +2016,6 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + +-#ifdef __aarch64__ + /* + * ACPI spec, Revision 6.3 + * 5.2.29.2 Cache Type Structure (Type 1) +@@ -2072,7 +2071,7 @@ static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) + */ +-void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -2172,7 +2171,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + acpi_table_end(linker, &table); + } + +-#else + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2248,7 +2246,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); + } +-#endif + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 3cb50bdc65..48fc77fb83 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1024,7 +1024,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + + if (!vmc->no_cpu_topology) { + acpi_add_table(table_offsets, tables_blob); +- build_pptt(tables_blob, tables->linker, ms, ++ build_pptt_arm(tables_blob, tables->linker, ms, + vms->oem_id, vms->oem_table_id); + } + +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 200cb113de..7281c281f6 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,7 +221,6 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + +-#ifdef __aarch64__ + /* Definitions of the hardcoded cache info*/ + + typedef enum { +@@ -266,7 +265,6 @@ struct offset_status { + uint32_t l1i_offset; + }; + +-#endif + + typedef + struct CrsRangeEntry { +@@ -542,6 +540,9 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, + void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id); ++ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id); + +-- +2.27.0 + diff --git a/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b70c456bf22b5738d1f5d172f3ccacd0fc58eb5 --- /dev/null +++ b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch @@ -0,0 +1,78 @@ +From b394996c99c0af0de870a5d79fff69f01d504b0c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:47:07 +0800 +Subject: [PATCH] arm/cpu: Some fixes for arm_cpu_unrealizefn() + +Some minor fixes for arm_cpu_unrealizefn(). + +Signed-off-by: Keqian Zhu +--- + target/arm/cpu.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 501f88eb2f..9dd61c10ea 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2418,6 +2418,7 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + CPUState *cs = CPU(dev); + bool has_secure; + ++#ifndef CONFIG_USER_ONLY + has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); + + /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ +@@ -2433,30 +2434,38 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + if (has_secure) { + cpu_address_space_destroy(cs, ARMASIdx_S); + } ++#endif + + destroy_cpreg_list(cpu); + arm_cpu_unregister_gdb_regs(cpu); + unregister_cp_regs_for_features(cpu); + ++#ifndef CONFIG_USER_ONLY ++ if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { ++ arm_unregister_el_change_hooks(cpu); ++ } ++#endif ++ + if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { + g_free(env->sau.rbar); + g_free(env->sau.rlar); + } + + if (arm_feature(env, ARM_FEATURE_PMSA) && +- arm_feature(env, ARM_FEATURE_V7) && +- cpu->pmsav7_dregion) { +- if (arm_feature(env, ARM_FEATURE_V8)) { +- g_free(env->pmsav8.rbar[M_REG_NS]); +- g_free(env->pmsav8.rlar[M_REG_NS]); +- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { +- g_free(env->pmsav8.rbar[M_REG_S]); +- g_free(env->pmsav8.rlar[M_REG_S]); ++ arm_feature(env, ARM_FEATURE_V7)) { ++ if (cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); + } +- } else { +- g_free(env->pmsav7.drbar); +- g_free(env->pmsav7.drsr); +- g_free(env->pmsav7.dracr); + } + if (cpu->pmsav8r_hdregion) { + g_free(env->pmsav8.hprbar); +-- +2.27.0 + diff --git a/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..d457c6f33275472cb1e5a546a2822de4b4b979ee --- /dev/null +++ b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch @@ -0,0 +1,72 @@ +From 85e8e1ee8560e587845142342f81b218e44cba6a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:07:33 +0800 +Subject: [PATCH] arm/kvm: Set psci smccc filter only with vcpu hotplug + +The smccc filter mechanism is supported by newer Linux kernel, +don't try to do it unconditionaly. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 4 +++- + target/arm/kvm.c | 21 ++++++++++++--------- + 2 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e60f3431f9..38b5d214a1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2366,8 +2366,10 @@ static void machvirt_init(MachineState *machine) + finalize_gic_version(vms); + if (tcg_enabled() || hvf_enabled() || qtest_enabled() || + (vms->gic_version < VIRT_GIC_VERSION_3)) { +- machine->smp.max_cpus = smp_cpus; + mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ machine->smp.max_cpus = smp_cpus; + warn_report("cpu hotplug feature has been disabled"); + } + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 66caf9e5e7..19783d567f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -259,6 +259,7 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + int ret = 0; + + /* For ARM interrupt delivery is always asynchronous, +@@ -316,15 +317,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * filter in the Host KVM. This is required to support features like + * virtual CPU Hotplug on ARM platforms. + */ +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU On PSCI-to-user-space fwd filter install failed"); +- abort(); +- } +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU Off PSCI-to-user-space fwd filter install failed"); +- abort(); ++ if (mc->has_hotpluggable_cpus && ms->smp.max_cpus > ms->smp.cpus) { ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } + } + + kvm_arm_init_debug(s); +-- +2.27.0 + diff --git a/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch new file mode 100644 index 0000000000000000000000000000000000000000..61f5f9718628156d742d697fe0736400ffe16faf --- /dev/null +++ b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch @@ -0,0 +1,67 @@ +From f8914ec04d4d892520aa443eaf8018c80516adee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 16:27:01 +0000 +Subject: [PATCH] arm/virt: Add cpu hotplug events to GED during creation + +Add CPU Hotplug event to the set of supported ged-events during the creation of +GED device during VM init. Also initialize the memory map for CPU Hotplug +control device used in event exchanges between Qemu/VMM and the guest. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 5 ++++- + include/hw/arm/virt.h | 1 + + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 78ed3c4ba8..155000f22f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -78,6 +78,7 @@ + #include "hw/mem/pc-dimm.h" + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/virtio/virtio-md-pci.h" + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" +@@ -157,6 +158,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, ++ [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +@@ -725,7 +727,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + DeviceState *dev; + MachineState *ms = MACHINE(vms); + int irq = vms->irqmap[VIRT_ACPI_GED]; +- uint32_t event = ACPI_GED_PWR_DOWN_EVT; ++ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT; + + if (ms->ram_slots) { + event |= ACPI_GED_MEM_HOTPLUG_EVT; +@@ -741,6 +743,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + return dev; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index c2fde0522c..5de0185063 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -76,6 +76,7 @@ enum { + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, + VIRT_NVDIMM_ACPI, ++ VIRT_CPUHP_ACPI, + VIRT_PVTIME, + VIRT_LOWMEMMAP_LAST, + }; +-- +2.27.0 + diff --git a/arm-virt-Add-update-basic-hot-un-plug-framework.patch b/arm-virt-Add-update-basic-hot-un-plug-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea7c3772eab061766e8df4f16ec75bfb6d399aff --- /dev/null +++ b/arm-virt-Add-update-basic-hot-un-plug-framework.patch @@ -0,0 +1,197 @@ +From 724ab355c047cfb3e970d9ea78577087568eb095 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:40:19 +0100 +Subject: [PATCH] arm/virt: Add/update basic hot-(un)plug framework + +Add CPU hot-unplug hooks and update hotplug hooks with additional sanity checks +for use in hotplug paths. + +Note, Functional contents of the hooks(now left with TODO comment) shall be +gradually filled in the subsequent patches in an incremental approach to patch +and logic building which would be roughly as follows: +1. (Un-)wiring of interrupts between vCPU<->GIC +2. Sending events to Guest for hot-(un)plug so that guest can take appropriate + actions. +3. Notifying GIC about hot-(un)plug action so that vCPU could be (un-)stitched + to the GIC CPU interface. +4. Updating the Guest with Next boot info for this vCPU in the firmware. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 104 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bf385a469c..ed354be326 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -83,6 +83,7 @@ + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" ++#include "qapi/qmp/qdict.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -3083,12 +3084,23 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + int32_t min_cpuid = 0; + int32_t max_cpuid; + ++ if (dev->hotplugged && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (dev->hotplugged && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hotplug not supported on this machine"); ++ return; ++ } ++ + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3137,6 +3149,22 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); + ++ /* ++ * Fix the GIC for this new vCPU being plugged. The QOM CPU object for the ++ * new vCPU need to be updated in the corresponding QOM GICv3CPUState object ++ * We also need to re-wire the IRQs for this new CPU object. This update ++ * is limited to the QOM only and does not affects the KVM. Later has ++ * already been pre-sized with possible CPU at VM init time. This is a ++ * workaround to the constraints posed by ARM architecture w.r.t supporting ++ * CPU Hotplug. Specification does not exist for the later. ++ * This patch-up is required both for {cold,hot}-plugged vCPUs. Cold-inited ++ * vCPUs have their GIC state initialized during machvit_init(). ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update GIC about this hotplug change here */ ++ /* TODO: wire the GIC<->CPU irqs */ ++ } ++ + /* + * To give persistent presence view of vCPUs to the guest, ACPI might need + * to fake the presence of the vCPUs to the guest but keep them disabled. +@@ -3148,6 +3176,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +@@ -3156,10 +3185,81 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + cpu_slot->cpu = OBJECT(dev); + ++ /* ++ * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. ++ * vCPUs can be cold-plugged using '-device' option. For vCPUs being hot ++ * plugged, guest is also notified. ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ /* TODO: register cpu for reset & update F/W info for the next boot */ ++ } ++ + cs->disabled = false; + return; + } + ++static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ if (!mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hot(un)plug not supported on this machine"); ++ return; ++ } ++ ++ if (cs->cpu_index == first_cpu->cpu_index) { ++ error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", ++ first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, ++ cpu->core_id, cpu->thread_id); ++ return; ++ } ++ ++ /* TODO: request cpu hotplug from guest */ ++ ++ return; ++} ++ ++static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ ++ /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ ++ /* TODO: unwire the gic-cpu irqs here */ ++ /* TODO: update the GIC about this hot unplug change */ ++ ++ /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ ++ qobject_unref(dev->opts); ++ dev->opts = NULL; ++ ++ cpu_slot->cpu = NULL; ++ cs->disabled = true; ++ ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -3284,6 +3384,8 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -3297,6 +3399,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, + virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "virt: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +-- +2.27.0 + diff --git a/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..61c298d7bec0adbb7fc302343e5b09f94dd947b5 --- /dev/null +++ b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch @@ -0,0 +1,221 @@ +From a68abeefcbd78daaf7179b922f6b9040b4b63101 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:50:33 +0100 +Subject: [PATCH] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during + hot-(un)plug + +Refactors the existing GIC create code to extract common code to wire the +vcpu<->gic interrupts. This function could be used with cold-plug case and also +used when vCPU is hot-plugged. It also introduces a new function to unwire the +vcpu<->gic interrupts for the vCPU hot-unplug cases. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 138 ++++++++++++++++++++++++++++------------- + hw/core/gpio.c | 2 +- + include/hw/qdev-core.h | 2 + + 3 files changed, 99 insertions(+), 43 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed354be326..97bf4cca11 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -798,6 +798,99 @@ static void create_v2m(VirtMachineState *vms) + vms->msi_controller = VIRT_MSI_CTRL_GICV2M; + } + ++/* ++ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs ++ * we use for the virt board. ++ */ ++const int timer_irq[] = { ++ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, ++ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, ++ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, ++ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, ++}; ++ ++static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ int irq; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_disconnect_gpio_out_named(cpudev, NULL, irq); ++ } ++ ++ if (type != VIRT_GIC_VERSION_2) { ++ qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0); ++ } else if (vms->virt) { ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 4 * max_cpus); ++ } ++ ++ /* ++ * RFC: Question: This currently does not takes care of intimating the ++ * devices which might be sitting on system bus. Do we need a ++ * sysbus_disconnect_irq() which also does the job of notification beside ++ * disconnection? ++ */ ++ qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, cpu); ++ qdev_disconnect_gpio_out_named(gicdev, ++ SYSBUS_DEVICE_GPIO_IRQ, cpu + max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 2 * max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 3 * max_cpus); ++} ++ ++static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ SysBusDevice *gicbusdev; ++ int intidbase; ++ int irq; ++ ++ intidbase = NUM_IRQS + cpu * GIC_INTERNAL; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_connect_gpio_out(cpudev, irq, ++ qdev_get_gpio_in(gicdev, ++ intidbase + timer_irq[irq])); ++ } ++ ++ gicbusdev = SYS_BUS_DEVICE(gicdev); ++ if (type != VIRT_GIC_VERSION_2) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0, qirq); ++ } else if (vms->virt) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, qirq); ++ } ++ ++ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, ++ qdev_get_gpio_in(gicdev, ++ intidbase + VIRTUAL_PMU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++} ++ + static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + { + MachineState *ms = MACHINE(vms); +@@ -894,46 +987,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. + */ + for (i = 0; i < smp_cpus; i++) { +- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); +- int intidbase = NUM_IRQS + i * GIC_INTERNAL; +- /* Mapping from the output timer irq lines from the CPU to the +- * GIC PPI inputs we use for the virt board. +- */ +- const int timer_irq[] = { +- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, +- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, +- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, +- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, +- }; +- +- for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +- qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(vms->gic, +- intidbase + timer_irq[irq])); +- } +- +- if (vms->gic_version != VIRT_GIC_VERSION_2) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", +- 0, irq); +- } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); +- } +- +- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(vms->gic, intidbase +- + VIRTUAL_PMU_IRQ)); +- +- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++ wire_gic_cpu_irqs(vms, qemu_get_cpu(i)); + } + + fdt_add_gic_node(vms); +@@ -3162,7 +3216,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + */ + if (vms->acpi_dev) { + /* TODO: update GIC about this hotplug change here */ +- /* TODO: wire the GIC<->CPU irqs */ ++ wire_gic_cpu_irqs(vms, cs); + } + + /* +@@ -3246,7 +3300,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + +- /* TODO: unwire the gic-cpu irqs here */ ++ unwire_gic_cpu_irqs(vms, cs); + /* TODO: update the GIC about this hot unplug change */ + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ +diff --git a/hw/core/gpio.c b/hw/core/gpio.c +index 80d07a6ec9..abb164d5c0 100644 +--- a/hw/core/gpio.c ++++ b/hw/core/gpio.c +@@ -143,7 +143,7 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) + + /* disconnect a GPIO output, returning the disconnected input (if any) */ + +-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, + const char *name, int n) + { + char *propname = g_strdup_printf("%s[%d]", +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 151d968238..2d3661d6cd 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -739,6 +739,8 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n); + */ + qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n); ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++ const char *name, int n); + + BusState *qdev_get_child_bus(DeviceState *dev, const char *name); + +-- +2.27.0 + diff --git a/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch new file mode 100644 index 0000000000000000000000000000000000000000..27ca6d7ab1b918bffda8a3e78beae1626d19d6fb --- /dev/null +++ b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch @@ -0,0 +1,73 @@ +From baa26f2fc075522f91c3e9a332fc4fa3f3b167bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:55:49 +0800 +Subject: [PATCH] arm/virt: Consider has_ged when set mc->has_hotpluggable_cpus + +Vcpu hotplug relies on ged device. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 38b5d214a1..00e57f2d75 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2357,6 +2357,7 @@ static void machvirt_init(MachineState *machine) + bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; ++ ObjectClass *cpu_class; + + if (!cpu_type_valid(machine->cpu_type)) { + error_report("mach-virt: CPU type %s not supported", machine->cpu_type); +@@ -2364,14 +2365,6 @@ static void machvirt_init(MachineState *machine) + } + + finalize_gic_version(vms); +- if (tcg_enabled() || hvf_enabled() || qtest_enabled() || +- (vms->gic_version < VIRT_GIC_VERSION_3)) { +- mc->has_hotpluggable_cpus = false; +- } +- if (!mc->has_hotpluggable_cpus) { +- machine->smp.max_cpus = smp_cpus; +- warn_report("cpu hotplug feature has been disabled"); +- } + + possible_cpus = mc->possible_cpu_arch_ids(machine); + +@@ -2501,6 +2494,21 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ cpu_class = object_class_by_name(machine->cpu_type); ++ has_ged = has_ged && firmware_loaded && ++ virt_is_acpi_enabled(vms) && ++ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ if (machine->smp.max_cpus > smp_cpus) { ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ machine->smp.max_cpus = smp_cpus; ++ } ++ + notifier_list_init(&vms->cpuhp_notifiers); + possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); +@@ -2581,8 +2589,6 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + +- has_ged = has_ged && aarch64 && firmware_loaded && +- virt_is_acpi_enabled(vms); + if (has_ged) { + vms->acpi_dev = create_acpi_ged(vms); + } +-- +2.27.0 + diff --git a/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch new file mode 100644 index 0000000000000000000000000000000000000000..d120fe42a27c540e4c58a345b6425c7278453f26 --- /dev/null +++ b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch @@ -0,0 +1,54 @@ +From 028d71744dfeedabfa67d629c71a6ed5e494cc68 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 29 Aug 2023 00:47:05 +0000 +Subject: [PATCH] arm/virt: Create GED dev before *disabled* CPU Objs are + destroyed + +ACPI CPU hotplug state (is_present=_STA.PRESENT, is_enabled=_STA.ENABLED) for +all the possible vCPUs MUST be initialized during machine init. This is done +during the creation of the GED device. VMM/Qemu MUST expose/fake the ACPI state +of the disabled vCPUs to the Guest kernel as 'present' (_STA.PRESENT) always +i.e. ACPI persistent. if the 'disabled' vCPU objectes are destroyed before the +GED device has been created then their ACPI hotplug state might not get +initialized correctly as acpi_persistent flag is part of the CPUState. This will +expose wrong status of the unplugged vCPUs to the Guest kernel. + +Hence, moving the GED device creation before disabled vCPU objects get destroyed +as part of the post CPU init routine. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 155000f22f..818398e753 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2472,6 +2472,12 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + ++ has_ged = has_ged && aarch64 && firmware_loaded && ++ virt_is_acpi_enabled(vms); ++ if (has_ged) { ++ vms->acpi_dev = create_acpi_ged(vms); ++ } ++ + virt_cpu_post_init(vms, sysmem); + + fdt_add_pmu_nodes(vms); +@@ -2496,9 +2502,7 @@ static void machvirt_init(MachineState *machine) + + create_pcie(vms); + +- if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { +- vms->acpi_dev = create_acpi_ged(vms); +- } else { ++ if (!has_ged) { + create_gpio_devices(vms, VIRT_GPIO, sysmem); + } + +-- +2.27.0 + diff --git a/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..30a11521e48cbfda6ec3bbf2d9861bf189398472 --- /dev/null +++ b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch @@ -0,0 +1,47 @@ +From 00a78edf572783c18a1d4945758371c0f175e321 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:41:14 +0800 +Subject: [PATCH] arm/virt: Fix adjudgement of core_id for vcpu hotplugged + +The core_id should between 0 and ms->smp.cores - 1. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 934b0412ef..e60f3431f9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3170,8 +3170,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +- int32_t min_cpuid = 0; +- int32_t max_cpuid; + + if (dev->hotplugged && !vms->acpi_dev) { + error_setg(errp, "GED acpi device does not exists"); +@@ -3196,15 +3194,9 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- max_cpuid = ms->possible_cpus->len - 1; +- if (!dev->hotplugged) { +- min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; +- max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; +- } +- +- if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { +- error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", +- cpu->core_id, min_cpuid, max_cpuid); ++ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range 0:%u", ++ cpu->core_id, ms->smp.cores - 1); + return; + } + +-- +2.27.0 + diff --git a/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch new file mode 100644 index 0000000000000000000000000000000000000000..89fd4ca49fe6115d3ed7d19eb2a265bbe462bc46 --- /dev/null +++ b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch @@ -0,0 +1,71 @@ +From c375e6fdc49f7d3d0232786e4cfd8b792379107c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 14:12:34 +0100 +Subject: [PATCH] arm/virt: Init PMU at host for all possible vcpus + +PMU for all possible vCPUs must be initialized at the VM initialization time. +Refactor existing code to accomodate possible vCPUs. This also assumes that all +processor being used are identical. + +Past discussion for reference: +Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 12 ++++++++---- + include/hw/arm/virt.h | 1 + + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 08ba255317..78ed3c4ba8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2055,12 +2055,14 @@ static void finalize_gic_version(VirtMachineState *vms) + */ + static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { ++ CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; +- bool aarch64, pmu, steal_time; ++ bool aarch64, steal_time; + CPUState *cpu; ++ int n; + + aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL); +- pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); ++ vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); + steal_time = object_property_get_bool(OBJECT(first_cpu), + "kvm-steal-time", NULL); + +@@ -2087,8 +2089,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); + } + +- CPU_FOREACH(cpu) { +- if (pmu) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); + if (kvm_irqchip_in_kernel()) { + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index a6977bade5..c2fde0522c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -155,6 +155,7 @@ struct VirtMachineState { + bool ras; + bool mte; + bool dtb_randomness; ++ bool pmu; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +-- +2.27.0 + diff --git a/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8e661145e7b1b42272971979d463d51a5ee7b4e --- /dev/null +++ b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch @@ -0,0 +1,97 @@ +From 3780dddd4fc8f0471525c50893e24846d1474692 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 8 Aug 2023 00:43:18 +0000 +Subject: [PATCH] arm/virt: Make ARM vCPU *present* status ACPI *persistent* + +ARM arch does not allow CPUs presence to be changed [1] after kernel has booted. +Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs to the Guest +kernel even when they are not present in the QoM i.e. are unplugged or are +yet-to-be-plugged + +References: +[1] Check comment 5 in the bugzilla entry + Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5 + +Signed-off-by: Salil Mehta +--- + cpu-common.c | 6 ++++++ + hw/arm/virt.c | 7 +++++++ + include/hw/core/cpu.h | 20 ++++++++++++++++++++ + 3 files changed, 33 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index d041a351ab..da52e45760 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu) + return cpu && !cpu->disabled; + } + ++bool qemu_persistent_cpu(CPUState *cpu) ++{ ++ /* cpu state can be faked to the guest via acpi */ ++ return cpu->acpi_persistent; ++} ++ + uint64_t qemu_get_cpu_archid(int cpu_index) + { + MachineState *ms = MACHINE(qdev_get_machine()); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 818398e753..91b2653c03 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3104,6 +3104,13 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++ ++ /* ++ * To give persistent presence view of vCPUs to the guest, ACPI might need ++ * to fake the presence of the vCPUs to the guest but keep them disabled. ++ * This shall be used during the init of ACPI Hotplug state and hot-unplug ++ */ ++ cs->acpi_persistent = true; + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index fdfb952259..0ca778eb75 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -550,6 +550,13 @@ struct CPUState { + * By default every CPUState is enabled as of now across all archs. + */ + bool disabled; ++ /* ++ * On certain architectures, to give persistent view of the 'presence' of ++ * vCPUs to the guest, ACPI might need to fake the 'presence' of the vCPUs ++ * but keep them ACPI disabled to the guest. This is done by returning ++ * _STA.PRES=True and _STA.Ena=False for the unplugged vCPUs in QEMU QoM. ++ */ ++ bool acpi_persistent; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -957,6 +964,19 @@ bool qemu_present_cpu(CPUState *cpu); + */ + bool qemu_enabled_cpu(CPUState *cpu); + ++/** ++ * qemu_persistent_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU state should always be reflected as *present* via ACPI ++ * to the Guest. By default, this is False on all architectures and has to be ++ * explicity set during initialization. ++ * ++ * Returns: True if it is ACPI 'persistent' CPU ++ * ++ */ ++bool qemu_persistent_cpu(CPUState *cpu); ++ + /** + * qemu_get_cpu_archid: + * @cpu_index: possible vCPU for which arch-id needs to be retreived +-- +2.27.0 + diff --git a/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a1198a9d63b5203f251134500fc03dc3847611a --- /dev/null +++ b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch @@ -0,0 +1,88 @@ +From 097e3b46a7eede0182a846f7b993e14d3eed83b7 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 03:01:08 +0100 +Subject: [PATCH] arm/virt: Release objects for *disabled* possible vCPUs after + init + +During machvirt_init(), QOM ARMCPU objects are also pre-created along with the +corresponding KVM vCPUs in the host for all possible vCPUs. This necessary +because of the architectural constraint, KVM restricts the deferred creation of +the KVM vCPUs and VGIC initialization/sizing after VM init. Hence, VGIC is +pre-sized with possible vCPUs. + +After initialization of the machine is complete disabled possible KVM vCPUs are +then parked at the per-virt-machine list "kvm_parked_vcpus" and we release the +QOM ARMCPU objects for the disabled vCPUs. These shall be re-created at the time +when vCPU is hotplugged again. QOM ARMCPU object is then re-attached with +corresponding parked KVM vCPU. + +Alternatively, we could've never released the QOM CPU objects and kept on +reusing. This approach might require some modifications of qdevice_add() +interface to get old ARMCPU object instead of creating a new one for the hotplug +request. + +Each of the above approaches come with their own pros and cons. This prototype +uses the 1st approach.(suggestions are welcome!) + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 91b2653c03..bf385a469c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2060,6 +2060,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { + CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; ++ MachineState *ms = MACHINE(vms); + bool aarch64, steal_time; + CPUState *cpu; + int n; +@@ -2120,6 +2121,37 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + } ++ ++ if (kvm_enabled() || tcg_enabled()) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ /* ++ * Now, GIC has been sized with possible CPUs and we dont require ++ * disabled vCPU objects to be represented in the QOM. Release the ++ * disabled ARMCPU objects earlier used during init for pre-sizing. ++ * ++ * We fake to the guest through ACPI about the presence(_STA.PRES=1) ++ * of these non-existent vCPUs at VMM/qemu and present these as ++ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These vCPUs ++ * can be later added to the guest through hotplug exchanges when ++ * ARMCPU objects are created back again using 'device_add' QMP ++ * command. ++ */ ++ /* ++ * RFC: Question: Other approach could've been to keep them forever ++ * and release it only once when qemu exits as part of finalize or ++ * when new vCPU is hotplugged. In the later old could be released ++ * for the newly created object for the same vCPU? ++ */ ++ if (!qemu_enabled_cpu(cpu)) { ++ CPUArchId *cpu_slot; ++ cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index); ++ cpu_slot->cpu = NULL; ++ object_unref(OBJECT(cpu)); ++ } ++ } ++ } + } + + static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, +-- +2.27.0 + diff --git a/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..d64e1a07a1cdb9de75b21fe11f1cac4b340a596f --- /dev/null +++ b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch @@ -0,0 +1,55 @@ +From 519699c61eeb980bb7d7f443eb95c0406aae82da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:05:39 +0800 +Subject: [PATCH] arm/virt: Require mc->has_hotpluggable_cpus for cold-plugged + vcpu + +Cold-plugged vCPU also need mc->has_hotpluggable_cpus. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 00e57f2d75..73b29c7f73 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3179,16 +3179,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + +- if (dev->hotplugged && !vms->acpi_dev) { +- error_setg(errp, "GED acpi device does not exists"); +- return; +- } +- +- if (dev->hotplugged && !mc->has_hotpluggable_cpus) { +- error_setg(errp, "CPU hotplug not supported on this machine"); +- return; +- } +- + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3222,6 +3212,17 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); + ++ /* Except for cold-booted vCPUs, this should check presence of ACPI GED */ ++ if (cs->cpu_index >= ms->smp.cpus && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (cs->cpu_index >= ms->smp.cpus && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); ++ return; ++ } ++ + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + if (qemu_present_cpu(CPU(cpu_slot->cpu))) { + error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", +-- +2.27.0 + diff --git a/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..a45f47dea256e42e6e8b8f1817f724c9cac6cb10 --- /dev/null +++ b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch @@ -0,0 +1,123 @@ +From afb71c88d935349cdf9763e8f51f77334ab615ec Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:54:10 +0100 +Subject: [PATCH] arm/virt: Update the guest(via GED) about CPU hot-(un)plug + events + +During any vCPU hot-(un)plug, running guest VM needs to be intimated about the +new vCPU being added or request the deletion of the vCPU which is already part +of the guest VM. This is done using the ACPI GED event which eventually gets +demultiplexed to a CPU hotplug event and further to specific hot-(un)plug event +of a particular vCPU. + +This change adds the ACPI calls to the existing hot-(un)plug hooks to trigger +ACPI GED events from QEMU to guest VM. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 33 ++++++++++++++++++++++++++++++--- + 1 file changed, 30 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0312fa366d..60cd560ab9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3256,6 +3256,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + /* insert the cold/hot-plugged vcpu in the slot */ +@@ -3268,12 +3269,20 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * plugged, guest is also notified. + */ + if (vms->acpi_dev) { +- /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ HotplugHandlerClass *hhc; ++ /* update acpi hotplug state and send cpu hotplug event to guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + /* TODO: register cpu for reset & update F/W info for the next boot */ + } + + cs->disabled = false; + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, +@@ -3281,8 +3290,10 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + { + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + + if (!vms->acpi_dev || !dev->realized) { + error_setg(errp, "GED does not exists or device is not realized!"); +@@ -3301,9 +3312,16 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + +- /* TODO: request cpu hotplug from guest */ ++ /* request cpu hotplug from guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3311,7 +3329,9 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + if (!vms->acpi_dev || !dev->realized) { +@@ -3321,7 +3341,12 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + +- /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ /* update the acpi cpu hotplug state for cpu hot-unplug */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); +@@ -3335,6 +3360,8 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + cs->disabled = true; + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, +-- +2.27.0 + diff --git a/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch new file mode 100644 index 0000000000000000000000000000000000000000..cde5af36c19f45400e9c75b3755d57fcd19967ba --- /dev/null +++ b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch @@ -0,0 +1,43 @@ +From bea23b0f82cedbd860b66c7b9e1f6bb0ca85d1cf Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 17:05:30 +0000 +Subject: [PATCH] arm/virt/acpi: Build CPUs AML with CPU Hotplug support + +Support of vCPU Hotplug requires sequence of ACPI handshakes between Qemu and +Guest kernel when a vCPU is plugged or unplugged. Most of the AML code to +support these handshakes already exists. This AML need to be build during VM +init for ARM architecture as well if the GED support exists. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 084c8abc7c..d88f3cded1 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -937,7 +937,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ ++ if (vms->acpi_dev) { ++ CPUHotplugFeatures opts = { ++ .acpi_1_compatible = false, ++ .has_legacy_cphp = false ++ }; ++ ++ build_cpus_aml(scope, ms, opts, NULL, virt_acpi_dsdt_cpu_cppc, ++ memmap[VIRT_CPUHP_ACPI].base, ++ "\\_SB", NULL, AML_SYSTEM_MEMORY); ++ } else { ++ acpi_dsdt_add_cpus(scope, vms); ++ } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a599cbfe434992133a647dd0b1f0e278a649397 --- /dev/null +++ b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch @@ -0,0 +1,76 @@ +From 2d5040ce21af5fc02a8588456be7316fcd5bc2a0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 16:36:38 +0800 +Subject: [PATCH] arm/virt/acpi: Factor out CPPC building from DSDT CPU aml + +When CPU hotplug is enabled, we will use build_cpus_aml instead of +acpi_dsdt_add_cpus, so factor out CPPC building to reuse it. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 48fc77fb83..084c8abc7c 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -123,8 +123,23 @@ static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) + aml_append(dev, aml_name_decl("_CPC", cpc)); + } + +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, +- const MemMapEntry *cppc_memmap) ++static void virt_acpi_dsdt_cpu_cppc(int ncpu, int num_cpu, Aml *dev) { ++ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); ++ const MemMapEntry *cppc_memmap = &vms->memmap[VIRT_CPUFREQ]; ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + ncpu * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, num_cpu); ++ } ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) + { + MachineState *ms = MACHINE(vms); + uint16_t i; +@@ -134,18 +149,9 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); + +- /* +- * Append _CPC and _PSD to support CPU frequence show +- * Check CPPC available by DESIRED_PERF register +- */ +- if (cppc_regs_offset[DESIRED_PERF] != -1) { +- acpi_dsdt_add_cppc(dev, +- cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, +- cppc_regs_offset); +- acpi_dsdt_add_psd(dev, ms->smp.cpus); +- } ++ virt_acpi_dsdt_cpu_cppc(i, ms->smp.cpus, dev); + +- aml_append(scope, dev); ++ aml_append(scope, dev); + } + } + +@@ -931,7 +937,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); ++ acpi_dsdt_add_cpus(scope, vms); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch new file mode 100644 index 0000000000000000000000000000000000000000..c323c440952a681f11f46ce4f1f3bcfbcb954f91 --- /dev/null +++ b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch @@ -0,0 +1,38 @@ +From 0bee56446962676992d11e5879f6fbac57e785e8 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:38:31 +0800 +Subject: [PATCH] arm/virt-acpi: Require possible_cpu_arch_ids for + build_cpus_aml() + +As the acpi_dev requires possible_cpu_arch_ids to support +vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 590afcfa98..46642efac4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1003,6 +1003,7 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + Aml *scope, *dsdt; + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; +@@ -1020,7 +1021,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + */ + scope = aml_scope("\\_SB"); + /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ +- if (vms->acpi_dev) { ++ if (mc->has_hotpluggable_cpus) { + CPUHotplugFeatures opts = { + .acpi_1_compatible = false, + .has_legacy_cphp = false +-- +2.27.0 + diff --git a/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2f8c67fce6c0c005d1e5537abc3faddfd22f250 --- /dev/null +++ b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch @@ -0,0 +1,225 @@ +From fe61cbaf2dc92b062c8d147b05c3ce213734c24a Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:20:23 +0100 +Subject: [PATCH] arm/virt,gicv3: Changes to pre-size GIC with possible vcpus + @machine init + +GIC needs to be pre-sized with possible vcpus at the initialization time. This +is necessary because Memory regions and resources associated with GICC/GICR +etc cannot be changed (add/del/modified) after VM has inited. Also, GIC_TYPER +needs to be initialized with mp_affinity and cpu interface number association. +This cannot be changed after GIC has initialized. + +Once all the cpu interfaces of the GIC has been inited it needs to be ensured +that any updates to the GICC during reset only takes place for the present +vcpus and not the disabled ones. Therefore, proper checks are required at +various places. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Jean-Philippe Brucker +[changed the comment in arm_gicv3_icc_reset] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 13 +++++++------ + hw/intc/arm_gicv3_common.c | 7 +++++-- + hw/intc/arm_gicv3_cpuif.c | 8 ++++++++ + hw/intc/arm_gicv3_kvm.c | 34 +++++++++++++++++++++++++++++++--- + include/hw/arm/virt.h | 2 +- + 5 files changed, 52 insertions(+), 12 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f10d75366b..08ba255317 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -802,6 +802,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + const char *gictype; + int i; + unsigned int smp_cpus = ms->smp.cpus; ++ unsigned int max_cpus = ms->smp.max_cpus; + uint32_t nb_redist_regions = 0; + int revision; + +@@ -826,7 +827,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } + vms->gic = qdev_new(gictype); + qdev_prop_set_uint32(vms->gic, "revision", revision); +- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +@@ -838,7 +839,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + if (vms->gic_version != VIRT_GIC_VERSION_2) { + QList *redist_region_count; + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); +- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); ++ uint32_t redist0_count = MIN(max_cpus, redist0_capacity); + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +@@ -915,7 +916,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->virt) { + qemu_irq irq = qdev_get_gpio_in(vms->gic, + intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); ++ sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); + } + + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +@@ -923,11 +924,11 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + + VIRTUAL_PMU_IRQ)); + + sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 2ebf880ead..ebd99af610 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -392,10 +392,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t cpu_affid; + +- s->cpu[i].cpu = cpu; ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[i].cpu = cpu; ++ } ++ + s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index ab1a00508e..0d0eb2f62f 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -934,6 +934,10 @@ void gicv3_cpuif_update(GICv3CPUState *cs) + ARMCPU *cpu = ARM_CPU(cs->cpu); + CPUARMState *env = &cpu->env; + ++ if (!qemu_enabled_cpu(cs->cpu)) { ++ return; ++ } ++ + g_assert(qemu_mutex_iothread_locked()); + + trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, +@@ -1826,6 +1830,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, + for (i = 0; i < s->num_cpu; i++) { + GICv3CPUState *ocs = &s->cpu[i]; + ++ if (!qemu_enabled_cpu(ocs->cpu)) { ++ continue; ++ } ++ + if (irm) { + /* IRM == 1 : route to all CPUs except self */ + if (cs == ocs) { +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index 77eb37e131..db06c75e2b 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -24,6 +24,7 @@ + #include "hw/intc/arm_gicv3_common.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/cpus.h" + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "kvm_arm.h" +@@ -458,6 +459,18 @@ static void kvm_arm_gicv3_put(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* ++ * To support hotplug of vcpus we need to make sure all gic cpuif/GICC ++ * are initialized at machvirt init time. Once the init is done we ++ * release the ARMCPU object for disabled vcpus but this leg could hit ++ * during reset of GICC later as well i.e. after init has happened and ++ * all of the cases we want to make sure we dont acess the GICC for ++ * the disabled VCPUs. ++ */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); +@@ -616,6 +629,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* don't access GICC for the disabled vCPUs. */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); +@@ -695,10 +713,19 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + return; + } + ++ /* ++ * This shall be called even when vcpu is being hotplugged or onlined and ++ * other vcpus might be running. Host kernel KVM code to handle device ++ * access of IOCTLs KVM_{GET|SET}_DEVICE_ATTR might fail due to inability to ++ * grab vcpu locks for all the vcpus. Hence, we need to pause all vcpus to ++ * facilitate locking within host. ++ */ ++ pause_all_vcpus(); + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), + &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); ++ resume_all_vcpus(); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + } +@@ -808,9 +835,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); + + for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ CPUState *cs = qemu_get_cpu(i); ++ if (qemu_enabled_cpu(cs)) { ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ } + } + + /* Try to create the device via the device control API */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 49d1ec8656..a6977bade5 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -208,7 +208,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.cpus > redist0_capacity && ++ return (MACHINE(vms)->smp.max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +-- +2.27.0 + diff --git a/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch new file mode 100644 index 0000000000000000000000000000000000000000..b752e1fd85490dc2292692b29bc34652b29d460b --- /dev/null +++ b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch @@ -0,0 +1,221 @@ +From 2669fd26cbc36e24ebfc844c240b45ad831701cc Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 18:44:59 +0100 +Subject: [PATCH] arm/virt,kvm: Pre-create disabled possible vCPUs @machine + init + +In ARMv8 architecture, GIC needs all the vCPUs to be created and present when +it is initialized. This is because: +1. GICC and MPIDR association must be fixed at the VM initialization time. + This is represented by register GIC_TYPER(mp_afffinity, proc_num) +2. GICC(cpu interfaces), GICR(redistributors) etc all must be initialized + at the boot time as well. +3. Memory regions associated with GICR etc. cannot be changed(add/del/mod) + after VM has inited. + +This patch adds the support to pre-create all such possible vCPUs within the +host using the KVM interface as part of the virt machine initialization. These +vCPUs could later be attached to QOM/ACPI while they are actually hot plugged +and made present. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 53 +++++++++++++++++++++++++++++++++++++++++-- + include/hw/core/cpu.h | 1 + + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 32 ++++++++++++++++++++++++++ + target/arm/kvm64.c | 9 +++++++- + target/arm/kvm_arm.h | 11 +++++++++ + 6 files changed, 104 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 2f04bc7666..f10d75366b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2389,8 +2389,10 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; ++ CPUState *cs; + + cpuobj = object_new(possible_cpus->cpus[n].type); ++ cs = CPU(cpuobj); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", +@@ -2402,8 +2404,55 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); +- object_unref(cpuobj); ++ if (n < smp_cpus) { ++ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); ++ object_unref(cpuobj); ++ } else { ++ CPUArchId *cpu_slot; ++ ++ /* handling for vcpus which are yet to be hot-plugged */ ++ cs->cpu_index = n; ++ cpu_slot = virt_find_cpu_slot(machine, cs->cpu_index); ++ ++ /* ++ * ARM host vCPU features need to be fixed at the boot time. But as ++ * per current approach this CPU object will be destroyed during ++ * cpu_post_init(). During hotplug of vCPUs these properties are ++ * initialized again. ++ */ ++ virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal); ++ ++ /* ++ * For KVM, we shall be pre-creating the now disabled/un-plugged ++ * possbile host vcpus and park them till the time they are ++ * actually hot plugged. This is required to pre-size the host ++ * GICC and GICR with the all possible vcpus for this VM. ++ */ ++ if (kvm_enabled()) { ++ kvm_arm_create_host_vcpu(ARM_CPU(cs)); ++ } ++ /* ++ * Add disabled vCPU to CPU slot during the init phase of the virt ++ * machine ++ * 1. We need this ARMCPU object during the GIC init. This object ++ * will facilitate in pre-realizing the GIC. Any info like ++ * mp-affinity(required to derive gicr_type) etc. could still be ++ * fetched while preserving QOM abstraction akin to realized ++ * vCPUs. ++ * 2. Now, after initialization of the virt machine is complete we ++ * could use two approaches to deal with this ARMCPU object: ++ * (i) re-use this ARMCPU object during hotplug of this vCPU. ++ * OR ++ * (ii) defer release this ARMCPU object after gic has been ++ * initialized or during pre-plug phase when a vCPU is ++ * hotplugged. ++ * ++ * We will use the (ii) approach and release the ARMCPU objects ++ * after GIC and machine has been fully initialized during ++ * machine_init_done() phase. ++ */ ++ cpu_slot->cpu = OBJECT(cs); ++ } + } + fdt_add_timer_nodes(vms); + fdt_add_cpu_nodes(vms); +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c30636a936..fdfb952259 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -528,6 +528,7 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + int kvm_vcpu_stats_fd; ++ VMChangeStateEntry *vmcse; + + /* Use by accel-block: CPU is executing an ioctl() */ + QemuLockCnt in_ioctl_lock; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e226b60b72..5d28838175 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -859,6 +859,7 @@ static void aarch64_cpu_initfn(Object *obj) + * enabled explicitly + */ + cs->disabled = true; ++ cs->thread_id = 0; + } + + static void aarch64_cpu_finalizefn(Object *obj) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index f59f4f81b2..70cf15b550 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -659,6 +659,38 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + write_list_to_cpustate(cpu); + } + ++void kvm_arm_create_host_vcpu(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ unsigned long vcpu_id = cs->cpu_index; ++ int ret; ++ ++ ret = kvm_create_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to create host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * Initialize the vCPU in the host. This will reset the sys regs ++ * for this vCPU and related registers like MPIDR_EL1 etc. also ++ * gets programmed during this call to host. These are referred ++ * later while setting device attributes of the GICR during GICv3 ++ * reset ++ */ ++ ret = kvm_arch_init_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to initialize host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * park the created vCPU. shall be used during kvm_get_vcpu() when ++ * threads are created during realization of ARM vCPUs. ++ */ ++ kvm_park_vcpu(cs); ++} ++ + /* + * Update KVM's MP_STATE based on what QEMU thinks it is + */ +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 3c175c93a7..03ce1e7525 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -562,7 +562,14 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + +- qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ /* ++ * Install VM change handler only when vCPU thread has been spawned ++ * i.e. vCPU is being realized ++ */ ++ if (cs->thread_id) { ++ cs->vmcse = qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, ++ cs); ++ } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 051a0da41c..31408499b3 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -163,6 +163,17 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); + */ + void kvm_arm_reset_vcpu(ARMCPU *cpu); + ++/** ++ * kvm_arm_create_host_vcpu: ++ * @cpu: ARMCPU ++ * ++ * Called at to pre create all possible kvm vCPUs within the the host at the ++ * virt machine init time. This will also init this pre-created vCPU and ++ * hence result in vCPU reset at host. These pre created and inited vCPUs ++ * shall be parked for use when ARM vCPUs are actually realized. ++ */ ++void kvm_arm_create_host_vcpu(ARMCPU *cpu); ++ + /** + * kvm_arm_init_serror_injection: + * @cs: CPUState +-- +2.27.0 + diff --git a/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..71f2ff037965e861699d5448b53f67381156c7bb --- /dev/null +++ b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch @@ -0,0 +1,153 @@ +From c8e062285078e688e692214baf97b35246fc2552 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 23:19:17 +0100 +Subject: [PATCH] arm/virt,target/arm: Add new ARMCPU + {socket,cluster,core,thread}-id property + +This shall be used to store user specified topology{socket,cluster,core,thread} +and shall be converted to a unique 'vcpu-id' which is used as slot-index during +hot(un)plug of vCPU. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.c | 4 +++ + target/arm/cpu.h | 4 +++ + 3 files changed, 71 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f4c3d47f30..94481d45d4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,11 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index); ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index); ++static int virt_get_core_id(const MachineState *ms, int cpu_index); ++static int virt_get_thread_id(const MachineState *ms, int cpu_index); ++ + static bool cpu_type_valid(const char *cpu) + { + int i; +@@ -2264,6 +2269,14 @@ static void machvirt_init(MachineState *machine) + &error_fatal); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); ++ object_property_set_int(cpuobj, "socket-id", ++ virt_get_socket_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "cluster-id", ++ virt_get_cluster_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "core-id", ++ virt_get_core_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "thread-id", ++ virt_get_thread_id(machine, n), NULL); + + if (!vms->secure) { + object_property_set_bool(cpuobj, "has_el3", false, NULL); +@@ -2750,10 +2763,59 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + return socket_id % ms->numa_state->num_nodes; + } + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.socket_id; ++} ++ ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.cluster_id; ++} ++ ++static int virt_get_core_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.core_id; ++} ++ ++static int virt_get_thread_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.thread_id; ++} ++ ++static int ++virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev) ++{ ++ int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num; ++ ARMCPU *cpu = ARM_CPU(dev); ++ ++ /* calculate total logical cpus across socket/cluster/core */ ++ sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores * ++ ms->smp.clusters); ++ clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores); ++ core_vcpu_num = cpu->core_id * ms->smp.threads; ++ ++ /* get vcpu-id(logical cpu index) for this vcpu from this topology */ ++ cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id; ++ ++ assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len); ++ ++ return cpu_id; ++} ++ + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; ++ unsigned int smp_threads = ms->smp.threads; + VirtMachineState *vms = VIRT_MACHINE(ms); + MachineClass *mc = MACHINE_GET_CLASS(vms); + +@@ -2767,6 +2829,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; ++ ms->possible_cpus->cpus[n].vcpus_count = smp_threads; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index efb22a87f9..cce315c18a 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2422,6 +2422,10 @@ static Property arm_cpu_properties[] = { + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, + mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0), ++ DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0), ++ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0), ++ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0), + DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + DEFINE_PROP_END_OF_LIST() + }; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0d28..145d3dbf13 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1096,6 +1096,10 @@ struct ArchCPU { + QLIST_HEAD(, ARMELChangeHook) el_change_hooks; + + int32_t node_id; /* NUMA node this CPU belongs to */ ++ int32_t socket_id; ++ int32_t cluster_id; ++ int32_t core_id; ++ int32_t thread_id; + + /* Used to synchronize KVM and QEMU in-kernel device levels */ + uint8_t device_irq_level; +-- +2.27.0 + diff --git a/arm-virt-target-arm-Machine-init-time-change-common-.patch b/arm-virt-target-arm-Machine-init-time-change-common-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8199f7ce05614c00088072be19912fda08e3c13 --- /dev/null +++ b/arm-virt-target-arm-Machine-init-time-change-common-.patch @@ -0,0 +1,328 @@ +From 7cd2d7ef7bb7f6c6a97988d86b97922ff700ab06 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 00:13:31 +0100 +Subject: [PATCH] arm/virt,target/arm: Machine init time change common to vCPU + {cold|hot}-plug + +Refactor and introduce the common logic required during the initialization of +both cold and hot plugged vCPUs. Also initialize the *disabled* state of the +vCPUs which shall be used further during init phases of various other components +like GIC, PMU, ACPI etc as part of the virt machine initialization. + +KVM vCPUs corresponding to unplugged/yet-to-be-plugged QOM CPUs are kept in +powered-off state in the KVM Host and do not run the guest code. Plugged vCPUs +are also kept in powered-off state but vCPU threads exist and is kept sleeping. + +TBD: +For the cold booted vCPUs, this change also exists in the arm_load_kernel() +in boot.c but for the hotplugged CPUs this change should still remain part of +the pre-plug phase. We are duplicating the powering-off of the cold booted CPUs. +Shall we remove the duplicate change from boot.c? + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Gavin Shan +[GS: pointed the assertion due to wrong range check] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 149 ++++++++++++++++++++++++++++++++++++++++----- + target/arm/cpu.c | 7 +++ + target/arm/cpu64.c | 14 +++++ + 3 files changed, 156 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8f647422d8..2f04bc7666 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid); + static int virt_get_socket_id(const MachineState *ms, int cpu_index); + static int virt_get_cluster_id(const MachineState *ms, int cpu_index); + static int virt_get_core_id(const MachineState *ms, int cpu_index); +@@ -2249,6 +2250,14 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ finalize_gic_version(vms); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3)) { ++ machine->smp.max_cpus = smp_cpus; ++ mc->has_hotpluggable_cpus = false; ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ + possible_cpus = mc->possible_cpu_arch_ids(machine); + + /* +@@ -2275,11 +2284,6 @@ static void machvirt_init(MachineState *machine) + virt_set_memmap(vms, pa_bits); + } + +- /* We can probe only here because during property set +- * KVM is not available yet +- */ +- finalize_gic_version(vms); +- + sysmem = vms->sysmem = get_system_memory(); + + if (vms->secure) { +@@ -2385,17 +2389,9 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +- CPUState *cs; +- +- if (n >= smp_cpus) { +- break; +- } + + cpuobj = object_new(possible_cpus->cpus[n].type); + +- cs = CPU(cpuobj); +- cs->cpu_index = n; +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2902,6 +2898,50 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ CPUArchId *found_cpu; ++ uint64_t mp_affinity; ++ ++ assert(vcpuid >= 0 && vcpuid < ms->possible_cpus->len); ++ ++ /* ++ * RFC: Question: ++ * TBD: Should mp-affinity be treated as MPIDR? ++ */ ++ mp_affinity = virt_cpu_mp_affinity(vms, vcpuid); ++ found_cpu = &ms->possible_cpus->cpus[vcpuid]; ++ ++ assert(found_cpu->arch_id == mp_affinity); ++ ++ /* ++ * RFC: Question: ++ * Slot-id is the index where vCPU with certain arch-id(=mpidr/ap-affinity) ++ * is plugged. For Host KVM, MPIDR for vCPU is derived using vcpu-id. ++ * As I understand, MPIDR and vcpu-id are property of vCPU but slot-id is ++ * more related to machine? Current code assumes slot-id and vcpu-id are ++ * same i.e. meaning of slot is bit vague. ++ * ++ * Q1: Is there any requirement to clearly represent slot and dissociate it ++ * from vcpu-id? ++ * Q2: Should we make MPIDR within host KVM user configurable? ++ * ++ * +----+----+----+----+----+----+----+----+ ++ * MPIDR ||| Res | Aff2 | Aff1 | Aff0 | ++ * +----+----+----+----+----+----+----+----+ ++ * \ \ \ | | ++ * \ 8bit \ 8bit \ |4bit| ++ * \<------->\<------->\ |<-->| ++ * \ \ \| | ++ * +----+----+----+----+----+----+----+----+ ++ * VCPU-ID | Byte4 | Byte2 | Byte1 | Byte0 | ++ * +----+----+----+----+----+----+----+----+ ++ */ ++ ++ return found_cpu; ++} ++ + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -2945,6 +2985,81 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ int32_t min_cpuid = 0; ++ int32_t max_cpuid; ++ ++ /* sanity check the cpu */ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) { ++ error_setg(errp, "Invalid thread-id %u specified, correct range 0:%u", ++ cpu->thread_id, ms->smp.threads - 1); ++ return; ++ } ++ ++ max_cpuid = ms->possible_cpus->len - 1; ++ if (!dev->hotplugged) { ++ min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; ++ max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; ++ } ++ ++ if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", ++ cpu->core_id, min_cpuid, max_cpuid); ++ return; ++ } ++ ++ if ((cpu->cluster_id < 0) || (cpu->cluster_id >= ms->smp.clusters)) { ++ error_setg(errp, "Invalid cluster-id %u specified, correct range 0:%u", ++ cpu->cluster_id, ms->smp.clusters - 1); ++ return; ++ } ++ ++ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) { ++ error_setg(errp, "Invalid socket-id %u specified, correct range 0:%u", ++ cpu->socket_id, ms->smp.sockets - 1); ++ return; ++ } ++ ++ cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ if (qemu_present_cpu(CPU(cpu_slot->cpu))) { ++ error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", ++ cs->cpu_index, cpu->socket_id, cpu->cluster_id, cpu->core_id, ++ cpu->thread_id, cpu_slot->arch_id); ++ return; ++ } ++ virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ /* insert the cold/hot-plugged vcpu in the slot */ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ cpu_slot->cpu = OBJECT(dev); ++ ++ cs->disabled = false; ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -2987,6 +3102,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + qlist_append_str(reserved_regions, resv_prop_str); + qdev_prop_set_array(dev, "reserved-regions", reserved_regions); + g_free(resv_prop_str); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -3008,6 +3125,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + virt_memory_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { +@@ -3092,7 +3211,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + if (device_is_dynamic_sysbus(mc, dev) || + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || +- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { ++ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || ++ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +@@ -3169,6 +3289,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + mc->kvm_type = virt_kvm_type; ++ mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->pre_plug = virt_machine_device_pre_plug_cb; +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index cce315c18a..18b8a79c8f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2477,6 +2477,12 @@ static const struct TCGCPUOps arm_tcg_ops = { + }; + #endif /* CONFIG_TCG */ + ++static int64_t arm_cpu_get_arch_id(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ return cpu->mp_affinity; ++} ++ + static void arm_cpu_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); +@@ -2495,6 +2501,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->dump_state = arm_cpu_dump_state; ++ cc->get_arch_id = arm_cpu_get_arch_id; + cc->set_pc = arm_cpu_set_pc; + cc->get_pc = arm_cpu_get_pc; + cc->gdb_read_register = arm_cpu_gdb_read_register; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 471014b5a9..e226b60b72 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -850,6 +850,17 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) + } + } + ++static void aarch64_cpu_initfn(Object *obj) ++{ ++ CPUState *cs = CPU(obj); ++ ++ /* ++ * we start every ARM64 vcpu as disabled possible vCPU. It needs to be ++ * enabled explicitly ++ */ ++ cs->disabled = true; ++} ++ + static void aarch64_cpu_finalizefn(Object *obj) + { + } +@@ -862,7 +873,9 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs) + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); ++ DeviceClass *dc = DEVICE_CLASS(oc); + ++ dc->user_creatable = true; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; + cc->gdb_num_core_regs = 34; +@@ -908,6 +921,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info) + static const TypeInfo aarch64_cpu_type_info = { + .name = TYPE_AARCH64_CPU, + .parent = TYPE_ARM_CPU, ++ .instance_init = aarch64_cpu_initfn, + .instance_finalize = aarch64_cpu_finalizefn, + .abstract = true, + .class_init = aarch64_cpu_class_init, +-- +2.27.0 + diff --git a/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a2b9ced7bc1c37c4c19bd08d7662f3b63342ee2 --- /dev/null +++ b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch @@ -0,0 +1,29 @@ +From 25438f2cdb13d07c1bd228fcf4223c21da368548 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:15:31 +0800 +Subject: [PATCH] arm/virt.c: Convey local_err when set psci-conduit + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed437ce0e8..934b0412ef 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2323,7 +2323,10 @@ static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, + */ + if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { + object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, +- NULL); ++ &local_err); ++ if (local_err) { ++ goto out; ++ } + + /* Secondary CPUs start in PSCI powered-down state */ + if (CPU(cpuobj)->cpu_index > 0) { +-- +2.27.0 + diff --git a/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..052ac56a4167e507ce6866b5e5c19ea33fdc75c8 --- /dev/null +++ b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch @@ -0,0 +1,615 @@ +From ebe05c34a66969e4cacc4d6c030dfe93ace89cb2 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 19 Mar 2024 14:35:55 +0800 +Subject: [PATCH] arm64: Add the cpufreq device to show cpufreq info to guest + +On ARM64 platform, cpu frequency is retrieved via ACPI CPPC. +A virtual cpufreq device based on ACPI CPPC is created to +present cpu frequency info to the guest. + +The default frequency is set to host cpu nominal frequency, +which is obtained from the host CPPC sysfs. Other performance +data are set to the same value, since we don't support guest +performance scaling here. + +Performance counters are also not emulated and they simply +return 1 if read, and guest should fallback to use desired +performance value as the current performance. + +Guest kernel version above 4.18 is required to make it work. + +This series is backported from: +https://patchwork.kernel.org/cover/11379943/ + +Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + configs/devices/aarch64-softmmu/default.mak | 1 + + hw/acpi/aml-build.c | 22 ++ + hw/acpi/cpufreq.c | 283 ++++++++++++++++++++ + hw/acpi/meson.build | 1 + + hw/arm/virt-acpi-build.c | 79 +++++- + hw/arm/virt.c | 13 + + hw/char/Kconfig | 4 + + include/hw/acpi/acpi-defs.h | 40 +++ + include/hw/acpi/aml-build.h | 3 + + include/hw/arm/virt.h | 1 + + 10 files changed, 444 insertions(+), 3 deletions(-) + create mode 100644 hw/acpi/cpufreq.c + +diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak +index f82a04c27d..8d66d0f1af 100644 +--- a/configs/devices/aarch64-softmmu/default.mak ++++ b/configs/devices/aarch64-softmmu/default.mak +@@ -8,3 +8,4 @@ include ../arm-softmmu/default.mak + # CONFIG_XLNX_ZYNQMP_ARM=n + # CONFIG_XLNX_VERSAL=n + # CONFIG_SBSA_REF=n ++# CONFIG_CPUFREQ=n +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 2968df5562..714498165a 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1554,6 +1554,28 @@ Aml *aml_sleep(uint64_t msec) + return var; + } + ++/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */ ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, uint64_t addr) ++{ ++ int i; ++ Aml *var = aml_alloc(); ++ build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */ ++ build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */ ++ build_append_byte(var->buf, 0); /* Length, bits[15:8] value = 0 */ ++ build_append_byte(var->buf, rs); /* Address Space ID */ ++ build_append_byte(var->buf, reg_width); /* Register Bit Width */ ++ build_append_byte(var->buf, reg_offset); /* Register Bit Offset */ ++ build_append_byte(var->buf, type); /* Access Size */ ++ ++ /* Register address */ ++ for (i = 0; i < 8; i++) { ++ build_append_byte(var->buf, extract64(addr, i * 8, 8)); ++ } ++ ++ return var; ++} ++ + static uint8_t Hex2Byte(const char *src) + { + int hi, lo; +diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c +new file mode 100644 +index 0000000000..a84db490b3 +--- /dev/null ++++ b/hw/acpi/cpufreq.c +@@ -0,0 +1,283 @@ ++/* ++ * ACPI CPPC register device ++ * ++ * Support for showing CPU frequency in guest OS. ++ * ++ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/sysbus.h" ++#include "chardev/char.h" ++#include "qemu/log.h" ++#include "trace.h" ++#include "qemu/option.h" ++#include "sysemu/sysemu.h" ++#include "hw/acpi/acpi-defs.h" ++#include "qemu/cutils.h" ++#include "qemu/error-report.h" ++#include "hw/boards.h" ++ ++#define TYPE_CPUFREQ "cpufreq" ++#define CPUFREQ(obj) OBJECT_CHECK(CpuhzState, (obj), TYPE_CPUFREQ) ++#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq" ++#define CPU_MAX_FREQ_FILE "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" ++#define HZ_MAX_LENGTH 1024 ++#define MAX_SUPPORT_SPACE 0x10000 ++ ++/* ++ * Since Hi1616 will not support CPPC, we simply use its nominal frequency as ++ * the default. ++ */ ++#define DEFAULT_HZ 2400 ++ ++int cppc_regs_offset[CPPC_REG_COUNT] = { ++ [HIGHEST_PERF] = 0, ++ [NOMINAL_PERF] = 4, ++ [LOW_NON_LINEAR_PERF] = 8, ++ [LOWEST_PERF] = 12, ++ [GUARANTEED_PERF] = 16, ++ [DESIRED_PERF] = 20, ++ [MIN_PERF] = -1, ++ [MAX_PERF] = -1, ++ [PERF_REDUC_TOLERANCE] = -1, ++ [TIME_WINDOW] = -1, ++ [CTR_WRAP_TIME] = -1, ++ [REFERENCE_CTR] = 24, ++ [DELIVERED_CTR] = 32, ++ [PERF_LIMITED] = 40, ++ [ENABLE] = -1, ++ [AUTO_SEL_ENABLE] = -1, ++ [AUTO_ACT_WINDOW] = -1, ++ [ENERGY_PERF] = -1, ++ [REFERENCE_PERF] = -1, ++ [LOWEST_FREQ] = 44, ++ [NOMINAL_FREQ] = 48, ++}; ++ ++typedef struct CpuhzState { ++ SysBusDevice parent_obj; ++ ++ MemoryRegion iomem; ++ uint32_t HighestPerformance; ++ uint32_t NominalPerformance; ++ uint32_t LowestNonlinearPerformance; ++ uint32_t LowestPerformance; ++ uint32_t GuaranteedPerformance; ++ uint32_t DesiredPerformance; ++ uint64_t ReferencePerformanceCounter; ++ uint64_t DeliveredPerformanceCounter; ++ uint32_t PerformanceLimited; ++ uint32_t LowestFreq; ++ uint32_t NominalFreq; ++ uint32_t reg_size; ++} CpuhzState; ++ ++ ++static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) ++{ ++ CpuhzState *s = (CpuhzState *)opaque; ++ uint64_t r; ++ uint64_t n; ++ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ warn_report("cpufreq_read: offset 0x%lx out of range", offset); ++ return 0; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ switch (n) { ++ case 0: ++ r = s->HighestPerformance; ++ break; ++ case 4: ++ r = s->NominalPerformance; ++ break; ++ case 8: ++ r = s->LowestNonlinearPerformance; ++ break; ++ case 12: ++ r = s->LowestPerformance; ++ break; ++ case 16: ++ r = s->GuaranteedPerformance; ++ break; ++ case 20: ++ r = s->DesiredPerformance; ++ break; ++ /* ++ * We don't have real counters and it is hard to emulate, so always set the ++ * counter value to 1 to rely on Linux to use the DesiredPerformance value ++ * directly. ++ */ ++ case 24: ++ r = s->ReferencePerformanceCounter; ++ break; ++ /* ++ * Guest may still access the register by 32bit; add the process to ++ * eliminate unnecessary warnings. ++ */ ++ case 28: ++ r = s->ReferencePerformanceCounter >> 32; ++ break; ++ case 32: ++ r = s->DeliveredPerformanceCounter; ++ break; ++ case 36: ++ r = s->DeliveredPerformanceCounter >> 32; ++ break; ++ ++ case 40: ++ r = s->PerformanceLimited; ++ break; ++ case 44: ++ r = s->LowestFreq; ++ break; ++ case 48: ++ r = s->NominalFreq; ++ break; ++ default: ++ error_printf("cpufreq_read: Bad offset 0x%lx\n", offset); ++ r = 0; ++ break; ++ } ++ return r; ++} ++ ++static void cpufreq_write(void *opaque, hwaddr offset, ++ uint64_t value, unsigned size) ++{ ++ uint64_t n; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ error_printf("cpufreq_write: offset 0x%lx out of range", offset); ++ return; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ ++ switch (n) { ++ case 20: ++ break; ++ default: ++ error_printf("cpufreq_write: Bad offset 0x%lx\n", offset); ++ } ++} ++ ++static uint32_t CPPC_Read(const char *hostpath) ++{ ++ int fd; ++ char buffer[HZ_MAX_LENGTH] = { 0 }; ++ uint64_t hz; ++ int len; ++ const char *endptr = NULL; ++ int ret; ++ ++ fd = qemu_open_old(hostpath, O_RDONLY); ++ if (fd < 0) { ++ return 0; ++ } ++ ++ len = read(fd, buffer, HZ_MAX_LENGTH); ++ qemu_close(fd); ++ if (len <= 0) { ++ return 0; ++ } ++ ret = qemu_strtoul(buffer, &endptr, 0, &hz); ++ if (ret < 0) { ++ return 0; ++ } ++ return (uint32_t)hz; ++} ++ ++static const MemoryRegionOps cpufreq_ops = { ++ .read = cpufreq_read, ++ .write = cpufreq_write, ++ .endianness = DEVICE_NATIVE_ENDIAN, ++}; ++ ++static void hz_init(CpuhzState *s) ++{ ++ uint32_t hz; ++ ++ hz = CPPC_Read(NOMINAL_FREQ_FILE); ++ if (hz == 0) { ++ hz = CPPC_Read(CPU_MAX_FREQ_FILE); ++ if (hz == 0) { ++ hz = DEFAULT_HZ; ++ } else { ++ /* Value in CpuMaxFrequency is in KHz unit; convert to MHz */ ++ hz = hz / 1000; ++ } ++ } ++ ++ s->HighestPerformance = hz; ++ s->NominalPerformance = hz; ++ s->LowestNonlinearPerformance = hz; ++ s->LowestPerformance = hz; ++ s->GuaranteedPerformance = hz; ++ s->DesiredPerformance = hz; ++ s->ReferencePerformanceCounter = 1; ++ s->DeliveredPerformanceCounter = 1; ++ s->PerformanceLimited = 0; ++ s->LowestFreq = hz; ++ s->NominalFreq = hz; ++} ++ ++static void cpufreq_init(Object *obj) ++{ ++ SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ CpuhzState *s = CPUFREQ(obj); ++ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; ++ if (s->reg_size > MAX_SUPPORT_SPACE) { ++ error_report("Required space 0x%x excesses the max support 0x%x", ++ s->reg_size, MAX_SUPPORT_SPACE); ++ goto err_end; ++ } ++ ++ memory_region_init_io(&s->iomem, OBJECT(s), &cpufreq_ops, s, "cpufreq", ++ s->reg_size); ++ sysbus_init_mmio(sbd, &s->iomem); ++ hz_init(s); ++ return; ++ ++err_end: ++ /* Set desired perf register offset to -1 to indicate no support for CPPC */ ++ cppc_regs_offset[DESIRED_PERF] = -1; ++} ++ ++static const TypeInfo cpufreq_arm_info = { ++ .name = TYPE_CPUFREQ, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(CpuhzState), ++ .instance_init = cpufreq_init, ++}; ++ ++static void cpufreq_register_types(void) ++{ ++ type_register_static(&cpufreq_arm_info); ++} ++ ++type_init(cpufreq_register_types) +diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build +index fc1b952379..d36b10ea3c 100644 +--- a/hw/acpi/meson.build ++++ b/hw/acpi/meson.build +@@ -27,6 +27,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'ich9_tco.c')) + acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c')) + acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) + acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) ++acpi_ss.add(when: 'CONFIG_CPUFREQ', if_true: files('cpufreq.c')) + if have_tpm + acpi_ss.add(files('tpm.c')) + endif +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 8bc35a483c..3cb50bdc65 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -63,7 +63,68 @@ + + #define ACPI_BUILD_TABLE_SIZE 0x20000 + +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) ++static void acpi_dsdt_add_psd(Aml *dev, int cpus) ++{ ++ Aml *pkg; ++ Aml *sub; ++ ++ sub = aml_package(5); ++ aml_append(sub, aml_int(5)); ++ aml_append(sub, aml_int(0)); ++ /* Assume all vCPUs belong to the same domain */ ++ aml_append(sub, aml_int(0)); ++ /* SW_ANY: OSPM coordinate, initiate on any processor */ ++ aml_append(sub, aml_int(0xFD)); ++ aml_append(sub, aml_int(cpus)); ++ ++ pkg = aml_package(1); ++ aml_append(pkg, sub); ++ ++ aml_append(dev, aml_name_decl("_PSD", pkg)); ++} ++ ++static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) ++{ ++ Aml *cpc; ++ int i; ++ ++ /* Use version 3 of CPPC table from ACPI 6.3 */ ++ cpc = aml_package(23); ++ aml_append(cpc, aml_int(23)); ++ aml_append(cpc, aml_int(3)); ++ ++ for (i = 0; i < CPPC_REG_COUNT; i++) { ++ Aml *res; ++ uint8_t reg_width; ++ uint8_t acc_type; ++ uint64_t addr; ++ ++ if (regs_offset[i] == -1) { ++ reg_width = 0; ++ acc_type = AML_ANY_ACC; ++ addr = 0; ++ } else { ++ addr = cpu_base + regs_offset[i]; ++ if (i == REFERENCE_CTR || i == DELIVERED_CTR) { ++ reg_width = 64; ++ acc_type = AML_QWORD_ACC; ++ } else { ++ reg_width = 32; ++ acc_type = AML_DWORD_ACC; ++ } ++ } ++ ++ res = aml_resource_template(); ++ aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0, ++ acc_type, addr)); ++ aml_append(cpc, res); ++ } ++ ++ aml_append(dev, aml_name_decl("_CPC", cpc)); ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, ++ const MemMapEntry *cppc_memmap) + { + MachineState *ms = MACHINE(vms); + uint16_t i; +@@ -72,7 +133,19 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) + Aml *dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); +- aml_append(scope, dev); ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, ms->smp.cpus); ++ } ++ ++ aml_append(scope, dev); + } + } + +@@ -858,7 +931,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index b82bd1b8c8..c19cacec8b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -157,6 +157,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, ++ [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, + [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, +@@ -980,6 +981,16 @@ static void create_uart(const VirtMachineState *vms, int uart, + g_free(nodename); + } + ++static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) ++{ ++ hwaddr base = vms->memmap[VIRT_CPUFREQ].base; ++ DeviceState *dev = qdev_new("cpufreq"); ++ SysBusDevice *s = SYS_BUS_DEVICE(dev); ++ ++ sysbus_realize_and_unref(s, &error_fatal); ++ memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); ++} ++ + static void create_rtc(const VirtMachineState *vms) + { + char *nodename; +@@ -2346,6 +2357,8 @@ static void machvirt_init(MachineState *machine) + + create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); + ++ create_cpufreq(vms, sysmem); ++ + if (vms->secure) { + create_secure_ram(vms, secure_sysmem, secure_tag_sysmem); + create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); +diff --git a/hw/char/Kconfig b/hw/char/Kconfig +index 6b6cf2fc1d..335a60c2c1 100644 +--- a/hw/char/Kconfig ++++ b/hw/char/Kconfig +@@ -71,3 +71,7 @@ config GOLDFISH_TTY + + config SHAKTI_UART + bool ++ ++config CPUFREQ ++ bool ++ default y +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index 2b42e4192b..b1f389fb4b 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -93,4 +93,44 @@ typedef struct AcpiFadtData { + #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) + #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) + ++/* ++ * CPPC register definition from kernel header ++ * include/acpi/cppc_acpi.h ++ * The last element is newly added for easy use ++ */ ++enum cppc_regs { ++ HIGHEST_PERF, ++ NOMINAL_PERF, ++ LOW_NON_LINEAR_PERF, ++ LOWEST_PERF, ++ GUARANTEED_PERF, ++ DESIRED_PERF, ++ MIN_PERF, ++ MAX_PERF, ++ PERF_REDUC_TOLERANCE, ++ TIME_WINDOW, ++ CTR_WRAP_TIME, ++ REFERENCE_CTR, ++ DELIVERED_CTR, ++ PERF_LIMITED, ++ ENABLE, ++ AUTO_SEL_ENABLE, ++ AUTO_ACT_WINDOW, ++ ENERGY_PERF, ++ REFERENCE_PERF, ++ LOWEST_FREQ, ++ NOMINAL_FREQ, ++ CPPC_REG_COUNT, ++}; ++ ++#define CPPC_REG_PER_CPU_STRIDE 0x40 ++ ++/* ++ * Offset for each CPPC register; -1 for unavailable ++ * ++ * Offset for each CPPC register; -1 for unavailable ++ * The whole register space is unavailable if desired perf offset is -1. ++ */ ++extern int cppc_regs_offset[CPPC_REG_COUNT]; ++ + #endif +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 84ded2ecd3..200cb113de 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -429,6 +429,9 @@ Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, + uint8_t channel); + Aml *aml_sleep(uint64_t msec); + Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source); ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, ++ uint64_t addr); + + /* Block AML object primitives */ + Aml *aml_scope(const char *name_format, ...) G_GNUC_PRINTF(1, 2); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index f69239850e..e944d434c4 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -60,6 +60,7 @@ enum { + VIRT_GIC_REDIST, + VIRT_SMMU, + VIRT_UART, ++ VIRT_CPUFREQ, + VIRT_MMIO, + VIRT_RTC, + VIRT_FW_CFG, +-- +2.27.0 + diff --git a/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a08dc5ebb4da87c7704a272ce63b058a690df42 --- /dev/null +++ b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch @@ -0,0 +1,101 @@ +From c28455a0bac4bbf171d1f19e162557377a85e96c Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:32:46 +0800 +Subject: [PATCH] [backup] memory: bakcup hugepages: hugepages files maybe + leftover + +old info: +commit id: +3cb1b0ce091998532a30793e3272925da4e6f3aa +old messages: +hugepages: hugepages files maybe leftover + +Before qemu uses the hugepage memory directory /dev/hugepages/libvirt/qemu/xxx, +The directory may be deleted because of the destroy virtual machine. +Cause qemu to create files directly under /dev/hugepages/libvirt/qemu/. +After the file is created, the file is not cleaned up by unlink, +and when the virtual machine is destroyed, libvirt will only clean up +/dev/hugepages/libvirt/qemu/xxx directory. After creating the hugepage file, +execute unlink to clean up the file to fix the problem. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + include/qemu/mmap-alloc.h | 4 ++++ + system/physmem.c | 9 ++++++++- + util/mmap-alloc.c | 22 ++++++++++++++++++++++ + 3 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 8344daaa03..63e4edfd2f 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,6 +1,10 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++#define HUGETLBFS_MAGIC 0x958458f6 ++ ++size_t qemu_fd_getfiletype(int fd); ++ + typedef enum { + QEMU_FS_TYPE_UNKNOWN = 0, + QEMU_FS_TYPE_TMPFS, +diff --git a/system/physmem.c b/system/physmem.c +index a63853a7bc..f14d64819b 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1329,7 +1329,14 @@ static int file_ram_open(const char *path, + /* @path names a file that doesn't exist, create it */ + fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd >= 0) { +- *created = true; ++ info_report("open %s success \n", path); ++ /* if fd file type is HUGETLBFS_MAGIC, unlink it, */ ++ /* in case to prevent residue after qemu killed */ ++ if (qemu_fd_getfiletype(fd) == HUGETLBFS_MAGIC) { ++ unlink(path); ++ } else { ++ *created = true; ++ } + break; + } + } else if (errno == EISDIR) { +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index ed14f9c64d..6890ad676c 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -30,6 +30,28 @@ + #include + #endif + ++size_t qemu_fd_getfiletype(int fd) ++{ ++ struct statfs fs; ++ int ret; ++ ++ if (fd != -1) { ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ if (ret != 0) { ++ fprintf(stderr, "Couldn't fstatfs() fd: %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ return fs.f_type; ++ } else { ++ fprintf(stderr, "fd is invalid \n"); ++ return -1; ++ } ++} ++ + QemuFsType qemu_fd_getfs(int fd) + { + #ifdef CONFIG_LINUX +-- +2.27.0 + diff --git a/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2f7ad6a14474d6ae4b4558280f78047540e689f --- /dev/null +++ b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch @@ -0,0 +1,47 @@ +From f9aef3909d23af6a33c604f59dccfcb764090f01 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:29:15 +0800 +Subject: [PATCH] block: disallow block jobs when there is a BDRV_O_INACTIVE + flag + +Currently, migration will put a BDRV_O_INACTIVE flag +on bs's open_flags until another resume being called. In that case, +any IO from vm or block jobs will cause a qemu crash with an assert +'assert(!(bs->open_flags & BDRV_O_INACTIVE))' failure in bdrv_co_pwritev +function. we hereby disallow block jobs by faking a blocker. + +Signed-off-by: wangjian161 +--- + block.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block.c b/block.c +index bfb0861ec6..b7cb963929 100644 +--- a/block.c ++++ b/block.c +@@ -7298,6 +7298,22 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) + bdrv_get_device_or_node_name(bs)); + return true; + } ++ ++ /* ++ * When migration puts a BDRV_O_INACTIVE flag on driver's open_flags, ++ * we fake a blocker that doesn't exist. From now on, block jobs ++ * will not be permitted. ++ */ ++ if ((op == BLOCK_OP_TYPE_RESIZE || op == BLOCK_OP_TYPE_COMMIT_SOURCE || ++ op == BLOCK_OP_TYPE_MIRROR_SOURCE || op == BLOCK_OP_TYPE_MIRROR_TARGET) && ++ (bs->open_flags & BDRV_O_INACTIVE)) { ++ if (errp) { ++ error_setg(errp, "block device is in use by migration with" ++ " a driver BDRV_O_INACTIVE flag setted"); ++ } ++ return true; ++ } ++ + return false; + } + +-- +2.27.0 + diff --git a/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d45a21f04698d0c4def05fce0fbe86fc84b8cf0 --- /dev/null +++ b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch @@ -0,0 +1,32 @@ +From 48a328ee1a5a71b7048e4591310471c759fc5af6 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 27 Jul 2020 20:39:07 +0800 +Subject: [PATCH] bugfix: irq: Avoid covering object refcount of qemu_irq + +Avoid covering object refcount of qemu_irq, otherwise it may causes +memory leak. + +Signed-off-by: Keqian Zhu +--- + hw/core/irq.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/irq.c b/hw/core/irq.c +index 3f14e2dda7..df9b5dac9b 100644 +--- a/hw/core/irq.c ++++ b/hw/core/irq.c +@@ -110,7 +110,10 @@ void qemu_irq_intercept_in(qemu_irq *gpio_in, qemu_irq_handler handler, int n) + int i; + qemu_irq *old_irqs = qemu_allocate_irqs(NULL, NULL, n); + for (i = 0; i < n; i++) { +- *old_irqs[i] = *gpio_in[i]; ++ old_irqs[i]->handler = gpio_in[i]->handler; ++ old_irqs[i]->opaque = gpio_in[i]->opaque; ++ old_irqs[i]->n = gpio_in[i]->n; ++ + gpio_in[i]->handler = handler; + gpio_in[i]->opaque = &old_irqs[i]; + } +-- +2.27.0 + diff --git a/coro-support-live-patch-for-libcare.patch b/coro-support-live-patch-for-libcare.patch new file mode 100644 index 0000000000000000000000000000000000000000..71b83c2df433f74db003e6ceee10a067f7db39af --- /dev/null +++ b/coro-support-live-patch-for-libcare.patch @@ -0,0 +1,116 @@ +From c2b377814e7874811d7eb98462d5153e966281cf Mon Sep 17 00:00:00 2001 +From: Fei Xu +Date: Wed, 3 Apr 2024 18:05:25 +0800 +Subject: [PATCH] coro: support live patch for libcare + +Signed-off-by: Dawei Jiang +--- + include/qemu/coroutine_int.h | 3 ++- + util/coroutine-ucontext.c | 52 ++++++++++++++++++++++++++++++++++++ + util/qemu-coroutine.c | 4 +++ + 3 files changed, 58 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h +index 1da148552f..11b550a0fc 100644 +--- a/include/qemu/coroutine_int.h ++++ b/include/qemu/coroutine_int.h +@@ -73,5 +73,6 @@ Coroutine *qemu_coroutine_new(void); + void qemu_coroutine_delete(Coroutine *co); + CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); +- ++void qemu_coroutine_info_add(const Coroutine *co_); ++void qemu_coroutine_info_delete(const Coroutine *co_); + #endif +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 7b304c79d9..650c21846d 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -80,6 +80,19 @@ union cc_arg { + int i[2]; + }; + ++/** ++ * coroutines list for libcare ++ */ ++struct CoroutineInformation { ++ sigjmp_buf *env; ++ QLIST_ENTRY(CoroutineInformation) next; ++}; ++ ++static QemuMutex coro_mtx; ++QLIST_HEAD(, CoroutineInformation) coro_info_list = QLIST_HEAD_INITIALIZER(pool); ++int coro_env_offset = offsetof(struct CoroutineInformation, env); ++int coro_next_offset = offsetof(struct CoroutineInformation, next); ++ + /* + * QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. + * always_inline is required to avoid TSan runtime fatal errors. +@@ -340,3 +353,42 @@ bool qemu_in_coroutine(void) + + return self && self->caller; + } ++ ++static void __attribute__((constructor)) coro_mutex_init(void) ++{ ++ qemu_mutex_init(&coro_mtx); ++} ++ ++void qemu_coroutine_info_add(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* save coroutine env to coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ coro_info = g_malloc0(sizeof(struct CoroutineInformation)); ++ coro_info->env = &co->env; ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_INSERT_HEAD(&coro_info_list, coro_info, next); ++ qemu_mutex_unlock(&coro_mtx); ++} ++ ++void qemu_coroutine_info_delete(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* Remove relative coroutine env info from coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_FOREACH(coro_info, &coro_info_list, next) { ++ if (coro_info->env == &co->env) { ++ QLIST_REMOVE(coro_info, next); ++ g_free(coro_info); ++ break; ++ } ++ } ++ qemu_mutex_unlock(&coro_mtx); ++} +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 5fd2dbaf8b..f550214484 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -89,6 +89,8 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + co = qemu_coroutine_new(); + } + ++ qemu_coroutine_info_add(co); ++ + co->entry = entry; + co->entry_arg = opaque; + QSIMPLEQ_INIT(&co->co_queue_wakeup); +@@ -99,6 +101,8 @@ static void coroutine_delete(Coroutine *co) + { + co->caller = NULL; + ++ qemu_coroutine_info_delete(co); ++ + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { + if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); +-- +2.27.0 + diff --git a/cpu-add-Cortex-A72-processor-kvm-target-support.patch b/cpu-add-Cortex-A72-processor-kvm-target-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e35603acb11d3b44e0df0faa1e46e018aea0051 --- /dev/null +++ b/cpu-add-Cortex-A72-processor-kvm-target-support.patch @@ -0,0 +1,60 @@ +From 5853333c9513caea541701c95a4ac691bb97452f Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Tue, 19 Mar 2024 10:45:56 +0800 +Subject: [PATCH] cpu: add Cortex-A72 processor kvm target support + +The ARM Cortex-A72 is ARMv8-A micro-architecture, +add kvm target to ARM Cortex-A72 processor definition. + +Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang +--- + target/arm/cpu64.c | 2 +- + target/arm/kvm-consts.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 922eac3b61..471014b5a9 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -710,6 +710,7 @@ static void aarch64_a72_initfn(Object *obj) + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; ++ cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); +@@ -773,7 +774,6 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x110305408; + cpu->isar.id_aa64isar0 = 0x10211120; + cpu->isar.id_aa64mmfr0 = 0x101125; +- cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; + } + + static void aarch64_host_initfn(Object *obj) +diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h +index 7c6adc14f6..c034823170 100644 +--- a/target/arm/kvm-consts.h ++++ b/target/arm/kvm-consts.h +@@ -133,6 +133,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); + #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 + #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3 + #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4 ++/* Generic ARM v8 target */ ++#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5 + + /* There's no kernel define for this: sentinel value which + * matches no KVM target value for either 64 or 32 bit +@@ -144,6 +146,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); ++MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8); + + #define CP_REG_ARM64 0x6000000000000000ULL + #define CP_REG_ARM_COPROC_MASK 0x000000000FFF0000 +-- +2.27.0 + diff --git a/cpu-add-Kunpeng-920-cpu-support.patch b/cpu-add-Kunpeng-920-cpu-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc9c4cc8f35dc3b53cf64f7f47b1135e9dc197e3 --- /dev/null +++ b/cpu-add-Kunpeng-920-cpu-support.patch @@ -0,0 +1,120 @@ +From e4ae54316651bf6af12de263da158c5ec4ed0401 Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Mon, 18 Mar 2024 17:31:31 +0800 +Subject: [PATCH] cpu: add Kunpeng-920 cpu support + +Add the Kunpeng-920 CPU model + +Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang +--- + hw/arm/virt.c | 1 + + target/arm/cpu64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 73 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index be2856c018..500a15aa5b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -220,6 +220,7 @@ static const char *valid_cpus[] = { + #endif + ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a57"), ++ ARM_CPU_TYPE_NAME("Kunpeng-920"), + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 1e9c6c85ae..922eac3b61 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -705,6 +705,77 @@ static void aarch64_a53_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } + ++static void aarch64_a72_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ cpu->dtb_compatible = "arm,cortex-a72"; ++ set_feature(&cpu->env, ARM_FEATURE_V8); ++ set_feature(&cpu->env, ARM_FEATURE_NEON); ++ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); ++ set_feature(&cpu->env, ARM_FEATURE_AARCH64); ++ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); ++ set_feature(&cpu->env, ARM_FEATURE_EL2); ++ set_feature(&cpu->env, ARM_FEATURE_EL3); ++ set_feature(&cpu->env, ARM_FEATURE_PMU); ++ cpu->midr = 0x410fd083; ++ cpu->revidr = 0x00000000; ++ cpu->reset_fpsid = 0x41034080; ++ cpu->isar.mvfr0 = 0x10110222; ++ cpu->isar.mvfr1 = 0x12111111; ++ cpu->isar.mvfr2 = 0x00000043; ++ cpu->ctr = 0x8444c004; ++ cpu->reset_sctlr = 0x00c50838; ++ cpu->isar.id_pfr0 = 0x00000131; ++ cpu->isar.id_pfr1 = 0x00011011; ++ cpu->isar.id_dfr0 = 0x03010066; ++ cpu->id_afr0 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x10201105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; ++ cpu->isar.id_isar0 = 0x02101110; ++ cpu->isar.id_isar1 = 0x13112111; ++ cpu->isar.id_isar2 = 0x21232042; ++ cpu->isar.id_isar3 = 0x01112131; ++ cpu->isar.id_isar4 = 0x00011142; ++ cpu->isar.id_isar5 = 0x00011121; ++ cpu->isar.id_aa64pfr0 = 0x00002222; ++ cpu->isar.id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64isar0 = 0x00011120; ++ cpu->isar.id_aa64mmfr0 = 0x00001124; ++ cpu->isar.dbgdidr = 0x3516d000; ++ cpu->clidr = 0x0a200023; ++ cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ ++ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ ++ cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ ++ cpu->dcz_blocksize = 4; /* 64 bytes */ ++ cpu->gic_num_lrs = 4; ++ cpu->gic_vpribits = 5; ++ cpu->gic_vprebits = 5; ++ define_cortex_a72_a57_a53_cp_reginfo(cpu); ++} ++ ++static void aarch64_kunpeng_920_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ /* ++ * Hisilicon Kunpeng-920 CPU is similar to cortex-a72, ++ * so first initialize cpu data as cortex-a72, ++ * and then update the special register. ++ */ ++ aarch64_a72_initfn(obj); ++ ++ cpu->midr = 0x480fd010; ++ cpu->ctr = 0x84448004; ++ cpu->isar.id_aa64pfr0 = 0x11001111; ++ cpu->isar.id_aa64dfr0 = 0x110305408; ++ cpu->isar.id_aa64isar0 = 0x10211120; ++ cpu->isar.id_aa64mmfr0 = 0x101125; ++ cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; ++} ++ + static void aarch64_host_initfn(Object *obj) + { + #if defined(CONFIG_KVM) +@@ -744,6 +815,7 @@ static void aarch64_max_initfn(Object *obj) + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, ++ { .name = "Kunpeng-920", .initfn = aarch64_kunpeng_920_initfn}, + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +-- +2.27.0 + diff --git a/cpu-features-fix-bug-for-memory-leakage.patch b/cpu-features-fix-bug-for-memory-leakage.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e6793d462ca876a46fd6f377e4f7dd896d48e06 --- /dev/null +++ b/cpu-features-fix-bug-for-memory-leakage.patch @@ -0,0 +1,25 @@ +From 9ebad9c3020625df0a178e6a2d06eaae15ef767c Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 12:51:19 +0800 +Subject: [PATCH] cpu/features: fix bug for memory leakage + +strList hash not free after used, Fix it. +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fc61a84b1e..f94405c02b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5475,6 +5475,7 @@ static void x86_cpu_get_unavailable_features(Object *obj, Visitor *v, + + x86_cpu_list_feature_names(xc->filtered_features, &result); + visit_type_strList(v, "unavailable-features", &result, errp); ++ qapi_free_strList(result); + } + + /* Print all cpuid feature names in featureset +-- +2.27.0 + diff --git a/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2148e867fe6d83f1c5b25d7c242b17f02dfd472 --- /dev/null +++ b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch @@ -0,0 +1,144 @@ +From 444de91551c1e141a76bf3dae4cebee9dbd57b49 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:48:49 +0100 +Subject: [PATCH] cpus-common: Add common CPU utility for possible vCPUs + +Adds various utility functions which might be required to fetch or check the +state of the possible vCPUs. This also introduces concept of *disabled* vCPUs, +which are part of the *possible* vCPUs but are not part of the *present* vCPU. +This state shall be used during machine init time to check the presence of +vcpus. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + cpu-common.c | 31 +++++++++++++++++++++++++ + include/hw/core/cpu.h | 53 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 84 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index c81fd72d16..d041a351ab 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -24,6 +24,7 @@ + #include "sysemu/cpus.h" + #include "qemu/lockable.h" + #include "trace/trace-root.h" ++#include "hw/boards.h" + + QemuMutex qemu_cpu_list_lock; + static QemuCond exclusive_cond; +@@ -107,6 +108,36 @@ void cpu_list_remove(CPUState *cpu) + cpu_list_generation_id++; + } + ++CPUState *qemu_get_possible_cpu(int index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((index >= 0) && (index < possible_cpus->len)); ++ ++ return CPU(possible_cpus->cpus[index].cpu); ++} ++ ++bool qemu_present_cpu(CPUState *cpu) ++{ ++ return cpu; ++} ++ ++bool qemu_enabled_cpu(CPUState *cpu) ++{ ++ return cpu && !cpu->disabled; ++} ++ ++uint64_t qemu_get_cpu_archid(int cpu_index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((cpu_index >= 0) && (cpu_index < possible_cpus->len)); ++ ++ return possible_cpus->cpus[cpu_index].arch_id; ++} ++ + CPUState *qemu_get_cpu(int index) + { + CPUState *cpu; +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c0c8320413..c30636a936 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -538,6 +538,17 @@ struct CPUState { + GArray *plugin_mem_cbs; + #endif + ++ /* ++ * Some architectures do not allow *presence* of vCPUs to be changed ++ * after guest has booted using information specified by VMM/firmware ++ * via ACPI MADT at the boot time. Thus to enable vCPU hotplug on these ++ * architectures possible vCPU can have CPUState object in 'disabled' ++ * state or can also not have CPUState object at all. This is possible ++ * when vCPU Hotplug is supported and vCPUs are 'yet-to-be-plugged' in ++ * the QOM or have been hot-unplugged. ++ * By default every CPUState is enabled as of now across all archs. ++ */ ++ bool disabled; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -913,6 +924,48 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu) + */ + CPUState *qemu_get_cpu(int index); + ++/** ++ * qemu_get_possible_cpu: ++ * @index: The CPUState@cpu_index value of the CPU to obtain. ++ * Input index MUST be in range [0, Max Possible CPUs) ++ * ++ * If CPUState object exists,then it gets a CPU matching ++ * @index in the possible CPU array. ++ * ++ * Returns: The possible CPU or %NULL if CPU does not exist. ++ */ ++CPUState *qemu_get_possible_cpu(int index); ++ ++/** ++ * qemu_present_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is amongst the present possible vcpus. ++ * ++ * Returns: True if it is present possible vCPU else false ++ */ ++bool qemu_present_cpu(CPUState *cpu); ++ ++/** ++ * qemu_enabled_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is enabled. ++ * ++ * Returns: True if it is 'enabled' else false ++ */ ++bool qemu_enabled_cpu(CPUState *cpu); ++ ++/** ++ * qemu_get_cpu_archid: ++ * @cpu_index: possible vCPU for which arch-id needs to be retreived ++ * ++ * Fetches the vCPU arch-id from the present possible vCPUs. ++ * ++ * Returns: arch-id of the possible vCPU ++ */ ++uint64_t qemu_get_cpu_archid(int cpu_index); ++ + /** + * cpu_exists: + * @id: Guest-exposed CPU ID to lookup. +-- +2.27.0 + diff --git a/doc-Update-multi-thread-compression-doc.patch b/doc-Update-multi-thread-compression-doc.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1f1db086dbf9a31213839897a47546ce331db1d --- /dev/null +++ b/doc-Update-multi-thread-compression-doc.patch @@ -0,0 +1,86 @@ +From 55e5f8cafda3c7d4a91e9d58c7b3259476e0dab9 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:36:47 +0800 +Subject: [PATCH] doc: Update multi-thread compression doc + +Modify the doc to fit the previous changes. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + docs/multi-thread-compression.txt | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt +index 95b1556f67..450e5de469 100644 +--- a/docs/multi-thread-compression.txt ++++ b/docs/multi-thread-compression.txt +@@ -33,14 +33,15 @@ thread compression can be used to accelerate the compression process. + + The decompression speed of Zlib is at least 4 times as quick as + compression, if the source and destination CPU have equal speed, +-keeping the compression thread count 4 times the decompression +-thread count can avoid resource waste. ++and you choose Zlib as compression method, keeping the compression ++thread count 4 times the decompression thread count can avoid resource waste. + + Compression level can be used to control the compression speed and the +-compression ratio. High compression ratio will take more time, level 0 +-stands for no compression, level 1 stands for the best compression +-speed, and level 9 stands for the best compression ratio. Users can +-select a level number between 0 and 9. ++compression ratio. High compression ratio will take more time, ++level 1 stands for the best compression speed, and higher level means higher ++compression ration. For Zlib, users can select a level number between 0 and 9, ++where level 0 stands for no compression. For Zstd, users can select a ++level number between 1 and 22. + + + When to use the multiple thread compression in live migration +@@ -116,16 +117,19 @@ to support the multiple thread compression migration: + 2. Activate compression on the source: + {qemu} migrate_set_capability compress on + +-3. Set the compression thread count on source: ++3. Set the compression method: ++ {qemu} migrate_set_parameter compress_method zstd ++ ++4. Set the compression thread count on source: + {qemu} migrate_set_parameter compress-threads 12 + +-4. Set the compression level on the source: ++5. Set the compression level on the source: + {qemu} migrate_set_parameter compress-level 1 + +-5. Set the decompression thread count on destination: ++6. Set the decompression thread count on destination: + {qemu} migrate_set_parameter decompress-threads 3 + +-6. Start outgoing migration: ++7. Start outgoing migration: + {qemu} migrate -d tcp:destination.host:4444 + {qemu} info migrate + Capabilities: ... compress: on +@@ -136,6 +140,7 @@ The following are the default settings: + compress-threads: 8 + decompress-threads: 2 + compress-level: 1 (which means best speed) ++ compress_method: zlib + + So, only the first two steps are required to use the multiple + thread compression in migration. You can do more if the default +@@ -143,7 +148,7 @@ settings are not appropriate. + + TODO + ==== +-Some faster (de)compression method such as LZ4 and Quicklz can help +-to reduce the CPU consumption when doing (de)compression. If using +-these faster (de)compression method, less (de)compression threads ++Comparing to Zlib, Some faster (de)compression method such as LZ4 ++and Quicklz can help to reduce the CPU consumption when doing (de)compression. ++If using these faster (de)compression method, less (de)compression threads + are needed when doing the migration. +-- +2.27.0 + diff --git a/docs-Add-generic-vhost-vdpa-device-documentation.patch b/docs-Add-generic-vhost-vdpa-device-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..3480791dfabf4f6641a57b423c8185e5b74c63da --- /dev/null +++ b/docs-Add-generic-vhost-vdpa-device-documentation.patch @@ -0,0 +1,78 @@ +From 28ed79b98f08b5701dcaab7c6ad1015602b28e02 Mon Sep 17 00:00:00 2001 +From: libai +Date: Sat, 12 Nov 2022 22:40:13 +0800 +Subject: [PATCH] docs: Add generic vhost-vdpa device documentation + +Add the description of the generic vhost-vdpa device + +Signed-off-by: libai +--- + docs/system/device-emulation.rst | 1 + + .../devices/vhost-vdpa-generic-device.rst | 46 +++++++++++++++++++ + 2 files changed, 47 insertions(+) + create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst + +diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst +index d1f3277cb0..e1b2d18fb1 100644 +--- a/docs/system/device-emulation.rst ++++ b/docs/system/device-emulation.rst +@@ -98,3 +98,4 @@ Emulated Devices + devices/canokey.rst + devices/usb-u2f.rst + devices/igb.rst ++ devices/vhost-vdpa-generic-device.rst +diff --git a/docs/system/devices/vhost-vdpa-generic-device.rst b/docs/system/devices/vhost-vdpa-generic-device.rst +new file mode 100644 +index 0000000000..25fbcac60e +--- /dev/null ++++ b/docs/system/devices/vhost-vdpa-generic-device.rst +@@ -0,0 +1,46 @@ ++ ++========================= ++vhost-vDPA generic device ++========================= ++ ++This document explains the usage of the vhost-vDPA generic device. ++ ++Description ++----------- ++ ++vDPA(virtio data path acceleration) device is a device that uses a datapath ++which complies with the virtio specifications with vendor specific control ++path. ++ ++QEMU provides two types of vhost-vDPA devices to enable the vDPA device, one ++is type sensitive which means QEMU needs to know the actual device type ++(e.g. net, blk, scsi) and another is called "vhost-vDPA generic device" which ++is type insensitive ++ ++The vhost-vDPA generic device builds on the vhost-vdpa subsystem and virtio ++subsystem. It is quite small, but it can support any type of virtio device. ++ ++Examples ++-------- ++ ++Prepare the vhost-vDPA backends first: ++ ++:: ++ host# ls -l /dev/vhost-vdpa-* ++ crw------- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 ++ ++Start QEMU with virtio-mmio bus: ++ ++:: ++ host# qemu-system \ ++ -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ ++ -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ ++ ... ++ ++Start QEMU with virtio-pci bus: ++ ++:: ++ host# qemu-system \ ++ -M pc -m 512 -smp 2 \ ++ -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ ++ ...\ +-- +2.27.0 + diff --git a/feature-Add-log-for-each-modules.patch b/feature-Add-log-for-each-modules.patch new file mode 100644 index 0000000000000000000000000000000000000000..477a2eaa6facf8366775e2c0c7a0f5c30e01b3c5 --- /dev/null +++ b/feature-Add-log-for-each-modules.patch @@ -0,0 +1,250 @@ +From 30cc47b6dd3e9ff4842eb1c2a918bbabfd8c593b Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:44:28 +0800 +Subject: [PATCH] feature: Add log for each modules + +add log for each modules. + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + accel/kvm/kvm-all.c | 5 ++++- + hw/char/virtio-serial-bus.c | 5 +++++ + hw/pci/pci.c | 1 + + hw/usb/bus.c | 6 ++++++ + hw/usb/host-libusb.c | 5 +++++ + hw/virtio/virtio-scsi-pci.c | 3 +++ + monitor/qmp-cmds.c | 3 +++ + os-posix.c | 1 + + qapi/qmp-dispatch.c | 15 +++++++++++++++ + system/qdev-monitor.c | 5 +++++ + 10 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 33f4c6d547..d900df93a4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1834,7 +1834,10 @@ void kvm_irqchip_commit_routes(KVMState *s) + s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); + ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); +- assert(ret == 0); ++ if (ret < 0) { ++ error_report("Set GSI routing failed: %m"); ++ abort(); ++ } + } + + static void kvm_add_routing_entry(KVMState *s, +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index dd619f0731..44906057be 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -257,6 +257,8 @@ static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id, + virtio_stw_p(vdev, &cpkt.value, value); + + trace_virtio_serial_send_control_event(port_id, event, value); ++ qemu_log("virtio serial port %d send control message" ++ " event = %d, value = %d\n", port_id, event, value); + return send_control_msg(vser, &cpkt, sizeof(cpkt)); + } + +@@ -364,6 +366,9 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len) + cpkt.value = virtio_lduw_p(vdev, &gcpkt->value); + + trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value); ++ qemu_log("virtio serial port '%u' handle control message" ++ " event = %d, value = %d\n", ++ virtio_ldl_p(vdev, &gcpkt->id), cpkt.event, cpkt.value); + + if (cpkt.event == VIRTIO_CONSOLE_DEVICE_READY) { + if (!cpkt.value) { +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index c49417abb2..9da41088df 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2411,6 +2411,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + snprintf(name, sizeof(name), "%s.rom", + vmsd ? vmsd->name : object_get_typename(OBJECT(pdev))); + ++ qemu_log("add rom file: %s\n", name); + pdev->has_rom = true; + memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, + &error_fatal); +diff --git a/hw/usb/bus.c b/hw/usb/bus.c +index 92d6ed5626..20cd9b6e6f 100644 +--- a/hw/usb/bus.c ++++ b/hw/usb/bus.c +@@ -536,6 +536,10 @@ void usb_check_attach(USBDevice *dev, Error **errp) + bus->qbus.name, port->path, portspeed); + return; + } ++ ++ qemu_log("attach usb device \"%s\" (%s speed) to VM bus \"%s\", " ++ "port \"%s\" (%s speed)\n", dev->product_desc, devspeed, ++ bus->qbus.name, port->path, portspeed); + } + + void usb_device_attach(USBDevice *dev, Error **errp) +@@ -564,6 +568,8 @@ int usb_device_detach(USBDevice *dev) + + usb_detach(port); + dev->attached = false; ++ qemu_log("detach usb device \"%s\" from VM bus \"%s\", port \"%s\"\n", ++ dev->product_desc, bus->qbus.name, port->path); + return 0; + } + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index dba469c1ef..11a246ac72 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -992,6 +992,8 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + rc = libusb_open(dev, &s->dh); + if (rc != 0) { ++ qemu_log("libusb open usb device bus %d, device %d failed\n", ++ bus_num, addr); + goto fail; + } + } else { +@@ -1019,6 +1021,7 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + libusb_get_device_descriptor(dev, &s->ddesc); + usb_host_get_port(s->dev, s->port, sizeof(s->port)); ++ qemu_log("open a host usb device on bus %d, device %d\n", bus_num, addr); + + usb_ep_init(udev); + usb_host_ep_update(s); +@@ -1146,6 +1149,8 @@ static int usb_host_close(USBHostDevice *s) + usb_device_detach(udev); + } + ++ qemu_log("begin to reset the usb device, bus : %d, device : %d\n", ++ s->bus_num, s->addr); + usb_host_release_interfaces(s); + libusb_reset_device(s->dh); + usb_host_attach_kernel(s); +diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c +index e8e3442f38..e542d47162 100644 +--- a/hw/virtio/virtio-scsi-pci.c ++++ b/hw/virtio/virtio-scsi-pci.c +@@ -20,6 +20,7 @@ + #include "qemu/module.h" + #include "hw/virtio/virtio-pci.h" + #include "qom/object.h" ++#include "qemu/log.h" + + typedef struct VirtIOSCSIPCI VirtIOSCSIPCI; + +@@ -51,6 +52,8 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.conf; + char *bus_name; + ++ qemu_log("virtio scsi HBA %s begin to initialize.\n", ++ !proxy->id ? "NULL" : proxy->id); + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index b0f948d337..e78462b857 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -32,6 +32,7 @@ + #include "hw/mem/memory-device.h" + #include "hw/intc/intc.h" + #include "hw/rdma/rdma.h" ++#include "qemu/log.h" + + NameInfo *qmp_query_name(Error **errp) + { +@@ -110,8 +111,10 @@ void qmp_cont(Error **errp) + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { ++ qemu_log("qmp cont is received in migration\n"); + autostart = 1; + } else { ++ qemu_log("qmp cont is received and vm is started\n"); + vm_start(); + } + } +diff --git a/os-posix.c b/os-posix.c +index 52ef6990ff..8f70ee0534 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -306,6 +306,7 @@ int os_mlock(void) + #ifdef HAVE_MLOCKALL + int ret = 0; + ++ qemu_log("do mlockall\n"); + ret = mlockall(MCL_CURRENT | MCL_FUTURE); + if (ret < 0) { + error_report("mlockall: %s", strerror(errno)); +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 7a215cbfd7..e33efd3740 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -25,6 +25,7 @@ + #include "qemu/coroutine.h" + #include "qemu/main-loop.h" + #include "qemu/log.h" ++#include "qapi/qmp/qstring.h" + + Visitor *qobject_input_visitor_new_qmp(QObject *obj) + { +@@ -220,6 +221,20 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); ++ ++ json = qobject_to_json(QOBJECT(args)); ++ if (json) { ++ if ((strcmp(command, "query-block-jobs") != 0) ++ && (strcmp(command, "query-migrate") != 0) ++ && (strcmp(command, "query-blockstats") != 0) ++ && (strcmp(command, "query-balloon") != 0) ++ && (strcmp(command, "set_password") != 0)) { ++ qemu_log("qmp_cmd_name: %s, arguments: %s\n", ++ command, json->str); ++ } ++ g_string_free(json, true); ++ } ++ + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { + monitor_set_cur(qemu_coroutine_self(), cur_mon); + cmd->fn(args, &ret, &err); +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index b10e483a9a..5b35704b5e 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,6 +644,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { ++ qemu_log("can not find bus for %s\n", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +@@ -714,6 +715,8 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, + errp); + if (*errp) { ++ qemu_log("the bus %s -driver %s set property failed\n", ++ bus ? bus->name : "None", driver); + goto err_del_dev; + } + qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none"); +@@ -738,6 +741,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + + ret = qdev_device_add_from_qdict(qdict, false, errp); + if (ret) { ++ qemu_log("add qdev %s:%s success\n", qemu_opt_get(opts, "driver"), ++ qemu_opts_id(opts) ? qemu_opts_id(opts) : "none"); + qemu_opts_del(opts); + } + qobject_unref(qdict); +-- +2.27.0 + diff --git a/feature-Add-logs-for-vm-start-and-destroy.patch b/feature-Add-logs-for-vm-start-and-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3964bdf4efdb14f7e62d9f914938c5da7429296 --- /dev/null +++ b/feature-Add-logs-for-vm-start-and-destroy.patch @@ -0,0 +1,158 @@ +From 9a47271fb6c855ec92e087d59d65f3cc0c684725 Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:04:09 +0800 +Subject: [PATCH] feature: Add logs for vm start and destroy + +Add QEMU_LOG for vm start and destroy + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/acpi/core.c | 4 ++++ + hw/core/reset.c | 2 ++ + system/main.c | 2 ++ + system/runstate.c | 2 ++ + system/vl.c | 6 ++++++ + 5 files changed, 16 insertions(+) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index ec5e127d17..b6241f70e9 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -24,6 +24,7 @@ + #include "hw/acpi/acpi.h" + #include "hw/nvram/fw_cfg.h" + #include "qemu/config-file.h" ++#include "qemu/log.h" + #include "qapi/error.h" + #include "qapi/opts-visitor.h" + #include "qapi/qapi-events-run-state.h" +@@ -588,13 +589,16 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, + uint16_t sus_typ = (val >> 10) & 7; + switch (sus_typ) { + case 0: /* soft power off */ ++ qemu_log("VM will be soft power off\n"); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + case 1: ++ qemu_log("VM will be suspend state\n"); + qemu_system_suspend_request(); + break; + default: + if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ ++ qemu_log("VM will be S4 state\n"); + qapi_event_send_suspend_disk(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } +diff --git a/hw/core/reset.c b/hw/core/reset.c +index d3263b613e..fa63bfedb7 100644 +--- a/hw/core/reset.c ++++ b/hw/core/reset.c +@@ -25,6 +25,7 @@ + + #include "qemu/osdep.h" + #include "qemu/queue.h" ++#include "qemu/log.h" + #include "sysemu/reset.h" + + /* reset/shutdown handler */ +@@ -75,6 +76,7 @@ void qemu_devices_reset(ShutdownCause reason) + { + QEMUResetEntry *re, *nre; + ++ qemu_log("reset all devices\n"); + /* reset all devices */ + QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { + if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD && +diff --git a/system/main.c b/system/main.c +index 9b91d21ea8..28bb283ebf 100644 +--- a/system/main.c ++++ b/system/main.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu-main.h" + #include "sysemu/sysemu.h" + +@@ -34,6 +35,7 @@ int qemu_default_main(void) + { + int status; + ++ qemu_log("qemu enter main_loop\n"); + status = qemu_main_loop(); + qemu_cleanup(status); + +diff --git a/system/runstate.c b/system/runstate.c +index 62e6db8d42..538c645326 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -769,9 +769,11 @@ static bool main_loop_should_exit(int *status) + } + if (qemu_powerdown_requested()) { + qemu_system_powerdown(); ++ qemu_log("domain is power down by outside operation\n"); + } + if (qemu_vmstop_requested(&r)) { + vm_stop(r); ++ qemu_log("domain is stopped by outside operation\n"); + } + return false; + } +diff --git a/system/vl.c b/system/vl.c +index 2bcd9efb9a..165c3cae8a 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -26,6 +26,7 @@ + #include "qemu/help-texts.h" + #include "qemu/datadir.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "exec/cpu-common.h" + #include "exec/page-vary.h" + #include "hw/qdev-properties.h" +@@ -2633,6 +2634,7 @@ static void qemu_create_cli_devices(void) + } + + /* init generic devices */ ++ qemu_log("device init start\n"); + rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); +@@ -2778,6 +2780,7 @@ void qemu_init(int argc, char **argv) + + qemu_init_subsystems(); + ++ qemu_log("qemu pid is %d, options parsing start\n", getpid()); + /* first pass of option parsing */ + optind = 1; + while (optind < argc) { +@@ -2997,6 +3000,7 @@ void qemu_init(int argc, char **argv) + exit(0); + break; + case QEMU_OPTION_m: ++ qemu_log("memory options parse start\n"); + opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true); + if (opts == NULL) { + exit(1); +@@ -3714,6 +3718,7 @@ void qemu_init(int argc, char **argv) + */ + + machine_class = MACHINE_GET_CLASS(current_machine); ++ qemu_log("configure accelerator %s start\n", machine_class->name); + if (!qtest_enabled() && machine_class->deprecation_reason) { + warn_report("Machine type '%s' is deprecated: %s", + machine_class->name, machine_class->deprecation_reason); +@@ -3732,6 +3737,7 @@ void qemu_init(int argc, char **argv) + */ + migration_object_init(); + ++ qemu_log("machine init start\n"); + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.27.0 + diff --git a/fix-qemu-core-when-vhost-user-net-config-with-server.patch b/fix-qemu-core-when-vhost-user-net-config-with-server.patch new file mode 100644 index 0000000000000000000000000000000000000000..68ba7f2bc15cd6da54f26c3bf5886786bb609324 --- /dev/null +++ b/fix-qemu-core-when-vhost-user-net-config-with-server.patch @@ -0,0 +1,46 @@ +From 97335ac382e36db18a61d3891f1fafd15475822e Mon Sep 17 00:00:00 2001 +From: caojinhuahw +Date: Mon, 19 Dec 2022 12:35:50 +0000 +Subject: [PATCH] fix qemu-core when vhost-user-net config with server mode + +commit 3a223111d7 set default reconnect for vhost-user-net +device, if vhost-user-net config with server mode will +casuse the core when ovs client stop. +tcp_chr_disconnect ---> set tcp_char state disconnect +tcp_chr start reconnect ---> set tcp_char state connecting +tcp_char is listen ---> call tcp_chr_accept() +fun tcp_char_accept() set tcp_char state to connecting, but +current tcp_char state already is connecting, assert failed +in tcp_char_change_state() raise qemu core + assert(s->state == TCP_CHARDEV_STATE_DISCONNECTED) + +this commit check tcp_char mode, if tcp_char config with server +mode, dont set reconnect time for tcp_chr. + +fix: 3a223111d7 vhost-user: Add support reconnect vhost-user socket + +Signed-off-by: caojinhuahw +--- + chardev/char-socket.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 9c60e15c8e..0c9ab069ae 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -347,6 +347,12 @@ static void tcp_chr_set_reconnect_time(Chardev *chr, + void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) + { + ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ ++ /* if sock dev is listen, dont set reconnect time */ ++ if (s->is_listen) { ++ return; ++ } + + if (cc->chr_set_reconnect_time) { + cc->chr_set_reconnect_time(chr, reconnect_time); +-- +2.27.0 + diff --git a/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffbb8a8643d72a257a6914007a971d6d63695704 --- /dev/null +++ b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch @@ -0,0 +1,129 @@ +From 0a6baf4799dd6e70d7959002ea6ddb998eddbc6d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Mon, 18 Mar 2024 15:53:43 +0800 +Subject: [PATCH] freeclock: add qmp command to get time offset of vm in + seconds + +When setting the system time in VM, a RTC_CHANGE event will be reported. +However, if libvirt is restarted while the event is be reporting, the +event will be lost and we will get the old time (not the time we set in +VM) after rebooting the VM. + +We save the delta time in QEMU and add a rtc-date-diff qmp to get the +delta time so that libvirt can get the latest time in VM according to +the qmp after libvirt is restarted. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/core/machine-qmp-cmds.c | 6 ++++++ + include/sysemu/rtc.h | 4 +++- + qapi/misc.json | 9 +++++++++ + qapi/pragma.json | 3 ++- + system/rtc.c | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c +index 3860a50c3b..f1389ef644 100644 +--- a/hw/core/machine-qmp-cmds.c ++++ b/hw/core/machine-qmp-cmds.c +@@ -8,6 +8,7 @@ + */ + + #include "qemu/osdep.h" ++#include "sysemu/rtc.h" + #include "hw/acpi/vmgenid.h" + #include "hw/boards.h" + #include "hw/intc/intc.h" +@@ -373,6 +374,11 @@ HumanReadableText *qmp_x_query_irq(Error **errp) + return human_readable_text_from_str(buf); + } + ++int64_t qmp_query_rtc_date_diff(Error **errp) ++{ ++ return get_rtc_date_diff(); ++} ++ + GuidInfo *qmp_query_vm_generation_id(Error **errp) + { + GuidInfo *info; +diff --git a/include/sysemu/rtc.h b/include/sysemu/rtc.h +index 0fc8ad6fdf..3edae762d4 100644 +--- a/include/sysemu/rtc.h ++++ b/include/sysemu/rtc.h +@@ -54,5 +54,7 @@ void qemu_get_timedate(struct tm *tm, time_t offset); + * then this function will return 3600. + */ + time_t qemu_timedate_diff(struct tm *tm); +- ++time_t get_rtc_date_diff(void); ++void set_rtc_date_diff(time_t diff); ++int64_t qmp_query_rtc_date_diff(Error **errp); + #endif +diff --git a/qapi/misc.json b/qapi/misc.json +index cda2effa81..1832d5f460 100644 +--- a/qapi/misc.json ++++ b/qapi/misc.json +@@ -550,6 +550,15 @@ + 'returns': ['CommandLineOptionInfo'], + 'allow-preconfig': true} + ++## ++# @query-rtc-date-diff: ++# ++# get vm's time offset ++# ++# Since: 2.8 ++## ++{ 'command': 'query-rtc-date-diff', 'returns': 'int64' } ++ + ## + # @RTC_CHANGE: + # +diff --git a/qapi/pragma.json b/qapi/pragma.json +index 0aa4eeddd3..7a07b44bb1 100644 +--- a/qapi/pragma.json ++++ b/qapi/pragma.json +@@ -30,7 +30,8 @@ + 'qom-get', + 'query-tpm-models', + 'query-tpm-types', +- 'ringbuf-read' ], ++ 'ringbuf-read', ++ 'query-rtc-date-diff'], + # Externally visible types whose member names may use uppercase + 'member-name-exceptions': [ # visible in: + 'ACPISlotType', # query-acpi-ospm-status +diff --git a/system/rtc.c b/system/rtc.c +index 4904581abe..e16b5fffc5 100644 +--- a/system/rtc.c ++++ b/system/rtc.c +@@ -44,6 +44,7 @@ static time_t rtc_ref_start_datetime; + static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ + static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ ++static time_t rtc_date_diff = 0; + QEMUClockType rtc_clock; + /***********************************************************/ + /* RTC reference time/date access */ +@@ -108,6 +109,16 @@ time_t qemu_timedate_diff(struct tm *tm) + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); + } + ++time_t get_rtc_date_diff(void) ++{ ++ return rtc_date_diff; ++} ++ ++void set_rtc_date_diff(time_t diff) ++{ ++ rtc_date_diff = diff; ++} ++ + static void configure_rtc_base_datetime(const char *startdate) + { + time_t rtc_start_datetime; +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-X86.patch b/freeclock-set-rtc_date_diff-for-X86.patch new file mode 100644 index 0000000000000000000000000000000000000000..4711551f99a870a82f292cc3e9ba39e6f695c163 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-X86.patch @@ -0,0 +1,31 @@ +From 0a0010fe0656a63e82aea495ab0a59145d3b5750 Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 12:26:38 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for X86 + +Set rtc_date_diff in mc146818rtc. + +Signed-off-by: l00500761 +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/mc146818rtc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 2d391a8396..e61c76d060 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -606,7 +606,8 @@ static void rtc_set_time(MC146818RtcState *s) + s->base_rtc = mktimegm(&tm); + s->last_update = qemu_clock_get_ns(rtc_clock); + +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + } + + static void rtc_set_cmos(MC146818RtcState *s, const struct tm *tm) +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-arm.patch b/freeclock-set-rtc_date_diff-for-arm.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c6b15ab26be27990b2d8028fc647dd48fca9312 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-arm.patch @@ -0,0 +1,31 @@ +From 156be254a48d1d9b7aadcbfa4423485c592bc75d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 11:21:14 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for arm + +Set rtc_date_diff in pl031. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/pl031.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index b01d0e75d1..f2e6baebba 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -144,7 +144,8 @@ static void pl031_write(void * opaque, hwaddr offset, + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + + pl031_set_alarm(s); + break; +-- +2.27.0 + diff --git a/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch new file mode 100644 index 0000000000000000000000000000000000000000..34fa1c91b9d0e375f7f85a4477b3d897b63d936f --- /dev/null +++ b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch @@ -0,0 +1,187 @@ +From 19a8fbccbc997110f472df308813ad2d7738065c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 14 Nov 2022 02:25:28 +0000 +Subject: [PATCH] hw/acpi: ACPI/AML Changes to reflect the correct + _STA.{PRES,ENA} Bits to Guest + +ACPI AML changes to properly reflect the _STA.PRES and _STA.ENA Bits to the +guest during initialzation, when CPUs are hotplugged and after CPUs are +hot-unplugged. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 49 +++++++++++++++++++++++++++++++--- + hw/acpi/generic_event_device.c | 11 ++++++++ + include/hw/acpi/cpu.h | 2 ++ + 3 files changed, 58 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c8c11e51c6..991f1d4181 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -64,10 +64,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) + cdev = &cpu_st->devs[cpu_st->selector]; + switch (addr) { + case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ +- val |= cdev->cpu ? 1 : 0; ++ val |= cdev->is_enabled ? 1 : 0; + val |= cdev->is_inserting ? 2 : 0; + val |= cdev->is_removing ? 4 : 0; + val |= cdev->fw_remove ? 16 : 0; ++ val |= cdev->is_present ? 32 : 0; + trace_cpuhp_acpi_read_flags(cpu_st->selector, val); + break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: +@@ -229,7 +230,21 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { + state->devs[i].cpu = cpu; ++ state->devs[i].is_present = true; ++ } else { ++ if (qemu_persistent_cpu(cpu)) { ++ state->devs[i].is_present = true; ++ } else { ++ state->devs[i].is_present = false; ++ } + } ++ ++ if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].is_enabled = true; ++ } else { ++ state->devs[i].is_enabled = false; ++ } ++ + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +@@ -262,6 +277,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + } + + cdev->cpu = CPU(dev); ++ cdev->is_present = true; ++ cdev->is_enabled = true; + if (dev->hotplugged) { + cdev->is_inserting = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); +@@ -293,6 +310,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + return; + } + ++ cdev->is_enabled = false; ++ if (!qemu_persistent_cpu(CPU(dev))) { ++ cdev->is_present = false; ++ } ++ + cdev->cpu = NULL; + } + +@@ -303,6 +325,8 @@ static const VMStateDescription vmstate_cpuhp_sts = { + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_BOOL(is_removing, AcpiCpuStatus), ++ VMSTATE_BOOL(is_present, AcpiCpuStatus), ++ VMSTATE_BOOL(is_enabled, AcpiCpuStatus), + VMSTATE_UINT32(ost_event, AcpiCpuStatus), + VMSTATE_UINT32(ost_status, AcpiCpuStatus), + VMSTATE_END_OF_LIST() +@@ -340,6 +364,7 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_REMOVE_EVENT "CRMV" + #define CPU_EJECT_EVENT "CEJ0" + #define CPU_FW_EJECT_EVENT "CEJF" ++#define CPU_PRESENT "CPRS" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + build_madt_cpu_fn build_madt_cpu, +@@ -400,7 +425,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); + /* tell firmware to do device eject, write only */ + aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1)); +- aml_append(field, aml_reserved_field(3)); ++ /* 1 if present, read only */ ++ aml_append(field, aml_named_field(CPU_PRESENT, 1)); ++ aml_append(field, aml_reserved_field(2)); + aml_append(field, aml_named_field(CPU_COMMAND, 8)); + aml_append(cpu_ctrl_dev, field); + +@@ -430,6 +457,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); + Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); + Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); ++ Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT); + Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); + Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); + Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); +@@ -458,13 +486,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + { + Aml *idx = aml_arg(0); + Aml *sta = aml_local(0); ++ Aml *ifctx2; ++ Aml *else_ctx; + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(zero, sta)); +- ifctx = aml_if(aml_equal(is_enabled, one)); ++ ifctx = aml_if(aml_equal(is_present, one)); + { +- aml_append(ifctx, aml_store(aml_int(0xF), sta)); ++ ifctx2 = aml_if(aml_equal(is_enabled, one)); ++ { ++ /* cpu is present and enabled */ ++ aml_append(ifctx2, aml_store(aml_int(0xF), sta)); ++ } ++ aml_append(ifctx, ifctx2); ++ else_ctx = aml_else(); ++ { ++ /* cpu is present but disabled */ ++ aml_append(else_ctx, aml_store(aml_int(0xD), sta)); ++ } ++ aml_append(ifctx, else_ctx); + } + aml_append(method, ifctx); + aml_append(method, aml_release(ctrl_lock)); +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index d2fa1d0e4a..b84602b238 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -319,6 +319,16 @@ static const VMStateDescription vmstate_memhp_state = { + } + }; + ++static const VMStateDescription vmstate_cpuhp_state = { ++ .name = "acpi-ged/cpuhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static const VMStateDescription vmstate_ged_state = { + .name = "acpi-ged-state", + .version_id = 1, +@@ -367,6 +377,7 @@ static const VMStateDescription vmstate_acpi_ged = { + }, + .subsections = (const VMStateDescription * []) { + &vmstate_memhp_state, ++ &vmstate_cpuhp_state, + &vmstate_ghes_state, + NULL + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index b31a2e50d9..fced952152 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -23,6 +23,8 @@ typedef struct AcpiCpuStatus { + uint64_t arch_id; + bool is_inserting; + bool is_removing; ++ bool is_present; ++ bool is_enabled; + bool fw_remove; + uint32_t ost_event; + uint32_t ost_status; +-- +2.27.0 + diff --git a/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch new file mode 100644 index 0000000000000000000000000000000000000000..072d4eb9bac4ea0a553e14b6e2ce85a6961cc19b --- /dev/null +++ b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch @@ -0,0 +1,34 @@ +From e442d0f8670dc4218ab4beebe645e369f925410d Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 19 Aug 2023 00:26:20 +0000 +Subject: [PATCH] hw/acpi: Add ACPI CPU hotplug init stub + +ACPI CPU hotplug related initialization should only happend if ACPI_CPU_HOTPLUG +support has been enabled for particular architecture. Add cpu_hotplug_hw_init() +stub to avoid compilation break. + +Signed-off-by: Salil Mehta +--- + hw/acpi/acpi-cpu-hotplug-stub.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c +index 3fc4b14c26..c6c61bb9cd 100644 +--- a/hw/acpi/acpi-cpu-hotplug-stub.c ++++ b/hw/acpi/acpi-cpu-hotplug-stub.c +@@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, + return; + } + ++void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, ++ CPUHotplugState *state, hwaddr base_addr) ++{ ++ return; ++} ++ + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) + { + return; +-- +2.27.0 + diff --git a/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch new file mode 100644 index 0000000000000000000000000000000000000000..191328fd094c386d672ca9c32341105fa1cc0b1a --- /dev/null +++ b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch @@ -0,0 +1,81 @@ +From de1c8d6be3de67ff9854e9b008a000e1898aaacb Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 21:50:08 +0100 +Subject: [PATCH] hw/acpi: Init GED framework with cpu hotplug events + +ACPI GED(as described in the ACPI 6.2 spec) can be used to generate ACPI events +when OSPM/guest receives an interrupt listed in the _CRS object of GED. OSPM +then maps or demultiplexes the event by evaluating _EVT method. + +This change adds the support of cpu hotplug event initialization in the +existing GED framework. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + include/hw/acpi/generic_event_device.h | 5 +++++ + 2 files changed, 13 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index a3d31631fe..d2fa1d0e4a 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = { + ACPI_GED_MEM_HOTPLUG_EVT, + ACPI_GED_PWR_DOWN_EVT, + ACPI_GED_NVDIMM_HOTPLUG_EVT, ++ ACPI_GED_CPU_HOTPLUG_EVT, + }; + + /* +@@ -400,6 +401,13 @@ static void acpi_ged_initfn(Object *obj) + memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); ++ ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index ba84ce0214..a803ea818e 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -60,6 +60,7 @@ + #define HW_ACPI_GENERIC_EVENT_DEVICE_H + + #include "hw/sysbus.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/acpi/ghes.h" + #include "qom/object.h" +@@ -95,6 +96,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) + #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 + #define ACPI_GED_PWR_DOWN_EVT 0x2 + #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 ++#define ACPI_GED_CPU_HOTPLUG_EVT 0x8 + + typedef struct GEDState { + MemoryRegion evt; +@@ -106,6 +108,9 @@ struct AcpiGedState { + SysBusDevice parent_obj; + MemHotplugState memhp_state; + MemoryRegion container_memhp; ++ CPUHotplugState cpuhp_state; ++ MemoryRegion container_cpuhp; ++ AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 + diff --git a/hw-acpi-Make-_MAT-method-optional.patch b/hw-acpi-Make-_MAT-method-optional.patch new file mode 100644 index 0000000000000000000000000000000000000000..5695a1981c6efbe5ed71981f97dcf81d8eee5e8e --- /dev/null +++ b/hw-acpi-Make-_MAT-method-optional.patch @@ -0,0 +1,41 @@ +From e9b0d476172e872bf695780a9ffa8072faeb3cd0 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 25 Apr 2022 17:40:57 +0100 +Subject: [PATCH] hw/acpi: Make _MAT method optional + +The GICC interface on arm64 vCPUs is statically defined in the MADT, and +doesn't require a _MAT entry. Although the GICC is indicated as present +by the MADT entry, it can only be used from vCPU sysregs, which aren't +accessible until hot-add. + +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Co-developed-by: Jonathan Cameron +Signed-off-by: Jonathan Cameron +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 991f1d4181..c922c380aa 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -720,9 +720,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, method); + + /* build _MAT object */ +- build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ +- aml_append(dev, aml_name_decl("_MAT", +- aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ if (build_madt_cpu) { ++ build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ ++ aml_append(dev, aml_name_decl("_MAT", ++ aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ } + g_array_free(madt_buf, true); + + if (CPU(arch_ids->cpus[i].cpu) != first_cpu) { +-- +2.27.0 + diff --git a/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..9bfb91e15162c90365c621927205f857269bc4c3 --- /dev/null +++ b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch @@ -0,0 +1,52 @@ +From fd6e7e7278e1c0fb08e0a09d9e22157e11b36ece Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 20 Aug 2023 17:11:04 +0000 +Subject: [PATCH] hw/acpi: Move CPU ctrl-dev MMIO region len macro to common + header file + +CPU ctrl-dev MMIO region length could be used in ACPI GED (common ACPI code +across architectures) and various other architecture specific places. To make +these code places independent of compilation order, ACPI_CPU_HOTPLUG_REG_LEN +macro should be moved to a header file. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 2 +- + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 011d2c6c2d..4b24a25003 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -1,13 +1,13 @@ + #include "qemu/osdep.h" + #include "migration/vmstate.h" + #include "hw/acpi/cpu.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/core/cpu.h" + #include "qapi/error.h" + #include "qapi/qapi-events-acpi.h" + #include "trace.h" + #include "sysemu/numa.h" + +-#define ACPI_CPU_HOTPLUG_REG_LEN 12 + #define ACPI_CPU_SELECTOR_OFFSET_WR 0 + #define ACPI_CPU_FLAGS_OFFSET_RW 4 + #define ACPI_CPU_CMD_OFFSET_WR 5 +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 3b932abbbb..48b291e45e 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -19,6 +19,8 @@ + #include "hw/hotplug.h" + #include "hw/acpi/cpu.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++ + typedef struct AcpiCpuHotplug { + Object *device; + MemoryRegion io; +-- +2.27.0 + diff --git a/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b2a0a023e0ed032379dd39051804d267edad0f1 --- /dev/null +++ b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch @@ -0,0 +1,77 @@ +From 0bdb1861985704af9b82e35053b5ab99f7880eb6 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 7 May 2020 21:30:09 +0100 +Subject: [PATCH] hw/acpi: Update ACPI GED framework to support vCPU Hotplug + +ACPI GED shall be used to convey to the guest kernel about any CPU hot-(un)plug +events. Therefore, existing ACPI GED framework inside QEMU needs to be enhanced +to support CPU hotplug state and events. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index ad252e6a91..0266733a54 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -12,6 +12,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "hw/acpi/acpi.h" ++#include "hw/acpi/cpu.h" + #include "hw/acpi/generic_event_device.h" + #include "hw/irq.h" + #include "hw/mem/pc-dimm.h" +@@ -239,6 +240,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, + } else { + acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); + } ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "virt: device plug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -253,6 +256,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, + if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && + !(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) { + acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -266,6 +271,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_unplug_cb(&s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -277,6 +284,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) + AcpiGedState *s = ACPI_GED(adev); + + acpi_memory_ospm_status(&s->memhp_state, list); ++ acpi_cpu_ospm_status(&s->cpuhp_state, list); + } + + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -291,6 +299,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + sel = ACPI_GED_PWR_DOWN_EVT; + } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { + sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; ++ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { ++ sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else { + /* Unknown event. Return without generating interrupt. */ + warn_report("GED: Unsupported event %d. No irq injected", ev); +-- +2.27.0 + diff --git a/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch new file mode 100644 index 0000000000000000000000000000000000000000..91d4c4d78e6922d07c7981c2a886c70d07c45d2d --- /dev/null +++ b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch @@ -0,0 +1,118 @@ +From 06059c960d863c21c7d9cf4829ad2078692ed9e1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 13:27:57 +0100 +Subject: [PATCH] hw/acpi: Update CPUs AML with cpu-(ctrl)dev change + +CPUs Control device(\\_SB.PCI0) register interface for the x86 arch is based on +PCI and is IO port based and hence existing cpus AML code assumes _CRS objects +would evaluate to a system resource which describes IO Port address. But on ARM +arch CPUs control device(\\_SB.PRES) register interface is memory-mapped hence +_CRS object should evaluate to system resource which describes memory-mapped +base address. + +This cpus AML code change updates the existing inerface of the build cpus AML +function to accept both IO/MEMORY type regions and update the _CRS object +correspondingly. + +NOTE: Beside above CPU scan shall be triggered when OSPM evaluates _EVT method + part of the GED framework which is covered in subsequent patch. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 23 ++++++++++++++++------- + hw/i386/acpi-build.c | 3 ++- + include/hw/acpi/cpu.h | 5 +++-- + 3 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index cabeb4e86b..cf0c7e8538 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -342,9 +342,10 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, + const char *res_root, +- const char *event_handler_method) ++ const char *event_handler_method, ++ AmlRegionSpace rs) + { + Aml *ifctx; + Aml *field; +@@ -369,13 +370,19 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + + crs = aml_resource_template(); +- aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, ++ if (rs == AML_SYSTEM_IO) { ++ aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, + ACPI_CPU_HOTPLUG_REG_LEN)); ++ } else { ++ aml_append(crs, aml_memory32_fixed(base_addr, ++ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); ++ } ++ + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); + + /* declare CPU hotplug MMIO region with related access fields */ + aml_append(cpu_ctrl_dev, +- aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), ++ aml_operation_region("PRST", rs, aml_int(base_addr), + ACPI_CPU_HOTPLUG_REG_LEN)); + + field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, +@@ -699,9 +706,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(sb_scope, cpus_dev); + aml_append(table, sb_scope); + +- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); +- aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); +- aml_append(table, method); ++ if (event_handler_method) { ++ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); ++ aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); ++ aml_append(table, method); ++ } + + g_free(cphp_res_path); + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 80db183b78..db4ca8a66a 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1546,7 +1546,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, + }; + build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, +- pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); ++ pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", ++ AML_SYSTEM_IO); + } + + if (pcms->memhp_io_base && nr_mem) { +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 209e1773f8..76bc7eb251 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -60,9 +60,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, + const char *res_root, +- const char *event_handler_method); ++ const char *event_handler_method, ++ AmlRegionSpace rs); + + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + +-- +2.27.0 + diff --git a/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2dceaf31fbf7503d09b1410428b17ffe03f6338 --- /dev/null +++ b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch @@ -0,0 +1,53 @@ +From cfdb0f24431ae0f5115f905a1411509c01a50e88 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 00:50:36 +0100 +Subject: [PATCH] hw/acpi: Update GED _EVT method AML with cpu scan + +OSPM evaluates _EVT method to map the event. The cpu hotplug event eventually +results in start of the cpu scan. Scan figures out the cpu and the kind of +event(plug/unplug) and notifies it back to the guest. + +The change in this patch updates the GED AML _EVT method with the call to +\\_SB.CPUS.CSCN which will do above. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 4 ++++ + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index b84602b238..ad252e6a91 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -108,6 +108,10 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." + MEMORY_SLOT_SCAN_METHOD)); + break; ++ case ACPI_GED_CPU_HOTPLUG_EVT: ++ aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "." ++ ACPI_CPU_SCAN_METHOD)); ++ break; + case ACPI_GED_PWR_DOWN_EVT: + aml_append(if_ctx, + aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 48b291e45e..ef631750b4 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -20,6 +20,8 @@ + #include "hw/acpi/cpu.h" + + #define ACPI_CPU_HOTPLUG_REG_LEN 12 ++#define ACPI_CPU_SCAN_METHOD "CSCN" ++#define ACPI_CPU_CONTAINER "\\_SB.CPUS" + + typedef struct AcpiCpuHotplug { + Object *device; +-- +2.27.0 + diff --git a/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..93772f029c4a74fcc623b2cd8138ac2063e63049 --- /dev/null +++ b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch @@ -0,0 +1,37 @@ +From 576a2a88625978f1befde11f0823f32bbc54cad1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 28 Aug 2023 20:00:08 +0000 +Subject: [PATCH] hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init + +ACPI CPU Hotplug code assumes a virtual CPU is unplugged if the CPUState object +is absent in the list of ths possible CPUs(CPUArchIdList *possible_cpus) +maintained on per-machine basis. Use the earlier introduced qemu_present_cpu() +API to check this state. + +This change should have no bearing on the functionality of any architecture and +is mere a representational change. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 4b24a25003..cabeb4e86b 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -226,7 +226,10 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + state->dev_count = id_list->len; + state->devs = g_new0(typeof(*state->devs), state->dev_count); + for (i = 0; i < id_list->len; i++) { +- state->devs[i].cpu = CPU(id_list->cpus[i].cpu); ++ struct CPUState *cpu = CPU(id_list->cpus[i].cpu); ++ if (qemu_present_cpu(cpu)) { ++ state->devs[i].cpu = cpu; ++ } + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +-- +2.27.0 + diff --git a/hw-arm-Changes-required-for-reset-and-to-support-nex.patch b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch new file mode 100644 index 0000000000000000000000000000000000000000..66816ac14e0fd5e975d63b1786c03028908f8569 --- /dev/null +++ b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch @@ -0,0 +1,111 @@ +From 3e5f043c493fa4765c5637bec66be2bd620bc53f Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 18:10:24 +0100 +Subject: [PATCH] hw/arm: Changes required for reset and to support next boot + +Updates the firmware config with the next boot cpus information and also +registers the reset callback to be called when guest reboots to reset the cpu. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/boot.c | 2 +- + hw/arm/virt.c | 18 +++++++++++++++--- + include/hw/arm/boot.h | 2 ++ + include/hw/arm/virt.h | 1 + + 4 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index d1671e1d42..345c7cfa19 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -683,7 +683,7 @@ fail: + return -1; + } + +-static void do_cpu_reset(void *opaque) ++void do_cpu_reset(void *opaque) + { + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 60cd560ab9..eedff8e525 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -46,6 +46,8 @@ + #include "sysemu/device_tree.h" + #include "sysemu/numa.h" + #include "sysemu/runstate.h" ++#include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "sysemu/tpm.h" + #include "sysemu/tcg.h" + #include "sysemu/kvm.h" +@@ -1453,7 +1455,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as) + char *nodename; + + fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as); +- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus); ++ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -3276,7 +3278,13 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (local_err) { + goto fail; + } +- /* TODO: register cpu for reset & update F/W info for the next boot */ ++ /* register this cpu for reset & update F/W info for the next boot */ ++ qemu_register_reset(do_cpu_reset, ARM_CPU(cs)); ++ } ++ ++ vms->boot_cpus++; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + } + + cs->disabled = false; +@@ -3351,7 +3359,11 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); + +- /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs)); ++ vms->boot_cpus--; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } + + qobject_unref(dev->opts); + dev->opts = NULL; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 80c492d742..f81326a1dc 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu, + int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as, MachineState *ms); + ++void do_cpu_reset(void *opaque); ++ + /* Write a secure board setup routine with a dummy handler for SMCs */ + void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, + const struct arm_boot_info *info, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 069c9f2a09..ae0f5beb26 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -167,6 +167,7 @@ struct VirtMachineState { + MemMapEntry *memmap; + char *pciehb_nodename; + const int *irqmap; ++ uint16_t boot_cpus; + int fdt_size; + uint32_t clock_phandle; + uint32_t gic_phandle; +-- +2.27.0 + diff --git a/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch new file mode 100644 index 0000000000000000000000000000000000000000..75abc4e58ce1f30527638b0c62f94c50b033911b --- /dev/null +++ b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch @@ -0,0 +1,98 @@ +From 8e1b8d624128523654786953b381557c82654a57 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 18:03:11 +0100 +Subject: [PATCH] hw/arm: MADT Tbl change to size the guest with possible vCPUs + +Changes required during building of MADT Table by QEMU to accomodate disabled +possible vCPUs. This info shall be used by the guest kernel to size up its +resources during boot time. This pre-sizing of the guest kernel done on +possible vCPUs will facilitate hotplug of the disabled vCPUs. + +This change also caters ACPI MADT GIC CPU Interface flag related changes +recently introduced in the UEFI ACPI 6.5 Specification which allows deferred +virtual CPU online'ing in the Guest Kernel. + +Link: https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#gic-cpu-interface-gicc-structure + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 36 ++++++++++++++++++++++++++++++------ + 1 file changed, 30 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index d88f3cded1..2870c1ec5a 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -779,6 +779,29 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ ++ /* can only exist in 'enabled' state */ ++ if (!mc->has_hotpluggable_cpus) { ++ return 1; ++ } ++ ++ /* ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot ++ * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the ++ * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. ++ * Though as-of-now this is only used as a debugging feature. ++ * ++ * UEFI ACPI Specification 6.5 ++ * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure ++ * Table: 5.37 GICC CPU Interface Flags ++ * Link: https://uefi.org/specs/ACPI/6.5 ++ */ ++ return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++} ++ + static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { +@@ -805,12 +828,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { +- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); ++ for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) { ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +- uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? +- VIRTUAL_PMU_IRQ : 0; ++ uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu); ++ uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { + physical_base_address = memmap[VIRT_GIC_CPU].base; +@@ -825,7 +849,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, i, 4); /* GIC ID */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags */ +- build_append_int_noprefix(table_data, 1, 4); /* Enabled */ ++ build_append_int_noprefix(table_data, flags, 4); + /* Parking Protocol Version */ + build_append_int_noprefix(table_data, 0, 4); + /* Performance Interrupt GSIV */ +@@ -839,7 +863,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vgic_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ + /* MPIDR */ +- build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); ++ build_append_int_noprefix(table_data, mpidr, 8); + /* Processor Power Efficiency Class */ + build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ +-- +2.27.0 + diff --git a/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..87e236b6f83accff9b839e2c363e47949feb1f9b --- /dev/null +++ b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch @@ -0,0 +1,128 @@ +From c5dfec0bfd78f7e8f84a527a1aa73896f69b2367 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 10 Aug 2023 01:15:31 +0000 +Subject: [PATCH] hw/arm: Support hotplug capability check using _OSC method + +Physical CPU hotplug results in (un)setting of ACPI _STA.Present bit. AARCH64 +platforms do not support physical CPU hotplug. Virtual CPU hotplug support being +implemented toggles ACPI _STA.Enabled Bit to achieve hotplug functionality. This +is not same as physical CPU hotplug support. + +In future, if ARM architecture supports physical CPU hotplug then the current +design of virtual CPU hotplug can be used unchanged. Hence, there is a need for +firmware/VMM/Qemu to support evaluation of platform wide capabilitiy related to +the *type* of CPU hotplug support present on the platform. OSPM might need this +during boot time to correctly initialize the CPUs and other related components +in the kernel. + +NOTE: This implementation will be improved to add the support of *query* in the +subsequent versions. This is very minimal support to assist kernel. + +ASL for the implemented _OSC method: + +Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities +{ + CreateDWordField (Arg3, Zero, CDW1) + If ((Arg0 == ToUUID ("0811b06e-4a27-44f9-8d60-3cbbc22e7b48") /* Platform-wide Capabilities */)) + { + CreateDWordField (Arg3, 0x04, CDW2) + Local0 = CDW2 /* \_SB_._OSC.CDW2 */ + If ((Arg1 != One)) + { + CDW1 |= 0x08 + } + + Local0 &= 0x00800000 + If ((CDW2 != Local0)) + { + CDW1 |= 0x10 + } + + CDW2 = Local0 + } + Else + { + CDW1 |= 0x04 + } + + Return (Arg3) +} + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 52 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 2870c1ec5a..c402e102c4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -940,6 +940,55 @@ static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, + build_fadt(table_data, linker, &fadt, vms->oem_id, vms->oem_table_id); + } + ++static void build_virt_osc_method(Aml *scope, VirtMachineState *vms) ++{ ++ Aml *if_uuid, *else_uuid, *if_rev, *if_caps_masked, *method; ++ Aml *a_cdw1 = aml_name("CDW1"); ++ Aml *a_cdw2 = aml_local(0); ++ ++ method = aml_method("_OSC", 4, AML_NOTSERIALIZED); ++ aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); ++ ++ /* match UUID */ ++ if_uuid = aml_if(aml_equal( ++ aml_arg(0), aml_touuid("0811B06E-4A27-44F9-8D60-3CBBC22E7B48"))); ++ ++ aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); ++ aml_append(if_uuid, aml_store(aml_name("CDW2"), a_cdw2)); ++ ++ /* check unknown revision in arg(1) */ ++ if_rev = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); ++ /* set revision error bits, DWORD1 Bit[3] */ ++ aml_append(if_rev, aml_or(a_cdw1, aml_int(0x08), a_cdw1)); ++ aml_append(if_uuid, if_rev); ++ ++ /* ++ * check support for vCPU hotplug type(=enabled) platform-wide capability ++ * in DWORD2 as sepcified in the below ACPI Specification ECR, ++ * # https://bugzilla.tianocore.org/show_bug.cgi?id=4481 ++ */ ++ if (vms->acpi_dev) { ++ aml_append(if_uuid, aml_and(a_cdw2, aml_int(0x800000), a_cdw2)); ++ /* check if OSPM specified hotplug capability bits were masked */ ++ if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW2"), a_cdw2))); ++ aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1)); ++ aml_append(if_uuid, if_caps_masked); ++ } ++ aml_append(if_uuid, aml_store(a_cdw2, aml_name("CDW2"))); ++ ++ aml_append(method, if_uuid); ++ else_uuid = aml_else(); ++ ++ /* set unrecognized UUID error bits, DWORD1 Bit[2] */ ++ aml_append(else_uuid, aml_or(a_cdw1, aml_int(4), a_cdw1)); ++ aml_append(method, else_uuid); ++ ++ aml_append(method, aml_return(aml_arg(3))); ++ aml_append(scope, method); ++ ++ return; ++} ++ + /* DSDT */ + static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +@@ -974,6 +1023,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } else { + acpi_dsdt_add_cpus(scope, vms); + } ++ ++ build_virt_osc_method(scope, vms); ++ + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8097e1973b31092363f156c4ee2df9372884906 --- /dev/null +++ b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch @@ -0,0 +1,267 @@ +From 8ad397f33f8b7d82c0ef72608ef8dc3e0ecba1c2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 14:38:38 +0100 +Subject: [PATCH] hw/arm,gicv3: Changes to update GIC with vCPU hot-plug + notification + +vCPU hot-(un)plug events MUST be notified to the GIC. Introduce a notfication +mechanism to update any such events to GIC so that it can update its vCPU to GIC +CPU interface association. + +This is required to implement a workaround to the limitations posed by the ARM +architecture. For details about the constraints and workarounds please check +below slides: + +Link: https://kvm-forum.qemu.org/2023/talk/9SMPDQ/ + +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 27 +++++++++++++-- + hw/intc/arm_gicv3_common.c | 54 +++++++++++++++++++++++++++++- + hw/intc/arm_gicv3_cpuif_common.c | 5 +++ + hw/intc/gicv3_internal.h | 1 + + include/hw/arm/virt.h | 1 + + include/hw/intc/arm_gicv3_common.h | 22 ++++++++++++ + 6 files changed, 107 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 97bf4cca11..0312fa366d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -750,6 +750,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + return dev; + } + ++static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ ++ if (mc->has_hotpluggable_cpus) { ++ Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); ++ notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); ++ } ++} ++ + static void create_its(VirtMachineState *vms) + { + const char *itsclass = its_class_name(); +@@ -997,6 +1007,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->gic_version == VIRT_GIC_VERSION_2) { + create_v2m(vms); + } ++ ++ /* add GIC CPU hot(un)plug update notifier */ ++ virt_add_gic_cpuhp_notifier(vms); + } + + static void create_uart(const VirtMachineState *vms, int uart, +@@ -2481,6 +2494,8 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ notifier_list_init(&vms->cpuhp_notifiers); ++ possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +@@ -3133,6 +3148,14 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_update_gic(VirtMachineState *vms, CPUState *cs) ++{ ++ GICv3CPUHotplugInfo gic_info = { .gic = vms->gic, .cpu = cs }; ++ ++ /* notify gic to stitch GICC to this new cpu */ ++ notifier_list_notify(&vms->cpuhp_notifiers, &gic_info); ++} ++ + static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -3215,7 +3238,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * vCPUs have their GIC state initialized during machvit_init(). + */ + if (vms->acpi_dev) { +- /* TODO: update GIC about this hotplug change here */ ++ virt_update_gic(vms, cs); + wire_gic_cpu_irqs(vms, cs); + } + +@@ -3301,7 +3324,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + + unwire_gic_cpu_irqs(vms, cs); +- /* TODO: update the GIC about this hot unplug change */ ++ virt_update_gic(vms, cs); + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index ebd99af610..fc87fa9369 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -33,7 +33,6 @@ + #include "hw/arm/linux-boot-if.h" + #include "sysemu/kvm.h" + +- + static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs) + { + if (cs->gicd_no_migration_shift_bug) { +@@ -322,6 +321,56 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + } + } + ++static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu) ++{ ++ uint64_t mp_affinity; ++ uint64_t gicr_typer; ++ uint64_t cpu_affid; ++ int i; ++ ++ mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ /* match the cpu mp-affinity to get the gic cpuif number */ ++ for (i = 0; i < s->num_cpu; i++) { ++ gicr_typer = s->cpu[i].gicr_typer; ++ cpu_affid = (gicr_typer >> 32) & 0xFFFFFF; ++ if (cpu_affid == mp_affinity) { ++ return i; ++ } ++ } ++ ++ return -1; ++} ++ ++static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) ++{ ++ GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; ++ CPUState *cpu = gic_info->cpu; ++ int gic_cpuif_num; ++ GICv3State *s; ++ ++ s = ARM_GICV3_COMMON(gic_info->gic); ++ ++ /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ ++ gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); ++ if (gic_cpuif_num < 0) { ++ error_report("Failed to associate cpu %d with any GIC cpuif", ++ cpu->cpu_index); ++ abort(); ++ } ++ ++ /* check if update is for vcpu hot-unplug */ ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[gic_cpuif_num].cpu = NULL; ++ return; ++ } ++ ++ /* re-stitch the gic cpuif to this new cpu */ ++ gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); ++ gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); ++ ++ /* TODO: initialize the registers info for this newly added cpu */ ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -444,6 +493,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } + ++ s->cpu_update_notifier.notify = arm_gicv3_cpu_update_notifier; ++ + s->itslist = g_ptr_array_new(); + } + +@@ -451,6 +502,7 @@ static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); + ++ notifier_remove(&s->cpu_update_notifier); + g_free(s->redist_region_count); + } + +diff --git a/hw/intc/arm_gicv3_cpuif_common.c b/hw/intc/arm_gicv3_cpuif_common.c +index ff1239f65d..381cf2754b 100644 +--- a/hw/intc/arm_gicv3_cpuif_common.c ++++ b/hw/intc/arm_gicv3_cpuif_common.c +@@ -20,3 +20,8 @@ void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) + + env->gicv3state = (void *)s; + }; ++ ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu) ++{ ++ s->cpu = cpu; ++} +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 29d5cdc1b6..9d4c1209bd 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -848,5 +848,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) + } + + void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu); + + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 5de0185063..069c9f2a09 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -180,6 +180,7 @@ struct VirtMachineState { + PCIBus *bus; + char *oem_id; + char *oem_table_id; ++ NotifierList cpuhp_notifiers; + }; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 4e2fb518e7..97a48f44b9 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -280,6 +280,7 @@ struct GICv3State { + GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ]; + uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)]; + ++ Notifier cpu_update_notifier; + GICv3CPUState *cpu; + /* List of all ITSes connected to this GIC */ + GPtrArray *itslist; +@@ -328,6 +329,27 @@ struct ARMGICv3CommonClass { + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + const MemoryRegionOps *ops); ++/** ++ * Structure used by GICv3 CPU hotplug notifier ++ */ ++typedef struct GICv3CPUHotplugInfo { ++ DeviceState *gic; /* GICv3State */ ++ CPUState *cpu; ++} GICv3CPUHotplugInfo; ++ ++/** ++ * gicv3_cpuhp_notifier ++ * ++ * Returns CPU hotplug notifier which could be used to update GIC about any ++ * CPU hot(un)plug events. ++ * ++ * Returns: Notifier initialized with CPU Hot(un)plug update function ++ */ ++static inline Notifier *gicv3_cpuhp_notifier(DeviceState *dev) ++{ ++ GICv3State *s = ARM_GICV3_COMMON(dev); ++ return &s->cpu_update_notifier; ++} + + /** + * gicv3_class_name +-- +2.27.0 + diff --git a/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c124e92aa8a31f4573ac5b9098d39c925ec6fa3 --- /dev/null +++ b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch @@ -0,0 +1,107 @@ +From 837b04877be49b930a2d437f55e2ae15ff820421 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 23 Sep 2023 22:31:49 +0000 +Subject: [PATCH] hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled + +Hotpluggable CPUs MUST be exposed as 'online-capable' as per the new change. But +cold booted CPUs if made 'online-capable' during boot time might not get +detected in the legacy OS. Hence, can cause compatibility problems. + +Original Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + +Specification change might take time and hence disabling the support of +unplugging any cold booted CPUs to preserve the compatibility with legacy OS. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 19 ++++++++++++++----- + hw/arm/virt.c | 16 ++++++++++++++++ + include/hw/core/cpu.h | 2 ++ + 3 files changed, 32 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index c402e102c4..590afcfa98 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -789,17 +789,26 @@ static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) + } + + /* +- * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot +- * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the +- * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. +- * Though as-of-now this is only used as a debugging feature. ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot. We ++ * MUST set 'online-capable' bit for all hotpluggable CPUs. ++ * Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + * + * UEFI ACPI Specification 6.5 + * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure + * Table: 5.37 GICC CPU Interface Flags + * Link: https://uefi.org/specs/ACPI/6.5 ++ * ++ * Cold-booted CPUs, except for the first/boot CPU, SHOULD be allowed to be ++ * hot(un)plug as well but for this to happen these MUST have ++ * 'online-capable' bit set. Later creates compatibility problem with legacy ++ * OS as it might ignore online-capable' bits during boot time and hence ++ * some CPUs might not get detected. To fix this MADT GIC CPU interface flag ++ * should be allowed to have both bits set i.e. 'online-capable' and ++ * 'Enabled' bits together. This change will require UEFI ACPI standard ++ * change. Till this happens exposing all cold-booted CPUs as 'enabled' only ++ * + */ +- return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++ return cpu && cpu->cold_booted ? 1 : (1 << 3); + } + + static void +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index eedff8e525..ed437ce0e8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3250,6 +3250,10 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * This shall be used during the init of ACPI Hotplug state and hot-unplug + */ + cs->acpi_persistent = true; ++ ++ if (!dev->hotplugged) { ++ cs->cold_booted = true; ++ } + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3313,6 +3317,18 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + ++ /* ++ * UEFI ACPI standard change is required to make both 'enabled' and the ++ * 'online-capable' bit co-exist instead of being mutually exclusive. ++ * check virt_acpi_get_gicc_flags() for more details. ++ * ++ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation. ++ */ ++ if (cs->cold_booted) { ++ error_setg(errp, "Hot-unplug of cold-booted CPU not supported!"); ++ return; ++ } ++ + if (cs->cpu_index == first_cpu->cpu_index) { + error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", + first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 6dbe163548..ee04ee44c2 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -565,6 +565,8 @@ struct CPUState { + uint32_t halted; + int32_t exception_index; + ++ bool cold_booted; ++ + AccelCPUState *accel; + /* shared by kvm and hvf */ + bool vcpu_dirty; +-- +2.27.0 + diff --git a/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec3b72f02c9fe269e68623de66654906734e83f4 --- /dev/null +++ b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch @@ -0,0 +1,311 @@ +From 8daa90ad502b79e232377f831f67df456a743304 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 26 Aug 2023 01:29:37 +0000 +Subject: [PATCH] hw/arm/virt: Move setting of common CPU properties in a + function + +Factor out CPU properties code common for {hot,cold}-plugged CPUs. This allows +code reuse. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 220 ++++++++++++++++++++++++++---------------- + include/hw/arm/virt.h | 4 + + 2 files changed, 140 insertions(+), 84 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 94481d45d4..8f647422d8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2113,16 +2113,130 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + ++static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ Error *local_err = NULL; ++ VirtMachineClass *vmc; ++ ++ vmc = VIRT_MACHINE_GET_CLASS(ms); ++ ++ /* now, set the cpu object property values */ ++ numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL); ++ ++ if (!vms->secure) { ++ object_property_set_bool(cpuobj, "has_el3", false, NULL); ++ } ++ ++ if (!vms->virt && object_property_find(cpuobj, "has_el2")) { ++ object_property_set_bool(cpuobj, "has_el2", false, NULL); ++ } ++ ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime")) { ++ object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); ++ } ++ ++ if (vmc->no_kvm_steal_time && ++ object_property_find(cpuobj, "kvm-steal-time")) { ++ object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); ++ } ++ ++ if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { ++ object_property_set_bool(cpuobj, "pmu", false, NULL); ++ } ++ ++ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { ++ object_property_set_bool(cpuobj, "lpa2", false, NULL); ++ } ++ ++ if (object_property_find(cpuobj, "reset-cbar")) { ++ object_property_set_int(cpuobj, "reset-cbar", ++ vms->memmap[VIRT_CPUPERIPHS].base, ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* link already initialized {secure,tag}-memory regions to this cpu */ ++ object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-memory", ++ OBJECT(vms->secure_sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ if (vms->mte) { ++ if (!object_property_find(cpuobj, "tag-memory")) { ++ error_setg(&local_err, "MTE requested, but not supported " ++ "by the guest CPU"); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-tag-memory", ++ OBJECT(vms->secure_tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ } ++ ++ /* ++ * RFC: Question: this must only be called for the hotplugged cpus. For the ++ * cold booted secondary cpus this is being taken care in arm_load_kernel() ++ * in boot.c. Perhaps we should remove that code now? ++ */ ++ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { ++ object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, ++ NULL); ++ ++ /* Secondary CPUs start in PSCI powered-down state */ ++ if (CPU(cpuobj)->cpu_index > 0) { ++ object_property_set_bool(cpuobj, "start-powered-off", true, NULL); ++ } ++ } ++ ++out: ++ if (local_err) { ++ error_propagate(errp, local_err); ++ } ++ return; ++} ++ + static void machvirt_init(MachineState *machine) + { + VirtMachineState *vms = VIRT_MACHINE(machine); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; +- MemoryRegion *sysmem = get_system_memory(); ++ MemoryRegion *secure_tag_sysmem = NULL; + MemoryRegion *secure_sysmem = NULL; + MemoryRegion *tag_sysmem = NULL; +- MemoryRegion *secure_tag_sysmem = NULL; ++ MemoryRegion *sysmem; + int n, virt_max_cpus; + bool firmware_loaded; + bool aarch64 = true; +@@ -2166,6 +2280,8 @@ static void machvirt_init(MachineState *machine) + */ + finalize_gic_version(vms); + ++ sysmem = vms->sysmem = get_system_memory(); ++ + if (vms->secure) { + /* + * The Secure view of the world is the same as the NonSecure, +@@ -2173,7 +2289,7 @@ static void machvirt_init(MachineState *machine) + * containing the system memory at low priority; any secure-only + * devices go in at higher priority and take precedence. + */ +- secure_sysmem = g_new(MemoryRegion, 1); ++ secure_sysmem = vms->secure_sysmem = g_new(MemoryRegion, 1); + memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory", + UINT64_MAX); + memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1); +@@ -2246,6 +2362,23 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ if (vms->mte) { ++ /* Create the memory region only once, but link to all cpus later */ ++ tag_sysmem = vms->tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(tag_sysmem, OBJECT(machine), ++ "tag-memory", UINT64_MAX / 32); ++ ++ if (vms->secure) { ++ secure_tag_sysmem = vms->secure_tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(secure_tag_sysmem, OBJECT(machine), ++ "secure-tag-memory", UINT64_MAX / 32); ++ ++ /* As with ram, secure-tag takes precedence over tag. */ ++ memory_region_add_subregion_overlap(secure_tag_sysmem, 0, ++ tag_sysmem, -1); ++ } ++ } ++ + create_fdt(vms); + qemu_log("cpu init start\n"); + +@@ -2259,15 +2392,10 @@ static void machvirt_init(MachineState *machine) + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +- object_property_set_int(cpuobj, "mp-affinity", +- possible_cpus->cpus[n].arch_id, NULL); + + cs = CPU(cpuobj); + cs->cpu_index = n; + +- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), +- &error_fatal); +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2278,82 +2406,6 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- if (!vms->secure) { +- object_property_set_bool(cpuobj, "has_el3", false, NULL); +- } +- +- if (!vms->virt && object_property_find(cpuobj, "has_el2")) { +- object_property_set_bool(cpuobj, "has_el2", false, NULL); +- } +- +- if (vmc->kvm_no_adjvtime && +- object_property_find(cpuobj, "kvm-no-adjvtime")) { +- object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); +- } +- +- if (vmc->no_kvm_steal_time && +- object_property_find(cpuobj, "kvm-steal-time")) { +- object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); +- } +- +- if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { +- object_property_set_bool(cpuobj, "pmu", false, NULL); +- } +- +- if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { +- object_property_set_bool(cpuobj, "lpa2", false, NULL); +- } +- +- if (object_property_find(cpuobj, "reset-cbar")) { +- object_property_set_int(cpuobj, "reset-cbar", +- vms->memmap[VIRT_CPUPERIPHS].base, +- &error_abort); +- } +- +- object_property_set_link(cpuobj, "memory", OBJECT(sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-memory", +- OBJECT(secure_sysmem), &error_abort); +- } +- +- if (vms->mte) { +- /* Create the memory region only once, but link to all cpus. */ +- if (!tag_sysmem) { +- /* +- * The property exists only if MemTag is supported. +- * If it is, we must allocate the ram to back that up. +- */ +- if (!object_property_find(cpuobj, "tag-memory")) { +- error_report("MTE requested, but not supported " +- "by the guest CPU"); +- exit(1); +- } +- +- tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(tag_sysmem, OBJECT(machine), +- "tag-memory", UINT64_MAX / 32); +- +- if (vms->secure) { +- secure_tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(secure_tag_sysmem, OBJECT(machine), +- "secure-tag-memory", UINT64_MAX / 32); +- +- /* As with ram, secure-tag takes precedence over tag. */ +- memory_region_add_subregion_overlap(secure_tag_sysmem, 0, +- tag_sysmem, -1); +- } +- } +- +- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-tag-memory", +- OBJECT(secure_tag_sysmem), +- &error_abort); +- } +- } +- + qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); + object_unref(cpuobj); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index e944d434c4..49d1ec8656 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -139,6 +139,10 @@ struct VirtMachineState { + DeviceState *platform_bus_dev; + FWCfgState *fw_cfg; + PFlashCFI01 *flash[2]; ++ MemoryRegion *sysmem; ++ MemoryRegion *secure_sysmem; ++ MemoryRegion *tag_sysmem; ++ MemoryRegion *secure_tag_sysmem; + bool secure; + bool highmem; + bool highmem_compact; +-- +2.27.0 + diff --git a/hw-arm64-add-vcpu-cache-info-support.patch b/hw-arm64-add-vcpu-cache-info-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e09d4fa537e3ed220bfb67e95685c60c64ee016 --- /dev/null +++ b/hw-arm64-add-vcpu-cache-info-support.patch @@ -0,0 +1,352 @@ +From 7d3d37d3af4278aee627952d6a81b63dec6ac62b Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Sun, 17 Mar 2024 18:56:09 +0800 +Subject: [PATCH] hw/arm64: add vcpu cache info support + +Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. + +Signed-off-by: zhanghailiang +Signed-off-by: Honghao +Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + hw/acpi/aml-build.c | 158 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 72 ++++++++++++++++ + include/hw/acpi/aml-build.h | 47 +++++++++++ + 3 files changed, 277 insertions(+) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index af66bde0f5..2968df5562 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1994,6 +1994,163 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + ++#ifdef __aarch64__ ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29.2 Cache Type Structure (Type 1) ++ */ ++static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, ++ uint32_t cache_type) ++{ ++ build_append_byte(tbl, 1); ++ build_append_byte(tbl, 24); ++ build_append_int_noprefix(tbl, 0, 2); ++ build_append_int_noprefix(tbl, 127, 4); ++ build_append_int_noprefix(tbl, next_level, 4); ++ ++ switch (cache_type) { ++ case ARM_L1D_CACHE: /* L1 dcache info */ ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L1I_CACHE: /* L1 icache info */ ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L2_CACHE: /* L2 cache info */ ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L3_CACHE: /* L3 cache info */ ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); ++ break; ++ default: ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_byte(tbl, 0); ++ build_append_byte(tbl, 0); ++ build_append_int_noprefix(tbl, 0, 2); ++ } ++} ++ ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29 Processor Properties Topology Table (PPTT) ++ */ ++void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ GQueue *list = g_queue_new(); ++ guint pptt_start = table_data->len; ++ guint parent_offset; ++ guint length, i; ++ int uid = 0; ++ int socket; ++ AcpiTable table = { .sig = "PPTT", .rev = 2, ++ .oem_id = oem_id, .oem_table_id = oem_table_id }; ++ ++ acpi_table_begin(&table, table_data); ++ ++ for (socket = 0; socket < ms->smp.sockets; socket++) { ++ uint32_t l3_cache_offset = table_data->len - pptt_start; ++ build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE); ++ ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ /* ++ * Physical package - represents the boundary ++ * of a physical package ++ */ ++ (1 << 0), ++ 0, socket, &l3_cache_offset, 1); ++ } ++ ++ if (mc->smp_props.clusters_supported) { ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int cluster; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (cluster = 0; cluster < ms->smp.clusters; cluster++) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, cluster, NULL, 0); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int core; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (core = 0; core < ms->smp.cores; core++) { ++ uint32_t priv_rsrc[3] = {}; ++ priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */ ++ build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE); ++ ++ priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE); ++ ++ priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE); ++ ++ if (ms->smp.threads > 1) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, core, priv_rsrc, 3); ++ } else { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, priv_rsrc, 3); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int thread; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (thread = 0; thread < ms->smp.threads; thread++) { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 2) | /* Processor is a Thread */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, NULL, 0); ++ } ++ } ++ ++ g_queue_free(list); ++ acpi_table_end(linker, &table); ++} ++ ++#else + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2069,6 +2226,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); + } ++#endif + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 500a15aa5b..b82bd1b8c8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -379,6 +379,72 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); + } + ++static void fdt_add_l3cache_nodes(const VirtMachineState *vms) ++{ ++ int i; ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int sockets = (ms->smp.cpus + cpus_per_socket - 1) / cpus_per_socket; ++ ++ for (i = 0; i < sockets; i++) { ++ char *nodename = g_strdup_printf("/cpus/l3-cache%d", i); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-level", 3); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x2000000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 128); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 2048); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); ++ g_free(nodename); ++ } ++} ++ ++static void fdt_add_l2cache_nodes(const VirtMachineState *vms) ++{ ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int cpu; ++ ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { ++ char *next_path = g_strdup_printf("/cpus/l3-cache%d", ++ cpu / cpus_per_socket); ++ char *nodename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x80000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 1024); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ next_path); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); ++ ++ g_free(next_path); ++ g_free(nodename); ++ } ++} ++ ++static void fdt_add_l1cache_prop(const VirtMachineState *vms, ++ char *nodename, int cpu) ++{ ++ const MachineState *ms = MACHINE(vms); ++ char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", 256); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", 256); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ cachename); ++ g_free(cachename); ++} ++ + static void fdt_add_cpu_nodes(const VirtMachineState *vms) + { + int cpu; +@@ -413,6 +479,11 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", addr_cells); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); + ++ if (!vmc->no_cpu_topology) { ++ fdt_add_l3cache_nodes(vms); ++ fdt_add_l2cache_nodes(vms); ++ } ++ + for (cpu = smp_cpus - 1; cpu >= 0; cpu--) { + char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); +@@ -442,6 +513,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + } + + if (!vmc->no_cpu_topology) { ++ fdt_add_l1cache_prop(vms, nodename, cpu); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + } +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index ff2a310270..84ded2ecd3 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,6 +221,53 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + ++#ifdef __aarch64__ ++/* Definitions of the hardcoded cache info*/ ++ ++typedef enum { ++ ARM_L1D_CACHE, ++ ARM_L1I_CACHE, ++ ARM_L2_CACHE, ++ ARM_L3_CACHE ++} ArmCacheType; ++ ++/* L1 data cache: */ ++#define ARM_L1DCACHE_SIZE 65536 ++#define ARM_L1DCACHE_SETS 256 ++#define ARM_L1DCACHE_ASSOCIATIVITY 4 ++#define ARM_L1DCACHE_ATTRIBUTES 2 ++#define ARM_L1DCACHE_LINE_SIZE 64 ++ ++/* L1 instruction cache: */ ++#define ARM_L1ICACHE_SIZE 65536 ++#define ARM_L1ICACHE_SETS 256 ++#define ARM_L1ICACHE_ASSOCIATIVITY 4 ++#define ARM_L1ICACHE_ATTRIBUTES 4 ++#define ARM_L1ICACHE_LINE_SIZE 64 ++ ++/* Level 2 unified cache: */ ++#define ARM_L2CACHE_SIZE 524288 ++#define ARM_L2CACHE_SETS 1024 ++#define ARM_L2CACHE_ASSOCIATIVITY 8 ++#define ARM_L2CACHE_ATTRIBUTES 10 ++#define ARM_L2CACHE_LINE_SIZE 64 ++ ++/* Level 3 unified cache: */ ++#define ARM_L3CACHE_SIZE 33554432 ++#define ARM_L3CACHE_SETS 2048 ++#define ARM_L3CACHE_ASSOCIATIVITY 15 ++#define ARM_L3CACHE_ATTRIBUTES 10 ++#define ARM_L3CACHE_LINE_SIZE 128 ++ ++struct offset_status { ++ uint32_t parent; ++ uint32_t l2_offset; ++ uint32_t l1d_offset; ++ uint32_t l1i_offset; ++}; ++ ++#endif ++ + typedef + struct CrsRangeEntry { + uint64_t base; +-- +2.27.0 + diff --git a/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch new file mode 100644 index 0000000000000000000000000000000000000000..d551fad998c56d23a82c912ab300dcf261bf381d --- /dev/null +++ b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch @@ -0,0 +1,403 @@ +From 4e0a4443b7c36608fc30dcaaf0db120220111dd2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:26:27 +0100 +Subject: [PATCH] hw/intc/arm-gicv3*: Changes required to (re)init the vCPU + register info + +vCPU register info needs to be re-initialized each time vCPU is hot-plugged. +This has to be done both for emulation/TCG and KVM case. This is done in +context to the GIC update notification for any vCPU hot-(un)plug events. This +change adds that support and re-factors existing to maximize the code re-use. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3.c | 1 + + hw/intc/arm_gicv3_common.c | 7 +- + hw/intc/arm_gicv3_cpuif.c | 257 +++++++++++++++-------------- + hw/intc/arm_gicv3_kvm.c | 7 +- + hw/intc/gicv3_internal.h | 1 + + include/hw/intc/arm_gicv3_common.h | 1 + + 6 files changed, 150 insertions(+), 124 deletions(-) + +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index 0b8f79a122..e1c7c8c4bc 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -410,6 +410,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); + + agcc->post_load = arm_gicv3_post_load; ++ agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index fc87fa9369..d051024a30 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -345,10 +345,12 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + { + GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; + CPUState *cpu = gic_info->cpu; ++ ARMGICv3CommonClass *c; + int gic_cpuif_num; + GICv3State *s; + + s = ARM_GICV3_COMMON(gic_info->gic); ++ c = ARM_GICV3_COMMON_GET_CLASS(s); + + /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ + gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); +@@ -368,7 +370,10 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); + gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); + +- /* TODO: initialize the registers info for this newly added cpu */ ++ /* initialize the registers info for this newly added cpu */ ++ if (c->init_cpu_reginfo) { ++ c->init_cpu_reginfo(cpu); ++ } + } + + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index 0d0eb2f62f..a013510074 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -2782,6 +2782,127 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr23_reginfo[] = { + }, + }; + ++void gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ GICv3CPUState *gcs = icc_cs_from_env(&cpu->env); ++ ++ /* ++ * If the CPU doesn't define a GICv3 configuration, probably because ++ * in real hardware it doesn't have one, then we use default values ++ * matching the one used by most Arm CPUs. This applies to: ++ * cpu->gic_num_lrs ++ * cpu->gic_vpribits ++ * cpu->gic_vprebits ++ * cpu->gic_pribits ++ */ ++ ++ /* ++ * Note that we can't just use the GICv3CPUState as an opaque pointer ++ * in define_arm_cp_regs_with_opaque(), because when we're called back ++ * it might be with code translated by CPU 0 but run by CPU 1, in ++ * which case we'd get the wrong value. ++ * So instead we define the regs with no ri->opaque info, and ++ * get back to the GICv3CPUState from the CPUARMState. ++ */ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ ++ /* ++ * The CPU implementation specifies the number of supported ++ * bits of physical priority. For backwards compatibility ++ * of migration, we have a compat property that forces use ++ * of 8 priority bits regardless of what the CPU really has. ++ */ ++ if (gcs->gic->force_8bit_prio) { ++ gcs->pribits = 8; ++ } else { ++ gcs->pribits = cpu->gic_pribits ?: 5; ++ } ++ ++ /* ++ * The GICv3 has separate ID register fields for virtual priority ++ * and preemption bit values, but only a single ID register field ++ * for the physical priority bits. The preemption bit count is ++ * always the same as the priority bit count, except that 8 bits ++ * of priority means 7 preemption bits. We precalculate the ++ * preemption bits because it simplifies the code and makes the ++ * parallels between the virtual and physical bits of the GIC ++ * a bit clearer. ++ */ ++ gcs->prebits = gcs->pribits; ++ if (gcs->prebits == 8) { ++ gcs->prebits--; ++ } ++ /* ++ * Check that CPU code defining pribits didn't violate ++ * architectural constraints our implementation relies on. ++ */ ++ g_assert(gcs->pribits >= 4 && gcs->pribits <= 8); ++ ++ /* ++ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions ++ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. ++ */ ++ if (gcs->prebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); ++ } ++ if (gcs->prebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); ++ } ++ ++ if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { ++ int j; ++ ++ gcs->num_list_regs = cpu->gic_num_lrs ?: 4; ++ gcs->vpribits = cpu->gic_vpribits ?: 5; ++ gcs->vprebits = cpu->gic_vprebits ?: 5; ++ ++ /* ++ * Check against architectural constraints: getting these ++ * wrong would be a bug in the CPU code defining these, ++ * and the implementation relies on them holding. ++ */ ++ g_assert(gcs->vprebits <= gcs->vpribits); ++ g_assert(gcs->vprebits >= 5 && gcs->vprebits <= 7); ++ g_assert(gcs->vpribits >= 5 && gcs->vpribits <= 8); ++ ++ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); ++ ++ for (j = 0; j < gcs->num_list_regs; j++) { ++ /* ++ * Note that the AArch64 LRs are 64-bit; the AArch32 LRs ++ * are split into two cp15 regs, LR (the low part, with the ++ * same encoding as the AArch64 LR) and LRC (the high part). ++ */ ++ ARMCPRegInfo lr_regset[] = { ++ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, ++ .opc0 = 3, .opc1 = 4, .crn = 12, ++ .crm = 12 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, ++ .cp = 15, .opc1 = 4, .crn = 12, ++ .crm = 14 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ }; ++ define_arm_cp_regs(cpu, lr_regset); ++ } ++ if (gcs->vprebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); ++ } ++ if (gcs->vprebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ } ++ } ++} ++ + static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) + { + GICv3CPUState *cs = opaque; +@@ -2804,131 +2925,23 @@ void gicv3_init_cpuif(GICv3State *s) + + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- GICv3CPUState *cs = &s->cpu[i]; +- +- /* +- * If the CPU doesn't define a GICv3 configuration, probably because +- * in real hardware it doesn't have one, then we use default values +- * matching the one used by most Arm CPUs. This applies to: +- * cpu->gic_num_lrs +- * cpu->gic_vpribits +- * cpu->gic_vprebits +- * cpu->gic_pribits +- */ +- +- /* Note that we can't just use the GICv3CPUState as an opaque pointer +- * in define_arm_cp_regs_with_opaque(), because when we're called back +- * it might be with code translated by CPU 0 but run by CPU 1, in +- * which case we'd get the wrong value. +- * So instead we define the regs with no ri->opaque info, and +- * get back to the GICv3CPUState from the CPUARMState. +- * +- * These CP regs callbacks can be called from either TCG or HVF code. +- */ +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); +- +- /* +- * The CPU implementation specifies the number of supported +- * bits of physical priority. For backwards compatibility +- * of migration, we have a compat property that forces use +- * of 8 priority bits regardless of what the CPU really has. +- */ +- if (s->force_8bit_prio) { +- cs->pribits = 8; +- } else { +- cs->pribits = cpu->gic_pribits ?: 5; +- } +- +- /* +- * The GICv3 has separate ID register fields for virtual priority +- * and preemption bit values, but only a single ID register field +- * for the physical priority bits. The preemption bit count is +- * always the same as the priority bit count, except that 8 bits +- * of priority means 7 preemption bits. We precalculate the +- * preemption bits because it simplifies the code and makes the +- * parallels between the virtual and physical bits of the GIC +- * a bit clearer. +- */ +- cs->prebits = cs->pribits; +- if (cs->prebits == 8) { +- cs->prebits--; +- } +- /* +- * Check that CPU code defining pribits didn't violate +- * architectural constraints our implementation relies on. +- */ +- g_assert(cs->pribits >= 4 && cs->pribits <= 8); + +- /* +- * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions +- * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. +- */ +- if (cs->prebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); +- } +- if (cs->prebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); +- } +- +- if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { +- int j; +- +- cs->num_list_regs = cpu->gic_num_lrs ?: 4; +- cs->vpribits = cpu->gic_vpribits ?: 5; +- cs->vprebits = cpu->gic_vprebits ?: 5; +- +- /* Check against architectural constraints: getting these +- * wrong would be a bug in the CPU code defining these, +- * and the implementation relies on them holding. +- */ +- g_assert(cs->vprebits <= cs->vpribits); +- g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); +- g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); +- +- for (j = 0; j < cs->num_list_regs; j++) { +- /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs +- * are split into two cp15 regs, LR (the low part, with the +- * same encoding as the AArch64 LR) and LRC (the high part). ++ if (qemu_enabled_cpu(CPU(cpu))) { ++ GICv3CPUState *cs = icc_cs_from_env(&cpu->env); ++ gicv3_init_cpu_reginfo(CPU(cpu)); ++ if (tcg_enabled() || qtest_enabled()) { ++ /* ++ * We can only trap EL changes with TCG. However the GIC ++ * interrupt state only changes on EL changes involving EL2 or ++ * EL3, so for the non-TCG case this is OK, as EL2 and EL3 can't ++ * exist. + */ +- ARMCPRegInfo lr_regset[] = { +- { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, +- .opc0 = 3, .opc1 = 4, .crn = 12, +- .crm = 12 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, +- .cp = 15, .opc1 = 4, .crn = 12, +- .crm = 14 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- }; +- define_arm_cp_regs(cpu, lr_regset); +- } +- if (cs->vprebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); +- } +- if (cs->vprebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, ++ cs); ++ } else { ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); + } + } +- if (tcg_enabled() || qtest_enabled()) { +- /* +- * We can only trap EL changes with TCG. However the GIC interrupt +- * state only changes on EL changes involving EL2 or EL3, so for +- * the non-TCG case this is OK, as EL2 and EL3 can't exist. +- */ +- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); +- } else { +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); +- } + } + } +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index db06c75e2b..dd2a60fa20 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -804,6 +804,10 @@ static void vm_change_state_handler(void *opaque, bool running, + } + } + ++static void kvm_gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++} + + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { +@@ -837,7 +841,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + for (i = 0; i < s->num_cpu; i++) { + CPUState *cs = qemu_get_cpu(i); + if (qemu_enabled_cpu(cs)) { +- define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ kvm_gicv3_init_cpu_reginfo(cs); + } + } + +@@ -925,6 +929,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) + + agcc->pre_save = kvm_arm_gicv3_get; + agcc->post_load = kvm_arm_gicv3_put; ++ agcc->init_cpu_reginfo = kvm_gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, + &kgc->parent_realize); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL, +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 9d4c1209bd..0bed0f6e2a 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -709,6 +709,7 @@ void gicv3_redist_vinvall(GICv3CPUState *cs, uint64_t vptaddr); + + void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); + void gicv3_init_cpuif(GICv3State *s); ++void gicv3_init_cpu_reginfo(CPUState *cs); + + /** + * gicv3_cpuif_update: +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 97a48f44b9..b5f8ba17ff 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -325,6 +325,7 @@ struct ARMGICv3CommonClass { + + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); ++ void (*init_cpu_reginfo)(CPUState *cs); + }; + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, +-- +2.27.0 + diff --git a/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..052c6cbe2e167250833171c371bc737f6351ddf8 --- /dev/null +++ b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch @@ -0,0 +1,40 @@ +From c3f204e02eacdd3e9ec6ac55396ccc7f115ad63e Mon Sep 17 00:00:00 2001 +From: Qiang Ning +Date: Mon, 12 Jul 2021 17:30:45 +0800 +Subject: [PATCH] hw/net/rocker_of_dpa: fix double free bug of rocker device + +The of_dpa_cmd_add_l2_flood function of the rocker device +releases the memory of group->l2_flood.group_ids before +applying for new memory. If the l2_group configured by +the guest does not match the input group->l2_flood.group_ids, +the err_out branch is redirected to release the memory of the +group->l2_flood.group_ids branch. The pointer is not set to +NULL after the memory is freed. When the guest accesses the +of_dpa_cmd_add_l2_flood function again, the memory of +group->l2_flood.group_ids is released again. As a result, +the memory is double free. + +Fix that by setting group->l2_flood.group_ids to NULL after free. + +Signed-off-by: Jiajie Li +Signed-off-by: Qiang Ning +Signed-off-by: Yan Wang +--- + hw/net/rocker/rocker_of_dpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c +index 5e16056be6..c25438cccc 100644 +--- a/hw/net/rocker/rocker_of_dpa.c ++++ b/hw/net/rocker/rocker_of_dpa.c +@@ -2070,6 +2070,7 @@ static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); ++ group->l2_flood.group_ids = NULL; + g_free(tlvs); + + return err; +-- +2.27.0 + diff --git a/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch new file mode 100644 index 0000000000000000000000000000000000000000..72d0c2a4fc5211a27de4ee2c0def9ee28b956973 --- /dev/null +++ b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch @@ -0,0 +1,459 @@ +From dc7e40b2841132b0bc43d25c2c31f41ae3fa2c68 Mon Sep 17 00:00:00 2001 +From: eillon +Date: Tue, 8 Feb 2022 22:43:59 -0500 +Subject: [PATCH] hw/usb: reduce the vpcu cost of UHCI when VNC disconnect + +Reduce the vpcu cost by set a lower FRAME_TIMER_FREQ of the UHCI +when VNC client disconnected. This can reduce about 3% cost of +vcpu thread. + +Signed-off-by: eillon +--- + hw/usb/core.c | 5 ++-- + hw/usb/desc.c | 7 +++-- + hw/usb/dev-hid.c | 2 +- + hw/usb/hcd-uhci.c | 63 ++++++++++++++++++++++++++++++++++------ + hw/usb/hcd-uhci.h | 1 + + hw/usb/host-libusb.c | 32 ++++++++++++++++++++ + include/hw/usb.h | 1 + + include/qemu/timer.h | 28 ++++++++++++++++++ + ui/vnc.c | 4 +++ + util/qemu-timer.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 197 insertions(+), 15 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 975f76250a..51b36126ca 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -87,7 +87,7 @@ void usb_device_reset(USBDevice *dev) + return; + } + usb_device_handle_reset(dev); +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + dev->addr = 0; + dev->state = USB_STATE_DEFAULT; + } +@@ -105,7 +105,8 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + */ + return; + } +- if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { ++ if ((dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) ++ && dev->port && dev->port->ops->wakeup) { + dev->port->ops->wakeup(dev->port); + } + if (bus->ops->wakeup_endpoint) { +diff --git a/hw/usb/desc.c b/hw/usb/desc.c +index f2bdc05a95..333f73fff1 100644 +--- a/hw/usb/desc.c ++++ b/hw/usb/desc.c +@@ -752,7 +752,7 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + if (config->bmAttributes & USB_CFG_ATT_SELFPOWER) { + data[0] |= 1 << USB_DEVICE_SELF_POWERED; + } +- if (dev->remote_wakeup) { ++ if (dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + data[0] |= 1 << USB_DEVICE_REMOTE_WAKEUP; + } + data[1] = 0x00; +@@ -762,14 +762,15 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + } + case DeviceOutRequest | USB_REQ_CLEAR_FEATURE: + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_clear_device_feature(dev->addr, value, ret); + break; + case DeviceOutRequest | USB_REQ_SET_FEATURE: ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED; + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 1; ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_set_device_feature(dev->addr, value, ret); +diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c +index bdd6d1ffaf..cc68d1ce9e 100644 +--- a/hw/usb/dev-hid.c ++++ b/hw/usb/dev-hid.c +@@ -745,7 +745,7 @@ static int usb_ptr_post_load(void *opaque, int version_id) + { + USBHIDState *s = opaque; + +- if (s->dev.remote_wakeup) { ++ if (s->dev.remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + hid_pointer_activate(&s->hid); + } + return 0; +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 6975966c3f..a92581ff5f 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -44,6 +44,8 @@ + #include "hcd-uhci.h" + + #define FRAME_TIMER_FREQ 1000 ++#define FRAME_TIMER_FREQ_LAZY 10 ++#define USB_DEVICE_NEED_NORMAL_FREQ "QEMU USB Tablet" + + #define FRAME_MAX_LOOPS 256 + +@@ -109,6 +111,22 @@ static void uhci_async_cancel(UHCIAsync *async); + static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td); + static void uhci_resume(void *opaque); + ++static int64_t uhci_frame_timer_freq = FRAME_TIMER_FREQ_LAZY; ++ ++static void uhci_set_frame_freq(int freq) ++{ ++ if (freq <= 0) { ++ return; ++ } ++ ++ uhci_frame_timer_freq = freq; ++} ++ ++static qemu_usb_controller qemu_uhci = { ++ .name = "uhci", ++ .qemu_set_freq = uhci_set_frame_freq, ++}; ++ + static inline int32_t uhci_queue_token(UHCI_TD *td) + { + if ((td->token & (0xf << 15)) == 0) { +@@ -351,7 +369,7 @@ static int uhci_post_load(void *opaque, int version_id) + + if (version_id < 2) { + s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ (NANOSECONDS_PER_SECOND / uhci_frame_timer_freq); + } + return 0; + } +@@ -392,8 +410,29 @@ static void uhci_port_write(void *opaque, hwaddr addr, + if ((val & UHCI_CMD_RS) && !(s->cmd & UHCI_CMD_RS)) { + /* start frame processing */ + trace_usb_uhci_schedule_start(); +- s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ ++ /* ++ * If the frequency of frame_timer is too slow, Guest OS (Win2012) would become ++ * blue-screen after hotplugging some vcpus. ++ * If this USB device support the remote-wakeup, the UHCI controller ++ * will enter global suspend mode when there is no input for several seconds. ++ * In this case, Qemu will delete the frame_timer. Since the frame_timer has been deleted, ++ * there is no influence to the performance of Vms. So, we can change the frequency to 1000. ++ * After that the frequency will be safe when we trigger the frame_timer again. ++ * Excepting this, there are two ways to change the frequency: ++ * 1)VNC connect/disconnect;2)attach/detach USB device. ++ */ ++ if ((uhci_frame_timer_freq != FRAME_TIMER_FREQ) ++ && (s->ports[0].port.dev) ++ && (!memcmp(s->ports[0].port.dev->product_desc, ++ USB_DEVICE_NEED_NORMAL_FREQ, strlen(USB_DEVICE_NEED_NORMAL_FREQ))) ++ && (s->ports[0].port.dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED)) { ++ qemu_log("turn up the frequency of UHCI controller to %d\n", FRAME_TIMER_FREQ); ++ uhci_frame_timer_freq = FRAME_TIMER_FREQ; ++ } ++ ++ s->frame_time = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; ++ s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->frame_time; + timer_mod(s->frame_timer, s->expire_time); + s->status &= ~UHCI_STS_HCHALTED; + } else if (!(val & UHCI_CMD_RS)) { +@@ -1083,7 +1122,6 @@ static void uhci_frame_timer(void *opaque) + UHCIState *s = opaque; + uint64_t t_now, t_last_run; + int i, frames; +- const uint64_t frame_t = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; + + s->completions_only = false; + qemu_bh_cancel(s->bh); +@@ -1099,14 +1137,14 @@ static void uhci_frame_timer(void *opaque) + } + + /* We still store expire_time in our state, for migration */ +- t_last_run = s->expire_time - frame_t; ++ t_last_run = s->expire_time - s->frame_time; + t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* Process up to MAX_FRAMES_PER_TICK frames */ +- frames = (t_now - t_last_run) / frame_t; ++ frames = (t_now - t_last_run) / s->frame_time; + if (frames > s->maxframes) { + int skipped = frames - s->maxframes; +- s->expire_time += skipped * frame_t; ++ s->expire_time += skipped * s->frame_time; + s->frnum = (s->frnum + skipped) & 0x7ff; + frames -= skipped; + } +@@ -1123,7 +1161,7 @@ static void uhci_frame_timer(void *opaque) + /* The spec says frnum is the frame currently being processed, and + * the guest must look at frnum - 1 on interrupt, so inc frnum now */ + s->frnum = (s->frnum + 1) & 0x7ff; +- s->expire_time += frame_t; ++ s->expire_time += s->frame_time; + } + + /* Complete the previous frame(s) */ +@@ -1134,7 +1172,12 @@ static void uhci_frame_timer(void *opaque) + } + s->pending_int_mask = 0; + +- timer_mod(s->frame_timer, t_now + frame_t); ++ /* expire_time is calculated from last frame_time, we should calculate it ++ * according to new frame_time which equals to ++ * NANOSECONDS_PER_SECOND / uhci_frame_timer_freq */ ++ s->expire_time -= s->frame_time - NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ timer_mod(s->frame_timer, t_now + s->frame_time); + } + + static const MemoryRegionOps uhci_ioport_ops = { +@@ -1195,8 +1238,10 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; + QTAILQ_INIT(&s->queues); + ++ qemu_register_usb_controller(&qemu_uhci, QEMU_USB_CONTROLLER_UHCI); + memory_region_init_io(&s->io_bar, OBJECT(s), &uhci_ioport_ops, s, + "uhci", 0x20); + +diff --git a/hw/usb/hcd-uhci.h b/hw/usb/hcd-uhci.h +index 69f8b40c49..0918719911 100644 +--- a/hw/usb/hcd-uhci.h ++++ b/hw/usb/hcd-uhci.h +@@ -50,6 +50,7 @@ typedef struct UHCIState { + uint16_t status; + uint16_t intr; /* interrupt enable register */ + uint16_t frnum; /* frame number */ ++ uint64_t frame_time; /* frame time in ns */ + uint32_t fl_base_addr; /* frame list base address */ + uint8_t sof_timing; + uint8_t status2; /* bit 0 and 1 are used to generate UHCI_STS_USBINT */ +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index d7060a42d5..dba469c1ef 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -945,6 +945,30 @@ static void usb_host_ep_update(USBHostDevice *s) + libusb_free_config_descriptor(conf); + } + ++static unsigned int usb_get_controller_type(int speed) ++{ ++ unsigned int type = MAX_USB_CONTROLLER_TYPES; ++ ++ switch (speed) { ++ case USB_SPEED_SUPER: ++ type = QEMU_USB_CONTROLLER_XHCI; ++ break; ++ case USB_SPEED_HIGH: ++ type = QEMU_USB_CONTROLLER_EHCI; ++ break; ++ case USB_SPEED_FULL: ++ type = QEMU_USB_CONTROLLER_UHCI; ++ break; ++ case USB_SPEED_LOW: ++ type = QEMU_USB_CONTROLLER_OHCI; ++ break; ++ default: ++ break; ++ } ++ ++ return type; ++} ++ + static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + { + USBDevice *udev = USB_DEVICE(s); +@@ -1054,6 +1078,12 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + } + + trace_usb_host_open_success(bus_num, addr); ++ ++ /* change ehci frame time freq when USB passthrough */ ++ qemu_log("usb host speed is %d\n", udev->speed); ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, ++ usb_get_controller_type(udev->speed)); ++ + return 0; + + fail: +@@ -1129,6 +1159,8 @@ static int usb_host_close(USBHostDevice *s) + } + + usb_host_auto_check(NULL); ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, ++ usb_get_controller_type(udev->speed)); + return 0; + } + +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 32c23a5ca2..911179158d 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -142,6 +142,7 @@ + + #define USB_DEVICE_SELF_POWERED 0 + #define USB_DEVICE_REMOTE_WAKEUP 1 ++#define USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED 2 + + #define USB_DT_DEVICE 0x01 + #define USB_DT_CONFIG 0x02 +diff --git a/include/qemu/timer.h b/include/qemu/timer.h +index 9a366e551f..475c2a3f18 100644 +--- a/include/qemu/timer.h ++++ b/include/qemu/timer.h +@@ -91,6 +91,34 @@ struct QEMUTimer { + int scale; + }; + ++#define QEMU_USB_NORMAL_FREQ 1000 ++#define QEMU_USB_LAZY_FREQ 10 ++#define MAX_USB_CONTROLLER_TYPES 4 ++#define QEMU_USB_CONTROLLER_OHCI 0 ++#define QEMU_USB_CONTROLLER_UHCI 1 ++#define QEMU_USB_CONTROLLER_EHCI 2 ++#define QEMU_USB_CONTROLLER_XHCI 3 ++ ++typedef void (*QEMUSetFreqHandler) (int freq); ++ ++typedef struct qemu_usb_controller { ++ const char *name; ++ QEMUSetFreqHandler qemu_set_freq; ++} qemu_usb_controller; ++ ++typedef qemu_usb_controller* qemu_usb_controller_ptr; ++ ++enum qemu_timer_mode { ++ QEMU_TIMER_USB_NORMAL_MODE = 1 << 0, /* Set when VNC connect or ++ * with usb dev passthrough ++ */ ++ QEMU_TIMER_USB_LAZY_MODE = 1 << 1, /* Set when VNC disconnect */ ++}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type); ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type); ++ + extern QEMUTimerListGroup main_loop_tlg; + + /* +diff --git a/ui/vnc.c b/ui/vnc.c +index 4f23a0fa79..5dd77e73cb 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1365,6 +1365,8 @@ void vnc_disconnect_finish(VncState *vs) + g_free(vs->zrle); + g_free(vs->tight); + g_free(vs); ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err) +@@ -3341,6 +3343,8 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, + } + } + } ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + void vnc_start_protocol(VncState *vs) +diff --git a/util/qemu-timer.c b/util/qemu-timer.c +index 6a0de33dd2..dc891cc557 100644 +--- a/util/qemu-timer.c ++++ b/util/qemu-timer.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/timer.h" + #include "qemu/lockable.h" +@@ -75,6 +76,74 @@ struct QEMUTimerList { + QemuEvent timers_done_ev; + }; + ++typedef struct qemu_controller_timer_state { ++ qemu_usb_controller_ptr controller; ++ int refs; ++} controller_timer_state; ++ ++typedef controller_timer_state* controller_timer_state_ptr; ++ ++static controller_timer_state uhci_timer_state = { ++ .controller = NULL, ++ .refs = 0, ++}; ++ ++static controller_timer_state_ptr \ ++ qemu_usb_controller_tab[MAX_USB_CONTROLLER_TYPES] = {NULL, ++ &uhci_timer_state, ++ NULL, NULL}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ return 0; ++ } ++ ++ /* for companion EHCI controller will create three UHCI controllers, ++ * we init it only once. ++ */ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) registed frame handler\n", type); ++ qemu_usb_controller_tab[type]->controller = controller; ++ } ++ ++ return 0; ++} ++ ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ qemu_log("the usb controller (%d) no need change frame frep\n", type); ++ return 0; ++ } ++ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) not registed yet\n", type); ++ return 0; ++ } ++ ++ if (mode == QEMU_TIMER_USB_NORMAL_MODE) { ++ if (qemu_usb_controller_tab[type]->refs++ > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_NORMAL_FREQ); ++ qemu_log("Set the controller (%d) of freq %d HZ,\n", ++ type, QEMU_USB_NORMAL_FREQ); ++ } else { ++ if (--qemu_usb_controller_tab[type]->refs > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_LAZY_FREQ); ++ qemu_log("Set the controller(type:%d) of freq %d HZ,\n", ++ type, QEMU_USB_LAZY_FREQ); ++ } ++ ++ return 0; ++} ++ + /** + * qemu_clock_ptr: + * @type: type of clock +-- +2.27.0 + diff --git a/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3a119f754fc931c7f1b0fe716e91fe81cf5b3c6 --- /dev/null +++ b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch @@ -0,0 +1,65 @@ +From ff43e9201aba8f4047e6fd5edb93a4861cc8fed2 Mon Sep 17 00:00:00 2001 +From: Yanan Wang +Date: Thu, 28 Mar 2024 18:57:56 +0800 +Subject: [PATCH] i386: cache passthrough: Update AMD 8000_001D.EAX[25:14] + based on vCPU topo + +On AMD target, when host cache passthrough is disabled we will +emulate the guest caches with default values and initialize the +shared cpu list of the caches based on vCPU topology. However +when host cache passthrough is enabled, the shared cpu list is +consistent with host regardless what the vCPU topology is. + +For example, when cache passthrough is enabled, running a guest +with vThreads=1 on a host with pThreads=2, we will get that there +are every *two* logical vCPUs sharing a L1/L2 cache, which is not +consistent with the vCPU topology (vThreads=1). + +So let's reinitialize BITs[25:14] of AMD CPUID 8000_001D.EAX +based on the actual vCPU topology instead of host pCPU topology. + +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + target/i386/cpu.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f94405c02b..491cf40cc7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6597,9 +6597,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + case 0x8000001D: ++ /* Populate AMD Processor Cache Information */ + *eax = 0; + if (cpu->cache_info_passthrough) { + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); ++ ++ /* ++ * Clear BITs[25:14] and then update them based on the guest ++ * vCPU topology, like what we do in encode_cache_cpuid8000001d ++ * when cache_info_passthrough is not enabled. ++ */ ++ *eax &= ~0x03FFC000; ++ switch (count) { ++ case 0: /* L1 dcache info */ ++ case 1: /* L1 icache info */ ++ case 2: /* L2 cache info */ ++ *eax |= ((topo_info.threads_per_core - 1) << 14); ++ break; ++ case 3: /* L3 cache info */ ++ *eax |= ((topo_info.cores_per_die * ++ topo_info.threads_per_core - 1) << 14); ++ break; ++ default: /* end of info */ ++ *eax = *ebx = *ecx = *edx = 0; ++ break; ++ } + break; + } + switch (count) { +-- +2.27.0 + diff --git a/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch new file mode 100644 index 0000000000000000000000000000000000000000..133577de23a77bfa7915097451864fb1ef639af6 --- /dev/null +++ b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch @@ -0,0 +1,42 @@ +From 06fc5eb48668a1c83e6a4e76c1a71403917b1835 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 20:33:47 +0800 +Subject: [PATCH] i6300esb watchdog: bugfix: Add a runstate transition + +QEMU will abort() for the reasons now: + + invalid runstate transition: 'prelaunch' -> 'postmigrate' + Aborted + +This happens when: + |<- watchdog timeout happened, then sets reset_requested to + | SHUTDOWN_CAUSE_GUEST_RESET; + |<- hot-migration thread sets vm state to RUN_STATE_FINISH_MIGRATE + | before the last time of migration; + |<- main thread gets the change of reset_requested and triggers + | reset, then sets vm state to RUN_STATE_PRELAUNCH; + |<- hot-migration thread sets vm state to RUN_STATE_POSTMIGRATE. + +Then 'prelaunch' -> 'postmigrate' runstate transition will happen. +It is legal so add this transition to runstate_transitions_def. + +Signed-off-by: Jinhua Cao +--- + system/runstate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/system/runstate.c b/system/runstate.c +index ea9d6c2a32..9d3f627fee 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -116,6 +116,7 @@ static const RunStateTransition runstate_transitions_def[] = { + { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, + { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, ++ { RUN_STATE_PRELAUNCH, RUN_STATE_POSTMIGRATE }, + + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, +-- +2.27.0 + diff --git a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch new file mode 100644 index 0000000000000000000000000000000000000000..5aaf01663e077c1336c79a115a433a15c53002e8 --- /dev/null +++ b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch @@ -0,0 +1,42 @@ +From 6689eebbb520dc75bc65e0914c4e05e40a4efc1d Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Mon, 21 Jun 2021 09:22:35 +0800 +Subject: [PATCH] ide: ahci: add check to avoid null dereference + (CVE-2019-12067) + +Fix CVE-2019-12067 + +AHCI emulator while committing DMA buffer in ahci_commit_buf() +may do a NULL dereference if the command header 'ad->cur_cmd' +is null. Add check to avoid it. + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + hw/ide/ahci.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index afdc44b8e0..8062e1743c 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1519,8 +1519,10 @@ static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes) + { + AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); + +- tx_bytes += le32_to_cpu(ad->cur_cmd->status); +- ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ if (ad->cur_cmd) { ++ tx_bytes += le32_to_cpu(ad->cur_cmd->status); ++ ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ } + } + + static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write) +-- +2.27.0 + diff --git a/intc-gicv3-Fixes-for-vcpu-hotplug.patch b/intc-gicv3-Fixes-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..5241a57ddaccffa65e13ff8defd9ffbad9750bc1 --- /dev/null +++ b/intc-gicv3-Fixes-for-vcpu-hotplug.patch @@ -0,0 +1,70 @@ +From 343b61303152b06f9e1ba6d09a405faeaa3fcc98 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:12:58 +0800 +Subject: [PATCH] intc/gicv3: Fixes for vcpu hotplug + +1. Some types of machine don't support possible_cpus +callback. +2. The cpu_update_notifier is register only when machine +support vcpu hotplug, so do notifier_remove() unconditi- +onally is wrong. + +Signed-off-by: Keqian Zhu +--- + cpu-common.c | 4 ++++ + hw/intc/arm_gicv3_common.c | 9 +++++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/cpu-common.c b/cpu-common.c +index da52e45760..54e63b3f77 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -113,6 +113,10 @@ CPUState *qemu_get_possible_cpu(int index) + MachineState *ms = MACHINE(qdev_get_machine()); + const CPUArchIdList *possible_cpus = ms->possible_cpus; + ++ if (possible_cpus == NULL) { ++ return qemu_get_cpu(index); ++ } ++ + assert((index >= 0) && (index < possible_cpus->len)); + + return CPU(possible_cpus->cpus[index].cpu); +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index d051024a30..5667d9f40b 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -25,6 +25,7 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "qemu/error-report.h" ++#include "hw/boards.h" + #include "hw/core/cpu.h" + #include "hw/intc/arm_gicv3_common.h" + #include "hw/qdev-properties.h" +@@ -446,7 +447,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_possible_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i) ? : qemu_get_cpu(i); + uint64_t cpu_affid; + + if (qemu_enabled_cpu(cpu)) { +@@ -506,8 +507,12 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); ++ Object *ms = qdev_get_machine(); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + +- notifier_remove(&s->cpu_update_notifier); ++ if (mc->has_hotpluggable_cpus) { ++ notifier_remove(&s->cpu_update_notifier); ++ } + g_free(s->redist_region_count); + } + +-- +2.27.0 + diff --git a/iotests-adapt-to-output-change-for-recently-introduc.patch b/iotests-adapt-to-output-change-for-recently-introduc.patch new file mode 100644 index 0000000000000000000000000000000000000000..85946c303baf315c19a0762442ba38951a2be831 --- /dev/null +++ b/iotests-adapt-to-output-change-for-recently-introduc.patch @@ -0,0 +1,62 @@ +From 7212ca27f0dc957f83fe29858430ee2927e0175c Mon Sep 17 00:00:00 2001 +From: root +Date: Mon, 25 Mar 2024 21:31:32 +0800 +Subject: [PATCH] =?UTF-8?q?iotests:=20adapt=20to=20output=20change=20for?= + =?UTF-8?q?=20recently=20introduced=20'detached=20hea=E2=80=A6?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 39a94d7c34ce9d222fa9c0c99a14e20a567456d7 + +…der' field + +Failure was noticed when running the tests for the qcow2 image format. + +Fixes: 0bd779e ("crypto: Introduce 'detached-header' field in QCryptoBlockInfoLUKS") +Signed-off-by: Fiona Ebner +Message-ID: <20240216101415.293769-1-f.ebner@proxmox.com> +Reviewed-by: Daniel P. Berrangé +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +Signed-off-by: Gao Jiazhen +--- + tests/qemu-iotests/198.out | 2 ++ + tests/qemu-iotests/206.out | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 805494916f..62fb73fa3e 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,6 +39,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -84,6 +85,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 7e95694777..979f00f9bf 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,6 +114,7 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/linux-headers-update-against-5.10-and-manual-clear-v.patch b/linux-headers-update-against-5.10-and-manual-clear-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..911475272126f45878b57fb991ca7551761dc565 --- /dev/null +++ b/linux-headers-update-against-5.10-and-manual-clear-v.patch @@ -0,0 +1,82 @@ +From 2ccd1ec0d18070727ad9b9647da6b6937f16de2a Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:03 +0800 +Subject: [PATCH] linux-headers: update against 5.10 and manual clear vfio + dirty log series + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, update the header to add them. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + linux-headers/linux/vfio.h | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 8e175ece31..956154e509 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -56,6 +56,16 @@ + */ + #define VFIO_UPDATE_VADDR 10 + ++/* ++ * The vfio_iommu driver may support user clears dirty log manually, which means ++ * dirty log can be requested to not cleared automatically after dirty log is ++ * copied to userspace, it's user's duty to clear dirty log. ++ * ++ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP. ++ */ ++#define VFIO_DIRTY_LOG_MANUAL_CLEAR 11 ++ + /* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between +@@ -1651,8 +1661,30 @@ struct vfio_iommu_type1_dma_unmap { + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * +- * Only one of the flags _START, _STOP and _GET may be specified at a time. ++ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying ++ * dirty bitmap is not cleared automatically. The user can clear it manually by ++ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set. + * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set, ++ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap ++ * for IOMMU container for a given IOVA range. The user must specify the IOVA ++ * range, the bitmap and the pgsize through the structure ++ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface ++ * supports clearing a bitmap of the smallest supported pgsize only and can be ++ * modified in future to clear a bitmap of any specified supported pgsize. The ++ * user must provide a memory area for the bitmap memory and specify its size ++ * in bitmap.size. One bit is used to represent one page consecutively starting ++ * from iova offset. The user should provide page size in bitmap.pgsize field. ++ * A bit set in the bitmap indicates that the page at that offset from iova is ++ * cleared the dirty status, and dirty tracking is re-enabled for that page. The ++ * caller must set argsz to a value including the size of structure ++ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If ++ * dirty pages logging is not enabled, an error will be returned. Note: user ++ * should clear dirty log before handle corresponding dirty pages. ++ * ++ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be ++ * specified at a time. + */ + struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; +@@ -1660,6 +1692,8 @@ struct vfio_iommu_type1_dirty_bitmap { + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR (1 << 3) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP (1 << 4) + __u8 data[]; + }; + +-- +2.27.0 + diff --git a/log-Add-log-at-boot-cpu-init-for-aarch64.patch b/log-Add-log-at-boot-cpu-init-for-aarch64.patch new file mode 100644 index 0000000000000000000000000000000000000000..73ddaae920abb4e64793dfd332ba4ba34c3a9b81 --- /dev/null +++ b/log-Add-log-at-boot-cpu-init-for-aarch64.patch @@ -0,0 +1,68 @@ +From 16c4b8946903985e3dfd470d0e04b79d473505bc Mon Sep 17 00:00:00 2001 +From: "wanghaibin.wang" +Date: Sun, 17 Mar 2024 15:53:57 +0800 +Subject: [PATCH] log: Add log at boot & cpu init for aarch64 + +Add log at boot & cpu init for aarch64 + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/arm/boot.c | 4 ++++ + hw/arm/virt.c | 3 +++ + 2 files changed, 7 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 84ea6a807a..d1671e1d42 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -11,6 +11,7 @@ + #include "qemu/datadir.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qemu/log.h" + #include + #include "hw/arm/boot.h" + #include "hw/arm/linux-boot-if.h" +@@ -1226,6 +1227,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + * doesn't support secure. + */ + assert(!(info->secure_board_setup && kvm_enabled())); ++ ++ qemu_log("load the kernel\n"); ++ + info->kernel_filename = ms->kernel_filename; + info->kernel_cmdline = ms->kernel_cmdline; + info->initrd_filename = ms->initrd_filename; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c19cacec8b..f4c3d47f30 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -32,6 +32,7 @@ + #include "qemu/datadir.h" + #include "qemu/units.h" + #include "qemu/option.h" ++#include "qemu/log.h" + #include "monitor/qdev.h" + #include "hw/sysbus.h" + #include "hw/arm/boot.h" +@@ -1020,6 +1021,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) + { + VirtMachineState *s = container_of(n, VirtMachineState, powerdown_notifier); + ++ qemu_log("send powerdown to vm.\n"); + if (s->acpi_dev) { + acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); + } else { +@@ -2240,6 +2242,7 @@ static void machvirt_init(MachineState *machine) + } + + create_fdt(vms); ++ qemu_log("cpu init start\n"); + + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { +-- +2.27.0 + diff --git a/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch new file mode 100644 index 0000000000000000000000000000000000000000..a1c270bc32a6c14c946175df7231cad51d4173c2 --- /dev/null +++ b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch @@ -0,0 +1,126 @@ +From 65435e107fc8eee37c61a3a7d1adebd013ad466f Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:18:03 +0800 +Subject: [PATCH] memory: [backup] Modify the VM's physical bits value set + policy. + +backup code from qemu-6.2 to qemu-8.2 +old info: +commit id : +a09c3928b33b0c53831bd9eeb56f8171c26057bc +messages: +target-i386: Modify the VM's physical bits value set policy. + +To resolve the problem that a VM with large memory capacity fails +to be live migrated, determine whether the VM is a large memory +capacity based on the memory size (4 TB). If yes, set the bus width +of the VM address to 46 bits. If no, set the bus width to 42 bits. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + target/i386/cpu.c | 20 +++++++++++++++++++- + target/i386/cpu.h | 6 ++++++ + target/i386/host-cpu.c | 13 +++++++------ + 3 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a66e5a357b..fc61a84b1e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -7666,6 +7666,24 @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value) + cpu->env.eip = value; + } + ++ ++/* At present, we check the vm is *LARGE* or not, i.e. whether ++ * the memory size is more than 4T or not. ++ */ ++const uint64_t large_vm_mem_size = 0x40000000000UL; ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu) ++{ ++ /* If there is not a large vm, we set the phys_bits to 42 bits, ++ * otherwise, we increase the phys_bits to 46 bits. ++ */ ++ if (ram_size < large_vm_mem_size) { ++ cpu->phys_bits = DEFAULT_VM_CPU_PHYS_BITS; ++ } else { ++ cpu->phys_bits = LARGE_VM_CPU_PHYS_BITS; ++ cpu->fill_mtrr_mask = true; ++ } ++} ++ + static vaddr x86_cpu_get_pc(CPUState *cs) + { + X86CPU *cpu = X86_CPU(cs); +@@ -7868,7 +7886,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), +- DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), ++ DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, false), + DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, + UINT32_MAX), + DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index ef987f344c..6993552cd9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -24,6 +24,7 @@ + #include "cpu-qom.h" + #include "kvm/hyperv-proto.h" + #include "exec/cpu-defs.h" ++#include "exec/cpu-common.h" + #include "qapi/qapi-types-common.h" + #include "qemu/cpu-float.h" + #include "qemu/timer.h" +@@ -2081,6 +2082,11 @@ struct X86CPUClass { + extern const VMStateDescription vmstate_x86_cpu; + #endif + ++#define DEFAULT_VM_CPU_PHYS_BITS 42 ++#define LARGE_VM_CPU_PHYS_BITS 46 ++ ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu); ++ + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c +index 92ecb7254b..07738bf857 100644 +--- a/target/i386/host-cpu.c ++++ b/target/i386/host-cpu.c +@@ -13,6 +13,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/sysemu.h" ++#include "hw/boards.h" + + /* Note: Only safe for use on x86(-64) hosts */ + static uint32_t host_cpu_phys_bits(void) +@@ -57,14 +58,14 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) + uint32_t phys_bits = cpu->phys_bits; + static bool warned; + +- /* +- * Print a warning if the user set it to a value that's not the +- * host value. +- */ +- if (phys_bits != host_phys_bits && phys_bits != 0 && ++ /* adjust x86 cpu phys_bits according to ram_size. */ ++ x86_cpu_adjuest_by_ram_size(current_machine->ram_size, cpu); ++ ++ /* Print a warning if the host value less than the user set. */ ++ if (phys_bits > host_phys_bits && phys_bits != 0 && + !warned) { + warn_report("Host physical bits (%u)" +- " does not match phys-bits property (%u)", ++ " less than phys-bits property (%u)", + host_phys_bits, phys_bits); + warned = true; + } +-- +2.27.0 + diff --git a/migration-Add-compress_level-sanity-check.patch b/migration-Add-compress_level-sanity-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..011547f9bad7579ceef2360d39122d40dab5ee4d --- /dev/null +++ b/migration-Add-compress_level-sanity-check.patch @@ -0,0 +1,68 @@ +From 79863c5ccdd4c635657d2e32e91bc02aa49655e0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:23:15 +0800 +Subject: [PATCH] migration: Add compress_level sanity check + +Zlib compression has level from 1 to 9. However Zstd compression has level +from 1 to 22 (level >= 20 not recommanded). Let's do sanity check here +to make sure a vaild compress_level is given by user. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/options.c | 32 ++++++++++++++++++++++++++++---- + 1 file changed, 28 insertions(+), 4 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 6aaee702dc..9b68962a65 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -1065,16 +1065,40 @@ void migrate_params_init(MigrationParameters *params) + params->has_mode = true; + } + ++static bool compress_level_check(MigrationParameters *params, Error **errp) ++{ ++ switch (params->compress_method) { ++ case COMPRESS_METHOD_ZLIB: ++ if (params->compress_level > 9 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 0 to 9 for Zlib method"); ++ return false; ++ } ++ break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ if (params->compress_level > 19 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 1 to 19 for Zstd method"); ++ return false; ++ } ++ break; ++#endif ++ default: ++ error_setg(errp, "Checking compress_level failed for unknown reason"); ++ return false; ++ } ++ ++ return true; ++} ++ + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. + */ + bool migrate_params_check(MigrationParameters *params, Error **errp) + { +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "a value between 0 and 9"); ++ if (params->has_compress_level && !compress_level_check(params, errp)) { + return false; + } + +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-method.patch b/migration-Add-multi-thread-compress-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c2782dc3107c988df2ceb796c16172dfc0e38e4 --- /dev/null +++ b/migration-Add-multi-thread-compress-method.patch @@ -0,0 +1,292 @@ +From c2402b63ecb10b9a25695b710f2664dbcbc01ec4 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 14:57:54 +0800 +Subject: [PATCH] migration: Add multi-thread compress method + +A multi-thread compress method parameter is added to hold the method we +are going to use. By default the 'zlib' method is used to maintain the +compatibility as before. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-properties-system.c | 11 +++++++++++ + include/hw/qdev-properties.h | 4 ++++ + migration/migration-hmp-cmds.c | 13 +++++++++++++ + migration/options.c | 15 +++++++++++++++ + monitor/hmp-cmds.c | 1 + + qapi/migration.json | 32 ++++++++++++++++++++++++++++++-- + util/oslib-posix.c | 2 +- + 7 files changed, 75 insertions(+), 3 deletions(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index f2e2718c74..cd5571fcfb 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -1202,6 +1202,17 @@ const PropertyInfo qdev_prop_uuid = { + .set_default_value = set_default_uuid_auto, + }; + ++/* --- CompressMethod --- */ ++const PropertyInfo qdev_prop_compress_method = { ++ .name = "CompressMethod", ++ .description = "multi-thread compression method, " ++ "zlib", ++ .enum_table = &CompressMethod_lookup, ++ .get = qdev_propinfo_get_enum, ++ .set = qdev_propinfo_set_enum, ++ .set_default_value = qdev_propinfo_set_default_value_enum, ++}; ++ + /* --- s390 cpu entitlement policy --- */ + + QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int)); +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index 25743a29a0..63602c2c74 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -60,6 +60,7 @@ extern const PropertyInfo qdev_prop_int64; + extern const PropertyInfo qdev_prop_size; + extern const PropertyInfo qdev_prop_string; + extern const PropertyInfo qdev_prop_on_off_auto; ++extern const PropertyInfo qdev_prop_compress_method; + extern const PropertyInfo qdev_prop_size32; + extern const PropertyInfo qdev_prop_array; + extern const PropertyInfo qdev_prop_link; +@@ -168,6 +169,9 @@ extern const PropertyInfo qdev_prop_link; + DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*) + #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto) ++#define DEFINE_PROP_COMPRESS_METHOD(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_compress_method, \ ++ CompressMethod) + #define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 86ae832176..261ec1e35c 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,6 +22,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" +@@ -291,6 +292,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), + params->decompress_threads); + assert(params->has_throttle_trigger_threshold); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD), ++ CompressMethod_str(params->compress_method)); + monitor_printf(mon, "%s: %u\n", + MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD), + params->throttle_trigger_threshold); +@@ -519,6 +523,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + MigrateSetParameters *p = g_new0(MigrateSetParameters, 1); + uint64_t valuebw = 0; + uint64_t cache_size; ++ CompressMethod compress_method; + Error *err = NULL; + int val, ret; + +@@ -544,6 +549,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_decompress_threads = true; + visit_type_uint8(v, param, &p->decompress_threads, &err); + break; ++ case MIGRATION_PARAMETER_COMPRESS_METHOD: ++ p->has_compress_method = true; ++ visit_type_CompressMethod(v, param, &compress_method, &err); ++ if (err) { ++ break; ++ } ++ p->compress_method = compress_method; ++ break; + case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD: + p->has_throttle_trigger_threshold = true; + visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err); +diff --git a/migration/options.c b/migration/options.c +index 8d8ec73ad9..af7ea7b346 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -47,6 +47,7 @@ + #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 + /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ + #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++#define DEFAULT_MIGRATE_COMPRESS_METHOD COMPRESS_METHOD_ZLIB + /* Define default autoconverge cpu throttle migration parameters */ + #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 + #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +@@ -113,6 +114,9 @@ Property migration_properties[] = { + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState, ++ parameters.compress_method, ++ DEFAULT_MIGRATE_COMPRESS_METHOD), + DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, + parameters.throttle_trigger_threshold, + DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +@@ -953,6 +957,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->compress_wait_thread = s->parameters.compress_wait_thread; + params->has_decompress_threads = true; + params->decompress_threads = s->parameters.decompress_threads; ++ params->has_compress_method = true; ++ params->compress_method = s->parameters.compress_method; + params->has_throttle_trigger_threshold = true; + params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; + params->has_cpu_throttle_initial = true; +@@ -1025,6 +1031,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_compress_threads = true; + params->has_compress_wait_thread = true; + params->has_decompress_threads = true; ++ params->has_compress_method = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; + params->has_cpu_throttle_increment = true; +@@ -1259,6 +1266,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + dest->decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ dest->compress_method = params->compress_method; ++ } ++ + if (params->has_throttle_trigger_threshold) { + dest->throttle_trigger_threshold = params->throttle_trigger_threshold; + } +@@ -1380,6 +1391,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + s->parameters.decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ s->parameters.compress_method = params->compress_method; ++ } ++ + if (params->has_throttle_trigger_threshold) { + s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 871898ac46..5bb3c9cd46 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -24,6 +24,7 @@ + #include "qapi/qapi-commands-control.h" + #include "qapi/qapi-commands-misc.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qapi-visit-migration.h" + #include "qemu/cutils.h" + #include "hw/intc/intc.h" + #include "qemu/log.h" +diff --git a/qapi/migration.json b/qapi/migration.json +index eb2f883513..cafaa5ccb3 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -708,6 +708,19 @@ + 'bitmaps': [ 'BitmapMigrationBitmapAlias' ] + } } + ++## ++# @CompressMethod: ++# ++# An enumeration of multi-thread compression methods. ++# ++# @zlib: use zlib compression method. ++# ++# Since: 5.0 ++# ++## ++{ 'enum': 'CompressMethod', ++ 'data': [ 'zlib' ] } ++ + ## + # @MigrationParameter: + # +@@ -746,6 +759,9 @@ + # fast as compression, so set the decompress-threads to the number + # about 1/4 of compress-threads is adequate. + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -892,6 +908,7 @@ + { 'name': 'compress-level', 'features': [ 'deprecated' ] }, + { 'name': 'compress-threads', 'features': [ 'deprecated' ] }, + { 'name': 'decompress-threads', 'features': [ 'deprecated' ] }, ++ { 'name': 'compress-method', 'features': [ 'deprecated' ] }, + { 'name': 'compress-wait-thread', 'features': [ 'deprecated' ] }, + 'throttle-trigger-threshold', + 'cpu-throttle-initial', 'cpu-throttle-increment', +@@ -935,6 +952,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Set compression method to use in multi-thread compression. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1066,8 +1086,9 @@ + # + # @deprecated: Member @block-incremental is deprecated. Use + # blockdev-mirror with NBD instead. Members @compress-level, +-# @compress-threads, @decompress-threads and @compress-wait-thread +-# are deprecated because @compression is deprecated. ++# @compress-threads, @decompress-threads, @compress-method ++# and @compress-wait-thread are deprecated because ++# @compression is deprecated. + # + # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period + # are experimental. +@@ -1090,6 +1111,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', +@@ -1161,6 +1184,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1315,6 +1341,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index 9ca3fee2b8..43af077fed 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -346,7 +346,7 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + +- while (started_num_threads != memset_args->context.num_threads) { ++ while (started_num_threads != memset_args->context->num_threads) { + smp_mb(); + } + +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-ops.patch b/migration-Add-multi-thread-compress-ops.patch new file mode 100644 index 0000000000000000000000000000000000000000..dee278f8e65ab15824e9b624fbedfff6c177f8a4 --- /dev/null +++ b/migration-Add-multi-thread-compress-ops.patch @@ -0,0 +1,493 @@ +From 5896dedf32c7e4417bd7f3e889ca41a34b06f5db Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 15:57:31 +0800 +Subject: [PATCH] migration: Add multi-thread compress ops + +Add the MigrationCompressOps and MigrationDecompressOps structures to make +the compression method configurable for multi-thread compression migration. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/options.c | 9 ++ + migration/options.h | 1 + + migration/ram-compress.c | 261 ++++++++++++++++++++++++++------------- + migration/ram-compress.h | 31 ++++- + migration/ram.c | 4 +- + 5 files changed, 215 insertions(+), 91 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index af7ea7b346..6aaee702dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -799,6 +799,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++CompressMethod migrate_compress_method(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_method; ++} ++ + uint64_t migrate_downtime_limit(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 246c160aee..9aca5e41ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -78,6 +78,7 @@ uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); ++CompressMethod migrate_compress_method(void); + uint64_t migrate_downtime_limit(void); + uint8_t migrate_max_cpu_throttle(void); + uint64_t migrate_max_bandwidth(void); +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 2be344acbc..6e37b22492 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -65,26 +65,167 @@ static QemuThread *compress_threads; + static QemuMutex comp_done_lock; + static QemuCond comp_done_cond; + +-struct DecompressParam { +- bool done; +- bool quit; +- QemuMutex mutex; +- QemuCond cond; +- void *des; +- uint8_t *compbuf; +- int len; +- z_stream stream; +-}; +-typedef struct DecompressParam DecompressParam; +- + static QEMUFile *decomp_file; + static DecompressParam *decomp_param; + static QemuThread *decompress_threads; ++MigrationCompressOps *compress_ops; ++MigrationDecompressOps *decompress_ops; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; + + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); + ++static int zlib_save_setup(CompressParam *param) ++{ ++ if (deflateInit(¶m->stream, ++ migrate_compress_level()) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static ssize_t zlib_compress_data(CompressParam *param, size_t size) ++{ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static void zlib_save_cleanup(CompressParam *param) ++{ ++ deflateEnd(¶m->stream); ++} ++ ++static int zlib_load_setup(DecompressParam *param) ++{ ++ if (inflateInit(¶m->stream) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int ++zlib_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int err; ++ ++ z_stream *stream = ¶m->stream; ++ ++ err = inflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = size; ++ stream->next_out = dest; ++ ++ err = inflate(stream, Z_NO_FLUSH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ return stream->total_out; ++} ++ ++static void zlib_load_cleanup(DecompressParam *param) ++{ ++ inflateEnd(¶m->stream); ++} ++ ++static int zlib_check_len(int len) ++{ ++ return len < 0 || len > compressBound(TARGET_PAGE_SIZE); ++} ++ ++static int set_compress_ops(void) ++{ ++ compress_ops = g_new0(MigrationCompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ compress_ops->save_setup = zlib_save_setup; ++ compress_ops->save_cleanup = zlib_save_cleanup; ++ compress_ops->compress_data = zlib_compress_data; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int set_decompress_ops(void) ++{ ++ decompress_ops = g_new0(MigrationDecompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ decompress_ops->load_setup = zlib_load_setup; ++ decompress_ops->load_cleanup = zlib_load_cleanup; ++ decompress_ops->decompress_data = zlib_decompress_data; ++ decompress_ops->check_len = zlib_check_len; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void clean_compress_ops(void) ++{ ++ compress_ops->save_setup = NULL; ++ compress_ops->save_cleanup = NULL; ++ compress_ops->compress_data = NULL; ++ ++ g_free(compress_ops); ++ compress_ops = NULL; ++} ++ ++static void clean_decompress_ops(void) ++{ ++ decompress_ops->load_setup = NULL; ++ decompress_ops->load_cleanup = NULL; ++ decompress_ops->decompress_data = NULL; ++ ++ g_free(decompress_ops); ++ decompress_ops = NULL; ++} ++ + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; +@@ -141,7 +282,7 @@ void compress_threads_save_cleanup(void) + qemu_thread_join(compress_threads + i); + qemu_mutex_destroy(&comp_param[i].mutex); + qemu_cond_destroy(&comp_param[i].cond); +- deflateEnd(&comp_param[i].stream); ++ compress_ops->save_cleanup(&comp_param[i]); + g_free(comp_param[i].originbuf); + qemu_fclose(comp_param[i].file); + comp_param[i].file = NULL; +@@ -152,6 +293,7 @@ void compress_threads_save_cleanup(void) + g_free(comp_param); + compress_threads = NULL; + comp_param = NULL; ++ clean_compress_ops(); + } + + int compress_threads_save_setup(void) +@@ -161,6 +303,12 @@ int compress_threads_save_setup(void) + if (!migrate_compress()) { + return 0; + } ++ ++ if (set_compress_ops() < 0) { ++ clean_compress_ops(); ++ return -1; ++ } ++ + thread_count = migrate_compress_threads(); + compress_threads = g_new0(QemuThread, thread_count); + comp_param = g_new0(CompressParam, thread_count); +@@ -172,8 +320,7 @@ int compress_threads_save_setup(void) + goto exit; + } + +- if (deflateInit(&comp_param[i].stream, +- migrate_compress_level()) != Z_OK) { ++ if (compress_ops->save_setup(&comp_param[i]) < 0) { + g_free(comp_param[i].originbuf); + goto exit; + } +@@ -198,50 +345,6 @@ exit: + return -1; + } + +-/* +- * Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. +- */ +-static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) +-{ +- int err; +- uint8_t *dest = NULL; +- z_stream *stream = ¶m->stream; +- uint8_t *p = param->originbuf; +- QEMUFile *f = f = param->file; +- ssize_t blen = qemu_put_compress_start(f, &dest); +- +- if (blen < compressBound(size)) { +- return -1; +- } +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = size; +- stream->next_in = p; +- stream->avail_out = blen; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- blen = stream->next_out - dest; +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_compress_end(f, blen); +- return blen + sizeof(int32_t); +-} +- + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) + { + uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); +@@ -260,7 +363,7 @@ static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block + * decompression + */ + memcpy(param->originbuf, p, page_size); +- ret = qemu_put_compression_data(param, page_size); ++ ret = compress_ops->compress_data(param, page_size); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +@@ -356,32 +459,6 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + } + } + +-/* return the size after decompression, or negative value on error */ +-static int +-qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) +-{ +- int err; +- +- z_stream *stream = ¶m->stream; +- +- err = inflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = param->len; +- stream->next_in = param->compbuf; +- stream->avail_out = pagesize; +- stream->next_out = dest; +- +- err = inflate(stream, Z_NO_FLUSH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->total_out; +-} +- + static void *do_data_decompress(void *opaque) + { + DecompressParam *param = opaque; +@@ -398,7 +475,7 @@ static void *do_data_decompress(void *opaque) + + pagesize = qemu_target_page_size(); + +- ret = qemu_uncompress_data(param, des, pagesize); ++ ret = decompress_ops->decompress_data(param, des, pagesize); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +@@ -466,7 +543,7 @@ void compress_threads_load_cleanup(void) + qemu_thread_join(decompress_threads + i); + qemu_mutex_destroy(&decomp_param[i].mutex); + qemu_cond_destroy(&decomp_param[i].cond); +- inflateEnd(&decomp_param[i].stream); ++ decompress_ops->load_cleanup(&decomp_param[i]); + g_free(decomp_param[i].compbuf); + decomp_param[i].compbuf = NULL; + } +@@ -475,6 +552,7 @@ void compress_threads_load_cleanup(void) + decompress_threads = NULL; + decomp_param = NULL; + decomp_file = NULL; ++ clean_decompress_ops(); + } + + int compress_threads_load_setup(QEMUFile *f) +@@ -485,6 +563,11 @@ int compress_threads_load_setup(QEMUFile *f) + return 0; + } + ++ if (set_decompress_ops() < 0) { ++ clean_decompress_ops(); ++ return -1; ++ } ++ + /* + * set compression_counters memory to zero for a new migration + */ +@@ -497,7 +580,7 @@ int compress_threads_load_setup(QEMUFile *f) + qemu_cond_init(&decomp_done_cond); + decomp_file = f; + for (i = 0; i < thread_count; i++) { +- if (inflateInit(&decomp_param[i].stream) != Z_OK) { ++ if (decompress_ops->load_setup(&decomp_param[i]) < 0) { + goto exit; + } + +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index 0d89a2f55e..daf241987f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -39,6 +39,20 @@ enum CompressResult { + }; + typedef enum CompressResult CompressResult; + ++struct DecompressParam { ++ bool done; ++ bool quit; ++ QemuMutex mutex; ++ QemuCond cond; ++ void *des; ++ uint8_t *compbuf; ++ int len; ++ ++ /* for zlib compression */ ++ z_stream stream; ++}; ++typedef struct DecompressParam DecompressParam; ++ + struct CompressParam { + bool done; + bool quit; +@@ -51,11 +65,26 @@ struct CompressParam { + ram_addr_t offset; + + /* internally used fields */ +- z_stream stream; + uint8_t *originbuf; ++ ++ /* for zlib compression */ ++ z_stream stream; + }; + typedef struct CompressParam CompressParam; + ++typedef struct { ++ int (*save_setup)(CompressParam *param); ++ void (*save_cleanup)(CompressParam *param); ++ ssize_t (*compress_data)(CompressParam *param, size_t size); ++} MigrationCompressOps; ++ ++typedef struct { ++ int (*load_setup)(DecompressParam *param); ++ void (*load_cleanup)(DecompressParam *param); ++ int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size); ++ int (*check_len)(int len); ++} MigrationDecompressOps; ++ + void compress_threads_save_cleanup(void); + int compress_threads_save_setup(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 8c7886ab79..f9b2b9b985 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -96,6 +96,8 @@ + + XBZRLECacheStats xbzrle_counters; + ++extern MigrationDecompressOps *decompress_ops; ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* The migration channel used for a specific host page */ +@@ -3979,7 +3981,7 @@ static int ram_load_precopy(QEMUFile *f) + + case RAM_SAVE_FLAG_COMPRESS_PAGE: + len = qemu_get_be32(f); +- if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { ++ if (decompress_ops->check_len(len)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; +-- +2.27.0 + diff --git a/migration-Add-zstd-support-in-multi-thread-compressi.patch b/migration-Add-zstd-support-in-multi-thread-compressi.patch new file mode 100644 index 0000000000000000000000000000000000000000..856e86a6bc0143e297bf1328f57338340d19f70d --- /dev/null +++ b/migration-Add-zstd-support-in-multi-thread-compressi.patch @@ -0,0 +1,229 @@ +From 8c9603270184d8dadf64ec6de263268e846f8c18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:15:10 +0800 +Subject: [PATCH] migration: Add zstd support in multi-thread compression + +This patch enables zstd option in multi-thread compression. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-properties-system.c | 2 +- + migration/ram-compress.c | 112 +++++++++++++++++++++++++++++++ + migration/ram-compress.h | 15 +++++ + qapi/migration.json | 3 +- + 4 files changed, 130 insertions(+), 2 deletions(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index cd5571fcfb..c581d46f2e 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -1206,7 +1206,7 @@ const PropertyInfo qdev_prop_uuid = { + const PropertyInfo qdev_prop_compress_method = { + .name = "CompressMethod", + .description = "multi-thread compression method, " +- "zlib", ++ "zlib/zstd", + .enum_table = &CompressMethod_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 6e37b22492..74703f0ec4 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -171,6 +171,103 @@ static int zlib_check_len(int len) + return len < 0 || len > compressBound(TARGET_PAGE_SIZE); + } + ++#ifdef CONFIG_ZSTD ++static int zstd_save_setup(CompressParam *param) ++{ ++ int res; ++ param->zstd_cs = ZSTD_createCStream(); ++ if (!param->zstd_cs) { ++ return -1; ++ } ++ res = ZSTD_initCStream(param->zstd_cs, migrate_compress_level()); ++ if (ZSTD_isError(res)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_save_cleanup(CompressParam *param) ++{ ++ ZSTD_freeCStream(param->zstd_cs); ++ param->zstd_cs = NULL; ++} ++static ssize_t zstd_compress_data(CompressParam *param, size_t size) ++{ ++ int ret; ++ uint8_t *dest = NULL; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ if (blen < ZSTD_compressBound(size)) { ++ return -1; ++ } ++ param->out.dst = dest; ++ param->out.size = blen; ++ param->out.pos = 0; ++ param->in.src = p; ++ param->in.size = size; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_compressStream2(param->zstd_cs, ¶m->out, ++ ¶m->in, ZSTD_e_end); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ blen = param->out.pos; ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static int zstd_load_setup(DecompressParam *param) ++{ ++ int ret; ++ param->zstd_ds = ZSTD_createDStream(); ++ if (!param->zstd_ds) { ++ return -1; ++ } ++ ret = ZSTD_initDStream(param->zstd_ds); ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_load_cleanup(DecompressParam *param) ++{ ++ ZSTD_freeDStream(param->zstd_ds); ++ param->zstd_ds = NULL; ++} ++static int ++zstd_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int ret; ++ param->out.dst = dest; ++ param->out.size = size; ++ param->out.pos = 0; ++ param->in.src = param->compbuf; ++ param->in.size = param->len; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_decompressStream(param->zstd_ds, ¶m->out, ¶m->in); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return ret; ++} ++static int zstd_check_len(int len) ++{ ++ return len < 0 || len > ZSTD_compressBound(TARGET_PAGE_SIZE); ++} ++#endif ++ + static int set_compress_ops(void) + { + compress_ops = g_new0(MigrationCompressOps, 1); +@@ -181,6 +278,13 @@ static int set_compress_ops(void) + compress_ops->save_cleanup = zlib_save_cleanup; + compress_ops->compress_data = zlib_compress_data; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ compress_ops->save_setup = zstd_save_setup; ++ compress_ops->save_cleanup = zstd_save_cleanup; ++ compress_ops->compress_data = zstd_compress_data; ++ break; ++#endif + default: + return -1; + } +@@ -199,6 +303,14 @@ static int set_decompress_ops(void) + decompress_ops->decompress_data = zlib_decompress_data; + decompress_ops->check_len = zlib_check_len; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ decompress_ops->load_setup = zstd_load_setup; ++ decompress_ops->load_cleanup = zstd_load_cleanup; ++ decompress_ops->decompress_data = zstd_decompress_data; ++ decompress_ops->check_len = zstd_check_len; ++ break; ++#endif + default: + return -1; + } +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index daf241987f..e8700eb36f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -29,6 +29,10 @@ + #ifndef QEMU_MIGRATION_COMPRESS_H + #define QEMU_MIGRATION_COMPRESS_H + ++#ifdef CONFIG_ZSTD ++#include ++#include ++#endif + #include "qemu-file.h" + #include "qapi/qapi-types-migration.h" + +@@ -50,6 +54,11 @@ struct DecompressParam { + + /* for zlib compression */ + z_stream stream; ++#ifdef CONFIG_ZSTD ++ ZSTD_DStream *zstd_ds; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct DecompressParam DecompressParam; + +@@ -69,6 +78,12 @@ struct CompressParam { + + /* for zlib compression */ + z_stream stream; ++ ++#ifdef CONFIG_ZSTD ++ ZSTD_CStream *zstd_cs; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct CompressParam CompressParam; + +diff --git a/qapi/migration.json b/qapi/migration.json +index cafaa5ccb3..29af841f4e 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -714,12 +714,13 @@ + # An enumeration of multi-thread compression methods. + # + # @zlib: use zlib compression method. ++# @zstd: use zstd compression method. + # + # Since: 5.0 + # + ## + { 'enum': 'CompressMethod', +- 'data': [ 'zlib' ] } ++ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } + + ## + # @MigrationParameter: +-- +2.27.0 + diff --git a/migration-Refactoring-multi-thread-compress-migratio.patch b/migration-Refactoring-multi-thread-compress-migratio.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f2a5acbd929af6feaeb19c57d612730bf9f36f1 --- /dev/null +++ b/migration-Refactoring-multi-thread-compress-migratio.patch @@ -0,0 +1,330 @@ +From cf6f31249817380e91cbc4e55b189216645fac18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 15:21:17 +0800 +Subject: [PATCH] migration: Refactoring multi-thread compress migration + +Code refactor for the compression procedure which includes: + +1. Move qemu_compress_data and qemu_put_compression_data from qemu-file.c to +ram.c, for the reason that most part of the code logical has nothing to do +with qemu-file. Besides, the decompression code is located at ram.c only. + +2. Simplify the function input arguments for compression and decompression. +Wrap the input into the param structure which already exists. This change also +makes the function much more flexible for other compression methods. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/meson.build | 4 +- + migration/migration-hmp-cmds.c | 1 - + migration/qemu-file.c | 61 +++++------------------- + migration/qemu-file.h | 4 +- + migration/ram-compress.c | 87 ++++++++++++++++++++++++---------- + 5 files changed, 77 insertions(+), 80 deletions(-) + +diff --git a/migration/meson.build b/migration/meson.build +index 92b1cc4297..d9b46ef0df 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,7 +22,6 @@ system_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', +- 'ram-compress.c', + 'options.c', + 'postcopy-ram.c', + 'savevm.c', +@@ -43,4 +42,5 @@ system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +- 'target.c')) ++ 'target.c', ++ 'ram-compress.c')) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 261ec1e35c..1fa6a5f478 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,7 +22,6 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" +-#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 94231ff295..bd1dbc3db1 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -669,55 +669,6 @@ uint64_t qemu_get_be64(QEMUFile *f) + return v; + } + +-/* return the size after compression, or negative value on error */ +-static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) +-{ +- int err; +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->next_out - dest; +-} +- +-/* Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. +- */ +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size) +-{ +- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); +- +- if (blen < compressBound(size)) { +- return -1; +- } +- +- blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), +- blen, p, size); +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_be32(f, blen); +- add_buf_to_iovec(f, blen); +- return blen + sizeof(int32_t); +-} + + /* Put the data in the buffer of f_src to the buffer of f_des, and + * then reset the buf_index of f_src to 0. +@@ -834,3 +785,15 @@ int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size) + + return 0; + } ++ ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr) ++{ ++ *dest_ptr = f->buf + f->buf_index + sizeof(int32_t); ++ return IO_BUF_SIZE - f->buf_index - sizeof(int32_t); ++} ++ ++void qemu_put_compress_end(QEMUFile *f, unsigned int v) ++{ ++ qemu_put_be32(f, v); ++ add_buf_to_iovec(f, v); ++} +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 8aec9fabf7..8afa95732b 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -54,8 +54,8 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, + + size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); + size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size); ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr); ++void qemu_put_compress_end(QEMUFile *f, unsigned int v); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + bool qemu_file_buffer_empty(QEMUFile *file); + +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index fa4388f6a6..2be344acbc 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -28,7 +28,6 @@ + + #include "qemu/osdep.h" + #include "qemu/cutils.h" +- + #include "ram-compress.h" + + #include "qemu/error-report.h" +@@ -40,6 +39,7 @@ + #include "exec/ramblock.h" + #include "ram.h" + #include "migration-stats.h" ++#include "exec/ram_addr.h" + + static struct { + int64_t pages; +@@ -83,28 +83,22 @@ static QemuThread *decompress_threads; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; + +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf); ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); + + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; + RAMBlock *block; +- ram_addr_t offset; + CompressResult result; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->trigger) { + block = param->block; +- offset = param->offset; + param->trigger = false; + qemu_mutex_unlock(¶m->mutex); + +- result = do_compress_ram_page(param->file, ¶m->stream, +- block, offset, param->originbuf); +- ++ result = do_compress_ram_page(param, block); + qemu_mutex_lock(&comp_done_lock); + param->done = true; + param->result = result; +@@ -204,15 +198,57 @@ exit: + return -1; + } + +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf) ++/* ++ * Compress size bytes of data start at p and store the compressed ++ * data to the buffer of f. ++ * ++ * Since the file is dummy file with empty_ops, return -1 if f has no space to ++ * save the compressed data. ++ */ ++static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) ++{ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) + { +- uint8_t *p = block->host + offset; ++ uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); + size_t page_size = qemu_target_page_size(); + int ret; + +- assert(qemu_file_buffer_empty(f)); ++ assert(qemu_file_buffer_empty(param->file)); + + if (buffer_is_zero(p, page_size)) { + return RES_ZEROPAGE; +@@ -223,12 +259,12 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, + * so that we can catch up the error during compression and + * decompression + */ +- memcpy(source_buf, p, page_size); +- ret = qemu_put_compression_data(f, stream, source_buf, page_size); ++ memcpy(param->originbuf, p, page_size); ++ ret = qemu_put_compression_data(param, page_size); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +- qemu_fflush(f); ++ qemu_fflush(param->file); + return RES_NONE; + } + return RES_COMPRESS; +@@ -322,19 +358,20 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + + /* return the size after decompression, or negative value on error */ + static int +-qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) ++qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) + { + int err; + ++ z_stream *stream = ¶m->stream; ++ + err = inflateReset(stream); + if (err != Z_OK) { + return -1; + } + +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = pagesize; + stream->next_out = dest; + + err = inflate(stream, Z_NO_FLUSH); +@@ -350,20 +387,18 @@ static void *do_data_decompress(void *opaque) + DecompressParam *param = opaque; + unsigned long pagesize; + uint8_t *des; +- int len, ret; ++ int ret; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->des) { + des = param->des; +- len = param->len; + param->des = 0; + qemu_mutex_unlock(¶m->mutex); + + pagesize = qemu_target_page_size(); + +- ret = qemu_uncompress_data(¶m->stream, des, pagesize, +- param->compbuf, len); ++ ret = qemu_uncompress_data(param, des, pagesize); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +-- +2.27.0 + diff --git a/migration-Skip-only-empty-block-devicesi.patch b/migration-Skip-only-empty-block-devicesi.patch new file mode 100644 index 0000000000000000000000000000000000000000..6787e2920d09b68bf6eb42dc58517a26e0bbae1d --- /dev/null +++ b/migration-Skip-only-empty-block-devicesi.patch @@ -0,0 +1,86 @@ +From 4506b31c0fff0b7a69ec4c7e264715ed70df75a8 Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 22:13:43 +0800 +Subject: [PATCH] migration: Skip only empty block devicesi +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2e128776dc56f502c2ee41750afe83938f389528 + +The block .save_setup() handler calls a helper routine +init_blk_migration() which builds a list of block devices to take into +account for migration. When one device is found to be empty (sectors +== 0), the loop exits and all the remaining devices are ignored. This +is a regression introduced when bdrv_iterate() was removed. + +Change that by skipping only empty devices. + +Cc: Markus Armbruster +Cc: qemu-stable +Suggested-by: Kevin Wolf +Fixes: fea68bb ("block: Eliminate bdrv_iterate(), use bdrv_next()") +Signed-off-by: Cédric Le Goater +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Link: https://lore.kernel.org/r/20240312120431.550054-1-clg@redhat.com +[peterx: fix "Suggested-by:"] +Signed-off-by: Peter Xu +Signed-off-by: Gao Jiazhen +--- + migration/block.c | 5 ++++- + tests/qemu-iotests/198.out | 2 -- + tests/qemu-iotests/206.out | 1 - + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index a15f9bddcb..710ef6f490 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -409,7 +409,10 @@ static int init_blk_migration(QEMUFile *f) + } + + sectors = bdrv_nb_sectors(bs); +- if (sectors <= 0) { ++ if (sectors == 0) { ++ continue; ++ } ++ if (sectors < 0) { + ret = sectors; + bdrv_next_cleanup(&it); + goto out; +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 62fb73fa3e..805494916f 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,7 +39,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -85,7 +84,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 979f00f9bf..7e95694777 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,7 +114,6 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/migration-report-migration-related-thread-pid-to-lib.patch b/migration-report-migration-related-thread-pid-to-lib.patch new file mode 100644 index 0000000000000000000000000000000000000000..9fd6fbb9a543dac44c6f9921620bb1112df267f4 --- /dev/null +++ b/migration-report-migration-related-thread-pid-to-lib.patch @@ -0,0 +1,54 @@ +From 7caa5d818e0fa0e1cee2513f2fde4e81f8b5cc13 Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:52:25 +0800 +Subject: [PATCH] migration: report migration related thread pid to libvirt + +in order to control migration thread cgroup, +we need to report migration related thread pid to libvirt + +Signed-off-by:zhengchuan +--- + migration/migration.c | 3 +++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..7c2fdde26b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3299,6 +3299,9 @@ static void *migration_thread(void *opaque) + MigThrError thr_error; + bool urgent = false; + ++ /* report migration thread pid to libvirt */ ++ qapi_event_send_migration_pid(qemu_get_thread_id()); ++ + thread = migration_threads_add("live_migration", qemu_get_thread_id()); + + rcu_register_thread(); +diff --git a/qapi/migration.json b/qapi/migration.json +index 29af841f4e..b442d0d878 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_PID: ++# ++# Emitted when migration thread appear ++# ++# @pid: pid of migration thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @COLOMessage: + # +-- +2.27.0 + diff --git a/migration-report-multiFd-related-thread-pid-to-libvi.patch b/migration-report-multiFd-related-thread-pid-to-libvi.patch new file mode 100644 index 0000000000000000000000000000000000000000..77730286d76ec64da9e5f1dd78a62ca88b2e852b --- /dev/null +++ b/migration-report-multiFd-related-thread-pid-to-libvi.patch @@ -0,0 +1,62 @@ +From e387eaeef8845993a437ad19eaf988fb101d3fdd Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:56:35 +0800 +Subject: [PATCH] migration: report multiFd related thread pid to libvirt + +report multiFd related thread pid to libvirt in order to +pin multiFd thread to different cpu. + +Signed-off-by:zhengchuan +--- + migration/multifd.c | 4 ++++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 16 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 409460684f..7d373a245e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -17,6 +17,7 @@ + #include "exec/ramblock.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qapi/qapi-events-migration.h" + #include "ram.h" + #include "migration.h" + #include "migration-stats.h" +@@ -657,6 +658,9 @@ static void *multifd_send_thread(void *opaque) + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + ++ /* report multifd thread pid to libvirt */ ++ qapi_event_send_migration_multifd_pid(qemu_get_thread_id()); ++ + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +diff --git a/qapi/migration.json b/qapi/migration.json +index b442d0d878..5d0855a1d8 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_MULTIFD_PID: ++# ++# Emitted when multifd thread appear ++# ++# @pid: pid of multifd thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_MULTIFD_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @MIGRATION_PID: + # +-- +2.27.0 + diff --git a/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ee3f088a239688d68c7aaa807781fbd2664dc72 --- /dev/null +++ b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch @@ -0,0 +1,47 @@ +From dfb9372702b2fb994392b8a6e8a39964c2656ae6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 08:49:41 +0800 +Subject: [PATCH] migration: skip cache_drop for bios bootloader and nvram + template + +Qemu enabled page cache dropping for raw device on the destionation host +during shared storage migration. +However, fsync may take 300ms to multiple seconds to return in multiple-migration +scene, because all domains in a host share bios bootloader file, skip cache_drop +for bios bootloader and nvram template to avoid downtime increase. +--- + block.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index b7cb963929..3bfd4be6b4 100644 +--- a/block.c ++++ b/block.c +@@ -68,6 +68,9 @@ + + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + ++#define DEFAULT_BIOS_BOOT_LOADER_DIR "/usr/share/edk2" ++#define DEFAULT_NVRAM_TEMPLATE_DIR "/var/lib/libvirt/qemu/nvram" ++ + /* Protected by BQL */ + static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); +@@ -7017,7 +7020,13 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); + +- if (bs->drv->bdrv_co_invalidate_cache) { ++ /* ++ * It's not necessary for bios bootloader and nvram template to drop cache ++ * when migration, skip this step for them to avoid dowtime increase. ++ */ ++ if (bs->drv->bdrv_co_invalidate_cache && ++ !strstr(bs->filename, DEFAULT_BIOS_BOOT_LOADER_DIR) && ++ !strstr(bs->filename, DEFAULT_NVRAM_TEMPLATE_DIR)) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); +-- +2.27.0 + diff --git a/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bda51123bb31527b02f67c633a8adcb3e210118 --- /dev/null +++ b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch @@ -0,0 +1,96 @@ +From a344d8636168ba5f034a908d3394ef88d36133dd Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Thu, 10 Feb 2022 11:18:13 +0800 +Subject: [PATCH] monitor: Discard BLOCK_IO_ERROR event when VM rebooted + +Throttled event like QAPI_EVENT_BLOCK_IO_ERROR may be queued +to limit event rate. Event may be delivered when VM is rebooted +if the event was queued in the *monitor_qapi_event_state* hash table. +Which may casue VM pause and other related problems. +Such as seabios blocked during virtio-scsi initialization: + vring_add_buf(vq, sg, out_num, in_num, 0, 0); + vring_kick(vp, vq, 1); + ------------> VM paused here <----------- + /* Wait for reply */ + while (!vring_more_used(vq)) usleep(5); + +Signed-off-by: Yan Wang +--- + include/monitor/monitor.h | 2 ++ + monitor/monitor.c | 29 +++++++++++++++++++++++++++++ + system/runstate.c | 1 + + 3 files changed, 32 insertions(+) + +diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h +index 965f5d5450..60079086a8 100644 +--- a/include/monitor/monitor.h ++++ b/include/monitor/monitor.h +@@ -63,4 +63,6 @@ void monitor_register_hmp_info_hrt(const char *name, + int error_vprintf_unless_qmp(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0); + int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2); + ++void monitor_qapi_event_discard_io_error(void); ++ + #endif /* MONITOR_H */ +diff --git a/monitor/monitor.c b/monitor/monitor.c +index e540c1334a..8d59a76612 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -34,6 +34,8 @@ + #include "qemu/option.h" + #include "sysemu/qtest.h" + #include "trace.h" ++#include "qemu/log.h" ++#include "qapi/qmp/qobject.h" + + /* + * To prevent flooding clients, events can be throttled. The +@@ -787,6 +789,33 @@ int monitor_init_opts(QemuOpts *opts, Error **errp) + return ret; + } + ++void monitor_qapi_event_discard_io_error(void) ++{ ++ GHashTableIter event_iter; ++ MonitorQAPIEventState *evstate; ++ gpointer key, value; ++ GString *json; ++ ++ qemu_mutex_lock(&monitor_lock); ++ g_hash_table_iter_init(&event_iter, monitor_qapi_event_state); ++ while (g_hash_table_iter_next(&event_iter, &key, &value)) { ++ evstate = key; ++ /* Only QAPI_EVENT_BLOCK_IO_ERROR is discarded */ ++ if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) { ++ g_hash_table_iter_remove(&event_iter); ++ json = qobject_to_json(QOBJECT(evstate->qdict)); ++ qemu_log(" %s event discarded\n", json->str); ++ timer_del(evstate->timer); ++ timer_free(evstate->timer); ++ qobject_unref(evstate->data); ++ qobject_unref(evstate->qdict); ++ g_string_free(json, true); ++ g_free(evstate); ++ } ++ } ++ qemu_mutex_unlock(&monitor_lock); ++} ++ + QemuOptsList qemu_mon_opts = { + .name = "mon", + .implied_opt_name = "chardev", +diff --git a/system/runstate.c b/system/runstate.c +index 9d3f627fee..62e6db8d42 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -503,6 +503,7 @@ void qemu_system_reset(ShutdownCause reason) + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } + cpu_synchronize_all_post_reset(); ++ monitor_qapi_event_discard_io_error(); + } + + /* +-- +2.27.0 + diff --git a/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch new file mode 100644 index 0000000000000000000000000000000000000000..8dc8fc1219659e8010693e153e50fbbb4d13301c --- /dev/null +++ b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch @@ -0,0 +1,111 @@ +From c6b183a4c3c63454dea39be26b0fb773ec04887e Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:13:05 +0800 +Subject: [PATCH] monitor/qmp: drop inflight rsp if qmp client broken + +If libvirt restart while qemu is handle qmp message, libvirt will +reconnect qemu monitor socket, and query status of qemu by qmp. +But qemu may return last qmp respond to new connect socket, and libvirt +recv unexpected respond, So libvirt think qemu is abnormal, and will +kill qemu. + +This patch add qmp connect id, while reconnect id will change. While +respond to libvirt, judge if id is same, if not, drop this respond. +--- + monitor/monitor-internal.h | 1 + + monitor/qmp.c | 19 +++++++++++-------- + 2 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h +index 252de85681..d7842fa464 100644 +--- a/monitor/monitor-internal.h ++++ b/monitor/monitor-internal.h +@@ -144,6 +144,7 @@ typedef struct { + const QmpCommandList *commands; + bool capab_offered[QMP_CAPABILITY__MAX]; /* capabilities offered */ + bool capab[QMP_CAPABILITY__MAX]; /* offered and accepted */ ++ uint64_t qmp_client_id; /*qmp client id, update if peer disconnect */ + /* + * Protects qmp request/response queue. + * Take monitor_lock first when you need both. +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 6eee450fe4..8f7671c5f1 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -149,18 +149,19 @@ void qmp_send_response(MonitorQMP *mon, const QDict *rsp) + * Null @rsp can only happen for commands with QCO_NO_SUCCESS_RESP. + * Nothing is emitted then. + */ +-static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp) ++static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp, uint64_t req_client_id) + { +- if (rsp) { +- qmp_send_response(mon, rsp); ++ if (!rsp || (mon->qmp_client_id != req_client_id)) { ++ return; + } ++ qmp_send_response(mon, rsp); + } + + /* + * Runs outside of coroutine context for OOB commands, but in + * coroutine context for everything else. + */ +-static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) ++static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req, uint64_t req_client_id) + { + QDict *rsp; + QDict *error; +@@ -180,7 +181,7 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) + } + } + +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, req_client_id); + qobject_unref(rsp); + } + +@@ -340,13 +341,13 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + trace_monitor_qmp_cmd_in_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req_obj->req); ++ monitor_qmp_dispatch(mon, req_obj->req, mon->qmp_client_id); + } else { + assert(req_obj->err); + trace_monitor_qmp_err_in_band(error_get_pretty(req_obj->err)); + rsp = qmp_error_response(req_obj->err); + req_obj->err = NULL; +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, mon->qmp_client_id); + qobject_unref(rsp); + } + +@@ -402,7 +403,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err) + trace_monitor_qmp_cmd_out_of_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req); ++ monitor_qmp_dispatch(mon, req, mon->qmp_client_id); + qobject_unref(req); + return; + } +@@ -486,6 +487,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event) + mon_refcount++; + break; + case CHR_EVENT_CLOSED: ++ mon->qmp_client_id++; + /* + * Note: this is only useful when the output of the chardev + * backend is still open. For example, when the backend is +@@ -539,6 +541,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) + } + qemu_chr_fe_set_echo(&mon->common.chr, true); + ++ mon->qmp_client_id = 1; + /* Note: we run QMP monitor in I/O thread when @chr supports that */ + monitor_data_init(&mon->common, true, false, + qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT)); +-- +2.27.0 + diff --git a/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch new file mode 100644 index 0000000000000000000000000000000000000000..350339ecb2ab482936e5a700347cf01556c04051 --- /dev/null +++ b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch @@ -0,0 +1,45 @@ +From 81b4091eee81fe3871d836b1a684e27828cdc2be Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:42:33 +0800 +Subject: [PATCH] nbd/server.c: fix invalid read after client was already free + +In the process of NBD equipment pressurization, executing QEMU NBD will +lead to the failure of IO distribution and go to NBD_ Out process of trip(). +If two or more IO go to the out process, client NBD will release in nbd_request_put(). +The user after free problem that is read again in close(). +Through the NBD_ Save the value of client > closing before the out process in trip +to solve the use after free problem. + +Signed-off-by: wangjian161 +--- + nbd/server.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 895cf0a752..e8baed9705 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -2939,6 +2939,7 @@ static coroutine_fn void nbd_trip(void *opaque) + NBDRequestData *req; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; ++ bool client_closing; + Error *local_err = NULL; + + trace_nbd_trip(); +@@ -3023,8 +3024,11 @@ disconnect: + if (local_err) { + error_reportf_err(local_err, "Disconnect client, due to: "); + } ++ client_closing = client->closing; + nbd_request_put(req); +- client_close(client, true); ++ if (!client_closing) { ++ client_close(client, true); ++ } + nbd_client_put(client); + } + +-- +2.27.0 + diff --git a/net-dump.c-Suppress-spurious-compiler-warning.patch b/net-dump.c-Suppress-spurious-compiler-warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..c35ee6f95844981753fcfd9b57510d7131817685 --- /dev/null +++ b/net-dump.c-Suppress-spurious-compiler-warning.patch @@ -0,0 +1,51 @@ +From 6999f07558308ee6b7d63e46ca554a0b702948d6 Mon Sep 17 00:00:00 2001 +From: liuxiangdong +Date: Tue, 8 Feb 2022 15:10:25 +0800 +Subject: [PATCH] net/dump.c: Suppress spurious compiler warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Compiling with gcc version 11.2.0 (Ubuntu 11.2.0-13ubuntu1) results in +a (spurious) warning: + + In function ‘dump_receive_iov’, + inlined from ‘filter_dump_receive_iov’ at ../net/dump.c:157:5: + ../net/dump.c:89:9: error: ‘writev’ specified size 18446744073709551600 +exceeds maximum object size 9223372036854775807 [-Werror=stringop-overflow=] + 89 | if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In file included from /home/ptomsich/qemu/include/qemu/osdep.h:108, + from ../net/dump.c:25: + ../net/dump.c: In function ‘filter_dump_receive_iov’: + /usr/include/x86_64-linux-gnu/sys/uio.h:52:16: note: in a call to function +‘writev’ declared with attribute ‘read_only (2, 3)’ + 52 | extern ssize_t writev (int __fd, const struct iovec *__iovec, int +__count) + | ^~~~~~ + cc1: all warnings being treated as errors + +This change helps that version of GCC to understand what is going on +and suppresses this warning. + +Signed-off-by: Philipp Tomsich +--- + net/dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/dump.c b/net/dump.c +index 16073f2458..d880a7e299 100644 +--- a/net/dump.c ++++ b/net/dump.c +@@ -87,7 +87,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt, + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, offset, caplen); + +- if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { ++ if (writev(s->fd, &dumpiov[0], cnt + 1) != sizeof(hdr) + caplen) { + error_report("network dump write error - stopping dump"); + close(s->fd); + s->fd = -1; +-- +2.27.0 + diff --git a/net-eepro100-validate-various-address-valuesi-CVE-20.patch b/net-eepro100-validate-various-address-valuesi-CVE-20.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc259d62847299da04ed6a498ce88bdbc9896931 --- /dev/null +++ b/net-eepro100-validate-various-address-valuesi-CVE-20.patch @@ -0,0 +1,58 @@ +From 6e6215b3ad0c8eac918bca9e2b5bb661e27f2fed Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Sat, 18 Dec 2021 09:39:57 +0800 +Subject: [PATCH] net: eepro100: validate various address + valuesi(CVE-2021-20255) + +fix CVE-2021-20255 + +patch link: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg06098.html + +fix CVE-2021-20255, sync patch from ostms platform. + +Signed-off-by: zhouli57 +Signed-off-by: Yan Wang +--- + hw/net/eepro100.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index 69e1c4bb89..f6204ec059 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -279,6 +279,9 @@ typedef struct { + /* Quasi static device properties (no need to save them). */ + uint16_t stats_size; + bool has_extended_tcb_support; ++ ++ /* Flag to avoid recursions. */ ++ bool busy; + } EEPRO100State; + + /* Word indices in EEPROM. */ +@@ -844,6 +847,14 @@ static void action_command(EEPRO100State *s) + Therefore we limit the number of iterations. */ + unsigned max_loop_count = 16; + ++ if (s->busy) { ++ /* Prevent recursions. */ ++ logout("recursion in %s:%u\n", __FILE__, __LINE__); ++ return; ++ } ++ ++ s->busy = true; ++ + for (;;) { + bool bit_el; + bool bit_s; +@@ -940,6 +951,7 @@ static void action_command(EEPRO100State *s) + } + TRACE(OTHER, logout("CU list empty\n")); + /* List is empty. Now CU is idle or suspended. */ ++ s->busy = false; + } + + static void eepro100_cu_command(EEPRO100State * s, uint8_t val) +-- +2.27.0 + diff --git a/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch new file mode 100644 index 0000000000000000000000000000000000000000..d25778669c3421fb77c4a787d5852d1f708738e8 --- /dev/null +++ b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch @@ -0,0 +1,57 @@ +From b6c45f5ea5d1a379ac0a507cf59345c573b27cc8 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:21:39 +0800 +Subject: [PATCH] oslib-posix: optimise vm startup time for 1G hugepage + +It takes quit a long time to clear 1G-hugepage, which makes glibc +pthread_create quit slow. +Create touch_pages threads in advance, and then handle the touch_pages +callback. Only read lock is held here. +--- + util/oslib-posix.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index e86fd64e09..9ca3fee2b8 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -88,6 +88,8 @@ static QemuMutex sigbus_mutex; + static QemuMutex page_mutex; + static QemuCond page_cond; + ++static int started_num_threads; ++ + int qemu_get_thread_id(void) + { + #if defined(__linux__) +@@ -344,6 +346,10 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + ++ while (started_num_threads != memset_args->context.num_threads) { ++ smp_mb(); ++ } ++ + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); +@@ -448,7 +454,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + context.threads = g_new0(MemsetThread, context.num_threads); + numpages_per_thread = numpages / context.num_threads; + leftover = numpages % context.num_threads; +- for (i = 0; i < context.num_threads; i++) { ++ for (i = 0, started_num_threads = 0; i < context.num_threads; i++) { + context.threads[i].addr = addr; + context.threads[i].numpages = numpages_per_thread + (i < leftover); + context.threads[i].hpagesize = hpagesize; +@@ -464,6 +470,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + QEMU_THREAD_JOINABLE); + } + addr += context.threads[i].numpages * hpagesize; ++ started_num_threads++; + } + + if (!use_madv_populate_write) { +-- +2.27.0 + diff --git a/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..d86d8ab72d3851993421c2d3e778a5508d351177 --- /dev/null +++ b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch @@ -0,0 +1,99 @@ +From 3c4b4c4fc3c71b375490233bb9209763d7094ee9 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 8 Feb 2022 16:10:31 +0800 +Subject: [PATCH] pcie: Add pcie-root-port fast plug/unplug feature + +If a device is plugged in the pcie-root-port when VM kernel is +booting, the kernel may wrongly disable the device. +This bug was brought in by two patches of the linux kernel: + +https://patchwork.kernel.org/patch/10575355/ +https://patchwork.kernel.org/patch/10766219/ + +VM runtime like kata uses this feature to boot microVM, +so we must fix it up. We hack into the pcie native hotplug +patch so that hotplug/unplug will work under this circumstance. + +Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/core/machine.c | 2 ++ + hw/pci-bridge/gen_pcie_root_port.c | 2 ++ + hw/pci/pcie.c | 13 ++++++++++++- + include/hw/pci/pcie_port.h | 3 +++ + 4 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0c17398141..965682619b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -160,6 +160,8 @@ const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + GlobalProperty hw_compat_3_1[] = { + { "pcie-root-port", "x-speed", "2_5" }, + { "pcie-root-port", "x-width", "1" }, ++ { "pcie-root-port", "fast-plug", "0" }, ++ { "pcie-root-port", "fast-unplug", "0" }, + { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, + { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, + { "tpm-crb", "ppi", "false" }, +diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c +index 1ce4e7beba..1e1ab5bb19 100644 +--- a/hw/pci-bridge/gen_pcie_root_port.c ++++ b/hw/pci-bridge/gen_pcie_root_port.c +@@ -145,6 +145,8 @@ static Property gen_rp_props[] = { + speed, PCIE_LINK_SPEED_16), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_32), ++ DEFINE_PROP_UINT8("fast-plug", PCIESlot, fast_plug, 0), ++ DEFINE_PROP_UINT8("fast-unplug", PCIESlot, fast_unplug, 0), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index dccf204451..04fbd794a8 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -555,6 +555,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); ++ PCIESlot *s = PCIE_SLOT(hotplug_pdev); + + /* Check if hot-unplug is disabled on the slot */ + if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { +@@ -600,7 +601,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + return; + } + +- pcie_cap_slot_push_attention_button(hotplug_pdev); ++ if ((pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) && s->fast_plug) { ++ pci_word_test_and_clear_mask(pci_dev->config + pci_dev->exp.exp_cap + PCI_EXP_LNKSTA, ++ PCI_EXP_LNKSTA_DLLLA); ++ } ++ ++ if (s->fast_unplug) { ++ pcie_cap_slot_event(hotplug_pdev, ++ PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); ++ } else { ++ pcie_cap_slot_push_attention_button(hotplug_pdev); ++ } + } + + /* pci express slot for pci express root/downstream port +diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h +index 90e6cf45b8..7148a0959b 100644 +--- a/include/hw/pci/pcie_port.h ++++ b/include/hw/pci/pcie_port.h +@@ -56,6 +56,9 @@ struct PCIESlot { + uint8_t chassis; + uint16_t slot; + ++ uint8_t fast_plug; ++ uint8_t fast_unplug; ++ + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + +-- +2.27.0 + diff --git a/pcie-Compat-with-devices-which-do-not-support-Link-W.patch b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch new file mode 100644 index 0000000000000000000000000000000000000000..9897182af472d0d12369ed8b945d7b380730df9b --- /dev/null +++ b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch @@ -0,0 +1,50 @@ +From 6c72e65d57dc2a7d811f76a126a9a006abd0ab75 Mon Sep 17 00:00:00 2001 +From: fangying +Date: Wed, 18 Mar 2020 12:51:33 +0800 +Subject: [PATCH] pcie: Compat with devices which do not support Link Width, + such as ioh3420 + +We hack into PCI_EXP_LNKCAP to support device fast plug/unplug +for pcie-root-port. However some devices like ioh3420 does not +suport it, so PCI_EXP_LNKCAP is not set for such devices. + +Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/pci/pcie.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 6db0cf69cd..dccf204451 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -97,13 +97,6 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) + return; + } + +- /* Clear and fill LNKCAP from what was configured above */ +- pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, +- PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); +- pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, +- QEMU_PCI_EXP_LNKCAP_MLW(s->width) | +- QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); +- + /* + * Link bandwidth notification is required for all root ports and + * downstream ports supporting links wider than x1 or multiple link +@@ -111,6 +104,12 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) + */ + if (s->width > QEMU_PCI_EXP_LNK_X1 || + s->speed > QEMU_PCI_EXP_LNK_2_5GT) { ++ /* Clear and fill LNKCAP from what was configured above */ ++ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, ++ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); ++ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, ++ QEMU_PCI_EXP_LNKCAP_MLW(s->width) | ++ QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_LBNC); + } +-- +2.27.0 + diff --git a/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch new file mode 100644 index 0000000000000000000000000000000000000000..696a9a49357b61cd9d645c657c5aa38830276e88 --- /dev/null +++ b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch @@ -0,0 +1,127 @@ +From 8fa5af7de07d9bc2535ea8fab087d509795e3579 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 22:12:52 +0000 +Subject: [PATCH] physmem,gdbstub: Common helping funcs/changes to *unrealize* + vCPU + +Supporting vCPU Hotplug for ARM arch also means introducing new functionality of +unrealizing the ARMCPU. This requires some new common functions. + +Defining them as part of architecture independent change so that this code could +be reused by other interested parties. + +Signed-off-by: Salil Mehta +--- + gdbstub/gdbstub.c | 6 ++++++ + include/exec/cpu-common.h | 8 ++++++++ + include/exec/gdbstub.h | 1 + + include/hw/core/cpu.h | 1 + + system/physmem.c | 25 +++++++++++++++++++++++++ + 5 files changed, 41 insertions(+) + +diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c +index 46d752bbc2..f16006d2a8 100644 +--- a/gdbstub/gdbstub.c ++++ b/gdbstub/gdbstub.c +@@ -582,6 +582,12 @@ void gdb_register_coprocessor(CPUState *cpu, + } + } + ++void gdb_unregister_coprocessor_all(CPUState *cpu) ++{ ++ g_array_free(cpu->gdb_regs, true); ++ cpu->gdb_regs = NULL; ++} ++ + static void gdb_process_breakpoint_remove_all(GDBProcess *p) + { + CPUState *cpu = gdb_get_first_cpu_in_process(p); +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 41115d8919..2a3d4aa1c8 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -139,6 +139,14 @@ size_t qemu_ram_pagesize_largest(void); + */ + void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); ++/** ++ * cpu_address_space_destroy: ++ * @cpu: CPU for which address space needs to be destroyed ++ * @asidx: integer index of this address space ++ * ++ * Note that with KVM only one address space is supported. ++ */ ++void cpu_address_space_destroy(CPUState *cpu, int asidx); + + void cpu_physical_memory_rw(hwaddr addr, void *buf, + hwaddr len, bool is_write); +diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h +index d8a3c56fa2..d123b838c2 100644 +--- a/include/exec/gdbstub.h ++++ b/include/exec/gdbstub.h +@@ -39,6 +39,7 @@ typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg); + void gdb_register_coprocessor(CPUState *cpu, + gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg, + int num_regs, const char *xml, int g_pos); ++void gdb_unregister_coprocessor_all(CPUState *cpu); + + /** + * gdbserver_start: start the gdb server +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 0ca778eb75..6dbe163548 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -496,6 +496,7 @@ struct CPUState { + QSIMPLEQ_HEAD(, qemu_work_item) work_list; + + CPUAddressSpace *cpu_ases; ++ int cpu_ases_ref_count; + int num_ases; + AddressSpace *as; + MemoryRegion *memory; +diff --git a/system/physmem.c b/system/physmem.c +index 247c252e53..299174ad91 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -761,6 +761,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + + if (!cpu->cpu_ases) { + cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); ++ cpu->cpu_ases_ref_count = cpu->num_ases; + } + + newas = &cpu->cpu_ases[asidx]; +@@ -774,6 +775,30 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + } + } + ++void cpu_address_space_destroy(CPUState *cpu, int asidx) ++{ ++ CPUAddressSpace *cpuas; ++ ++ assert(asidx < cpu->num_ases); ++ assert(asidx == 0 || !kvm_enabled()); ++ assert(cpu->cpu_ases); ++ ++ cpuas = &cpu->cpu_ases[asidx]; ++ if (tcg_enabled()) { ++ memory_listener_unregister(&cpuas->tcg_as_listener); ++ } ++ ++ address_space_destroy(cpuas->as); ++ g_free_rcu(cpuas->as, rcu); ++ ++ if (cpu->cpu_ases_ref_count == 1) { ++ g_free(cpu->cpu_ases); ++ cpu->cpu_ases = NULL; ++ } ++ ++ cpu->cpu_ases_ref_count--; ++} ++ + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) + { + /* Return the AddressSpace corresponding to the specified index */ +-- +2.27.0 + diff --git a/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch new file mode 100644 index 0000000000000000000000000000000000000000..d879b781bc47781c33814d8be006901a62fa5c80 --- /dev/null +++ b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch @@ -0,0 +1,42 @@ +From e730214f4485ad444d8a1db9a284da53f407e8da Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 16:16:35 +0800 +Subject: [PATCH] pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff + +We can enable ACPI when AArch64 Linux is booted with QEMU and UEFI (AAVMF). +When VM is booting and the SBSA driver has not initialized, writting data +that exceds 32 bytes will cause the read FIFO full and proceeding data will +be lost. The searil port appears to be stuck in this abnormal situation. + +A hack to reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff appears to +resolve the issue. + +The question is fully discussed at +https://www.spinics.net/lists/linux-serial/msg23163.html + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/char/pl011.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/char/pl011.c b/hw/char/pl011.c +index 58edeb9ddb..bc65d778d2 100644 +--- a/hw/char/pl011.c ++++ b/hw/char/pl011.c +@@ -314,6 +314,10 @@ static void pl011_write(void *opaque, hwaddr offset, + case 17: /* UARTICR */ + s->int_level &= ~value; + pl011_update(s); ++ if (!s->int_enabled && !s->int_level) { ++ s->read_count = 0; ++ s->read_pos = 0; ++ } + break; + case 18: /* UARTDMACR */ + s->dmacr = value; +-- +2.27.0 + diff --git a/pl031-support-rtc-timer-property-for-pl031.patch b/pl031-support-rtc-timer-property-for-pl031.patch new file mode 100644 index 0000000000000000000000000000000000000000..48f4d1bad790b6041e6696cd3912e6c77fdeab8e --- /dev/null +++ b/pl031-support-rtc-timer-property-for-pl031.patch @@ -0,0 +1,71 @@ +From 8e30e81c4268103d502587de565842b9632a7965 Mon Sep 17 00:00:00 2001 +From: Jinhao Gao +Date: Tue, 15 Feb 2022 17:02:08 +0800 +Subject: [PATCH] pl031: support rtc-timer property for pl031 + +This patch adds the rtc-timer property for pl031, we can get the +rtc time (UTC) through qmp command "qom-get date" with this property. + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +Signed-off-by: Keqian Zhu +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang +--- + hw/rtc/pl031.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index f2e6baebba..57e9a35616 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -63,6 +63,15 @@ static uint32_t pl031_get_count(PL031State *s) + return s->tick_offset + now / NANOSECONDS_PER_SECOND; + } + ++static void pl031_get_date(Object *obj, struct tm *current_tm, Error **errp) ++{ ++ PL031State *s = PL031(obj); ++ time_t ti = pl031_get_count(s); ++ ++ /* Changed to UTC time */ ++ gmtime_r(&ti, current_tm); ++} ++ + static void pl031_set_alarm(PL031State *s) + { + uint32_t ticks; +@@ -202,6 +211,20 @@ static void pl031_init(Object *obj) + qemu_clock_get_ns(rtc_clock) / NANOSECONDS_PER_SECOND; + + s->timer = timer_new_ns(rtc_clock, pl031_interrupt, s); ++ object_property_add_tm(OBJECT(s), "date", pl031_get_date); ++} ++ ++static void pl031_realize(DeviceState *d, Error **errp) ++{ ++ object_property_add_alias(qdev_get_machine(), "rtc-time", ++ OBJECT(d), "date"); ++} ++ ++static void pl031_unrealize(DeviceState *d) ++{ ++ if (object_property_find(qdev_get_machine(), "rtc-time")) { ++ object_property_del(qdev_get_machine(), "rtc-time"); ++ } + } + + static void pl031_finalize(Object *obj) +@@ -338,6 +361,8 @@ static void pl031_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_pl031; ++ dc->realize = pl031_realize; ++ dc->unrealize = pl031_unrealize; + device_class_set_props(dc, pl031_properties); + } + +-- +2.27.0 + diff --git a/ps2-fix-oob-in-ps2-kbd.patch b/ps2-fix-oob-in-ps2-kbd.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d0f4dea78e884ab9f376659daca9b6717a01661 --- /dev/null +++ b/ps2-fix-oob-in-ps2-kbd.patch @@ -0,0 +1,35 @@ +From 0a54d68547df3f276dc242b52d54e8549d0a84a0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:21:28 +0800 +Subject: [PATCH] ps2: fix oob in ps2 kbd + +fix oob in ps2 kbd +--- + hw/input/ps2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/input/ps2.c b/hw/input/ps2.c +index c8fd23cf36..b647561069 100644 +--- a/hw/input/ps2.c ++++ b/hw/input/ps2.c +@@ -167,7 +167,7 @@ void ps2_queue_noirq(PS2State *s, int b) + } + + q->data[q->wptr] = b; +- if (++q->wptr == PS2_BUFFER_SIZE) { ++ if (++q->wptr >= PS2_BUFFER_SIZE) { + q->wptr = 0; + } + q->count++; +@@ -557,7 +557,7 @@ uint32_t ps2_read_data(PS2State *s) + val = q->data[index]; + } else { + val = q->data[q->rptr]; +- if (++q->rptr == PS2_BUFFER_SIZE) { ++ if (++q->rptr >= PS2_BUFFER_SIZE) { + q->rptr = 0; + } + q->count--; +-- +2.27.0 + diff --git a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch new file mode 100644 index 0000000000000000000000000000000000000000..4aa28bb0c5b9bed2bb8cc181af9fe5c046e5068e --- /dev/null +++ b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch @@ -0,0 +1,31 @@ +From 172d79d8ebb343fa144987d2c50d90655d5aa5f9 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Thu, 29 Jul 2021 15:24:48 +0800 +Subject: [PATCH] qdev/monitors: Fix reundant error_setg of qdev_add_device + +There is an extra log "error_setg" in qdev_add_device(). When +hot-plug a device, if the corresponding bus doesn't exist, it +will trigger an asseration "assert(*errp == NULL)". + +Fixes: 515a7970490 (log: Add some logs on VM runtime path) +Signed-off-by: Kunkun Jiang +Signed-off-by: Yan Wang +--- + system/qdev-monitor.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index c885175b66..b10e483a9a 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,7 +644,6 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { +- error_setg(errp, "can not find bus for %s", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +-- +2.27.0 + diff --git a/qemu-nbd-make-native-as-the-default-aio-mode.patch b/qemu-nbd-make-native-as-the-default-aio-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..23dae5662eeb47f75517140f9c70a8e4dee707e0 --- /dev/null +++ b/qemu-nbd-make-native-as-the-default-aio-mode.patch @@ -0,0 +1,35 @@ +From 0e610831d584d9485eb0655168d08d8234bbb555 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:48:58 +0800 +Subject: [PATCH] qemu-nbd: make native as the default aio mode + +When the file system is dealing with multithreading concurrent writing to a file, +the performance will be degraded because of the lock. +At present, the default AIO mode of QEMU NBD is threads. In the case of large blocks, +because IO is divided into small pieces and multiple queues, it will become multithreading +concurrent writing the same file. Due to the file system, the performance will be greatly reduced. +If you change to native mode, this problem will not exist. + +Signed-off-by: wangjian161 +--- + qemu-nbd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 186e6468b1..acccf2977f 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -843,6 +843,10 @@ int main(int argc, char **argv) + trace_init_file(); + qemu_set_log(LOG_TRACE, &error_fatal); + ++ if (!seen_aio && (flags & BDRV_O_NOCACHE)) { ++ flags |= BDRV_O_NATIVE_AIO; ++ } ++ + socket_activation = check_socket_activation(); + if (socket_activation == 0) { + if (!sockpath) { +-- +2.27.0 + diff --git a/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc41eac5641004223996c683b2af1792fd826737 --- /dev/null +++ b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch @@ -0,0 +1,42 @@ +From d6aa08ac3693be3e08f2c8d3ad5a356ea6e9dead Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:55:08 +0800 +Subject: [PATCH] qemu-nbd: set timeout to qemu-nbd socket + +In case of insufficient memory and kill-9, +the NBD socket cannot be processed and stuck all the time. + +Signed-off-by: wangjian161 +--- + nbd/client.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..987dde43c7 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -24,6 +24,8 @@ + #include "nbd-internal.h" + #include "qemu/cutils.h" + ++#define NBD_TIMEOUT_SECONDS 30 ++ + /* Definitions for opaque data types */ + + static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); +@@ -1310,6 +1312,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, + } + } + ++ if (ioctl(fd, NBD_SET_TIMEOUT, NBD_TIMEOUT_SECONDS) < 0) { ++ int serrno = errno; ++ error_setg(errp, "Failed setting timeout"); ++ return -serrno; ++ } ++ + trace_nbd_init_finish(); + + return 0; +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index 85ae9ef0296b042d5436105c29ca55d8f48d5acc..e8b6e62f08b632159b3bf2a58188daad10e61cba 100644 --- a/qemu.spec +++ b/qemu.spec @@ -3,7 +3,7 @@ Name: qemu Version: 8.2.0 -Release: 3 +Release: 6 Epoch: 11 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -89,6 +89,153 @@ Patch0072: target-loongarch-kvm-Enable-LSX-LASX-extension.patch Patch0073: target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch Patch0074: loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch Patch0075: disable-keyring-option.patch +Patch0076: virtio-net-correctly-copy-vnet-header-when-flushing-.patch +Patch0077: ui-clipboard-mark-type-as-not-available-when-there-i.patch +Patch0078: memory-backup-Modify-the-VM-s-physical-bits-value-se.patch +Patch0079: backup-memory-bakcup-hugepages-hugepages-files-maybe.patch +Patch0080: block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch +Patch0081: travis-ci-Rename-SOFTMMU-SYSTEM.patch +Patch0082: iotests-adapt-to-output-change-for-recently-introduc.patch +Patch0083: migration-Skip-only-empty-block-devicesi.patch +Patch0084: vhost-cancel-migration-when-vhost-user-restarted-dur.patch +Patch0085: Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch +Patch0086: ps2-fix-oob-in-ps2-kbd.patch +Patch0087: monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch +Patch0088: oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch +Patch0089: migration-skip-cache_drop-for-bios-bootloader-and-nv.patch +Patch0090: migration-Add-multi-thread-compress-method.patch +Patch0091: migration-Refactoring-multi-thread-compress-migratio.patch +Patch0092: migration-Add-multi-thread-compress-ops.patch +Patch0093: migration-Add-zstd-support-in-multi-thread-compressi.patch +Patch0094: migration-Add-compress_level-sanity-check.patch +Patch0095: doc-Update-multi-thread-compression-doc.patch +Patch0096: cpu-features-fix-bug-for-memory-leakage.patch +Patch0097: migration-report-migration-related-thread-pid-to-lib.patch +Patch0098: migration-report-multiFd-related-thread-pid-to-libvi.patch +Patch0099: virtio-check-descriptor-numbers.patch +Patch0100: virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch +Patch0101: virtio-print-the-guest-virtio_net-features-that-host.patch +Patch0102: virtio-bugfix-check-the-value-of-caches-before-acces.patch +Patch0103: virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch +Patch0104: nbd-server.c-fix-invalid-read-after-client-was-alrea.patch +Patch0105: qemu-nbd-make-native-as-the-default-aio-mode.patch +Patch0106: qemu-nbd-set-timeout-to-qemu-nbd-socket.patch +Patch0107: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch +Patch0108: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0109: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch +Patch0110: net-dump.c-Suppress-spurious-compiler-warning.patch +Patch0111: hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch +Patch0112: i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch +Patch0113: vhost-user-Set-the-acked_features-to-vm-s-featrue.patch +Patch0114: vhost-user-Add-support-reconnect-vhost-user-socket.patch +Patch0115: fix-qemu-core-when-vhost-user-net-config-with-server.patch +Patch0116: vhost-user-quit-infinite-loop-while-used-memslots-is.patch +Patch0117: vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch +Patch0118: vhost-user-add-unregister_savevm-when-vhost-user-cle.patch +Patch0119: monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch +Patch0120: virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch +Patch0121: virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch +Patch0122: virtio-net-set-the-max-of-queue-size-to-4096.patch +Patch0123: virtio-net-update-the-default-and-max-of-rx-tx_queue.patch +Patch0124: hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch +Patch0125: vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch +Patch0126: vhost-vdpa-add-migration-log-ops-for-VhostOps.patch +Patch0127: vhost-introduce-bytemap-for-vhost-backend-logging.patch +Patch0128: vhost-add-vhost_dev_suspend-resume_op.patch +Patch0129: vhost-implement-vhost-vdpa-suspend-resume.patch +Patch0130: vhost-implement-vhost_vdpa_device_suspend-resume.patch +Patch0131: vhost-implement-savevm_handler-for-vdpa-device.patch +Patch0132: vhost-implement-post-resume-bh.patch +Patch0133: vhost-implement-migration-state-notifier-for-vdpa-de.patch +Patch0134: vdpa-implement-vdpa-device-migration.patch +Patch0135: vdpa-move-memory-listener-to-the-realize-stage.patch +Patch0136: vdpa-support-vdpa-device-suspend-resume.patch +Patch0137: vdpa-suspend-function-return-0-when-the-vdpa-device-.patch +Patch0138: vdpa-correct-param-passed-in-when-unregister-save.patch +Patch0139: vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch +Patch0140: docs-Add-generic-vhost-vdpa-device-documentation.patch +Patch0141: vdpa-set-vring-enable-only-if-the-vring-address-has-.patch +Patch0142: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch +Patch0143: net-eepro100-validate-various-address-valuesi-CVE-20.patch +Patch0144: cpu-add-Kunpeng-920-cpu-support.patch +Patch0145: cpu-add-Cortex-A72-processor-kvm-target-support.patch +Patch0146: tests-virt-Allow-changes-to-PPTT-test-table.patch +Patch0147: hw-arm64-add-vcpu-cache-info-support.patch +Patch0148: arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch +Patch0149: tests-virt-Update-expected-ACPI-tables-for-virt-test.patch +Patch0150: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +Patch0151: shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch +Patch0152: tests-Disable-filemonitor-testcase.patch +Patch0153: freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch +Patch0154: freeclock-set-rtc_date_diff-for-arm.patch +Patch0155: freeclock-set-rtc_date_diff-for-X86.patch +Patch0156: i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch +Patch0157: bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch +Patch0158: log-Add-log-at-boot-cpu-init-for-aarch64.patch +Patch0159: feature-Add-log-for-each-modules.patch +Patch0160: feature-Add-logs-for-vm-start-and-destroy.patch +Patch0161: pl031-support-rtc-timer-property-for-pl031.patch +Patch0162: arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch +Patch0163: linux-headers-update-against-5.10-and-manual-clear-v.patch +Patch0164: vfio-Maintain-DMA-mapping-range-for-the-container.patch +Patch0165: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch +Patch0166: arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch +Patch0167: cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch +Patch0168: hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch +Patch0169: arm-virt-target-arm-Machine-init-time-change-common-.patch +Patch0170: accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch +Patch0171: arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch +Patch0172: arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch +Patch0173: arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch +Patch0174: hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch +Patch0175: arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch +Patch0176: hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch +Patch0177: hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch +Patch0178: hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch +Patch0179: arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch +Patch0180: arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch +Patch0181: hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch +Patch0182: arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch +Patch0183: acpi-cpu-Add-cpu_cppc-building-support.patch +Patch0184: tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch +Patch0185: arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch +Patch0186: arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch +Patch0187: hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch +Patch0188: hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch +Patch0189: hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch +Patch0190: hw-acpi-Make-_MAT-method-optional.patch +Patch0191: arm-virt-Release-objects-for-disabled-possible-vCPUs.patch +Patch0192: hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch +Patch0193: arm-virt-Add-update-basic-hot-un-plug-framework.patch +Patch0194: arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch +Patch0195: hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch +Patch0196: hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch +Patch0197: arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch +Patch0198: hw-arm-Changes-required-for-reset-and-to-support-nex.patch +Patch0199: physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch +Patch0200: target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch +Patch0201: target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch +Patch0202: target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch +Patch0203: hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch +Patch0204: tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch +Patch0205: hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch +Patch0206: system-physmem-Fix-possible-double-free-when-destroy.patch +Patch0207: arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch +Patch0208: acpi-cpu-Fix-cpu_hotplug_hw_init.patch +Patch0209: system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch +Patch0210: system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch +Patch0211: arm-virt.c-Convey-local_err-when-set-psci-conduit.patch +Patch0212: arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch +Patch0213: accel-kvm-Use-correct-id-for-parked-vcpu.patch +Patch0214: arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch +Patch0215: intc-gicv3-Fixes-for-vcpu-hotplug.patch +Patch0216: acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch +Patch0217: acpi-ged-Remove-cpuhp-field-of-ged.patch +Patch0218: arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch +Patch0219: arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch +Patch0220: arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch +Patch0221: tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch +Patch0222: coro-support-live-patch-for-libcare.patch BuildRequires: flex BuildRequires: gcc @@ -303,7 +450,7 @@ qemubuilddir="build" tar xf %{SOURCE4} cd BinDir/ -\cp -r -a . ../ +\cp -r -a * ../ cd ../ ./configure \ @@ -686,6 +833,159 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Wed Apr 10 2024 Jiabo Feng - 11:8.2.0-6 +- coro: support live patch for libcare +- tests/acpi: Update expected ACPI tables for vcpu hotplug(Update BinDir) +- arm/virt: Require mc->has_hotpluggable_cpus for cold-plugged vcpu +- arm/virt: Consider has_ged when set mc->has_hotpluggable_cpus +- arm/virt-acpi: Require possible_cpu_arch_ids for build_cpus_aml() +- acpi/ged: Remove cpuhp field of ged +- acpi/ged: Init cpu hotplug only when machine support it +- intc/gicv3: Fixes for vcpu hotplug +- arm/kvm: Set psci smccc filter only with vcpu hotplug +- accel/kvm: Use correct id for parked vcpu +- arm/virt: Fix adjudgement of core_id for vcpu hotplugged +- arm/virt.c: Convey local_err when set psci-conduit +- system/cpus: Fix resume_all_vcpus() under vCPU hotplug condition +- system/cpus: Fix pause_all_vcpus() under concurrent environment +- acpi/cpu: Fix cpu_hotplug_hw_init() +- arm/cpu: Some fixes for arm_cpu_unrealizefn() +- system/physmem: Fix possible double free when destroy cpu as +- hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled +- tcg/mttcg: enable threads to unregister in tcg_ctxs[] +- hw/arm: Support hotplug capability check using _OSC method +- target/arm/kvm,tcg: Register/Handle SMCCC hypercall exits to VMM/Qemu +- target/arm/kvm: Write CPU state back to KVM on reset +- target/arm: Add support of *unrealize* ARMCPU during vCPU Hot-unplug +- physmem,gdbstub: Common helping funcs/changes to *unrealize* vCPU +- hw/arm: Changes required for reset and to support next boot +- arm/virt: Update the guest(via GED) about CPU hot-(un)plug events +- hw/intc/arm-gicv3*: Changes required to (re)init the vCPU register info +- hw/arm,gicv3: Changes to update GIC with vCPU hot-plug notification +- arm/virt: Changes to (un)wire GICC<->vCPU IRQs during hot-(un)plug +- arm/virt: Add/update basic hot-(un)plug framework +- hw/acpi: Update ACPI GED framework to support vCPU Hotplug +- arm/virt: Release objects for *disabled* possible vCPUs after init +- hw/acpi: Make _MAT method optional +- hw/arm: MADT Tbl change to size the guest with possible vCPUs +- hw/acpi: Update GED _EVT method AML with cpu scan +- hw/acpi: ACPI/AML Changes to reflect the correct _STA.{PRES,ENA} Bits to Guest +- arm/virt: Make ARM vCPU *present* status ACPI *persistent* +- arm/virt/acpi: Build CPUs AML with CPU Hotplug support +- tests/acpi/bios-tables-test: Allow changes to virt/DSDT file +- acpi/cpu: Add cpu_cppc building support +- arm/virt/acpi: Factor out CPPC building from DSDT CPU aml +- hw/acpi: Update CPUs AML with cpu-(ctrl)dev change +- arm/virt: Create GED dev before *disabled* CPU Objs are destroyed +- arm/virt: Add cpu hotplug events to GED during creation +- hw/acpi: Init GED framework with cpu hotplug events +- hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init +- hw/acpi: Add ACPI CPU hotplug init stub +- arm/acpi: Enable ACPI support for vcpu hotplug +- hw/acpi: Move CPU ctrl-dev MMIO region len macro to common header file +- arm/virt: Init PMU at host for all possible vcpus +- arm/virt,gicv3: Changes to pre-size GIC with possible vcpus @machine init +- arm/virt,kvm: Pre-create disabled possible vCPUs @machine init +- accel/kvm: Extract common KVM vCPU {creation,parking} code +- arm/virt,target/arm: Machine init time change common to vCPU {cold|hot}-plug +- hw/arm/virt: Move setting of common CPU properties in a function +- cpus-common: Add common CPU utility for possible vCPUs +- arm/virt,target/arm: Add new ARMCPU {socket,cluster,core,thread}-id property + +* Sun Apr 7 2024 Jiabo Feng - 11:8.2.0-5 +- vfio/migration: Add support for manual clear vfio dirty log +- vfio: Maintain DMA mapping range for the container +- linux-headers: update against 5.10 and manual clear vfio dirty log series +- arm/acpi: Fix when make qemu-system-aarch64 at x86_64 host bios_tables_test fail reason: __aarch64__ macro let build_pptt at x86_64 and aarch64 host build different function that let bios_tables_test fail. +- pl031: support rtc-timer property for pl031 +- feature: Add logs for vm start and destroy +- feature: Add log for each modules +- log: Add log at boot & cpu init for aarch64 +- bugfix: irq: Avoid covering object refcount of qemu_irq +- i386: cache passthrough: Update AMD 8000_001D.EAX[25:14] based on vCPU topo +- freeclock: set rtc_date_diff for X86 +- freeclock: set rtc_date_diff for arm +- freeclock: add qmp command to get time offset of vm in seconds +- tests: Disable filemonitor testcase +- shadow_dev: introduce shadow dev for virtio-net device +- pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff +- tests: virt: Update expected ACPI tables for virt test(Update BinDir) +- arm64: Add the cpufreq device to show cpufreq info to guest +- hw/arm64: add vcpu cache info support +- tests: virt: Allow changes to PPTT test table +- cpu: add Cortex-A72 processor kvm target support +- cpu: add Kunpeng-920 cpu support +- net: eepro100: validate various address valuesi(CVE-2021-20255) +- ide: ahci: add check to avoid null dereference (CVE-2019-12067) +- vdpa: set vring enable only if the vring address has already been set +- docs: Add generic vhost-vdpa device documentation +- vdpa: don't suspend/resume device when vdpa device not started +- vdpa: correct param passed in when unregister save +- vdpa: suspend function return 0 when the vdpa device is stopped +- vdpa: support vdpa device suspend/resume +- vdpa: move memory listener to the realize stage +- vdpa: implement vdpa device migration +- vhost: implement migration state notifier for vdpa device +- vhost: implement post resume bh +- vhost: implement savevm_handler for vdpa device +- vhost: implement vhost_vdpa_device_suspend/resume +- vhost: implement vhost-vdpa suspend/resume +- vhost: add vhost_dev_suspend/resume_op +- vhost: introduce bytemap for vhost backend logging +- vhost-vdpa: add migration log ops for VhostOps +- vhost-vdpa: add VHOST_BACKEND_F_BYTEMAPLOG +- hw/usb: reduce the vpcu cost of UHCI when VNC disconnect +- virtio-net: update the default and max of rx/tx_queue_size +- virtio-net: set the max of queue size to 4096 +- virtio-net: fix max vring buf size when set ring num +- virtio-net: bugfix: do not delete netdev before virtio net +- monitor: Discard BLOCK_IO_ERROR event when VM rebooted +- vhost-user: add unregister_savevm when vhost-user cleanup +- vhost-user: add vhost_set_mem_table when vm load_setup at destination +- vhost-user: quit infinite loop while used memslots is more than the backend limit +- fix qemu-core when vhost-user-net config with server mode +- vhost-user: Add support reconnect vhost-user socket +- vhost-user: Set the acked_features to vm's featrue +- i6300esb watchdog: bugfix: Add a runstate transition +- hw/net/rocker_of_dpa: fix double free bug of rocker device +- net/dump.c: Suppress spurious compiler warning +- pcie: Add pcie-root-port fast plug/unplug feature +- pcie: Compat with devices which do not support Link Width, such as ioh3420 +- qdev/monitors: Fix reundant error_setg of qdev_add_device +- qemu-nbd: set timeout to qemu-nbd socket +- qemu-nbd: make native as the default aio mode +- nbd/server.c: fix invalid read after client was already free +- virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk with dataplane +- virtio: bugfix: check the value of caches before accessing it +- virtio: print the guest virtio_net features that host does not support +- virtio: bugfix: add rcu_read_lock when vring_avail_idx is called +- virtio: check descriptor numbers +- migration: report multiFd related thread pid to libvirt +- migration: report migration related thread pid to libvirt +- cpu/features: fix bug for memory leakage +- doc: Update multi-thread compression doc +- migration: Add compress_level sanity check +- migration: Add zstd support in multi-thread compression +- migration: Add multi-thread compress ops +- migration: Refactoring multi-thread compress migration +- migration: Add multi-thread compress method +- migration: skip cache_drop for bios bootloader and nvram template +- oslib-posix: optimise vm startup time for 1G hugepage +- monitor/qmp: drop inflight rsp if qmp client broken +- ps2: fix oob in ps2 kbd +- Currently, while kvm and qemu can not handle some kvm exit, qemu will do vm_stop, which will make vm in pause state. This action make vm unrecoverable, so send guest panic to libvirt instead. +- vhost: cancel migration when vhost-user restarted during migraiton + +* Mon Apr 1 2024 Jiabo Feng - 11:8.2.0-4 +- migration: Skip only empty block devicesi +- iotests: adapt to output change for recently introduced 'detached hea… +- travis-ci: Rename SOFTMMU -> SYSTEM +- block: disallow block jobs when there is a BDRV_O_INACTIVE flag +- [backup] memory: bakcup hugepages: hugepages files maybe leftover +- memory: [backup] Modify the VM's physical bits value set policy. +- ui/clipboard: mark type as not available when there is no data (CVE-2023-6683) +- virtio-net: correctly copy vnet header when flushing TX (CVE-2023-6693) + * Wed Mar 27 2024 Jiabo Feng - 11:8.2.0-3 - disable keyring option - loongarch: Change the UEFI loading mode to loongarch diff --git a/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ba1e54d8bafed805bdc0212dcb6add9bf99fa87 --- /dev/null +++ b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch @@ -0,0 +1,196 @@ +From c4829aa6fce007c995b21cfbd86de0473263c19a Mon Sep 17 00:00:00 2001 +From: Dongxu Sun +Date: Sat, 30 Mar 2024 12:49:05 +0800 +Subject: [PATCH] shadow_dev: introduce shadow dev for virtio-net device + +for virtio net devices, create the shadow device for vlpi +bypass inject supported. + +Signed-off-by: Wang Haibin +Signed-off-by: Yu Zenghui +Signed-off-by: Chen Qun +Signed-off-by: KunKun Jiang +Signed-off-by: Dongxu Sun +Signed-off-by: Yuan Zhang +--- + hw/virtio/virtio-pci.c | 32 ++++++++++++++++++++++++++ + include/sysemu/kvm.h | 5 +++++ + linux-headers/linux/kvm.h | 13 +++++++++++ + target/arm/kvm.c | 47 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 97 insertions(+) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 134a8eaef6..f8adb0520a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -922,18 +922,44 @@ undo: + } + return ret; + } ++ ++#ifdef __aarch64__ ++int __attribute__((weak)) kvm_create_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int __attribute__((weak)) kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++#endif ++ + static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_create_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; + } + ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net") && ret != 0) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + return ret; + } + +@@ -976,6 +1002,12 @@ static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + } + kvm_virtio_pci_vector_release_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif + } + + static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index d614878164..b46d6203b4 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -538,4 +538,9 @@ bool kvm_arch_cpu_check_are_resettable(void); + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); ++ ++#ifdef __aarch64__ ++int kvm_create_shadow_device(PCIDevice *dev); ++int kvm_delete_shadow_device(PCIDevice *dev); ++#endif + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 549fea3a97..56f6b2583f 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1198,6 +1198,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + ++#define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 ++ + #ifdef KVM_CAP_IRQ_ROUTING + + struct kvm_irq_routing_irqchip { +@@ -1524,6 +1526,17 @@ struct kvm_s390_ucas_mapping { + #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) + #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) + #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) ++ ++#ifdef __aarch64__ ++struct kvm_master_dev_info ++{ ++ __u32 nvectors; /* number of msi vectors */ ++ struct kvm_msi msi[0]; ++}; ++#define KVM_CREATE_SHADOW_DEV _IOW(KVMIO, 0xf0, struct kvm_master_dev_info) ++#define KVM_DEL_SHADOW_DEV _IOW(KVMIO, 0xf1, __u32) ++#endif ++ + /* Available with KVM_CAP_PIT_STATE2 */ + #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) + #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 7903e2ddde..f59f4f81b2 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -26,6 +26,8 @@ + #include "trace.h" + #include "internals.h" + #include "hw/pci/pci.h" ++#include "hw/pci/msi.h" ++#include "hw/pci/msix.h" + #include "exec/memattrs.h" + #include "exec/address-spaces.h" + #include "hw/boards.h" +@@ -1053,6 +1055,51 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + return 0; + } + ++int kvm_create_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ struct kvm_master_dev_info *mdi; ++ MSIMessage msg; ++ uint32_t vector, nvectors = msix_nr_vectors_allocated(dev); ++ uint32_t request_id; ++ int ret; ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ mdi = g_malloc0(sizeof(uint32_t) + sizeof(struct kvm_msi) * nvectors); ++ mdi->nvectors = nvectors; ++ request_id = pci_requester_id(dev); ++ ++ for (vector = 0; vector < nvectors; vector++) { ++ msg = msix_get_message(dev, vector); ++ mdi->msi[vector].address_lo = extract64(msg.address, 0, 32); ++ mdi->msi[vector].address_hi = extract64(msg.address, 32, 32); ++ mdi->msi[vector].data = le32_to_cpu(msg.data); ++ mdi->msi[vector].flags = KVM_MSI_VALID_DEVID; ++ mdi->msi[vector].devid = request_id; ++ memset(mdi->msi[vector].pad, 0, sizeof(mdi->msi[vector].pad)); ++ } ++ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_SHADOW_DEV, mdi); ++ g_free(mdi); ++ return ret; ++} ++ ++int kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ uint32_t request_id, nvectors = msix_nr_vectors_allocated(dev); ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ request_id = pci_requester_id(dev); ++ return kvm_vm_ioctl(s, KVM_DEL_SHADOW_DEV, &request_id); ++} ++ + int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) + { +-- +2.27.0 + diff --git a/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c1b707d2a181fcc357d04385781add6b274a8f2 --- /dev/null +++ b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch @@ -0,0 +1,91 @@ +From 401e145800134d0310d613f48c4962a108b8ddda Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:03 +0800 +Subject: [PATCH] system/cpus: Fix pause_all_vcpus() under concurrent + environment + +Both main loop thread and vCPU thread are allowed to call +pause_all_vcpus(), and in general resume_all_vcpus() is called +after it. Two issues live in pause_all_vcpus(): + +1. There is possibility that during thread T1 waits on +qemu_pause_cond with bql unlocked, other thread has called +pause_all_vcpus() and resume_all_vcpus(), then thread T1 will +stuck, because the condition all_vcpus_paused() is always false. + +2. After all_vcpus_paused() has been checked as true, we will +unlock bql to relock replay_mutex. During the bql was unlocked, +the vcpu's state may has been changed by other thread, so we +must retry. + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 29 ++++++++++++++++++++++++----- + 1 file changed, 24 insertions(+), 5 deletions(-) + +diff --git a/system/cpus.c b/system/cpus.c +index a444a747f0..7c5369fa9c 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -551,12 +551,14 @@ static bool all_vcpus_paused(void) + return true; + } + +-void pause_all_vcpus(void) ++static void request_pause_all_vcpus(void) + { + CPUState *cpu; + +- qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); + CPU_FOREACH(cpu) { ++ if (cpu->stopped) { ++ continue; ++ } + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { +@@ -564,6 +566,14 @@ void pause_all_vcpus(void) + qemu_cpu_kick(cpu); + } + } ++} ++ ++void pause_all_vcpus(void) ++{ ++ qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); ++ ++retry: ++ request_pause_all_vcpus(); + + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks +@@ -572,14 +582,23 @@ void pause_all_vcpus(void) + + while (!all_vcpus_paused()) { + qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); +- CPU_FOREACH(cpu) { +- qemu_cpu_kick(cpu); +- } ++ /* During we waited on qemu_pause_cond the bql was unlocked, ++ * the vcpu's state may has been changed by other thread, so ++ * we must request the pause state on all vcpus again. ++ */ ++ request_pause_all_vcpus(); + } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); ++ ++ /* During the bql was unlocked, the vcpu's state may has been ++ * changed by other thread, so we must retry. ++ */ ++ if (!all_vcpus_paused()) { ++ goto retry; ++ } + } + + void cpu_resume(CPUState *cpu) +-- +2.27.0 + diff --git a/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4008abba6fee9de7707a9cdbf1ad33373a58270 --- /dev/null +++ b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch @@ -0,0 +1,43 @@ +From a29922f76c9b5064ddd2e686fa725b96c435e889 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:04 +0800 +Subject: [PATCH] system/cpus: Fix resume_all_vcpus() under vCPU hotplug + condition + +For vCPU being hotplugged, qemu_init_vcpu() is called. In this +function, we set vcpu state as stopped, and then wait vcpu thread +to be created. + +As the vcpu state is stopped, it will inform us it has been created +and then wait on halt_cond. After we has realized vcpu object, we +will resume the vcpu thread. + +However, during we wait vcpu thread to be created, the bql is +unlocked, and other thread is allowed to call resume_all_vcpus(), +which will resume the un-realized vcpu. + +This fixes the issue by filter out un-realized vcpu during +resume_all_vcpus(). + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/system/cpus.c b/system/cpus.c +index 7c5369fa9c..f2289e9545 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -618,6 +618,9 @@ void resume_all_vcpus(void) + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + CPU_FOREACH(cpu) { ++ if (!object_property_get_bool(OBJECT(cpu), "realized", &error_abort)) { ++ continue; ++ } + cpu_resume(cpu); + } + } +-- +2.27.0 + diff --git a/system-physmem-Fix-possible-double-free-when-destroy.patch b/system-physmem-Fix-possible-double-free-when-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2f3853bc110515d6d30dcb404fee28aef1339eb --- /dev/null +++ b/system-physmem-Fix-possible-double-free-when-destroy.patch @@ -0,0 +1,64 @@ +From 5f7464524d0fb2c25c9bacfb550df92bef9bb3bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:11:05 +0800 +Subject: [PATCH] system/physmem: Fix possible double free when destroy cpu as + +address_space_destroy() and g_free_rcu() both operate cpuas->as +at rcu thread context asynchronously, each one is a rcu task +that have different callback (the first callback is do_address_ +space_destroy() and the second callback is g_free()). + +It's possible that while the first task is pending and the second +task overwrites the rcu callback (as the second task operates on +the same object). Then the g_free will be called twice on cpuas->as. + +Signed-off-by: Keqian Zhu +--- + include/exec/memory.h | 1 + + system/memory.c | 3 +++ + system/physmem.c | 2 +- + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index e131c2682c..91c42c9a6a 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1114,6 +1114,7 @@ struct AddressSpace { + struct rcu_head rcu; + char *name; + MemoryRegion *root; ++ bool free_in_rcu; + + /* Accessed via RCU. */ + struct FlatView *current_map; +diff --git a/system/memory.c b/system/memory.c +index 798b6c0a17..fb817e54bc 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3130,6 +3130,9 @@ static void do_address_space_destroy(AddressSpace *as) + g_free(as->name); + g_free(as->ioeventfds); + memory_region_unref(as->root); ++ if (as->free_in_rcu) { ++ g_free(as); ++ } + } + + void address_space_destroy(AddressSpace *as) +diff --git a/system/physmem.c b/system/physmem.c +index 299174ad91..cbe838f203 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -788,8 +788,8 @@ void cpu_address_space_destroy(CPUState *cpu, int asidx) + memory_listener_unregister(&cpuas->tcg_as_listener); + } + ++ cpuas->as->free_in_rcu = true; + address_space_destroy(cpuas->as); +- g_free_rcu(cpuas->as, rcu); + + if (cpu->cpu_ases_ref_count == 1) { + g_free(cpu->cpu_ases); +-- +2.27.0 + diff --git a/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch new file mode 100644 index 0000000000000000000000000000000000000000..023fe7f49df7f1fa140ac9c9a7ea7dda1511f453 --- /dev/null +++ b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch @@ -0,0 +1,294 @@ +From b311feda2078e7ee8f060531d4d061beccbc2f77 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 20:13:10 +0100 +Subject: [PATCH] target/arm: Add support of *unrealize* ARMCPU during vCPU + Hot-unplug + +vCPU Hot-unplug will result in QOM CPU object unrealization which will do away +with all the vCPU thread creations, allocations, registrations that happened +as part of the realization process. This change introduces the ARM CPU unrealize +function taking care of exactly that. + +Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context +is parked at the QEMU KVM layer. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + target/arm/cpu.c | 101 +++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.h | 14 ++++++ + target/arm/gdbstub.c | 6 +++ + target/arm/helper.c | 25 ++++++++++ + target/arm/internals.h | 3 ++ + target/arm/kvm64.c | 4 ++ + 6 files changed, 153 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 18b8a79c8f..501f88eb2f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -142,6 +142,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node); + } + ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque) + { +@@ -153,6 +163,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); + } + ++void arm_unregister_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) + { + /* Reset a single ARMCPRegInfo register */ +@@ -2390,6 +2410,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + acc->parent_realize(dev, errp); + } + ++static void arm_cpu_unrealizefn(DeviceState *dev) ++{ ++ ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUARMState *env = &cpu->env; ++ CPUState *cs = CPU(dev); ++ bool has_secure; ++ ++ has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); ++ ++ /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ ++ cpu_address_space_destroy(cs, ARMASIdx_NS); ++ ++ if (cpu->tag_memory != NULL) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagNS); ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagS); ++ } ++ } ++ ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_S); ++ } ++ ++ destroy_cpreg_list(cpu); ++ arm_cpu_unregister_gdb_regs(cpu); ++ unregister_cp_regs_for_features(cpu); ++ ++ if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->sau.rbar); ++ g_free(env->sau.rlar); ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMSA) && ++ arm_feature(env, ARM_FEATURE_V7) && ++ cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); ++ } ++ if (cpu->pmsav8r_hdregion) { ++ g_free(env->pmsav8.hprbar); ++ g_free(env->pmsav8.hprlar); ++ } ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMU)) { ++ if (!kvm_enabled()) { ++ arm_unregister_pre_el_change_hooks(cpu); ++ arm_unregister_el_change_hooks(cpu); ++ } ++ ++#ifndef CONFIG_USER_ONLY ++ if (cpu->pmu_timer) { ++ timer_del(cpu->pmu_timer); ++ } ++#endif ++ } ++ ++ cpu_remove_sync(CPU(dev)); ++ acc->parent_unrealize(dev); ++ ++#ifndef CONFIG_USER_ONLY ++ timer_del(cpu->gt_timer[GTIMER_PHYS]); ++ timer_del(cpu->gt_timer[GTIMER_VIRT]); ++ timer_del(cpu->gt_timer[GTIMER_HYP]); ++ timer_del(cpu->gt_timer[GTIMER_SEC]); ++ timer_del(cpu->gt_timer[GTIMER_HYPVIRT]); ++#endif ++} ++ + static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) + { + ObjectClass *oc; +@@ -2492,6 +2591,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + + device_class_set_parent_realize(dc, arm_cpu_realizefn, + &acc->parent_realize); ++ device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn, ++ &acc->parent_unrealize); + + device_class_set_props(dc, arm_cpu_properties); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 145d3dbf13..c51a0e3467 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1138,6 +1138,7 @@ struct ARMCPUClass { + + const ARMCPUInfo *info; + DeviceRealize parent_realize; ++ DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; + }; + +@@ -3359,6 +3360,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs) + */ + void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque); ++/** ++ * arm_unregister_pre_el_change_hook: ++ * unregister all pre EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu); ++ + /** + * arm_register_el_change_hook: + * Register a hook function which will be called immediately after this +@@ -3371,6 +3379,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + */ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void + *opaque); ++/** ++ * arm_unregister_el_change_hook: ++ * unregister all EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_el_change_hooks(ARMCPU *cpu); + + /** + * arm_rebuild_hflags: +diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c +index 28f546a5ff..5ba1e28e34 100644 +--- a/target/arm/gdbstub.c ++++ b/target/arm/gdbstub.c +@@ -553,3 +553,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) + } + #endif /* CONFIG_TCG */ + } ++ ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ gdb_unregister_coprocessor_all(cs); ++} +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 2746d3fdac..e47498828c 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -263,6 +263,19 @@ void init_cpreg_list(ARMCPU *cpu) + g_list_free(keys); + } + ++void destroy_cpreg_list(ARMCPU *cpu) ++{ ++ assert(cpu->cpreg_indexes); ++ assert(cpu->cpreg_values); ++ assert(cpu->cpreg_vmstate_indexes); ++ assert(cpu->cpreg_vmstate_values); ++ ++ g_free(cpu->cpreg_indexes); ++ g_free(cpu->cpreg_values); ++ g_free(cpu->cpreg_vmstate_indexes); ++ g_free(cpu->cpreg_vmstate_values); ++} ++ + /* + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. + */ +@@ -9438,6 +9451,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) + #endif + } + ++void unregister_cp_regs_for_features(ARMCPU *cpu) ++{ ++ CPUARMState *env = &cpu->env; ++ if (arm_feature(env, ARM_FEATURE_M)) { ++ /* M profile has no coprocessor registers */ ++ return; ++ } ++ ++ /* empty it all. unregister all the coprocessor registers */ ++ g_hash_table_remove_all(cpu->cp_regs); ++} ++ + /* Sort alphabetically by type name, except for "any". */ + static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) + { +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 143d57c0fe..c3a7682f05 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -187,9 +187,12 @@ void arm_cpu_register(const ARMCPUInfo *info); + void aarch64_cpu_register(const ARMCPUInfo *info); + + void register_cp_regs_for_features(ARMCPU *cpu); ++void unregister_cp_regs_for_features(ARMCPU *cpu); + void init_cpreg_list(ARMCPU *cpu); ++void destroy_cpreg_list(ARMCPU *cpu); + + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu); + void arm_translate_init(void); + + void arm_restore_state_to_opc(CPUState *cs, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 03ce1e7525..9c3a35d63a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,6 +647,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ if (cs->thread_id) { ++ qemu_del_vm_change_state_handler(cs->vmcse); ++ } ++ + return 0; + } + +-- +2.27.0 + diff --git a/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b4922cc702a47bb45f2fa001c86719a8bb9c0c9 --- /dev/null +++ b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch @@ -0,0 +1,50 @@ +From a079801cd3ae6484cad6826f20bcf4ecc7e97ead Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 5 May 2021 15:43:27 +0200 +Subject: [PATCH] target/arm/kvm: Write CPU state back to KVM on reset + +When a KVM vCPU is reset following a PSCI CPU_ON call, its power state +is not synchronized with KVM at the moment. Because the vCPU is not +marked dirty, we miss the call to kvm_arch_put_registers() that writes +to KVM's MP_STATE. Force mp_state synchronization. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/kvm.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 70cf15b550..aca652621f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -636,11 +636,12 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; ++ CPUState *cs = CPU(cpu); + + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ +- ret = kvm_arm_vcpu_init(CPU(cpu)); ++ ret = kvm_arm_vcpu_init(cs); + if (ret < 0) { + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); +@@ -657,6 +658,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + * for the same reason we do so in kvm_arch_get_registers(). + */ + write_list_to_cpustate(cpu); ++ ++ /* ++ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if ++ * it was parked in KVM and is now booting from a PSCI CPU_ON call. ++ */ ++ cs->vcpu_dirty = true; + } + + void kvm_arm_create_host_vcpu(ARMCPU *cpu) +-- +2.27.0 + diff --git a/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b4bfbb2d6aa800e60bee7e96d429be813c6271c --- /dev/null +++ b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch @@ -0,0 +1,407 @@ +From 9c4a7c44c3c9e89c6aeab85b00c72a09a0c13940 Mon Sep 17 00:00:00 2001 +From: Author Salil Mehta +Date: Sat, 27 May 2023 22:13:13 +0200 +Subject: [PATCH] target/arm/kvm,tcg: Register/Handle SMCCC hypercall exits to + VMM/Qemu + +Add registration and Handling of HVC/SMC hypercall exits to VMM + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/arm-powerctl.c | 51 +++++++++++++++++++++++++++++------- + target/arm/helper.c | 2 +- + target/arm/internals.h | 11 -------- + target/arm/kvm.c | 52 +++++++++++++++++++++++++++++++++++++ + target/arm/kvm64.c | 46 +++++++++++++++++++++++++++++--- + target/arm/kvm_arm.h | 13 ++++++++++ + target/arm/meson.build | 1 + + target/arm/{tcg => }/psci.c | 8 ++++++ + target/arm/tcg/meson.build | 4 --- + 9 files changed, 160 insertions(+), 28 deletions(-) + rename target/arm/{tcg => }/psci.c (97%) + +diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c +index c078849403..fb19b04189 100644 +--- a/target/arm/arm-powerctl.c ++++ b/target/arm/arm-powerctl.c +@@ -16,6 +16,7 @@ + #include "qemu/log.h" + #include "qemu/main-loop.h" + #include "sysemu/tcg.h" ++#include "hw/boards.h" + + #ifndef DEBUG_ARM_POWERCTL + #define DEBUG_ARM_POWERCTL 0 +@@ -28,18 +29,37 @@ + } \ + } while (0) + ++static CPUArchId *arm_get_archid_by_id(uint64_t id) ++{ ++ int n; ++ CPUArchId *arch_id; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ++ /* ++ * At this point disabled CPUs don't have a CPUState, but their CPUArchId ++ * exists. ++ * ++ * TODO: Is arch_id == mp_affinity? This needs work. ++ */ ++ for (n = 0; n < ms->possible_cpus->len; n++) { ++ arch_id = &ms->possible_cpus->cpus[n]; ++ ++ if (arch_id->arch_id == id) { ++ return arch_id; ++ } ++ } ++ return NULL; ++} ++ + CPUState *arm_get_cpu_by_id(uint64_t id) + { +- CPUState *cpu; ++ CPUArchId *arch_id; + + DPRINTF("cpu %" PRId64 "\n", id); + +- CPU_FOREACH(cpu) { +- ARMCPU *armcpu = ARM_CPU(cpu); +- +- if (armcpu->mp_affinity == id) { +- return cpu; +- } ++ arch_id = arm_get_archid_by_id(id); ++ if (arch_id && arch_id->cpu) { ++ return CPU(arch_id->cpu); + } + + qemu_log_mask(LOG_GUEST_ERROR, +@@ -97,6 +117,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + { + CPUState *target_cpu_state; + ARMCPU *target_cpu; ++ CPUArchId *arch_id; + struct CpuOnInfo *info; + + assert(qemu_mutex_iothread_locked()); +@@ -117,12 +138,24 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + } + + /* Retrieve the cpu we are powering up */ +- target_cpu_state = arm_get_cpu_by_id(cpuid); +- if (!target_cpu_state) { ++ arch_id = arm_get_archid_by_id(cpuid); ++ if (!arch_id) { + /* The cpu was not found */ + return QEMU_ARM_POWERCTL_INVALID_PARAM; + } + ++ target_cpu_state = CPU(arch_id->cpu); ++ if (!qemu_enabled_cpu(target_cpu_state)) { ++ /* ++ * The cpu is not plugged in or disabled. We should return appropriate ++ * value as introduced in DEN0022E PSCI 1.2 issue E ++ */ ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "[ARM]%s: Denying attempt to online removed/disabled " ++ "CPU%" PRId64"\n", __func__, cpuid); ++ return QEMU_ARM_POWERCTL_IS_OFF; ++ } ++ + target_cpu = ARM_CPU(target_cpu_state); + if (target_cpu->power_state == PSCI_ON) { + qemu_log_mask(LOG_GUEST_ERROR, +diff --git a/target/arm/helper.c b/target/arm/helper.c +index e47498828c..793aa89cc6 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -11346,7 +11346,7 @@ void arm_cpu_do_interrupt(CPUState *cs) + env->exception.syndrome); + } + +- if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) { ++ if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; +diff --git a/target/arm/internals.h b/target/arm/internals.h +index c3a7682f05..20b9c1da38 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -314,21 +314,10 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len); + /* Callback function for when a watchpoint or breakpoint triggers. */ + void arm_debug_excp_handler(CPUState *cs); + +-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG) +-static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) +-{ +- return false; +-} +-static inline void arm_handle_psci_call(ARMCPU *cpu) +-{ +- g_assert_not_reached(); +-} +-#else + /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ + bool arm_is_psci_call(ARMCPU *cpu, int excp_type); + /* Actually handle a PSCI call */ + void arm_handle_psci_call(ARMCPU *cpu); +-#endif + + /** + * arm_clear_exclusive: clear the exclusive monitor +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index aca652621f..66caf9e5e7 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -260,6 +260,7 @@ int kvm_arch_get_default_type(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + int ret = 0; ++ + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ +@@ -310,6 +311,22 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ /* ++ * To be able to handle PSCI CPU ON calls in QEMU, we need to install SMCCC ++ * filter in the Host KVM. This is required to support features like ++ * virtual CPU Hotplug on ARM platforms. ++ */ ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ + kvm_arm_init_debug(s); + + return ret; +@@ -966,6 +983,38 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, + return -1; + } + ++static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ ++ /* ++ * hard coding immediate to 0 as we dont expect non-zero value as of now ++ * This might change in future versions. Hence, KVM_GET_ONE_REG could be ++ * used in such cases but it must be enhanced then only synchronize will ++ * also fetch ESR_EL2 value. ++ */ ++ if (run->hypercall.flags == KVM_HYPERCALL_EXIT_SMC) { ++ cs->exception_index = EXCP_SMC; ++ env->exception.syndrome = syn_aa64_smc(0); ++ } else { ++ cs->exception_index = EXCP_HVC; ++ env->exception.syndrome = syn_aa64_hvc(0); ++ } ++ env->exception.target_el = 1; ++ qemu_mutex_lock_iothread(); ++ arm_cpu_do_interrupt(cs); ++ qemu_mutex_unlock_iothread(); ++ ++ /* ++ * For PSCI, exit the kvm_run loop and process the work. Especially ++ * important if this was a CPU_OFF command and we can't return to the guest. ++ */ ++ return EXCP_INTERRUPT; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -981,6 +1030,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, + run->arm_nisv.fault_ipa); + break; ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_arm_handle_hypercall(cs, run); ++ break; + default: + qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", + __func__, run->exit_reason); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 9c3a35d63a..00b257bb4b 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -111,6 +111,25 @@ bool kvm_arm_hw_debug_active(CPUState *cs) + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); + } + ++static bool kvm_arm_set_vm_attr(struct kvm_device_attr *attr, const char *name) ++{ ++ int err; ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, + const char *name) + { +@@ -181,6 +200,28 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) + } + } + ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ struct kvm_smccc_filter filter = { ++ .base = func, ++ .nr_functions = 1, ++ .action = faction, ++ }; ++ struct kvm_device_attr attr = { ++ .group = KVM_ARM_VM_SMCCC_CTRL, ++ .attr = KVM_ARM_VM_SMCCC_FILTER, ++ .flags = 0, ++ .addr = (uintptr_t)&filter, ++ }; ++ ++ if (!kvm_arm_set_vm_attr(&attr, "SMCCC Filter")) { ++ error_report("failed to set SMCCC filter in KVM Host"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) + { + uint64_t ret; +@@ -629,9 +670,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + /* +- * When KVM is in use, PSCI is emulated in-kernel and not by qemu. +- * Currently KVM has its own idea about MPIDR assignment, so we +- * override our defaults with what we get from KVM. ++ * KVM may emulate PSCI in-kernel. Currently KVM has its own idea about ++ * MPIDR assignment, so we override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 31408499b3..bf4df54c96 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -388,6 +388,15 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); + + int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + ++/** ++ * kvm_arm_set_smccc_filter ++ * @func: funcion ++ * @faction: SMCCC filter action(handle, deny, fwd-to-user) to be deployed ++ * ++ * Sets the ARMs SMC-CC filter in KVM Host for selective hypercall exits ++ */ ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); ++ + #else + + /* +@@ -462,6 +471,10 @@ static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs) + g_assert_not_reached(); + } + ++static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +diff --git a/target/arm/meson.build b/target/arm/meson.build +index 5d04a8e94f..d1dd4932ed 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -23,6 +23,7 @@ arm_system_ss.add(files( + 'arm-qmp-cmds.c', + 'cortex-regs.c', + 'machine.c', ++ 'psci.c', + 'ptw.c', + )) + +diff --git a/target/arm/tcg/psci.c b/target/arm/psci.c +similarity index 97% +rename from target/arm/tcg/psci.c +rename to target/arm/psci.c +index 6c1239bb96..a8690a16af 100644 +--- a/target/arm/tcg/psci.c ++++ b/target/arm/psci.c +@@ -21,7 +21,9 @@ + #include "exec/helper-proto.h" + #include "kvm-consts.h" + #include "qemu/main-loop.h" ++#include "qemu/error-report.h" + #include "sysemu/runstate.h" ++#include "sysemu/tcg.h" + #include "internals.h" + #include "arm-powerctl.h" + +@@ -157,6 +159,11 @@ void arm_handle_psci_call(ARMCPU *cpu) + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: ++ if (!tcg_enabled()) { ++ warn_report("CPU suspend not supported in non-tcg mode"); ++ break; ++ } ++#ifdef CONFIG_TCG + /* Affinity levels are not supported in QEMU */ + if (param[1] & 0xfffe0000) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; +@@ -169,6 +176,7 @@ void arm_handle_psci_call(ARMCPU *cpu) + env->regs[0] = 0; + } + helper_wfi(env, 4); ++#endif + break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 6fca38f2cc..ad3cfcb3bd 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -51,7 +51,3 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'sme_helper.c', + 'sve_helper.c', + )) +- +-arm_system_ss.add(files( +- 'psci.c', +-)) +-- +2.27.0 + diff --git a/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch new file mode 100644 index 0000000000000000000000000000000000000000..28e2cf523f3eabc362a585cc6ac48cef43396ed8 --- /dev/null +++ b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch @@ -0,0 +1,98 @@ +From f797e2713a94b48de59324d00c851d89f4438fc0 Mon Sep 17 00:00:00 2001 +From: Miguel Luis +Date: Fri, 3 Feb 2023 12:33:41 -0100 +Subject: [PATCH] tcg/mttcg: enable threads to unregister in tcg_ctxs[] + +[This patch is just for reference. It has problems as it does not takes care of +the TranslationBlocks and their assigned regions during CPU unrealize] + +When using TCG acceleration in a multi-threaded context each vCPU has its own +thread registered in tcg_ctxs[] upon creation and tcg_cur_ctxs stores the current +number of threads that got created. Although, the lack of a mechanism to +unregister these threads is a problem when exercising vCPU hotplug/unplug +due to the fact that tcg_cur_ctxs gets incremented everytime a vCPU gets +hotplugged but never gets decremented everytime a vCPU gets unplugged, therefore +breaking the assert stating tcg_cur_ctxs < tcg_max_ctxs after a certain amount +of vCPU hotplugs. + +Suggested-by: Salil Mehta +[SM: Check Things To Do Section, https://lore.kernel.org/all/20200613213629.21984-1-salil.mehta@huawei.com/] +Signed-off-by: Miguel Luis +--- + accel/tcg/tcg-accel-ops-mttcg.c | 1 + + include/tcg/startup.h | 5 +++++ + tcg/tcg.c | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+) + +diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c +index fac80095bb..73866990ce 100644 +--- a/accel/tcg/tcg-accel-ops-mttcg.c ++++ b/accel/tcg/tcg-accel-ops-mttcg.c +@@ -122,6 +122,7 @@ static void *mttcg_cpu_thread_fn(void *arg) + qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); + rcu_unregister_thread(); ++ tcg_unregister_thread(); + return NULL; + } + +diff --git a/include/tcg/startup.h b/include/tcg/startup.h +index f71305765c..c6cb1d92a7 100644 +--- a/include/tcg/startup.h ++++ b/include/tcg/startup.h +@@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); + */ + void tcg_register_thread(void); + ++/** ++ * tcg_register_thread: Unregister this thread with the TCG runtime ++ */ ++void tcg_unregister_thread(void); ++ + /** + * tcg_prologue_init(): Generate the code for the TCG prologue + * +diff --git a/tcg/tcg.c b/tcg/tcg.c +index 896a36caeb..61fcf8597d 100644 +--- a/tcg/tcg.c ++++ b/tcg/tcg.c +@@ -764,6 +764,14 @@ static void alloc_tcg_plugin_context(TCGContext *s) + #endif + } + ++static void free_tcg_plugin_context(TCGContext *s) ++{ ++#ifdef CONFIG_PLUGIN ++ g_ptr_array_unref(s->plugin_tb->insns); ++ g_free(s->plugin_tb); ++#endif ++} ++ + /* + * All TCG threads except the parent (i.e. the one that called tcg_context_init + * and registered the target's TCG globals) must register with this function +@@ -814,6 +822,21 @@ void tcg_register_thread(void) + + tcg_ctx = s; + } ++ ++void tcg_unregister_thread(void) ++{ ++ TCGContext *s = tcg_ctx; ++ unsigned int n; ++ ++ /* Unclaim an entry in tcg_ctxs */ ++ n = qatomic_fetch_dec(&tcg_cur_ctxs); ++ g_assert(n > 1); ++ qatomic_store_release(&tcg_ctxs[n - 1], 0); ++ ++ free_tcg_plugin_context(s); ++ ++ g_free(s); ++} + #endif /* !CONFIG_USER_ONLY */ + + /* pool based memory allocation */ +-- +2.27.0 + diff --git a/tests-Disable-filemonitor-testcase.patch b/tests-Disable-filemonitor-testcase.patch new file mode 100644 index 0000000000000000000000000000000000000000..a12778e836e6f24397d8258a73dc509120cb9d7b --- /dev/null +++ b/tests-Disable-filemonitor-testcase.patch @@ -0,0 +1,32 @@ +From bad33579c56b73d56e0b220c98faad7893609b85 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 18 Mar 2024 10:21:04 +0800 +Subject: [PATCH] tests: Disable filemonitor testcase + +Since filemonitor testcase requires that host kernel being a LTS version, +we cannot guarantee that on OBS system. Lets disable it by default. + +Signed-off-by: Ying Fang +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang +--- + tests/unit/meson.build | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index a05d471090..598ba41bb9 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -142,9 +142,6 @@ if have_system + 'test-vmstate': [migration, io], + 'test-yank': ['socket-helpers.c', qom, io, chardev] + } +- if config_host_data.get('CONFIG_INOTIFY1') +- tests += {'test-util-filemonitor': []} +- endif + + # Some tests: test-char, test-qdev-global-props, and test-qga, + # are not runnable under TSan due to a known issue. +-- +2.27.0 + diff --git a/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6cd45895335b8bbfdcba149b7c30692b9afb0e7 --- /dev/null +++ b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch @@ -0,0 +1,62 @@ +From cecec52ca38fa98a821c2a833e71a5fae1cc735d Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 20:10:51 +0800 +Subject: [PATCH] tests/acpi: Update expected ACPI tables for vcpu hotplug + +Update the ACPI tables for vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 ------------------ + 1 files changed, 40 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index c7406e395a..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,41 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/pc/DSDT", +-"tests/data/acpi/pc/DSDT.acpierst", +-"tests/data/acpi/pc/DSDT.acpihmat", +-"tests/data/acpi/pc/DSDT.bridge", +-"tests/data/acpi/pc/DSDT.cphp", +-"tests/data/acpi/pc/DSDT.dimmpxm", +-"tests/data/acpi/pc/DSDT.hpbridge", +-"tests/data/acpi/pc/DSDT.hpbrroot", +-"tests/data/acpi/pc/DSDT.ipmikcs", +-"tests/data/acpi/pc/DSDT.memhp", +-"tests/data/acpi/pc/DSDT.nohpet", +-"tests/data/acpi/pc/DSDT.numamem", +-"tests/data/acpi/pc/DSDT.roothp", +-"tests/data/acpi/q35/DSDT", +-"tests/data/acpi/q35/DSDT.acpierst", +-"tests/data/acpi/q35/DSDT.acpihmat", +-"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", +-"tests/data/acpi/q35/DSDT.applesmc", +-"tests/data/acpi/q35/DSDT.bridge", +-"tests/data/acpi/q35/DSDT.cphp", +-"tests/data/acpi/q35/DSDT.cxl", +-"tests/data/acpi/q35/DSDT.dimmpxm", +-"tests/data/acpi/q35/DSDT.ipmibt", +-"tests/data/acpi/q35/DSDT.ipmismbus", +-"tests/data/acpi/q35/DSDT.ivrs", +-"tests/data/acpi/q35/DSDT.memhp", +-"tests/data/acpi/q35/DSDT.mmio64", +-"tests/data/acpi/q35/DSDT.multi-bridge", +-"tests/data/acpi/q35/DSDT.noacpihp", +-"tests/data/acpi/q35/DSDT.nohpet", +-"tests/data/acpi/q35/DSDT.numamem", +-"tests/data/acpi/q35/DSDT.pvpanic-isa", +-"tests/data/acpi/q35/DSDT.tis.tpm12", +-"tests/data/acpi/q35/DSDT.tis.tpm2", +-"tests/data/acpi/q35/DSDT.viot", +-"tests/data/acpi/virt/DSDT", +-"tests/data/acpi/virt/DSDT.acpihmatvirt", +-"tests/data/acpi/virt/DSDT.memhp", +-"tests/data/acpi/virt/DSDT.pxb", +-"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch new file mode 100644 index 0000000000000000000000000000000000000000..67d9c444d4f5069d6eeb2a4ee6f4e4cce374ea88 --- /dev/null +++ b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch @@ -0,0 +1,62 @@ +From 6cfe9afcaceb7d9fb7d54f08b2362fc654b54d12 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 17:23:18 +0800 +Subject: [PATCH] tests/acpi/bios-tables-test: Allow changes to virt/DSDT file + +Prepare to change of cpu aml. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 +++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..c7406e395a 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,41 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/pc/DSDT", ++"tests/data/acpi/pc/DSDT.acpierst", ++"tests/data/acpi/pc/DSDT.acpihmat", ++"tests/data/acpi/pc/DSDT.bridge", ++"tests/data/acpi/pc/DSDT.cphp", ++"tests/data/acpi/pc/DSDT.dimmpxm", ++"tests/data/acpi/pc/DSDT.hpbridge", ++"tests/data/acpi/pc/DSDT.hpbrroot", ++"tests/data/acpi/pc/DSDT.ipmikcs", ++"tests/data/acpi/pc/DSDT.memhp", ++"tests/data/acpi/pc/DSDT.nohpet", ++"tests/data/acpi/pc/DSDT.numamem", ++"tests/data/acpi/pc/DSDT.roothp", ++"tests/data/acpi/q35/DSDT", ++"tests/data/acpi/q35/DSDT.acpierst", ++"tests/data/acpi/q35/DSDT.acpihmat", ++"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", ++"tests/data/acpi/q35/DSDT.applesmc", ++"tests/data/acpi/q35/DSDT.bridge", ++"tests/data/acpi/q35/DSDT.cphp", ++"tests/data/acpi/q35/DSDT.cxl", ++"tests/data/acpi/q35/DSDT.dimmpxm", ++"tests/data/acpi/q35/DSDT.ipmibt", ++"tests/data/acpi/q35/DSDT.ipmismbus", ++"tests/data/acpi/q35/DSDT.ivrs", ++"tests/data/acpi/q35/DSDT.memhp", ++"tests/data/acpi/q35/DSDT.mmio64", ++"tests/data/acpi/q35/DSDT.multi-bridge", ++"tests/data/acpi/q35/DSDT.noacpihp", ++"tests/data/acpi/q35/DSDT.nohpet", ++"tests/data/acpi/q35/DSDT.numamem", ++"tests/data/acpi/q35/DSDT.pvpanic-isa", ++"tests/data/acpi/q35/DSDT.tis.tpm12", ++"tests/data/acpi/q35/DSDT.tis.tpm2", ++"tests/data/acpi/q35/DSDT.viot", ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.acpihmatvirt", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.pxb", ++"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-virt-Allow-changes-to-PPTT-test-table.patch b/tests-virt-Allow-changes-to-PPTT-test-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9cb5f6893a4ad9a7e80437b9a778abdb259bfae --- /dev/null +++ b/tests-virt-Allow-changes-to-PPTT-test-table.patch @@ -0,0 +1,25 @@ +From 3402740cb4f6d6b9baabfde0a7667b4990b010a5 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 19:21:59 +0800 +Subject: [PATCH] tests: virt: Allow changes to PPTT test table + +Allow changes to test/data/acpi/virt/PPTT*, prepare to change the +building policy of the cluster topology. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..18d02a710d 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,4 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/PPTT", ++"tests/data/acpi/virt/PPTT.acpihmatvirt", ++"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cb5a114d1d72fba550886fe5f58ea0ba1954ffd --- /dev/null +++ b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch @@ -0,0 +1,25 @@ +From b062e2f182af4c44fbd3a03eda9c934686037032 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 20:16:32 +0800 +Subject: [PATCH] tests: virt: Update expected ACPI tables for virt test + +Update the ACPI tables according to the acpi aml_build change, also +empty bios-tables-test-allowed-diff.h. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 --- + 1 files changed, 3 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 18d02a710d..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,4 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/PPTT", +-"tests/data/acpi/virt/PPTT.acpihmatvirt", +-"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/travis-ci-Rename-SOFTMMU-SYSTEM.patch b/travis-ci-Rename-SOFTMMU-SYSTEM.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa788135e89c9c9c09946848f47064de448e0545 --- /dev/null +++ b/travis-ci-Rename-SOFTMMU-SYSTEM.patch @@ -0,0 +1,67 @@ +From c03415f3b75e6a37c7eb392ef62bf92b94267b4d Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 17:26:52 +0800 +Subject: [PATCH] travis-ci: Rename SOFTMMU -> SYSTEM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 47833f817cc597db124c690bd14600bb5d00e824 + +Since we *might* have user emulation with softmmu, +rename MAIN_SOFTMMU_TARGETS as MAIN_SYSTEM_TARGETS +to express 'system emulation targets'. + +Signed-off-by: Philippe Mathieu-Daudé +Message-ID: <20240313213339.82071-3-philmd@linaro.org> +Reviewed-by: Thomas Huth +Reviewed-by: Richard Henderson +Signed-off-by: Thomas Huth +Signed-off-by: Gao Jiazhen +--- + .travis.yml | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/.travis.yml b/.travis.yml +index 76859d48da..597d151b80 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -35,7 +35,7 @@ env: + - TEST_BUILD_CMD="" + - TEST_CMD="make check V=1" + # This is broadly a list of "mainline" system targets which have support across the major distros +- - MAIN_SOFTMMU_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" ++ - MAIN_SYSTEM_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" + - CCACHE_SLOPPINESS="include_file_ctime,include_file_mtime" + - CCACHE_MAXSIZE=1G + - G_MESSAGES_DEBUG=error +@@ -114,7 +114,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS} --cxx=/bin/false" ++ --target-list=${MAIN_SYSTEM_TARGETS} --cxx=/bin/false" + - UNRELIABLE=true + + - name: "[ppc64] GCC check-tcg" +@@ -185,7 +185,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS},s390x-linux-user" ++ --target-list=${MAIN_SYSTEM_TARGETS},s390x-linux-user" + - UNRELIABLE=true + script: + - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$? +@@ -226,7 +226,7 @@ jobs: + - genisoimage + env: + - CONFIG="--disable-containers --enable-fdt=system --audio-drv-list=sdl +- --disable-user --target-list-exclude=${MAIN_SOFTMMU_TARGETS}" ++ --disable-user --target-list-exclude=${MAIN_SYSTEM_TARGETS}" + + - name: "[s390x] GCC (user)" + arch: s390x +-- +2.27.0 + diff --git a/ui-clipboard-mark-type-as-not-available-when-there-i.patch b/ui-clipboard-mark-type-as-not-available-when-there-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..843479a9c61b25ad1857352b4dafae1c5c2f4feb --- /dev/null +++ b/ui-clipboard-mark-type-as-not-available-when-there-i.patch @@ -0,0 +1,89 @@ +From 855f7f30de962f79393f0b9f8b0355b996d72de7 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:48 +0100 +Subject: [PATCH] ui/clipboard: mark type as not available when there is no + data (CVE-2023-6683) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT +message with len=0. In qemu_clipboard_set_data(), the clipboard info +will be updated setting data to NULL (because g_memdup(data, size) +returns NULL when size is 0). If the client does not set the +VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then +the 'request' callback for the clipboard peer is not initialized. +Later, because data is NULL, qemu_clipboard_request() can be reached +via vdagent_chr_write() and vdagent_clipboard_recv_request() and +there, the clipboard owner's 'request' callback will be attempted to +be called, but that is a NULL pointer. + +In particular, this can happen when using the KRDC (22.12.3) VNC +client. + +Another scenario leading to the same issue is with two clients (say +noVNC and KRDC): + +The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and +initializes its cbpeer. + +The KRDC client does not, but triggers a vnc_client_cut_text() (note +it's not the _ext variant)). There, a new clipboard info with it as +the 'owner' is created and via qemu_clipboard_set_data() is called, +which in turn calls qemu_clipboard_update() with that info. + +In qemu_clipboard_update(), the notifier for the noVNC client will be +called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the +noVNC client. The 'owner' in that clipboard info is the clipboard peer +for the KRDC client, which did not initialize the 'request' function. +That sounds correct to me, it is the owner of that clipboard info. + +Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set +the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it +passes), that clipboard info is passed to qemu_clipboard_request() and +the original segfault still happens. + +Fix the issue by handling updates with size 0 differently. In +particular, mark in the clipboard info that the type is not available. + +While at it, switch to g_memdup2(), because g_memdup() is deprecated. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2023-6683 +Reported-by: Markus Frank +Suggested-by: Marc-André Lureau +Signed-off-by: Fiona Ebner +Reviewed-by: Marc-André Lureau +Tested-by: Markus Frank +Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> +Signed-off-by: liuxiangdong +--- + ui/clipboard.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 3d14bffaf8..b3f6fa3c9e 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, + } + + g_free(info->types[type].data); +- info->types[type].data = g_memdup(data, size); +- info->types[type].size = size; +- info->types[type].available = true; ++ if (size) { ++ info->types[type].data = g_memdup2(data, size); ++ info->types[type].size = size; ++ info->types[type].available = true; ++ } else { ++ info->types[type].data = NULL; ++ info->types[type].size = 0; ++ info->types[type].available = false; ++ } + + if (update) { + qemu_clipboard_update(info); +-- +2.27.0 + diff --git a/vdpa-correct-param-passed-in-when-unregister-save.patch b/vdpa-correct-param-passed-in-when-unregister-save.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f3aa089a764098f3f976cb99fc5e47a56c775a8 --- /dev/null +++ b/vdpa-correct-param-passed-in-when-unregister-save.patch @@ -0,0 +1,30 @@ +From 5714aaddcbc313e63da435a253d9d472984d7b49 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:22:54 +0800 +Subject: [PATCH] vdpa: correct param passed in when unregister save + +The idstr passed in the unregister_savevm function is inconsisten +with the idstr passed in when register_savevm_live registration. +Needs to be modified, otherwise migration will fail after hotunplug +all vdpa devices. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index b889dd4715..1d299019da 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -404,6 +404,6 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { + migration_remove_notifier(&vdev->migration_state); +- unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); ++ unregister_savevm(NULL, "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +-- +2.27.0 + diff --git a/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a9ed108a970b2c447c9aaa0a8de42f7009300f4 --- /dev/null +++ b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch @@ -0,0 +1,67 @@ +From b82f02e93d5efa2ea62dd135c508cb707fdd35a7 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:32:00 +0800 +Subject: [PATCH] vdpa: don't suspend/resume device when vdpa device not + started + +When vdpa device not started, we don't need to suspend vdpa device +and send vdpa device state information. Therefore, add the suspended +flag of vdpa device to distinguish whether the device is suspended and +use it to determine whether the device needs to resume in dest qemu. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d299019da..887c96a201 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -294,10 +294,13 @@ static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) + int ret; + + qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); +- ret = vhost_vdpa_dev_buffer_save(hdev, f); +- if (ret) { +- error_report("Save vdpa device buffer failed: %d\n", ret); +- return ret; ++ qemu_put_be16(f, (uint16_t)vdev->suspended); ++ if (vdev->suspended) { ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } + } + qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); + +@@ -311,6 +314,7 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + + int ret; + uint64_t data; ++ uint16_t suspended; + + data = qemu_get_be64(f); + while (data != VDPA_MIG_FLAG_END_OF_STATE) { +@@ -323,10 +327,13 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + return -EINVAL; + } + } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { +- ret = vhost_vdpa_dev_buffer_load(hdev, f); +- if (ret) { +- error_report("fail to restore device buffer.\n"); +- return ret; ++ suspended = qemu_get_be16(f); ++ if (suspended) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } + } + } + +-- +2.27.0 + diff --git a/vdpa-implement-vdpa-device-migration.patch b/vdpa-implement-vdpa-device-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..295cdcdc24869ad704f467190303b456a14136e7 --- /dev/null +++ b/vdpa-implement-vdpa-device-migration.patch @@ -0,0 +1,75 @@ +From 4688e12c57a34801010abf2a4cf528fcef3b9ec0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:59:56 +0800 +Subject: [PATCH] vdpa: implement vdpa device migration + +Integrate the live migration code, call the registered live +migration function, and open the vdpa live migration prototype + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index f22d5d5bc0..6af78a4229 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -28,6 +28,8 @@ + #include "hw/virtio/vdpa-dev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/migration.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -154,6 +156,8 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + vhost_vdpa_device_dummy_handle_output); + } + ++ vdpa_migration_register(v); ++ + return; + + free_config: +@@ -173,6 +177,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + int i; + ++ vdpa_migration_unregister(s); + virtio_set_status(vdev, 0); + + for (i = 0; i < s->num_queues; i++) { +@@ -308,6 +313,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); ++ MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -320,6 +326,11 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + return; + } + ++ if (ms->state == RUN_STATE_PAUSED || ++ ms->state == RUN_STATE_RESTORE_VM) { ++ return; ++ } ++ + if (should_start) { + ret = vhost_vdpa_device_start(vdev, &local_err); + if (ret < 0) { +@@ -338,7 +349,7 @@ static Property vhost_vdpa_device_properties[] = { + + static const VMStateDescription vmstate_vhost_vdpa_device = { + .name = "vhost-vdpa-device", +- .unmigratable = 1, ++ .unmigratable = 0, + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { +-- +2.27.0 + diff --git a/vdpa-move-memory-listener-to-the-realize-stage.patch b/vdpa-move-memory-listener-to-the-realize-stage.patch new file mode 100644 index 0000000000000000000000000000000000000000..56137c61c50fca095cf11049bf57e53340f0bb5e --- /dev/null +++ b/vdpa-move-memory-listener-to-the-realize-stage.patch @@ -0,0 +1,91 @@ +From 587f42300488af4478d7aa1b62e2b351155621db Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 16:01:16 +0800 +Subject: [PATCH] vdpa: move memory listener to the realize stage + +Move the memory listener registration of vdpa from the start stage +to the realize stage. Avoid that in the start phase, the memory +listener callback function has not yet been processed. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 4 ++++ + hw/virtio/vhost-vdpa.c | 5 ----- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 6af78a4229..877bf7464f 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -30,6 +30,7 @@ + #include "sysemu/runstate.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/migration.h" ++#include "exec/address-spaces.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -125,6 +126,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + goto free_vqs; + } + ++ memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, + errp); +@@ -163,6 +165,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + free_config: + g_free(v->config); + vhost_cleanup: ++ memory_listener_unregister(&v->vdpa.listener); + vhost_dev_cleanup(&v->dev); + free_vqs: + g_free(vqs); +@@ -188,6 +191,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + + g_free(s->config); + g_free(s->dev.vqs); ++ memory_listener_unregister(&s->vdpa.listener); + vhost_dev_cleanup(&s->dev); + qemu_close(s->vhostfd); + s->vhostfd = -1; +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 063e941544..30408f2069 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1320,8 +1320,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + "IOMMU and try again"); + return -1; + } +- memory_listener_register(&v->listener, dev->vdev->dma_as); +- + return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + } + +@@ -1515,7 +1513,6 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + + static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + { +- struct vhost_vdpa *v = dev->opaque; + int ret; + + vhost_vdpa_svqs_stop(dev); +@@ -1526,7 +1523,6 @@ static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + } + + ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); +- memory_listener_unregister(&v->listener); + return ret; + } + +@@ -1548,7 +1544,6 @@ static int vhost_vdpa_resume_device(struct vhost_dev *dev) + return 0; + } + +- memory_listener_register(&v->listener, &address_space_memory); + return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); + } + +-- +2.27.0 + diff --git a/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f18cb554fd2d0cd90ef726a20b52ffb6219f98c --- /dev/null +++ b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch @@ -0,0 +1,38 @@ +From 0f515ff831f46ef34cd83aa145e547e48d8b3b56 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:05:52 +0800 +Subject: [PATCH] vdpa: set vring enable only if the vring address has already + been set + +Currently, vhost-vdpa does not determine the status of each vring when +performing the enable operation on vring. When the vBIOS(EDK2) is running, +the driver will not enable all vrings. In this case, setting all vrings +to enable is isconsistent with the actual situation. + +Add logic when enabling vring, make a judement on the vring status. If the +vring address is not set, the vring will not enabled. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 30408f2069..d49826845f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -890,6 +890,11 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + .index = idx, + .num = 1, + }; ++ hwaddr addr = virtio_queue_get_desc_addr(dev->vdev, idx); ++ if (addr == 0) { ++ return 0; ++ } ++ + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + + trace_vhost_vdpa_set_vring_ready(dev, idx, r); +-- +2.27.0 + diff --git a/vdpa-support-vdpa-device-suspend-resume.patch b/vdpa-support-vdpa-device-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e8ff653152a49d9410c2479427368f9a50c07d2 --- /dev/null +++ b/vdpa-support-vdpa-device-suspend-resume.patch @@ -0,0 +1,120 @@ +From e58b48ab2bb679f4c661301019d6f94bd39f93e5 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:18:03 +0800 +Subject: [PATCH] vdpa: support vdpa device suspend/resume + +only implement suspend and resume interface used for migration. The +current implementation still has bugs when suspend/resume a virtual +machine. Fix it. + +Fixes: 4c5a9a0703 (""vhost: implement vhost_vdpa_device_suspend/resume) + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 16 +++++++++++----- + hw/virtio/vdpa-dev.c | 8 +------- + include/hw/virtio/vdpa-dev.h | 1 + + 3 files changed, 13 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 9b47e3ed45..8b13f89c85 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -143,6 +143,7 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + } + + vdpa->started = false; ++ vdpa->suspended = true; + + ret = vhost_dev_suspend(&vdpa->dev, vdev, false); + if (ret) { +@@ -165,6 +166,7 @@ set_guest_notifiers_fail: + } + + suspend_fail: ++ vdpa->suspended = false; + vdpa->started = true; + return ret; + } +@@ -201,6 +203,7 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + goto err_guest_notifiers; + } + vdpa->started = true; ++ vdpa->suspended = false; + + /* + * guest_notifier_mask/pending not used yet, so just unmask +@@ -241,7 +244,7 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!running) { +- if (ms->state == RUN_STATE_PAUSED) { ++ if (ms->state == MIGRATION_STATUS_ACTIVE || state == RUN_STATE_PAUSED) { + ret = vhost_vdpa_device_suspend(vdpa); + if (ret) { + error_report("suspend vdpa device failed: %d\n", ret); +@@ -251,16 +254,19 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + } else { +- if (ms->state == RUN_STATE_RESTORE_VM) { ++ if (vdpa->suspended) { + ret = vhost_vdpa_device_resume(vdpa); + if (ret) { +- error_report("migration dest resume device failed, abort!\n"); +- exit(EXIT_FAILURE); ++ error_report("vhost vdpa device resume failed: %d\n", ret); + } + } + + if (mis->state == RUN_STATE_RESTORE_VM) { +- vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ ret = vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ if (ret) { ++ error_report("migration dest resume device failed: %d\n", ret); ++ exit(EXIT_FAILURE); ++ } + /* post resume */ + mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, + hdev); +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 877bf7464f..91e71847b0 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -317,7 +317,6 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +- MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -326,12 +325,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + should_start = false; + } + +- if (s->started == should_start) { +- return; +- } +- +- if (ms->state == RUN_STATE_PAUSED || +- ms->state == RUN_STATE_RESTORE_VM) { ++ if (s->started == should_start || s->suspended) { + return; + } + +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 20f50c76c6..60e9c3f3fe 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -37,6 +37,7 @@ struct VhostVdpaDevice { + int config_size; + uint16_t queue_size; + bool started; ++ bool suspended; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; + Notifier migration_state; +-- +2.27.0 + diff --git a/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch new file mode 100644 index 0000000000000000000000000000000000000000..69f13f0993b2229d4fac841eb52be1d6f5552d55 --- /dev/null +++ b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch @@ -0,0 +1,45 @@ +From a78602118043eb9923996504d5b2e1b14a1ec38d Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 21 Dec 2023 11:03:37 +0800 +Subject: [PATCH] vdpa: suspend function return 0 when the vdpa device is + stopped + +When vhost vdpa device is stopped(vdpa->started is false), suspend +operation do nothing and return success, instead of return failure. + +The same goes for resume function. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 8b13f89c85..b889dd4715 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -134,8 +134,8 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + +- if (!vdpa->started) { +- return -EFAULT; ++ if (!vdpa->started || vdpa->suspended) { ++ return 0; + } + + if (!k->set_guest_notifiers) { +@@ -178,6 +178,10 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + ++ if (vdpa->started || !vdpa->suspended) { ++ return 0; ++ } ++ + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers\n"); + return -ENOSYS; +-- +2.27.0 + diff --git a/vfio-Maintain-DMA-mapping-range-for-the-container.patch b/vfio-Maintain-DMA-mapping-range-for-the-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..5898ae86434982c5e54d5d3eaf152cb96ba1ff98 --- /dev/null +++ b/vfio-Maintain-DMA-mapping-range-for-the-container.patch @@ -0,0 +1,204 @@ +From bd2d81775edf285149346bf793d9b71236d7cf34 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:04 +0800 +Subject: [PATCH] vfio: Maintain DMA mapping range for the container + +When synchronizing dirty bitmap from kernel VFIO we do it in a +per-iova-range fashion and we allocate the userspace bitmap for each of the +ioctl. This patch introduces `struct VFIODMARange` to describe a range of +the given DMA mapping with respect to a VFIO_IOMMU_MAP_DMA operation, and +make the bitmap cache of this range be persistent so that we don't need to +g_try_malloc0() every time. Note that the new structure is almost a copy of +`struct vfio_iommu_type1_dma_map` but only internally used by QEMU. + +More importantly, the cached per-iova-range dirty bitmap will be further +used when we want to add support for the CLEAR_BITMAP and this cached +bitmap will be used to guarantee we don't clear any unknown dirty bits +otherwise that can be a severe data loss issue for migration code. + +It's pretty intuitive to maintain a bitmap per container since we perform +log_sync at this granule. But I don't know how to deal with things like +memory hot-{un}plug, sparse DMA mappings, etc. Suggestions welcome. + +* yet something to-do: + - can't work with guest viommu + - no locks + - etc + +[ The idea and even the commit message are largely inherited from kvm side. + See commit 9f4bf4baa8b820c7930e23c9566c9493db7e1d25. ] + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 9 +++++-- + hw/vfio/container.c | 49 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 12 +++++++++ + 3 files changed, 68 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e70fdf5e0c..564e933135 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1156,6 +1156,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + vfio_devices_all_device_dirty_tracking(container); + uint64_t dirty_pages; + VFIOBitmap vbmap; ++ VFIODMARange *qrange; + int ret; + + if (!container->dirty_pages_supported && !all_device_dirty_tracking) { +@@ -1165,10 +1166,16 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + return 0; + } + ++ qrange = vfio_lookup_match_range(container, iova, size); ++ /* the same as vfio_dma_unmap() */ ++ assert(qrange); ++ + ret = vfio_bitmap_alloc(&vbmap, size); + if (ret) { + return ret; + } ++ g_free(vbmap.bitmap); ++ vbmap.bitmap = qrange->bitmap; + + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); +@@ -1186,8 +1193,6 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, + ram_addr, dirty_pages); + out: +- g_free(vbmap.bitmap); +- + return ret; + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 242010036a..9a176a0d33 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -112,6 +112,29 @@ unmap_exit: + return ret; + } + ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size) ++{ ++ VFIODMARange *qrange; ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ if (qrange->iova == start_addr && qrange->size == size) { ++ return qrange; ++ } ++ } ++ return NULL; ++} ++ ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) ++{ ++ uint64_t pages, size; ++ ++ pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size(); ++ size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; ++ ++ qrange->bitmap = g_malloc0(size); ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +@@ -124,6 +147,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; + bool need_dirty_sync = false; + int ret; + +@@ -136,6 +160,22 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + need_dirty_sync = true; + } + ++ /* ++ * unregister the DMA range ++ * ++ * It seems that the memory layer will give us the same section as the one ++ * used in region_add(). Otherwise it'll be complicated to manipulate the ++ * bitmap across region_{add,del}. Is there any guarantee? ++ * ++ * But there is really not such a restriction on the kernel interface ++ * (VFIO_IOMMU_DIRTY_PAGES_FLAG_{UN}MAP_DMA, etc). ++ */ ++ qrange = vfio_lookup_match_range(container, iova, size); ++ assert(qrange); ++ g_free(qrange->bitmap); ++ QLIST_REMOVE(qrange, next); ++ g_free(qrange); ++ + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c +@@ -180,6 +220,14 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; ++ ++ qrange = g_malloc0(sizeof(*qrange)); ++ qrange->iova = iova; ++ qrange->size = size; ++ QLIST_INSERT_HEAD(&container->dma_list, qrange, next); ++ /* XXX allocate the dirty bitmap on demand */ ++ vfio_dma_range_init_dirty_bitmap(qrange); + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; +@@ -552,6 +600,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); ++ QLIST_INIT(&container->dma_list); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a4a22accb9..b131d04c9c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -80,6 +80,14 @@ typedef struct VFIOAddressSpace { + + struct VFIOGroup; + ++typedef struct VFIODMARange { ++ QLIST_ENTRY(VFIODMARange) next; ++ hwaddr iova; ++ size_t size; ++ void *vaddr; /* unused */ ++ unsigned long *bitmap; /* dirty bitmap cache for this range */ ++} VFIODMARange; ++ + typedef struct VFIOContainer { + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +@@ -97,6 +105,7 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; ++ QLIST_HEAD(, VFIODMARange) dma_list; + QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; +@@ -212,6 +221,9 @@ void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* container->fd */ ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size); ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); + int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb); + int vfio_dma_map(VFIOContainer *container, hwaddr iova, +-- +2.27.0 + diff --git a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch new file mode 100644 index 0000000000000000000000000000000000000000..06e5781624f27a44b87089188713bc3a9b345261 --- /dev/null +++ b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch @@ -0,0 +1,229 @@ +From 24c3ff779f35b40967d195e4764d4cb605c1a304 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:05 +0800 +Subject: [PATCH] vfio/migration: Add support for manual clear vfio dirty log + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, tweak the userspace side to use them. + +Check if the kernel supports VFIO_DIRTY_LOG_MANUAL_CLEAR and +provide the log_clear() hook for vfio_memory_listener. If the +kernel supports it, deliever the clear message to kernel. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 136 ++++++++++++++++++++++++++++++++++ + hw/vfio/container.c | 13 +++- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 148 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 564e933135..e08b147b3d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1344,6 +1344,141 @@ static void vfio_listener_log_sync(MemoryListener *listener, + } + } + ++/* ++ * I'm not sure if there's any alignment requirement for the CLEAR_BITMAP ++ * ioctl. But copy from kvm side and align {start, size} with 64 pages. ++ * ++ * I think the code can be simplified a lot if no alignment requirement. ++ */ ++#define VFIO_CLEAR_LOG_SHIFT 6 ++#define VFIO_CLEAR_LOG_ALIGN (qemu_real_host_page_size() << VFIO_CLEAR_LOG_SHIFT) ++#define VFIO_CLEAR_LOG_MASK (-VFIO_CLEAR_LOG_ALIGN) ++ ++static int vfio_log_clear_one_range(VFIOContainer *container,VFIODMARange *qrange, ++ uint64_t start, uint64_t size) ++{ ++ struct vfio_iommu_type1_dirty_bitmap *dbitmap; ++ struct vfio_iommu_type1_dirty_bitmap_get *range; ++ ++ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); ++ ++ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); ++ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP; ++ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; ++ ++ /* ++ * Now let's deal with the actual bitmap, which is almost the same ++ * as the kvm side. ++ */ ++ uint64_t end, bmap_start, start_delta, bmap_npages; ++ unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size(); ++ int ret; ++ ++ bmap_start = start & VFIO_CLEAR_LOG_MASK; ++ start_delta = start - bmap_start; ++ bmap_start /= psize; ++ ++ bmap_npages = DIV_ROUND_UP(size + start_delta, VFIO_CLEAR_LOG_ALIGN) ++ << VFIO_CLEAR_LOG_SHIFT; ++ end = qrange->size / psize; ++ if (bmap_npages > end - bmap_start) { ++ bmap_npages = end - bmap_start; ++ } ++ start_delta /= psize; ++ ++ if (start_delta) { ++ bmap_clear = bitmap_new(bmap_npages); ++ bitmap_copy_with_src_offset(bmap_clear, qrange->bitmap, ++ bmap_start, start_delta + size / psize); ++ bitmap_clear(bmap_clear, 0, start_delta); ++ range->bitmap.data = (__u64 *)bmap_clear; ++ } else { ++ range->bitmap.data = (__u64 *)(qrange->bitmap + BIT_WORD(bmap_start)); ++ } ++ ++ range->iova = qrange->iova + bmap_start * psize; ++ range->size = bmap_npages * psize; ++ range->bitmap.size = ROUND_UP(bmap_npages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ range->bitmap.pgsize = qemu_real_host_page_size(); ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); ++ if (ret) { ++ error_report("Failed to clear dirty log for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, ++ (uint64_t)range->size, errno); ++ goto err_out; ++ } ++ ++ bitmap_clear(qrange->bitmap, bmap_start + start_delta, size / psize); ++err_out: ++ g_free(bmap_clear); ++ g_free(dbitmap); ++ return 0; ++} ++ ++static int vfio_physical_log_clear(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ uint64_t start, size, offset, count; ++ VFIODMARange *qrange; ++ int ret = 0; ++ ++ if (!container->dirty_log_manual_clear) { ++ /* No need to do explicit clear */ ++ return ret; ++ } ++ ++ start = section->offset_within_address_space; ++ size = int128_get64(section->size); ++ ++ if (!size) { ++ return ret; ++ } ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ /* ++ * Discard ranges that do not overlap the section (e.g., the ++ * Memory BAR regions of the device) ++ */ ++ if (qrange->iova > start + size - 1 || ++ start > qrange->iova + qrange->size - 1) { ++ continue; ++ } ++ ++ if (start >= qrange->iova) { ++ /* The range starts before section or is aligned to it. */ ++ offset = start - qrange->iova; ++ count = MIN(qrange->size - offset, size); ++ } else { ++ /* The range starts after section. */ ++ offset = 0; ++ count = MIN(qrange->size, size - (qrange->iova - start)); ++ } ++ ret = vfio_log_clear_one_range(container, qrange, offset, count); ++ if (ret < 0) { ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static void vfio_listener_log_clear(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ if (vfio_listener_skipped_section(section) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_dirty_tracking(container)) { ++ vfio_physical_log_clear(container, section); ++ } ++} ++ + const MemoryListener vfio_memory_listener = { + .name = "vfio", + .region_add = vfio_listener_region_add, +@@ -1351,6 +1486,7 @@ const MemoryListener vfio_memory_listener = { + .log_global_start = vfio_listener_log_global_start, + .log_global_stop = vfio_listener_log_global_stop, + .log_sync = vfio_listener_log_sync, ++ .log_clear = vfio_listener_log_clear, + }; + + void vfio_reset_handler(void *opaque) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 9a176a0d33..d8b9117f4f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -285,7 +285,9 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); +- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ dbitmap->flags = container->dirty_log_manual_clear ? ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR : ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; +@@ -409,7 +411,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + static int vfio_init_container(VFIOContainer *container, int group_fd, + Error **errp) + { +- int iommu_type, ret; ++ int iommu_type, dirty_log_manual_clear, ret; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -438,6 +440,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; ++ ++ dirty_log_manual_clear = ioctl(container->fd, VFIO_CHECK_EXTENSION, ++ VFIO_DIRTY_LOG_MANUAL_CLEAR); ++ if (dirty_log_manual_clear) { ++ container->dirty_log_manual_clear = dirty_log_manual_clear; ++ } ++ + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b131d04c9c..fd9828d50b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -97,6 +97,7 @@ typedef struct VFIOContainer { + Error *error; + bool initialized; + bool dirty_pages_supported; ++ bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; +-- +2.27.0 + diff --git a/vhost-add-vhost_dev_suspend-resume_op.patch b/vhost-add-vhost_dev_suspend-resume_op.patch new file mode 100644 index 0000000000000000000000000000000000000000..c400fb2e0705786be85cdc2dffd9246b2af30b0c --- /dev/null +++ b/vhost-add-vhost_dev_suspend-resume_op.patch @@ -0,0 +1,38 @@ +From b0a62a84bd1c6ad5d4c11463371fcf267b56d902 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:13:41 +0800 +Subject: [PATCH] vhost: add vhost_dev_suspend/resume_op + +Introduce new vhost interface to support vhost device suspend & resume + +Signed-off-by: libai +--- + include/hw/virtio/vhost-backend.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index 71b02e4a12..84b8fa1075 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -155,6 +155,9 @@ typedef int (*vhost_set_device_state_fd_op)(struct vhost_dev *dev, + Error **errp); + typedef int (*vhost_check_device_state_op)(struct vhost_dev *dev, Error **errp); + ++typedef int (*vhost_dev_suspend_op)(struct vhost_dev *dev); ++typedef int (*vhost_dev_resume_op)(struct vhost_dev *dev); ++ + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -208,6 +211,8 @@ typedef struct VhostOps { + vhost_supports_device_state_op vhost_supports_device_state; + vhost_set_device_state_fd_op vhost_set_device_state_fd; + vhost_check_device_state_op vhost_check_device_state; ++ vhost_dev_suspend_op vhost_dev_suspend; ++ vhost_dev_resume_op vhost_dev_resume; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.27.0 + diff --git a/vhost-cancel-migration-when-vhost-user-restarted-dur.patch b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch new file mode 100644 index 0000000000000000000000000000000000000000..b551e4b014b4707df959b2acb42517ca6c5e3d31 --- /dev/null +++ b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch @@ -0,0 +1,86 @@ +From 302401ee7eb437712b69caff44ce684c88573dc6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Mon, 29 Jul 2019 16:22:12 +0800 +Subject: [PATCH] vhost: cancel migration when vhost-user restarted during + migraiton + +Qemu will abort when vhost-user process is restarted during migration +when vhost_log_global_start/stop is called. The reason is clear that +vhost_dev_set_log returns -1 because network connection is temporarily +lost. Let's cancel migraiton and report it to user in this abnormal +situation. + +Signed-off-by: Ying Fang +--- + hw/virtio/vhost.c | 9 +++++++-- + migration/migration.c | 2 +- + migration/migration.h | 1 + + 3 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 2c9ac79468..a8adc149ad 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -26,6 +26,7 @@ + #include "hw/mem/memory-device.h" + #include "migration/blocker.h" + #include "migration/qemu-file-types.h" ++#include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" + +@@ -1047,20 +1048,24 @@ check_dev_state: + static void vhost_log_global_start(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, true); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to start vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + + static void vhost_log_global_stop(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, false); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to stop vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..71a03b3248 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1377,7 +1377,7 @@ static void migrate_error_free(MigrationState *s) + } + } + +-static void migrate_fd_error(MigrationState *s, const Error *error) ++void migrate_fd_error(MigrationState *s, const Error *error) + { + trace_migrate_fd_error(error_get_pretty(error)); + assert(s->to_dst_file == NULL); +diff --git a/migration/migration.h b/migration/migration.h +index cf2c9c88e0..6aafa04314 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -482,6 +482,7 @@ bool migration_has_all_channels(void); + + uint64_t migrate_max_downtime(void); + ++void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_set_error(MigrationState *s, const Error *error); + bool migrate_has_error(MigrationState *s); + +-- +2.27.0 + diff --git a/vhost-implement-migration-state-notifier-for-vdpa-de.patch b/vhost-implement-migration-state-notifier-for-vdpa-de.patch new file mode 100644 index 0000000000000000000000000000000000000000..16d939032c5859f2e774abe3242356feb6dd057d --- /dev/null +++ b/vhost-implement-migration-state-notifier-for-vdpa-de.patch @@ -0,0 +1,87 @@ +From 3ef6dc341d6921a95564e9089f41ddbd79cd2a94 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:55:53 +0800 +Subject: [PATCH] vhost: implement migration state notifier for vdpa device + +Register migration state notifier to support triggered by +migration exceptions + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev.h | 1 + + 2 files changed, 30 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1872f11f3f..9b47e3ed45 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -23,6 +23,7 @@ + #include "hw/virtio/virtio-bus.h" + #include "migration/register.h" + #include "migration/migration.h" ++#include "migration/misc.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" +@@ -354,6 +355,31 @@ static SaveVMHandlers savevm_vdpa_handlers = { + .load_setup = vdpa_load_setup, + }; + ++static void vdpa_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ VhostVdpaDevice *vdev = container_of(notifier, ++ VhostVdpaDevice, ++ migration_state); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ switch (s->state) { ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_CANCELLED: ++ case MIGRATION_STATUS_FAILED: ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_CANCEL); ++ if (ret) { ++ error_report("Failed to set state CANCEL\n"); ++ } ++ ++ break; ++ case MIGRATION_STATUS_COMPLETED: ++ default: ++ break; ++ } ++} ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), +@@ -361,10 +387,13 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + DEVICE(vdev)); + register_savevm_live("vdpa", -1, 1, + &savevm_vdpa_handlers, DEVICE(vdev)); ++ vdev->migration_state.notify = vdpa_migration_state_notifier; ++ migration_add_notifier(&vdev->migration_state, vdpa_migration_state_notifier); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ migration_remove_notifier(&vdev->migration_state); + unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 43cbcef81b..20f50c76c6 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -39,6 +39,7 @@ struct VhostVdpaDevice { + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; ++ Notifier migration_state; + }; + + #endif +-- +2.27.0 + diff --git a/vhost-implement-post-resume-bh.patch b/vhost-implement-post-resume-bh.patch new file mode 100644 index 0000000000000000000000000000000000000000..1da1164301cc664adbbcd67dc05d6ba9d66ff6dd --- /dev/null +++ b/vhost-implement-post-resume-bh.patch @@ -0,0 +1,57 @@ +From 229737ca91d4e81b4a14143da9981bd59b80a539 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:57:35 +0800 +Subject: [PATCH] vhost: implement post resume bh + +Set vdpa device mig state to post start when vm post start + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 662d4a29dc..1872f11f3f 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" ++#include "qemu/main-loop.h" + + /* + * Flags used as delimiter: +@@ -218,6 +219,18 @@ err_host_notifiers: + return ret; + } + ++static void vdpa_dev_migration_handle_incoming_bh(void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int ret; ++ ++ /* Post start device, unsupport rollback if failed! */ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_POST_START); ++ if (ret) { ++ error_report("Failed to set state: POST_START\n"); ++ } ++} ++ + static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + { + VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); +@@ -247,6 +260,10 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + + if (mis->state == RUN_STATE_RESTORE_VM) { + vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ /* post resume */ ++ mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, ++ hdev); ++ qemu_bh_schedule(mis->bh); + } + } + } +-- +2.27.0 + diff --git a/vhost-implement-savevm_handler-for-vdpa-device.patch b/vhost-implement-savevm_handler-for-vdpa-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..721636ac0092081615e33da62edb6717e39b63e6 --- /dev/null +++ b/vhost-implement-savevm_handler-for-vdpa-device.patch @@ -0,0 +1,270 @@ +From 556aaa9632862505548d5083d369e92590fb2087 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:53:28 +0800 +Subject: [PATCH] vhost: implement savevm_handler for vdpa device + +Register savevm_handler ops for vdpa devices to support migration:x + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 175 +++++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 13 +++ + linux-headers/linux/vhost.h | 9 ++ + 3 files changed, 197 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d2bed2571..662d4a29dc 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -21,9 +21,21 @@ + #include "hw/virtio/vhost.h" + #include "hw/virtio/vdpa-dev.h" + #include "hw/virtio/virtio-bus.h" ++#include "migration/register.h" + #include "migration/migration.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/qemu-file-types.h" ++ ++/* ++ * Flags used as delimiter: ++ * 0xffffffff => MSB 32-bit all 1s ++ * 0xef10 => emulated (virtual) function IO ++ * 0x0000 => 16-bits reserved for flags ++ */ ++#define VDPA_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) ++#define VDPA_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) ++#define VDPA_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + + static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +@@ -39,6 +51,80 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + return ioctl(fd, request, arg); + } + ++static int vhost_vdpa_set_mig_state(struct vhost_dev *dev, uint8_t state) ++{ ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_MIG_STATE, &state); ++} ++ ++static int vhost_vdpa_dev_buffer_size(struct vhost_dev *dev, uint32_t *size) ++{ ++ return vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER_SIZE, size); ++} ++ ++static int vhost_vdpa_dev_buffer_save(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size = 0; ++ int ret; ++ ++ ret = vhost_vdpa_dev_buffer_size(dev, &buffer_size); ++ if (ret) { ++ error_report("get dev buffer size failed: %d\n", ret); ++ return ret; ++ } ++ ++ qemu_put_be32(f, buffer_size); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ ret = vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("get dev buffer failed: %d\n", ret); ++ goto free; ++ } ++ ++ qemu_put_buffer(f, config->buf, buffer_size); ++free: ++ g_free(config); ++ ++ return ret; ++} ++ ++static int vhost_vdpa_dev_buffer_load(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size, recv_size; ++ int ret; ++ ++ buffer_size = qemu_get_be32(f); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ recv_size = qemu_get_buffer(f, config->buf, buffer_size); ++ if (recv_size != buffer_size) { ++ error_report("read dev mig buffer failed, buffer_size: %u, " ++ "recv_size: %u\n", buffer_size, recv_size); ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_SET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("set dev buffer failed: %d\n", ret); ++ } ++ ++free: ++ g_free(config); ++ ++ return ret; ++} ++ + static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + { + VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); +@@ -165,14 +251,103 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + ++static int vdpa_save_setup(QEMUFile *f, void *opaque) ++{ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_SETUP_STATE); ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ ++ int ret; ++ uint64_t data; ++ ++ data = qemu_get_be64(f); ++ while (data != VDPA_MIG_FLAG_END_OF_STATE) { ++ if (data == VDPA_MIG_FLAG_DEV_SETUP_STATE) { ++ data = qemu_get_be64(f); ++ if (data == VDPA_MIG_FLAG_END_OF_STATE) { ++ return 0; ++ } else { ++ error_report("SETUP STATE: EOS not found 0x%lx\n", data); ++ return -EINVAL; ++ } ++ } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } ++ } ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ error_report("qemu file error: %d\n", ret); ++ return ret; ++ } ++ data = qemu_get_be64(f); ++ } ++ ++ return 0; ++} ++ ++static int vdpa_load_setup(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *v = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &v->dev; ++ int ret = 0; ++ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_PRE_START); ++ if (ret) { ++ error_report("pre start device failed: %d\n", ret); ++ goto out; ++ } ++ ++ return qemu_file_get_error(f); ++out: ++ return ret; ++} ++ ++static SaveVMHandlers savevm_vdpa_handlers = { ++ .save_setup = vdpa_save_setup, ++ .save_live_complete_precopy = vdpa_save_complete_precopy, ++ .load_state = vdpa_load_state, ++ .load_setup = vdpa_load_setup, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), + vdpa_dev_vmstate_change, + DEVICE(vdev)); ++ register_savevm_live("vdpa", -1, 1, ++ &savevm_vdpa_handlers, DEVICE(vdev)); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +index 89665ca747..adc1d657f7 100644 +--- a/include/hw/virtio/vdpa-dev-mig.h ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -9,6 +9,19 @@ + + #include "hw/virtio/vdpa-dev.h" + ++enum { ++ VDPA_DEVICE_START, ++ VDPA_DEVICE_STOP, ++ VDPA_DEVICE_PRE_START, ++ VDPA_DEVICE_PRE_STOP, ++ VDPA_DEVICE_CANCEL, ++ VDPA_DEVICE_POST_START, ++ VDPA_DEVICE_START_ASYNC, ++ VDPA_DEVICE_STOP_ASYNC, ++ VDPA_DEVICE_PRE_START_ASYNC, ++ VDPA_DEVICE_QUERY_OP_STATE, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev); + + void vdpa_migration_unregister(VhostVdpaDevice *vdev); +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 19dc7fd36c..a08e980a1e 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -231,4 +231,13 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* set and get device buffer */ ++#define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) ++#define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) ++#define VHOST_GET_DEV_BUFFER_SIZE _IOR(VHOST_VIRTIO, 0xb3, __u32) ++ ++/* set device migtration state */ ++#define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) ++ + #endif +-- +2.27.0 + diff --git a/vhost-implement-vhost-vdpa-suspend-resume.patch b/vhost-implement-vhost-vdpa-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..620b963673bb22390b6f2d4bb36bbdfd3c060c53 --- /dev/null +++ b/vhost-implement-vhost-vdpa-suspend-resume.patch @@ -0,0 +1,80 @@ +From a7f9a67ee98a5261f7639619055034f40bccfef0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:22:20 +0800 +Subject: [PATCH] vhost: implement vhost-vdpa suspend/resume + +vhost-vdpa implements the vhost_dev_suspend interface, +which will be called during the shutdown phase of the +live migration source virtual machine to suspend the +device but not reset the device information. + +vhost-vdpa implements the vhost_dev_resume interface. +If the live migration fails, it will be called during +the startup phase of the source virtual machine. +Enable the device but set the status, etc. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 037a9c6e4c..063e941544 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1513,6 +1513,45 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_suspend_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int ret; ++ ++ vhost_vdpa_svqs_stop(dev); ++ vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); ++ memory_listener_unregister(&v->listener); ++ return ret; ++} ++ ++static int vhost_vdpa_resume_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ bool ok; ++ ++ vhost_vdpa_host_notifiers_init(dev); ++ ok = vhost_vdpa_svqs_start(dev); ++ if (unlikely(!ok)) { ++ return -1; ++ } ++ for (int i = 0; i < v->dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, v->dev->vq_index + i); ++ } ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ memory_listener_register(&v->listener, &address_space_memory); ++ return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); ++} ++ + static int vhost_vdpa_log_sync(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; +@@ -1559,4 +1598,6 @@ const VhostOps vdpa_ops = { + .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, ++ .vhost_dev_suspend = vhost_vdpa_suspend_device, ++ .vhost_dev_resume = vhost_vdpa_resume_device, + }; +-- +2.27.0 + diff --git a/vhost-implement-vhost_vdpa_device_suspend-resume.patch b/vhost-implement-vhost_vdpa_device_suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9ef199773c3fe3c136ad0fc6d5d29b67b7ef2c9 --- /dev/null +++ b/vhost-implement-vhost_vdpa_device_suspend-resume.patch @@ -0,0 +1,447 @@ +From 4c5a9a0703e227186639124f09cdf7214e40ea7d Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:27:34 +0800 +Subject: [PATCH] vhost: implement vhost_vdpa_device_suspend/resume + +Implement vhost device suspend & resume interface + +Signed-off-by: jiangdongxu +Signed-off-by: fangyi +Signed-off-by: libai +--- + hw/virtio/meson.build | 2 +- + hw/virtio/vdpa-dev-mig.c | 178 +++++++++++++++++++++++++++++++ + hw/virtio/vhost.c | 138 ++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 16 +++ + include/hw/virtio/vdpa-dev.h | 1 + + include/hw/virtio/vhost.h | 3 + + migration/migration.c | 3 +- + migration/migration.h | 2 + + 8 files changed, 340 insertions(+), 3 deletions(-) + create mode 100644 hw/virtio/vdpa-dev-mig.c + create mode 100644 include/hw/virtio/vdpa-dev-mig.h + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index c0055a7832..596651d113 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') + system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +-system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) ++system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) + + specific_virtio_ss = ss.source_set() + specific_virtio_ss.add(files('virtio.c')) +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +new file mode 100644 +index 0000000000..1d2bed2571 +--- /dev/null ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include ++#include ++#include "qemu/osdep.h" ++#include "hw/virtio/vhost.h" ++#include "hw/virtio/vdpa-dev.h" ++#include "hw/virtio/virtio-bus.h" ++#include "migration/migration.h" ++#include "qemu/error-report.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++ ++static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, ++ void *arg) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int fd = v->device_fd; ++ ++ if (dev->vhost_ops->backend_type != VHOST_BACKEND_TYPE_VDPA) { ++ error_report("backend type isn't VDPA. Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ return ioctl(fd, request, arg); ++} ++ ++static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int ret; ++ ++ if (!vdpa->started) { ++ return -EFAULT; ++ } ++ ++ if (!k->set_guest_notifiers) { ++ return -EFAULT; ++ } ++ ++ vdpa->started = false; ++ ++ ret = vhost_dev_suspend(&vdpa->dev, vdev, false); ++ if (ret) { ++ goto suspend_fail; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++ if (ret < 0) { ++ error_report("vhost guest notifier cleanup failed: %d\n", ret); ++ goto set_guest_notifiers_fail; ++ } ++ ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++ ++set_guest_notifiers_fail: ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret) { ++ error_report("vhost guest notifier restore failed: %d\n", ret); ++ } ++ ++suspend_fail: ++ vdpa->started = true; ++ return ret; ++} ++ ++static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int i, ret; ++ ++ if (!k->set_guest_notifiers) { ++ error_report("binding does not support guest notifiers\n"); ++ return -ENOSYS; ++ } ++ ++ ret = vhost_dev_enable_notifiers(&vdpa->dev, vdev); ++ if (ret < 0) { ++ error_report("Error enabling host notifiers: %d\n", ret); ++ return ret; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret < 0) { ++ error_report("Error binding guest notifier: %d\n", ret); ++ goto err_host_notifiers; ++ } ++ ++ vdpa->dev.acked_features = vdev->guest_features; ++ ++ ret = vhost_dev_resume(&vdpa->dev, vdev, false); ++ if (ret < 0) { ++ error_report("Error starting vhost: %d\n", ret); ++ goto err_guest_notifiers; ++ } ++ vdpa->started = true; ++ ++ /* ++ * guest_notifier_mask/pending not used yet, so just unmask ++ * everything here. virtio-pci will do the right thing by ++ * enabling/disabling irqfd. ++ */ ++ for (i = 0; i < vdpa->dev.nvqs; i++) { ++ vhost_virtqueue_mask(&vdpa->dev, vdev, i, false); ++ } ++ ++ return ret; ++ ++err_guest_notifiers: ++ k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++err_host_notifiers: ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++} ++ ++static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) ++{ ++ VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdpa->dev; ++ int ret; ++ MigrationState *ms = migrate_get_current(); ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!running) { ++ if (ms->state == RUN_STATE_PAUSED) { ++ ret = vhost_vdpa_device_suspend(vdpa); ++ if (ret) { ++ error_report("suspend vdpa device failed: %d\n", ret); ++ if (ms->migration_thread_running) { ++ migrate_fd_cancel(ms); ++ } ++ } ++ } ++ } else { ++ if (ms->state == RUN_STATE_RESTORE_VM) { ++ ret = vhost_vdpa_device_resume(vdpa); ++ if (ret) { ++ error_report("migration dest resume device failed, abort!\n"); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ if (mis->state == RUN_STATE_RESTORE_VM) { ++ vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ } ++ } ++} ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev) ++{ ++ vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), ++ vdpa_dev_vmstate_change, ++ DEVICE(vdev)); ++} ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev) ++{ ++ qemu_del_vm_change_state_handler(vdev->vmstate); ++} +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 438182d850..d073a6d5a5 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -2492,3 +2492,141 @@ bool used_memslots_is_exceeded(void) + { + return used_memslots_exceeded; + } ++ ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i, r; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ vdev->vhost_started = true; ++ hdev->started = true; ++ hdev->vdev = vdev; ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ memory_listener_register(&hdev->iommu_listener, vdev->dma_as); ++ } ++ ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed"); ++ goto fail_mem; ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ r = vhost_virtqueue_start(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ if (r < 0) { ++ goto fail_vq; ++ } ++ } ++ ++ r = event_notifier_init(e, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear(e); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } ++ if (vrings) { ++ r = vhost_dev_set_vring_enable(hdev, true); ++ if (r) { ++ goto fail_vq; ++ } ++ } ++ if (hdev->vhost_ops->vhost_dev_resume) { ++ r = hdev->vhost_ops->vhost_dev_resume(hdev); ++ if (r) { ++ goto fail_start; ++ } ++ } ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); ++ ++ /* ++ * Update used ring information for IOTLB to work correctly, ++ * vhost-kernel code requires for this. ++ */ ++ for (i = 0; i < hdev->nvqs; ++i) { ++ struct vhost_virtqueue *vq = hdev->vqs + i; ++ vhost_device_iotlb_miss(hdev, vq->used_phys, true); ++ } ++ } ++ vhost_start_config_intr(hdev); ++ return 0; ++fail_start: ++ if (vrings) { ++ vhost_dev_set_vring_enable(hdev, false); ++ } ++fail_vq: ++ while (--i >= 0) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++fail_mem: ++ vdev->vhost_started = false; ++ hdev->started = false; ++ return r; ++} ++ ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i; ++ int ret = 0; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ event_notifier_test_and_clear(e); ++ event_notifier_test_and_clear(&vdev->config_notifier); ++ ++ if (hdev->vhost_ops->vhost_dev_suspend) { ++ ret = hdev->vhost_ops->vhost_dev_suspend(hdev); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ if (vrings) { ++ ret = vhost_dev_set_vring_enable(hdev, false); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); ++ memory_listener_unregister(&hdev->iommu_listener); ++ } ++ vhost_stop_config_intr(hdev); ++ vhost_log_put(hdev, true); ++ hdev->started = false; ++ vdev->vhost_started = false; ++ hdev->vdev = NULL; ++ ++ return ret; ++ ++fail_suspend: ++ event_notifier_test_and_clear(e); ++ ++ return ret; ++} +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +new file mode 100644 +index 0000000000..89665ca747 +--- /dev/null ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -0,0 +1,16 @@ ++/* ++ * Vhost Vdpa Device Migration Header ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All Rights Reserved. ++ */ ++ ++#ifndef _VHOST_VDPA_MIGRATION_H ++#define _VHOST_VDPA_MIGRATION_H ++ ++#include "hw/virtio/vdpa-dev.h" ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev); ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev); ++ ++#endif /* _VHOST_VDPA_MIGRATION_H */ +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 4dbf98195c..43cbcef81b 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -38,6 +38,7 @@ struct VhostVdpaDevice { + uint16_t queue_size; + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); ++ VMChangeStateEntry *vmstate; + }; + + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 6ae86833e3..9ca5819deb 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -466,4 +466,7 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + */ + int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index 23d9233bbe..dce22c2da5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -99,7 +99,6 @@ static bool migration_object_check(MigrationState *ms, Error **errp); + static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); +-static void migrate_fd_cancel(MigrationState *s); + static bool close_return_path_on_source(MigrationState *s); + + static void migration_downtime_start(MigrationState *s) +@@ -1386,7 +1385,7 @@ void migrate_fd_error(MigrationState *s, const Error *error) + migrate_set_error(s, error); + } + +-static void migrate_fd_cancel(MigrationState *s) ++void migrate_fd_cancel(MigrationState *s) + { + int old_state ; + +diff --git a/migration/migration.h b/migration/migration.h +index 6aafa04314..2f26c9509b 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -551,4 +551,6 @@ void migration_rp_kick(MigrationState *s); + + int migration_stop_vm(RunState state); + ++void migrate_fd_cancel(MigrationState *s); ++ + #endif +-- +2.27.0 + diff --git a/vhost-introduce-bytemap-for-vhost-backend-logging.patch b/vhost-introduce-bytemap-for-vhost-backend-logging.patch new file mode 100644 index 0000000000000000000000000000000000000000..7293b3b13a637d96422a85a17e2fe52cea5cf825 --- /dev/null +++ b/vhost-introduce-bytemap-for-vhost-backend-logging.patch @@ -0,0 +1,304 @@ +From 962acd498b11ae5ccc040d76ec89990add119dec Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:09:26 +0800 +Subject: [PATCH] vhost: introduce bytemap for vhost backend logging + +As vhost backend may use bytemap for logging, when get log_size +of vhost device, check whether vhost device support VHOST_BACKEND_F_BYTEMAPLOG. +If vhost device support, use bytemap for logging. + +By the way, add log_resize func pointer check and vhost_log_sync return +value check. + +Signed-off-by: libai +--- + hw/virtio/vhost.c | 89 ++++++++++++++++++++++++++++++++++++--- + include/exec/memory.h | 9 ++++ + include/exec/ram_addr.h | 44 +++++++++++++++++++ + include/hw/virtio/vhost.h | 1 + + system/physmem.c | 11 +++++ + 5 files changed, 148 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 038ac37dd0..438182d850 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -29,6 +29,7 @@ + #include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" ++#include "qapi/qapi-commands-migration.h" + + /* enabled until disconnected backend stabilizes */ + #define _VHOST_DEBUG 1 +@@ -44,6 +45,11 @@ + do { } while (0) + #endif + ++static inline bool vhost_bytemap_log_support(struct vhost_dev *dev) ++{ ++ return (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG)); ++} ++ + static struct vhost_log *vhost_log; + static struct vhost_log *vhost_log_shm; + +@@ -232,12 +238,40 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + return 0; + } + ++static int vhost_sync_dirty_bytemap(struct vhost_dev *dev, ++ MemoryRegionSection *section) ++{ ++ unsigned long *bytemap = dev->log->log; ++ return memory_section_set_dirty_bytemap(section, bytemap); ++} ++ + static void vhost_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + memory_listener); +- vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!dev->log_enabled || !dev->started) { ++ return; ++ } ++ ++ if (dev->vhost_ops->vhost_log_sync) { ++ int r = dev->vhost_ops->vhost_log_sync(dev); ++ if (r < 0) { ++ error_report("Failed to sync dirty log: 0x%x\n", r); ++ if (migration_is_running(ms->state)) { ++ qmp_migrate_cancel(NULL); ++ } ++ return; ++ } ++ } ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ } + } + + static void vhost_log_sync_range(struct vhost_dev *dev, +@@ -247,7 +281,11 @@ static void vhost_log_sync_range(struct vhost_dev *dev, + /* FIXME: this is N^2 in number of sections */ + for (i = 0; i < dev->n_mem_sections; ++i) { + MemoryRegionSection *section = &dev->mem_sections[i]; +- vhost_sync_dirty_bitmap(dev, section, first, last); ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, first, last); ++ } + } + } + +@@ -255,11 +293,19 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) + { + uint64_t log_size = 0; + int i; ++ uint64_t vhost_log_chunk_size; ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK_BYTES; ++ } else { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK; ++ } ++ + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + uint64_t last = range_get_last(reg->guest_phys_addr, + reg->memory_size); +- log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); ++ log_size = MAX(log_size, last / vhost_log_chunk_size + 1); + } + return log_size; + } +@@ -377,12 +423,21 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) + dev->vhost_ops->vhost_requires_shm_log(dev); + } + +-static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) ++static inline int vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + { + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); +- uint64_t log_base = (uintptr_t)log->log; ++ uint64_t log_base; ++ int log_fd; + int r; + ++ if (!log) { ++ r = -ENOMEM; ++ goto out; ++ } ++ ++ log_base = (uint64_t)log->log; ++ log_fd = log_fd; ++ + /* inform backend of log switching, this must be done before + releasing the current log, to ensure no logging is lost */ + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); +@@ -390,9 +445,19 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + } + ++ if (dev->vhost_ops->vhost_set_log_size) { ++ r = dev->vhost_ops->vhost_set_log_size(dev, size, dev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ } ++ } ++ + vhost_log_put(dev, true); + dev->log = log; + dev->log_size = size; ++ ++out: ++ return r; + } + + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, +@@ -1018,7 +1083,11 @@ static int vhost_migration_log(MemoryListener *listener, bool enable) + } + vhost_log_put(dev, false); + } else { +- vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ r = vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ if ( r < 0 ) { ++ return r; ++ } ++ + r = vhost_dev_set_log(dev, true); + if (r < 0) { + goto check_dev_state; +@@ -2057,6 +2126,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + goto fail_log; + } ++ ++ if (hdev->vhost_ops->vhost_set_log_size) { ++ r = hdev->vhost_ops->vhost_set_log_size(hdev, hdev->log_size, hdev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ goto fail_log; ++ } ++ } + } + if (vrings) { + r = vhost_dev_set_vring_enable(hdev, true); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 831f7c996d..e131c2682c 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2594,6 +2594,15 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr, + MemOp op, + MemTxAttrs attrs); + ++/** ++ * memory_section_set_dirty_bytemap: Mark a range of bytes as dirty for a memory section ++ * using a bytemap ++ * ++ * @section: the memory section being dirtied. ++ * @bytemap: bytemap that stores dirty page range information. ++ */ ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap); ++ + /** + * address_space_init: initializes an address space + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index 90676093f5..ef6988b445 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -535,5 +535,49 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + + return num_dirty; + } ++ ++#define BYTES_PER_LONG (sizeof(unsigned long)) ++#define BYTE_WORD(nr) ((nr) / BYTES_PER_LONG) ++#define BYTES_TO_LONGS(nr) DIV_ROUND_UP(nr, BYTES_PER_LONG) ++ ++static inline int64_t _set_dirty_bytemap_atomic(unsigned long *bytemap, unsigned long cur_pfn) ++{ ++ char *byte_of_long = (char *)bytemap; ++ int i; ++ int64_t dirty_num = 0; ++ ++ for (i = 0; i < BYTES_PER_LONG; i++) { ++ if (byte_of_long[i]) { ++ cpu_physical_memory_set_dirty_range((cur_pfn + i) << TARGET_PAGE_BITS, ++ TARGET_PAGE_SIZE, ++ 1 << DIRTY_MEMORY_MIGRATION); ++ /* Per byte ops, no need to atomic_xchg */ ++ byte_of_long[i] = 0; ++ dirty_num++; ++ } ++ } ++ ++ return dirty_num; ++} ++ ++static inline int64_t cpu_physical_memory_set_dirty_bytemap(unsigned long *bytemap, ++ ram_addr_t start, ++ ram_addr_t pages) ++{ ++ unsigned long i; ++ unsigned long len = BYTES_TO_LONGS(pages); ++ unsigned long pfn = (start >> TARGET_PAGE_BITS) / ++ BYTES_PER_LONG * BYTES_PER_LONG; ++ int64_t dirty_mig_bits = 0; ++ ++ for (i = 0; i < len; i++) { ++ if (bytemap[i]) { ++ dirty_mig_bits += _set_dirty_bytemap_atomic(&bytemap[i], ++ pfn + BYTES_PER_LONG * i); ++ } ++ } ++ ++ return dirty_mig_bits; ++} + #endif + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 444ca0ad42..6ae86833e3 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -43,6 +43,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_PAGE 0x1000 + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) ++#define VHOST_LOG_CHUNK_BYTES (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t)) + #define VHOST_INVALID_FEATURE_BIT (0xff) + #define VHOST_QUEUE_NUM_CONFIG_INR 0 + +diff --git a/system/physmem.c b/system/physmem.c +index f14d64819b..247c252e53 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -2602,6 +2602,17 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); + } + ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap) ++{ ++ ram_addr_t start = section->offset_within_region + ++ memory_region_get_ram_addr(section->mr); ++ ram_addr_t pages = int128_get64(section->size) >> TARGET_PAGE_BITS; ++ ++ hwaddr idx = BYTE_WORD( ++ section->offset_within_address_space >> TARGET_PAGE_BITS); ++ return cpu_physical_memory_set_dirty_bytemap(bytemap + idx, start, pages); ++} ++ + void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) + { + /* +-- +2.27.0 + diff --git a/vhost-user-Add-support-reconnect-vhost-user-socket.patch b/vhost-user-Add-support-reconnect-vhost-user-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..eec650e70d8b06bb45a20ad0504e803153bfa805 --- /dev/null +++ b/vhost-user-Add-support-reconnect-vhost-user-socket.patch @@ -0,0 +1,168 @@ +From 0bc608ab4117818b32d2a1aaf2d4f5c2aeb54af7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:05:47 +0800 +Subject: [PATCH] vhost-user: Add support reconnect vhost-user socket + +Add support reconnect vhost-user socket, the reconnect time +is set to be 3 seconds. + +Signed-off-by: Jinhua Cao +--- + chardev/char-socket.c | 19 ++++++++++++++++++- + hw/net/vhost_net.c | 4 +++- + hw/virtio/vhost-user.c | 6 ++++++ + include/chardev/char.h | 16 ++++++++++++++++ + net/vhost-user.c | 3 +++ + 5 files changed, 46 insertions(+), 2 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..9c60e15c8e 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -337,6 +337,22 @@ static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond) + return qio_channel_create_watch(s->ioc, cond); + } + ++static void tcp_chr_set_reconnect_time(Chardev *chr, ++ int64_t reconnect_time) ++{ ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ s->reconnect_time = reconnect_time; ++} ++ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) ++{ ++ ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ ++ if (cc->chr_set_reconnect_time) { ++ cc->chr_set_reconnect_time(chr, reconnect_time); ++ } ++} ++ + static void remove_hup_source(SocketChardev *s) + { + if (s->hup_source != NULL) { +@@ -537,7 +553,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len) + if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) { + qio_channel_set_blocking(s->ioc, false, NULL); + } +- if (size == 0) { ++ if (size == 0 && chr->chr_for_flag != CHR_FOR_VHOST_USER) { + /* connection closed */ + tcp_chr_disconnect(chr); + } +@@ -1543,6 +1559,7 @@ static void char_socket_class_init(ObjectClass *oc, void *data) + cc->set_msgfds = tcp_set_msgfds; + cc->chr_add_client = tcp_chr_add_client; + cc->chr_add_watch = tcp_chr_add_watch; ++ cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time; + cc->chr_update_read_handler = tcp_chr_update_read_handler; + + object_class_property_add(oc, "addr", "SocketAddress", +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 1b08b02477..e48c373b14 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -459,7 +459,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } + +- if (peer->vring_enable) { ++ /* ovs needs to restore all states of vring */ ++ if (peer->vring_enable || ++ ncs[i].peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + /* restore vring enable state */ + r = vhost_set_vring_enable(peer, peer->vring_enable); + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..05e14e1eff 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2126,9 +2126,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; + int err; ++ Chardev *chr; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + ++ chr = qemu_chr_fe_get_driver(((VhostUserState *)opaque)->chr); ++ if (chr) { ++ chr->chr_for_flag = CHR_FOR_VHOST_USER; ++ } ++ + u = g_new0(struct vhost_user, 1); + u->user = vus; + u->dev = dev; +diff --git a/include/chardev/char.h b/include/chardev/char.h +index 01df55f9e8..f8bd469466 100644 +--- a/include/chardev/char.h ++++ b/include/chardev/char.h +@@ -14,6 +14,8 @@ + #define IAC_SB 250 + #define IAC 255 + ++#define CHR_FOR_VHOST_USER 0x32a1 ++ + /* character device */ + typedef struct CharBackend CharBackend; + +@@ -70,6 +72,7 @@ struct Chardev { + GSource *gsource; + GMainContext *gcontext; + DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST); ++ int chr_for_flag; + }; + + /** +@@ -227,6 +230,16 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all); + #define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true) + int qemu_chr_wait_connected(Chardev *chr, Error **errp); + ++/** ++ * @qemu_chr_set_reconnect_time: ++ * ++ * Set reconnect time for char disconnect. ++ * Currently, only vhost user will call it. ++ * ++ * @reconnect_time the reconnect_time to be set ++ */ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time); ++ + #define TYPE_CHARDEV "chardev" + OBJECT_DECLARE_TYPE(Chardev, ChardevClass, CHARDEV) + +@@ -306,6 +319,9 @@ struct ChardevClass { + + /* handle various events */ + void (*chr_be_event)(Chardev *s, QEMUChrEvent event); ++ ++ /* set reconnect time */ ++ void (*chr_set_reconnect_time)(Chardev *chr, int64_t reconnect_time); + }; + + Chardev *qemu_chardev_new(const char *id, const char *typename, +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 12555518e8..51fa8c678f 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -21,6 +21,8 @@ + #include "qemu/option.h" + #include "trace.h" + ++#define VHOST_USER_RECONNECT_TIME (3) ++ + typedef struct NetVhostUserState { + NetClientState nc; + CharBackend chr; /* only queue index 0 */ +@@ -292,6 +294,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) + trace_vhost_user_event(chr->label, event); + switch (event) { + case CHR_EVENT_OPENED: ++ qemu_chr_set_reconnect_time(chr, VHOST_USER_RECONNECT_TIME); + if (vhost_user_start(queues, ncs, s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->chr); + return; +-- +2.27.0 + diff --git a/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch new file mode 100644 index 0000000000000000000000000000000000000000..24c2670561fc19f9c6c4e856c74c7bf9799c3f75 --- /dev/null +++ b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch @@ -0,0 +1,96 @@ +From 0154183e118169be5945cb5ebec2b79379071591 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:49:21 +0800 +Subject: [PATCH] vhost-user: Set the acked_features to vm's featrue + +Fix the problem when vm restart, the ovs restart and lead to the net +unreachable. The soluation is set the acked_features to vm's featrue +just the same as guest virtio-net mod load. + +Signed-off-by: Jinhua Cao +--- + hw/net/vhost_net.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 57 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index e8e1661646..1b08b02477 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -167,9 +167,26 @@ static int vhost_net_get_fd(NetClientState *backend) + } + } + ++static uint64_t vhost_get_mask_features(const int *feature_bits, uint64_t features) ++{ ++ const int *bit = feature_bits; ++ uint64_t out_features = 0; ++ ++ while (*bit != VHOST_INVALID_FEATURE_BIT) { ++ uint64_t bit_mask = (1ULL << *bit); ++ if (features & bit_mask) { ++ out_features |= bit_mask; ++ } ++ bit++; ++ } ++ return out_features; ++} ++ + struct vhost_net *vhost_net_init(VhostNetOptions *options) + { + int r; ++ VirtIONet *n; ++ VirtIODevice *vdev; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; + struct vhost_net *net = g_new0(struct vhost_net, 1); + uint64_t features = 0; +@@ -195,7 +212,46 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + net->backend = r; + net->dev.protocol_features = 0; + } else { +- net->dev.backend_features = 0; ++ /* for ovs restart when vm start. ++ * Normal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 4.feature set to ovs's vhostuser(0x40408000). ++ * 5.ovs restart. ++ * 6.vhost_user_stop will save net->dev.acked_features(0x40408000) to ++ * VhostUserState's acked_features(0x40408000). ++ * 7.restart ok. ++ * 8.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features, and set to net->dev.acked_features. ++ * Abnormal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.ovs restart. ++ * 4.vhost_user_stop will save net->dev.acked_features(0x40000000) to ++ * VhostUserState's acked_features(0x40000000). ++ * 5.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 6.restart ok. ++ * 7.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features(0x40000000), and set to net->dev.acked_features(0x40000000). ++ * 8.feature set to ovs's vhostuser(0x40000000). ++ * ++ * in abnormal situation, qemu set the wrong features to ovs's vhostuser, ++ * then the vm's network will be down. ++ * in abnormal situation, we found it just lost the guest feartures in ++ * acked_features, so hear we set the acked_features to vm's featrue ++ * just the same as guest virtio-net mod load. ++ */ ++ if (options->net_backend->peer) { ++ n = qemu_get_nic_opaque(options->net_backend->peer); ++ vdev = VIRTIO_DEVICE(n); ++ net->dev.backend_features = vhost_get_mask_features(vhost_net_get_feature_bits(net), ++ vdev->guest_features); ++ } else { ++ net->dev.backend_features = 0; ++ } + net->dev.protocol_features = 0; + net->backend = -1; + +-- +2.27.0 + diff --git a/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch new file mode 100644 index 0000000000000000000000000000000000000000..558464ce14870d63b86b4685410cff74235a2865 --- /dev/null +++ b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch @@ -0,0 +1,32 @@ +From c65ff10063a6c599b88cba27fd70a72e2e0cc0ff Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 20:21:33 +0800 +Subject: [PATCH] vhost-user: add unregister_savevm when vhost-user cleanup + +commit 12cf5e9ece ("vhost-user: add vhost_set_mem_table +when vm load_setup at destination") only register savevm +handler but not unregister it, which will cause the +number of handers increase when vhost-user devices hotplug, +so this commit add unregister_savevm when vhost-user cleanup. + +Fixes: 12cf5e9ece ("vhost-user: add vhost_set_mem_table when vm load_setup at destination") +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 6739dfc98e..e589ee3572 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2310,6 +2310,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) + u->region_rb_len = 0; + g_free(u); + dev->opaque = 0; ++ unregister_savevm(NULL, "vhost-user", dev); + + return 0; + } +-- +2.27.0 + diff --git a/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch new file mode 100644 index 0000000000000000000000000000000000000000..795185b2a1d522411e6b1bb5b127e56048afa7c1 --- /dev/null +++ b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch @@ -0,0 +1,130 @@ +From 12cf5e9ece9cb0825f14ca80f6b1c5d1eb95c3e5 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:59:34 +0800 +Subject: [PATCH] vhost-user: add vhost_set_mem_table when vm load_setup at + destination + +When migrate huge vm, packages lost are 90+. + +During the load_setup of the destination vm, pass the +vm mem structure to ovs, the netcard could be enabled +when the migration finish state shifting. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 24 ++++++++++++++++++++++++ + tests/qtest/vhost-user-test.c | 35 ++++++++++++++++++----------------- + 2 files changed, 42 insertions(+), 17 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..6739dfc98e 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -28,6 +28,7 @@ + #include "sysemu/cryptodev.h" + #include "migration/migration.h" + #include "migration/postcopy-ram.h" ++#include "migration/register.h" + #include "trace.h" + #include "exec/ramblock.h" + +@@ -2119,6 +2120,28 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, + return 0; + } + ++static int vhost_user_load_setup(QEMUFile *f, void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int r; ++ ++ if (hdev->vhost_ops && hdev->vhost_ops->vhost_set_mem_table) { ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ qemu_log("error: vhost_set_mem_table failed: %s(%d)\n", ++ strerror(errno), errno); ++ return r; ++ } else { ++ qemu_log("info: vhost_set_mem_table OK\n"); ++ } ++ } ++ return 0; ++} ++ ++SaveVMHandlers savevm_vhost_user_handlers = { ++ .load_setup = vhost_user_load_setup, ++}; ++ + static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + Error **errp) + { +@@ -2255,6 +2278,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + postcopy_add_notifier(&u->postcopy_notifier); ++ register_savevm_live("vhost-user", -1, 1, &savevm_vhost_user_handlers, dev); + + return 0; + } +diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c +index d4e437265f..fadf3f0f2e 100644 +--- a/tests/qtest/vhost-user-test.c ++++ b/tests/qtest/vhost-user-test.c +@@ -799,6 +799,23 @@ static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc) + read_guest_mem_server(global_qtest, server); + } + ++static void wait_for_rings_started(TestServer *s, size_t count) ++{ ++ gint64 end_time; ++ ++ g_mutex_lock(&s->data_mutex); ++ end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; ++ while (ctpop64(s->rings) != count) { ++ if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { ++ /* timeout has passed */ ++ g_assert_cmpint(ctpop64(s->rings), ==, count); ++ break; ++ } ++ } ++ ++ g_mutex_unlock(&s->data_mutex); ++} ++ + static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + { + TestServer *s = arg; +@@ -869,6 +886,7 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + qtest_qmp_eventwait(to, "RESUME"); + + g_assert(wait_for_fds(dest)); ++ wait_for_rings_started(dest, 2); + read_guest_mem_server(to, dest); + + g_source_destroy(source); +@@ -880,23 +898,6 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + g_string_free(dest_cmdline, true); + } + +-static void wait_for_rings_started(TestServer *s, size_t count) +-{ +- gint64 end_time; +- +- g_mutex_lock(&s->data_mutex); +- end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; +- while (ctpop64(s->rings) != count) { +- if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { +- /* timeout has passed */ +- g_assert_cmpint(ctpop64(s->rings), ==, count); +- break; +- } +- } +- +- g_mutex_unlock(&s->data_mutex); +-} +- + static inline void test_server_connect(TestServer *server) + { + test_server_create_chr(server, ",reconnect=1"); +-- +2.27.0 + diff --git a/vhost-user-quit-infinite-loop-while-used-memslots-is.patch b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch new file mode 100644 index 0000000000000000000000000000000000000000..c62111f584f741e6d64f9d08f77939bf3c525814 --- /dev/null +++ b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch @@ -0,0 +1,89 @@ +From 90d4333d4bbde45a10892bf9004979d239d39e28 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 19:24:30 +0800 +Subject: [PATCH] vhost-user: quit infinite loop while used memslots is more + than the backend limit + +When used memslots is more than the backend limit, +the vhost-user netcard would attach fail and quit +infinite loop. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost.c | 10 ++++++++++ + include/hw/virtio/vhost.h | 1 + + net/vhost-user.c | 5 +++++ + 3 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a8adc149ad..038ac37dd0 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -56,6 +56,8 @@ static unsigned int used_shared_memslots; + static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + ++bool used_memslots_exceeded; ++ + unsigned int vhost_get_max_memslots(void) + { + unsigned int max = UINT_MAX; +@@ -1569,8 +1571,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + error_setg(errp, "vhost backend memory slots limit (%d) is less" + " than current number of used (%d) and reserved (%d)" + " memory slots for memory devices.", limit, used, reserved); ++ used_memslots_exceeded = true; + r = -EINVAL; + goto fail_busyloop; ++ } else { ++ used_memslots_exceeded = false; + } + + return 0; +@@ -2405,3 +2410,8 @@ fail: + + return ret; + } ++ ++bool used_memslots_is_exceeded(void) ++{ ++ return used_memslots_exceeded; ++} +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 02477788df..444ca0ad42 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -340,6 +340,7 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, + struct vhost_inflight *inflight); + int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight); ++bool used_memslots_is_exceeded(void); + bool vhost_dev_has_iommu(struct vhost_dev *dev); + + #ifdef CONFIG_VHOST +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 51fa8c678f..86fd5056ab 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -20,6 +20,7 @@ + #include "qemu/error-report.h" + #include "qemu/option.h" + #include "trace.h" ++#include "include/hw/virtio/vhost.h" + + #define VHOST_USER_RECONNECT_TIME (3) + +@@ -373,6 +374,10 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, + net_vhost_user_event, NULL, nc0->name, NULL, + true); ++ if (used_memslots_is_exceeded()) { ++ error_report("used memslots exceeded the backend limit, quit loop"); ++ goto err; ++ } + } while (!s->started); + + assert(s->vhost_net); +-- +2.27.0 + diff --git a/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch new file mode 100644 index 0000000000000000000000000000000000000000..583fc50067bd42d94ccb891dbc358b4c9470613d --- /dev/null +++ b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch @@ -0,0 +1,49 @@ +From 3fe9a15feba924675ffcc5b797185091cfb8a007 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 14:49:53 +0800 +Subject: [PATCH] vhost-vdpa: add VHOST_BACKEND_F_BYTEMAPLOG + +support VHOST_BACKEND_F_BYTEMAPLOG to support vhost +device bytemap logging. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 9 +++++---- + include/standard-headers/linux/vhost_types.h | 2 ++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 819b2d811a..ce8ff7f417 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -829,10 +829,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; +- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | +- 0x1ULL << VHOST_BACKEND_F_SUSPEND; ++ uint64_t f = BIT_ULL(VHOST_BACKEND_F_IOTLB_MSG_V2) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_BATCH) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID) | ++ BIT_ULL(VHOST_BACKEND_F_SUSPEND) | ++ BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG); + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index fd54044936..46fc53cd83 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -192,5 +192,7 @@ struct vhost_vdpa_iova_range { + #define VHOST_BACKEND_F_DESC_ASID 0x7 + /* IOTLB don't flush memory mapping across device reset */ + #define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 ++/* device can use bytemap log */ ++#define VHOST_BACKEND_F_BYTEMAPLOG 0x3f + + #endif +-- +2.27.0 + diff --git a/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..83b2612fbff63dda4649c055176a43423092c1d6 --- /dev/null +++ b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch @@ -0,0 +1,127 @@ +From 3bc7a4e430e01fd90b427bf74a904664eda9ece6 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:04:25 +0800 +Subject: [PATCH] vhost-vdpa: add migration log ops for VhostOps + +Implement vhost_set_log_size for setting buffer size for logging. +Implement vhost_set_log_fd to specify an eventfd to signal on log write. +Implement vhost_log_sync for getting dirtymap logged by vhost backend. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 37 +++++++++++++++++++++++++++++++ + include/hw/virtio/vhost-backend.h | 8 +++++++ + linux-headers/linux/vhost.h | 4 ++++ + 3 files changed, 49 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index ce8ff7f417..037a9c6e4c 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1355,6 +1355,30 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); + } + ++static int vhost_vdpa_set_log_fd(struct vhost_dev *dev, int fd, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_FD, &fd); ++} ++ ++static int vhost_vdpa_set_log_size(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ uint64_t logsize = size * sizeof(*(log->log)); ++ ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_SIZE, &logsize); ++} ++ + static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) + { +@@ -1489,11 +1513,23 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_log_sync(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_LOG_SYNC, NULL); ++} ++ + const VhostOps vdpa_ops = { + .backend_type = VHOST_BACKEND_TYPE_VDPA, + .vhost_backend_init = vhost_vdpa_init, + .vhost_backend_cleanup = vhost_vdpa_cleanup, + .vhost_set_log_base = vhost_vdpa_set_log_base, ++ .vhost_set_log_size = vhost_vdpa_set_log_size, ++ .vhost_set_log_fd = vhost_vdpa_set_log_fd, + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, + .vhost_set_vring_num = vhost_vdpa_set_vring_num, + .vhost_set_vring_base = vhost_vdpa_set_vring_base, +@@ -1520,6 +1556,7 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, + }; +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index a86d103f82..71b02e4a12 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -65,6 +65,11 @@ typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev, + int *version); + typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log); ++typedef int (*vhost_set_log_size_op)(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log); ++typedef int (*vhost_set_log_fd_op)(struct vhost_dev *dev, int fd, ++ struct vhost_log *log); ++typedef int (*vhost_log_sync_op)(struct vhost_dev *dev); + typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev, + struct vhost_memory *mem); + typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev, +@@ -162,6 +167,9 @@ typedef struct VhostOps { + vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint; + vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version; + vhost_set_log_base_op vhost_set_log_base; ++ vhost_set_log_size_op vhost_set_log_size; ++ vhost_set_log_fd_op vhost_set_log_fd; ++ vhost_log_sync_op vhost_log_sync; + vhost_set_mem_table_op vhost_set_mem_table; + vhost_set_vring_addr_op vhost_set_vring_addr; + vhost_set_vring_endian_op vhost_set_vring_endian; +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..19dc7fd36c 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -43,6 +43,10 @@ + * The bit is set using an atomic 32 bit operation. */ + /* Set base address for logging. */ + #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) ++/* Set buffer size for logging */ ++#define VHOST_SET_LOG_SIZE _IOW(VHOST_VIRTIO, 0x05, __u64) ++/* Logging sync */ ++#define VHOST_LOG_SYNC _IO(VHOST_VIRTIO, 0x06) + /* Specify an eventfd file descriptor to signal on log write. */ + #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + /* By default, a device gets one vhost_worker that its virtqueues share. This +-- +2.27.0 + diff --git a/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d476ac2edd6745c975e35694679cbe38c8e3cd2 --- /dev/null +++ b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch @@ -0,0 +1,38 @@ +From 7b4a9547e68147291e68258db9415ef5a20fe06b Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:16:26 +0800 +Subject: [PATCH] virtio: bugfix: add rcu_read_lock when vring_avail_idx is + called + +viring_avail_idx should be called within rcu_read_lock(), +or may get NULL caches in vring_get_region_caches() and +trigger assert(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 27ceab92be..ec09d515c2 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2801,6 +2801,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; + ++ rcu_read_lock(); + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +@@ -2811,6 +2812,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + vring_avail_idx(&vdev->vq[index]), + vdev->vq[index].last_avail_idx, nheads); + } ++ rcu_read_unlock(); + } + + int virtio_save(VirtIODevice *vdev, QEMUFile *f) +-- +2.27.0 + diff --git a/virtio-bugfix-check-the-value-of-caches-before-acces.patch b/virtio-bugfix-check-the-value-of-caches-before-acces.patch new file mode 100644 index 0000000000000000000000000000000000000000..56d0513fecb8f477e785bed17a1d59ab51ca42f4 --- /dev/null +++ b/virtio-bugfix-check-the-value-of-caches-before-acces.patch @@ -0,0 +1,42 @@ +From f6b3e8ea39d00d25ab979f7b24842dc24e263ed8 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:37:52 +0800 +Subject: [PATCH] virtio: bugfix: check the value of caches before accessing it + +Vring caches may be NULL in check_vring_avail_num() if +virtio_reset() is called at the same time, such as when +the virtual machine starts. +So check it before accessing it in vring_avail_idx(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 1f78b74c00..d93ea62723 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2800,8 +2800,19 @@ static const VMStateDescription vmstate_virtio = { + static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; ++ VRingMemoryRegionCaches *caches; + + rcu_read_lock(); ++ caches = qatomic_rcu_read(&vdev->vq[index].vring.caches); ++ if (caches == NULL) { ++ /* ++ * caches may be NULL if virtio_reset is called at the same time, ++ * such as when the virtual machine starts. ++ */ ++ rcu_read_unlock(); ++ return; ++ } ++ + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +-- +2.27.0 + diff --git a/virtio-check-descriptor-numbers.patch b/virtio-check-descriptor-numbers.patch new file mode 100644 index 0000000000000000000000000000000000000000..f20748d899e42c610ff9653c39b99a331e919ba8 --- /dev/null +++ b/virtio-check-descriptor-numbers.patch @@ -0,0 +1,52 @@ +From b57e956ea522b487081d1c94aa2e4af6a3314d20 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:09:36 +0800 +Subject: [PATCH] virtio: check descriptor numbers + +Check if the vring num is normal in virtio_save(), and add LOG +the vm push the wrong viring num down through writing IO Port. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index a9aa0c4f66..27ceab92be 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2797,6 +2797,22 @@ static const VMStateDescription vmstate_virtio = { + } + }; + ++static void check_vring_avail_num(VirtIODevice *vdev, int index) ++{ ++ uint16_t nheads; ++ ++ /* Check it isn't doing strange things with descriptor numbers. */ ++ nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; ++ if (nheads > vdev->vq[index].vring.num) { ++ qemu_log("VQ %d size 0x%x Guest index 0x%x " ++ "inconsistent with Host index 0x%x: " ++ "delta 0x%x\n", ++ index, vdev->vq[index].vring.num, ++ vring_avail_idx(&vdev->vq[index]), ++ vdev->vq[index].last_avail_idx, nheads); ++ } ++} ++ + int virtio_save(VirtIODevice *vdev, QEMUFile *f) + { + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); +@@ -2827,6 +2843,8 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) + if (vdev->vq[i].vring.num == 0) + break; + ++ check_vring_avail_num(vdev, i); ++ + qemu_put_be32(f, vdev->vq[i].vring.num); + if (k->has_variable_vring_alignment) { + qemu_put_be32(f, vdev->vq[i].vring.align); +-- +2.27.0 + diff --git a/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch new file mode 100644 index 0000000000000000000000000000000000000000..e33cf68d855ec417f92778a3c5cea4b5da32e4d7 --- /dev/null +++ b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch @@ -0,0 +1,38 @@ +From 3cd74fd83d58aa88f9a006980c73844d6b79d1fb Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:31:38 +0800 +Subject: [PATCH] virtio-net: bugfix: do not delete netdev before virtio net + +For the vhost-user net-card, it is allow to delete its +network backend while the virtio-net device still exists. +However, when the status of the device changes in guest, +QEMU will check whether the network backend exists, otherwise +it will crash. +So do not allowed to delete the network backend directly +without delete virtio-net device. + +Signed-off-by: Jinhua Cao +--- + net/net.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/net.c b/net/net.c +index 0520bc1681..bcd3d7e04c 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -1322,6 +1322,12 @@ void qmp_netdev_del(const char *id, Error **errp) + return; + } + ++ if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER && nc->peer) { ++ error_setg(errp, "Device '%s' is a netdev for vhostuser," ++ "please delete the peer front-end device (virtio-net) first.", id); ++ return; ++ } ++ + qemu_del_net_client(nc); + + /* +-- +2.27.0 + diff --git a/virtio-net-correctly-copy-vnet-header-when-flushing-.patch b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b381e1024885a043e43e688f319cd4b44cc34a4c --- /dev/null +++ b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch @@ -0,0 +1,72 @@ +From 912641a75955a75f37ab8695a0753b1571762717 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 2 Jan 2024 11:29:01 +0800 +Subject: [PATCH] virtio-net: correctly copy vnet header when flushing TX + (CVE-2023-6693) + +When HASH_REPORT is negotiated, the guest_hdr_len might be larger than +the size of the mergeable rx buffer header. Using +virtio_net_hdr_mrg_rxbuf during the header swap might lead a stack +overflow in this case. Fixing this by using virtio_net_hdr_v1_hash +instead. + +Reported-by: Xiao Lei +Cc: Yuri Benditovich +Cc: qemu-stable@nongnu.org +Cc: Mauro Matteo Cascella +Fixes: CVE-2023-6693 +Fixes: e22f0603fb2f ("virtio-net: reference implementation of hash report") +Reviewed-by: Michael Tokarev +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..73024babd4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -674,6 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + + n->mergeable_rx_bufs = mergeable_rx_bufs; + ++ /* ++ * Note: when extending the vnet header, please make sure to ++ * change the vnet header copying logic in virtio_net_flush_tx() ++ * as well. ++ */ + if (version_1) { + n->guest_hdr_len = hash_report ? + sizeof(struct virtio_net_hdr_v1_hash) : +@@ -2693,7 +2698,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + ssize_t ret; + unsigned int out_num; + struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; +- struct virtio_net_hdr_mrg_rxbuf mhdr; ++ struct virtio_net_hdr_v1_hash vhdr; + + elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); + if (!elem) { +@@ -2710,7 +2715,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + } + + if (n->has_vnet_hdr) { +- if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < ++ if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < + n->guest_hdr_len) { + virtio_error(vdev, "virtio-net header incorrect"); + virtqueue_detach_element(q->tx_vq, elem, 0); +@@ -2718,8 +2723,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + return -EINVAL; + } + if (n->needs_vnet_hdr_swap) { +- virtio_net_hdr_swap(vdev, (void *) &mhdr); +- sg2[0].iov_base = &mhdr; ++ virtio_net_hdr_swap(vdev, (void *) &vhdr); ++ sg2[0].iov_base = &vhdr; + sg2[0].iov_len = n->guest_hdr_len; + out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, + out_sg, out_num, +-- +2.27.0 + diff --git a/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb492879329a7e56b5fee12ccd67e36f15798987 --- /dev/null +++ b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch @@ -0,0 +1,52 @@ +From 4321c9f8b85c6a4c1549399aa11e351b66bd1879 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:48:27 +0800 +Subject: [PATCH] virtio-net: fix max vring buf size when set ring num + +Set the max vring buf size of virtio-net devices to 4096 + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 9 +++++++-- + include/hw/virtio/virtio.h | 1 + + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d93ea62723..267c1e6fd0 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2196,12 +2196,17 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, + + void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) + { ++ int vq_max_size = VIRTQUEUE_MAX_SIZE; ++ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ vq_max_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ + /* Don't allow guest to flip queue between existent and + * nonexistent states, or to set it to an invalid size. + */ + if (!!num != !!vdev->vq[n].vring.num || +- num > VIRTQUEUE_MAX_SIZE || +- num < 0) { ++ num > vq_max_size || num < 0) { + return; + } + vdev->vq[n].vring.num = num; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7c35bb841b..e612441357 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -60,6 +60,7 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + typedef struct VirtQueue VirtQueue; + + #define VIRTQUEUE_MAX_SIZE 1024 ++#define VIRTIO_NET_VQ_MAX_SIZE (4096) + + typedef struct VirtQueueElement + { +-- +2.27.0 + diff --git a/virtio-net-set-the-max-of-queue-size-to-4096.patch b/virtio-net-set-the-max-of-queue-size-to-4096.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e3e067ffaaf7ebad67639c9db209050a29f943c --- /dev/null +++ b/virtio-net-set-the-max-of-queue-size-to-4096.patch @@ -0,0 +1,58 @@ +From 58fe483bf5824db177843675629ed955051078fd Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Sat, 12 Feb 2022 17:22:38 +0800 +Subject: [PATCH] virtio-net: set the max of queue size to 4096 + +set the max of virtio-net queue size to 4096. Now the +queue_size of virtio-net is set by rx_queue_size and +tx_queue_size + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 5 +++-- + hw/virtio/virtio.c | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 7f69a4b842..0ae2ddc002 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -710,6 +710,7 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: ++ return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: + return VIRTQUEUE_MAX_SIZE; + default: +@@ -3638,12 +3639,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + * help from us (using virtio 1 and up). + */ + if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || +- n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.rx_queue_size > VIRTIO_NET_VQ_MAX_SIZE || + !is_power_of_2(n->net_conf.rx_queue_size)) { + error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d.", + n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ VIRTIO_NET_VQ_MAX_SIZE); + virtio_cleanup(vdev); + return; + } +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 267c1e6fd0..d00effe4d5 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2338,7 +2338,7 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + break; + } + +- if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { ++ if (i == VIRTIO_QUEUE_MAX) { + qemu_log("unacceptable queue_size (%d) or num (%d)\n", + queue_size, i); + abort(); +-- +2.27.0 + diff --git a/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch new file mode 100644 index 0000000000000000000000000000000000000000..9817a2280cf9b883781794f0a4cc442c01bca77d --- /dev/null +++ b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch @@ -0,0 +1,110 @@ +From c2221815b79be9847c4729709809779b4b0550a7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 17:28:49 +0800 +Subject: [PATCH] virtio-net: update the default and max of rx/tx_queue_size + +Set the max of tx_queue_size to 4096 even if the backends +are not vhost-user. + +Set the default of rx/tx_queue_size to 2048 if the backends +are vhost-user, otherwise to 4096. + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 0ae2ddc002..523d01746d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -50,12 +50,11 @@ + #define VIRTIO_NET_VM_VERSION 11 + + /* previously fixed value */ +-#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 +-#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 ++#define VIRTIO_NET_VHOST_USER_DEFAULT_SIZE 2048 + + /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ +-#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE +-#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE ++#define VIRTIO_NET_RX_QUEUE_MIN_SIZE 256 ++#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 + + #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ + +@@ -696,6 +695,28 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + } + } + ++static void virtio_net_set_default_queue_size(VirtIONet *n) ++{ ++ NetClientState *peer = n->nic_conf.peers.ncs[0]; ++ ++ /* Default value is 0 if not set */ ++ if (n->net_conf.rx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++ ++ if (n->net_conf.tx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++} ++ + static int virtio_net_max_tx_queue_size(VirtIONet *n) + { + NetClientState *peer = n->nic_conf.peers.ncs[0]; +@@ -705,16 +726,16 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + * size. + */ + if (!peer) { +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + } + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: + return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: +- return VIRTQUEUE_MAX_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + default: +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + }; + } + +@@ -3633,6 +3654,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + virtio_net_set_config_size(n, n->host_features); + virtio_init(vdev, VIRTIO_ID_NET, n->config_size); + ++ virtio_net_set_default_queue_size(n); ++ + /* + * We set a lower limit on RX queue size to what it always was. + * Guests that want a smaller ring can always resize it without +@@ -3934,10 +3957,8 @@ static Property virtio_net_properties[] = { + TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), +- DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, +- VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), +- DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, +- VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), ++ DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 0), ++ DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 0), + DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), +-- +2.27.0 + diff --git a/virtio-print-the-guest-virtio_net-features-that-host.patch b/virtio-print-the-guest-virtio_net-features-that-host.patch new file mode 100644 index 0000000000000000000000000000000000000000..15157bb695f275763d5a9cd3eeb8538d86c4a109 --- /dev/null +++ b/virtio-print-the-guest-virtio_net-features-that-host.patch @@ -0,0 +1,112 @@ +From b24730e9abe34898483fa62b24c26abb9d98570c Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:16:17 +0800 +Subject: [PATCH] virtio: print the guest virtio_net features that host does + not support + +print the guest virtio_net features that host does not support + +For example: +Please check host config, because host does not support required feature bits 0x1983 +virtio_net_feature: csum, guest_csum, guest_tso4, guest_tso6, host_tso4, host_tso6 +Features 0xef99a3 unsupported. Allowed features: 0x40ff8024 + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 41 ++++++++++++++++++++++++++++++++++++++ + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 49 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..7f69a4b842 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3952,6 +3952,46 @@ static Property virtio_net_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void virtio_net_print_features(uint64_t features) ++{ ++ Property *props = virtio_net_properties; ++ int feature_cnt = 0; ++ ++ if (!features) { ++ return; ++ } ++ printf("virtio_net_feature: "); ++ ++ for (; features && props->name; props++) { ++ /* The bitnr of property may be default(0) besides 'csum' property. */ ++ if (props->bitnr == 0 && strcmp(props->name, "csum")) { ++ continue; ++ } ++ ++ /* Features only support 64bit. */ ++ if (props->bitnr > 63) { ++ continue; ++ } ++ ++ if (virtio_has_feature(features, props->bitnr)) { ++ virtio_clear_feature(&features, props->bitnr); ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("%s", props->name); ++ feature_cnt++; ++ } ++ } ++ ++ if (features) { ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("unkown bits 0x%." PRIx64, features); ++ } ++ printf("\n"); ++} ++ + static void virtio_net_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3966,6 +4006,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->set_config = virtio_net_set_config; + vdc->get_features = virtio_net_get_features; + vdc->set_features = virtio_net_set_features; ++ vdc->print_features = virtio_net_print_features; + vdc->bad_features = virtio_net_bad_features; + vdc->reset = virtio_net_reset; + vdc->queue_reset = virtio_net_queue_reset; +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index ec09d515c2..1f78b74c00 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2905,6 +2905,13 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) + { + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + bool bad = (val & ~(vdev->host_features)) != 0; ++ uint64_t feat = val & ~(vdev->host_features); ++ ++ if (bad && k->print_features) { ++ qemu_log("error: Please check host config, "\ ++ "because host does not support required feature bits 0x%" PRIx64 "\n", feat); ++ k->print_features(feat); ++ } + + val &= vdev->host_features; + if (k->set_features) { +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c8f72850bc..7c35bb841b 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -182,6 +182,7 @@ struct VirtioDeviceClass { + int (*validate_features)(VirtIODevice *vdev); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); ++ void (*print_features)(uint64_t features); + void (*reset)(VirtIODevice *vdev); + void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* Device must validate queue_index. */ +-- +2.27.0 + diff --git a/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..f78f3d7eac329e114005a901c5a1ce37ceb77dba --- /dev/null +++ b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch @@ -0,0 +1,37 @@ +From 4e5de00fb124d82f9c4ce2ac433ed3d691783c01 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Wed, 9 Feb 2022 19:58:21 +0800 +Subject: [PATCH] virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk + with dataplane + +The vm will trigger a disk sweep operation after plugging +a controller who's io type is iothread. If attach a scsi +disk immediately, the sg_inqury request in vm will trigger +the assert in virtio_scsi_ctx_check(), which is called by +virtio_scsi_handle_cmd_req_prepare(). + +Add judgment in virtio_scsi_handle_cmd_req_prepare() and +return IO Error directly if the device has not been +initialized. + +Signed-off-by: Jinhua Cao +--- + hw/scsi/virtio-scsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9c751bf296..bc7feb404a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -781,7 +781,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) + req->req.cmd.tag, req->req.cmd.cdb[0]); + + d = virtio_scsi_device_get(s, req->req.cmd.lun); +- if (!d) { ++ if (!d || !d->qdev.realized) { + req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + virtio_scsi_complete_cmd_req(req); + return -ENOENT; +-- +2.27.0 +