diff --git a/80-kvm.rules b/80-kvm.rules new file mode 100644 index 0000000000000000000000000000000000000000..c2f7317aacca3ee63cfaa322f87472d62193ffa7 --- /dev/null +++ b/80-kvm.rules @@ -0,0 +1 @@ +KERNEL=="kvm", GROUP="kvm", MODE="0660" diff --git a/BinDir.tar.gz b/BinDir.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..326fcb51c3998db1344613bba4f6cdbf0f858e3d Binary files /dev/null and b/BinDir.tar.gz differ diff --git a/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b662075670f5b73ff590d750f671bb41313c1cc --- /dev/null +++ b/Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch @@ -0,0 +1,27 @@ +From 59f038d21c1901245ba0be417f6285cec465d6c1 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:24:32 +0800 +Subject: [PATCH] Currently, while kvm and qemu can not handle some kvm exit, + qemu will do vm_stop, which will make vm in pause state. This action make vm + unrecoverable, so send guest panic to libvirt instead. + +--- + accel/kvm/kvm-all.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index e39a810a4e..33f4c6d547 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2993,7 +2993,7 @@ int kvm_cpu_exec(CPUState *cpu) + + if (ret < 0) { + cpu_dump_state(cpu, stderr, CPU_DUMP_CODE); +- vm_stop(RUN_STATE_INTERNAL_ERROR); ++ qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + } + + qatomic_set(&cpu->exit_request, 0); +-- +2.27.0 + diff --git a/Revert-file-posix-Remove-unused-s-discard_zeroes.patch b/Revert-file-posix-Remove-unused-s-discard_zeroes.patch new file mode 100644 index 0000000000000000000000000000000000000000..bad52a205a9db5a1555133fd6e81a3c67841105f --- /dev/null +++ b/Revert-file-posix-Remove-unused-s-discard_zeroes.patch @@ -0,0 +1,53 @@ +From db37bc0d85e141a666dd287cdc562a47f29b4343 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:01:28 +0800 +Subject: [PATCH] Revert "file-posix: Remove unused s->discard_zeroes" + +This reverts commit a7ca2eb488ff149c898f43abe103f8bd8e3ca3c4. +--- + block/file-posix.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index b862406c71..01ae5fd88c 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -158,6 +158,7 @@ typedef struct BDRVRawState { + + bool has_discard:1; + bool has_write_zeroes:1; ++ bool discard_zeroes:1; + bool use_linux_aio:1; + bool use_linux_io_uring:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ +@@ -765,6 +766,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + ret = -EINVAL; + goto fail; + } else { ++ s->discard_zeroes = true; + s->has_fallocate = true; + } + } else { +@@ -790,12 +792,19 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + #endif + + if (S_ISBLK(st.st_mode)) { ++#ifdef BLKDISCARDZEROES ++ unsigned int arg; ++ if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) { ++ s->discard_zeroes = true; ++ } ++#endif + #ifdef __linux__ + /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache. Do + * not rely on the contents of discarded blocks unless using O_DIRECT. + * Same for BLKZEROOUT. + */ + if (!(bs->open_flags & BDRV_O_NOCACHE)) { ++ s->discard_zeroes = false; + s->has_write_zeroes = false; + } + #endif +-- +2.27.0 + diff --git a/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..d68e7f54f43f58b3be56a880de48e15b7ebd5a2f --- /dev/null +++ b/accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch @@ -0,0 +1,147 @@ +From 6999ced63ca3bb05a1cbc4a667bd9fd27eeaeaee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 Sep 2023 00:04:04 +0000 +Subject: [PATCH] accel/kvm: Extract common KVM vCPU {creation,parking} code + +KVM vCPU creation is done once during the initialization of the VM when Qemu +threads are spawned. This is common to all the architectures. If the architecture +supports vCPU hot-{un}plug then this KVM vCPU creation could be deferred to +later point as well. Some architectures might in any case create KVM vCPUs for +the yet-to-be plugged vCPUs (i.e. QoM Object & thread does not exists) during VM +init time and park them. + +Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but +the KVM vCPU objects in the Host KVM are not destroyed and their representative +KVM vCPU objects in Qemu are parked. + +Signed-off-by: Salil Mehta +--- + accel/kvm/kvm-all.c | 61 ++++++++++++++++++++++++++++++++++---------- + include/sysemu/kvm.h | 2 ++ + 2 files changed, 49 insertions(+), 14 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index d900df93a4..6d503aa614 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -136,6 +136,7 @@ static QemuMutex kml_slots_lock; + #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) + + static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); ++static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); + + static inline void kvm_resample_fd_remove(int gsi) + { +@@ -324,11 +325,51 @@ err: + return ret; + } + ++void kvm_park_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ struct KVMParkedVcpu *vcpu; ++ ++ vcpu = g_malloc0(sizeof(*vcpu)); ++ vcpu->vcpu_id = vcpu_id; ++ vcpu->kvm_fd = cpu->kvm_fd; ++ QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++} ++ ++int kvm_create_vcpu(CPUState *cpu) ++{ ++ unsigned long vcpu_id = cpu->cpu_index; ++ KVMState *s = kvm_state; ++ int ret; ++ ++ DPRINTF("kvm_create_vcpu\n"); ++ ++ /* check if the KVM vCPU already exist but is parked */ ++ ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ if (ret > 0) { ++ goto found; ++ } ++ ++ /* create a new KVM vcpu */ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ if (ret < 0) { ++ return ret; ++ } ++ ++found: ++ cpu->vcpu_dirty = true; ++ cpu->kvm_fd = ret; ++ cpu->kvm_state = s; ++ cpu->dirty_pages = 0; ++ cpu->throttle_us_per_full = 0; ++ ++ return 0; ++} ++ + static int do_kvm_destroy_vcpu(CPUState *cpu) + { + KVMState *s = kvm_state; + long mmap_size; +- struct KVMParkedVcpu *vcpu = NULL; + int ret = 0; + + DPRINTF("kvm_destroy_vcpu\n"); +@@ -357,10 +398,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) + } + } + +- vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); +- vcpu->kvm_fd = cpu->kvm_fd; +- QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); ++ kvm_park_vcpu(cpu); + err: + return ret; + } +@@ -388,7 +426,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) + } + } + +- return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); ++ return -1; + } + + int kvm_init_vcpu(CPUState *cpu, Error **errp) +@@ -399,19 +437,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) + + trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + +- ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); ++ ret = kvm_create_vcpu(cpu); + if (ret < 0) { +- error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", ++ error_setg_errno(errp, -ret, ++ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", + kvm_arch_vcpu_id(cpu)); + goto err; + } + +- cpu->kvm_fd = ret; +- cpu->kvm_state = s; +- cpu->vcpu_dirty = true; +- cpu->dirty_pages = 0; +- cpu->throttle_us_per_full = 0; +- + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) { + ret = mmap_size; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index b46d6203b4..e534411ddc 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -434,6 +434,8 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); + + int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, + hwaddr *phys_addr); ++int kvm_create_vcpu(CPUState *cpu); ++void kvm_park_vcpu(CPUState *cpu); + + #endif /* NEED_CPU_H */ + +-- +2.27.0 + diff --git a/accel-kvm-Use-correct-id-for-parked-vcpu.patch b/accel-kvm-Use-correct-id-for-parked-vcpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec759c7697e70a45b61b8c7ba2e27a2be4c35b22 --- /dev/null +++ b/accel-kvm-Use-correct-id-for-parked-vcpu.patch @@ -0,0 +1,32 @@ +From 9de26d69c52db67f48619ad20b8cb9d8ee71e42c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:42:57 +0800 +Subject: [PATCH] accel/kvm: Use correct id for parked vcpu + +kvm_arch_vcpu_id is correct for all platform. + +Signed-off-by: Keqian Zhu +--- + accel/kvm/kvm-all.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6d503aa614..75a3075c14 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -327,11 +327,10 @@ err: + + void kvm_park_vcpu(CPUState *cpu) + { +- unsigned long vcpu_id = cpu->cpu_index; + struct KVMParkedVcpu *vcpu; + + vcpu = g_malloc0(sizeof(*vcpu)); +- vcpu->vcpu_id = vcpu_id; ++ vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + } +-- +2.27.0 + diff --git a/acpi-cpu-Add-cpu_cppc-building-support.patch b/acpi-cpu-Add-cpu_cppc-building-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b045f296d183ddf356f8fbc54d8ddd34780f121 --- /dev/null +++ b/acpi-cpu-Add-cpu_cppc-building-support.patch @@ -0,0 +1,72 @@ +From c75a0102a1bb00190b07b06ede8b1f9fa0bdaa3c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 16:52:10 +0800 +Subject: [PATCH] acpi/cpu: Add cpu_cppc building support + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 8 +++++++- + hw/i386/acpi-build.c | 2 +- + include/hw/acpi/cpu.h | 6 +++++- + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index cf0c7e8538..c8c11e51c6 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -342,7 +342,9 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs) +@@ -668,6 +670,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, aml_name_decl("_UID", uid)); + } + ++ if (build_cpu_cppc) { ++ build_cpu_cppc(i, arch_ids->len, dev); ++ } ++ + method = aml_method("_STA", 0, AML_SERIALIZED); + aml_append(method, aml_return(aml_call1(CPU_STS_METHOD, uid))); + aml_append(dev, method); +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index db4ca8a66a..e10799ecc6 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1545,7 +1545,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .smi_path = pm->smi_on_cpuhp ? "\\_SB.PCI0.SMI0.SMIC" : NULL, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, + }; +- build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, ++ build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, NULL, + pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", + AML_SYSTEM_IO); + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 76bc7eb251..b31a2e50d9 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -59,8 +59,12 @@ typedef struct CPUHotplugFeatures { + typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + ++typedef void (*build_cpu_cppc_fn)(int uid, int num_cpu, Aml *dev); ++ + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, ++ build_madt_cpu_fn build_madt_cpu, ++ build_cpu_cppc_fn build_cpu_cppc, ++ hwaddr base_addr, + const char *res_root, + const char *event_handler_method, + AmlRegionSpace rs); +-- +2.27.0 + diff --git a/acpi-cpu-Fix-cpu_hotplug_hw_init.patch b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch new file mode 100644 index 0000000000000000000000000000000000000000..bca3afd0faa559c7881fd251b8d164e90e1e5b9f --- /dev/null +++ b/acpi-cpu-Fix-cpu_hotplug_hw_init.patch @@ -0,0 +1,36 @@ +From 14c4062c4acc7d417d163276b65e59073ba18eeb Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:51:18 +0800 +Subject: [PATCH] acpi/cpu: Fix cpu_hotplug_hw_init() + +For the present but disabled vCPUs, they will be released after +cpu_hotplug_hw_init(), we should not assign it to AcpiCpuStatus. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c922c380aa..b258396e01 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -229,7 +229,6 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + for (i = 0; i < id_list->len; i++) { + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { +- state->devs[i].cpu = cpu; + state->devs[i].is_present = true; + } else { + if (qemu_persistent_cpu(cpu)) { +@@ -240,6 +239,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + } + + if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].cpu = cpu; + state->devs[i].is_enabled = true; + } else { + state->devs[i].is_enabled = false; +-- +2.27.0 + diff --git a/acpi-cpu-Fix-detection-of-present-cpu.patch b/acpi-cpu-Fix-detection-of-present-cpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bd4b47ef8fd3088c13f2d57c6833a0675513113 --- /dev/null +++ b/acpi-cpu-Fix-detection-of-present-cpu.patch @@ -0,0 +1,34 @@ +From c2eb1176fe06f359a8102bbacb54760c9c1d5aae Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 12:50:09 +0800 +Subject: [PATCH] acpi/cpu: Fix detection of present cpu + +When qemu_present_cpu is false. it means cpu object is +null and then calling of qemu_persistent_cpu() will +cause null pointer access. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpu.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index b258396e01..292e1daca2 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -231,11 +231,7 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + if (qemu_present_cpu(cpu)) { + state->devs[i].is_present = true; + } else { +- if (qemu_persistent_cpu(cpu)) { +- state->devs[i].is_present = true; +- } else { +- state->devs[i].is_present = false; +- } ++ state->devs[i].is_present = false; + } + + if (qemu_enabled_cpu(cpu)) { +-- +2.27.0 + diff --git a/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch new file mode 100644 index 0000000000000000000000000000000000000000..514292717255b282761e77810dbdcf922f8230d6 --- /dev/null +++ b/acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch @@ -0,0 +1,47 @@ +From 6e17d32d6df25d4fac1a31da61d89e0bb9c8c7da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:20:20 +0800 +Subject: [PATCH] acpi/ged: Init cpu hotplug only when machine support it + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 0266733a54..6e4f5f075f 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -403,6 +403,7 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; ++ MachineClass *mc; + + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -427,12 +428,15 @@ static void acpi_ged_initfn(Object *obj) + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); + +- s->cpuhp.device = OBJECT(s); +- memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", +- ACPI_CPU_HOTPLUG_REG_LEN); +- sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +- cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), +- &s->cpuhp_state, 0); ++ mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (mc->possible_cpu_arch_ids) { ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); ++ } + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +-- +2.27.0 + diff --git a/acpi-ged-Remove-cpuhp-field-of-ged.patch b/acpi-ged-Remove-cpuhp-field-of-ged.patch new file mode 100644 index 0000000000000000000000000000000000000000..760ad92d47604afba039502a7ff07598ac3d83fb --- /dev/null +++ b/acpi-ged-Remove-cpuhp-field-of-ged.patch @@ -0,0 +1,40 @@ +From 7af2722536b4b0d80f6c508066e8e77158869923 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:34:01 +0800 +Subject: [PATCH] acpi/ged: Remove cpuhp field of ged + +It's unused. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/generic_event_device.c | 1 - + include/hw/acpi/generic_event_device.h | 1 - + 2 files changed, 2 deletions(-) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 6e4f5f075f..4731a614a3 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -430,7 +430,6 @@ static void acpi_ged_initfn(Object *obj) + + mc = MACHINE_GET_CLASS(qdev_get_machine()); + if (mc->possible_cpu_arch_ids) { +- s->cpuhp.device = OBJECT(s); + memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", + ACPI_CPU_HOTPLUG_REG_LEN); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index a803ea818e..90fc41cbb8 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -110,7 +110,6 @@ struct AcpiGedState { + MemoryRegion container_memhp; + CPUHotplugState cpuhp_state; + MemoryRegion container_cpuhp; +- AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 + diff --git a/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..0296a6428ac4fb1666e518b9ec80ca20f08fc8a6 --- /dev/null +++ b/arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch @@ -0,0 +1,51 @@ +From 37aab238363c8242aa76853396c4f272b5508bca Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 15:25:35 +0100 +Subject: [PATCH] arm/acpi: Enable ACPI support for vcpu hotplug + +ACPI is required to interface QEMU with the guest. Roughly falls into below +cases, + +1. Convey the possible vcpus config at the machine init time to the guest + using various DSDT tables like MADT etc. +2. Convey vcpu hotplug events to guest(using GED) +3. Assist in evaluation of various ACPI methods(like _EVT, _STA, _OST, _EJ0, + _MAT etc.) +4. Provides ACPI cpu hotplug state and 12 Byte memory mapped cpu hotplug + control register interface to the OSPM/guest corresponding to each possible + vcpu. The register interface consists of various R/W fields and their + handling operations. These are called when ever register fields or memory + regions are accessed(i.e. read or written) by OSPM when ever it evaluates + various ACPI methods. + +Note: lot of this framework code is inherited from the changes already done for + x86 but still some minor changes are required to make it compatible with + ARM64.) + +This patch enables the ACPI support for virtual cpu hotplug. ACPI changes +required will follow in subsequent patches. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335a24..c0a7d0bd58 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -29,6 +29,7 @@ config ARM_VIRT + select ACPI_HW_REDUCED + select ACPI_APEI + select ACPI_VIOT ++ select ACPI_CPU_HOTPLUG + select VIRTIO_MEM_SUPPORTED + select ACPI_CXL + select ACPI_HMAT +-- +2.27.0 + diff --git a/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch new file mode 100644 index 0000000000000000000000000000000000000000..1dac436c9f2e12709c9f97fe198c9ffd7115265e --- /dev/null +++ b/arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch @@ -0,0 +1,98 @@ +From d269fb9a41abf5888a9bfeec2f8d1684b2d4dfb0 Mon Sep 17 00:00:00 2001 +From: saarloos <9090-90-90-9090@163.com> +Date: Sat, 30 Mar 2024 21:32:27 +0800 +Subject: [PATCH] arm/acpi: Fix when make qemu-system-aarch64 at x86_64 host + bios_tables_test fail reason: __aarch64__ macro let build_pptt at x86_64 and + aarch64 host build different function that let bios_tables_test fail. + +Signed-off-by: Yangzi Zhang +Signed-off-by: Yuan Zhang +--- + hw/acpi/aml-build.c | 5 +---- + hw/arm/virt-acpi-build.c | 2 +- + include/hw/acpi/aml-build.h | 5 +++-- + 3 files changed, 5 insertions(+), 7 deletions(-) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 714498165a..bf9c59f544 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -2016,7 +2016,6 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + +-#ifdef __aarch64__ + /* + * ACPI spec, Revision 6.3 + * 5.2.29.2 Cache Type Structure (Type 1) +@@ -2072,7 +2071,7 @@ static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) + */ +-void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id) + { + MachineClass *mc = MACHINE_GET_CLASS(ms); +@@ -2172,7 +2171,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + acpi_table_end(linker, &table); + } + +-#else + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2248,7 +2246,6 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); + } +-#endif + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 3cb50bdc65..48fc77fb83 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1024,7 +1024,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) + + if (!vmc->no_cpu_topology) { + acpi_add_table(table_offsets, tables_blob); +- build_pptt(tables_blob, tables->linker, ms, ++ build_pptt_arm(tables_blob, tables->linker, ms, + vms->oem_id, vms->oem_table_id); + } + +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 200cb113de..7281c281f6 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,7 +221,6 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + +-#ifdef __aarch64__ + /* Definitions of the hardcoded cache info*/ + + typedef enum { +@@ -266,7 +265,6 @@ struct offset_status { + uint32_t l1i_offset; + }; + +-#endif + + typedef + struct CrsRangeEntry { +@@ -542,6 +540,9 @@ void build_slit(GArray *table_data, BIOSLinker *linker, MachineState *ms, + void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + const char *oem_id, const char *oem_table_id); + ++void build_pptt_arm(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id); ++ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, + const char *oem_id, const char *oem_table_id); + +-- +2.27.0 + diff --git a/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b70c456bf22b5738d1f5d172f3ccacd0fc58eb5 --- /dev/null +++ b/arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch @@ -0,0 +1,78 @@ +From b394996c99c0af0de870a5d79fff69f01d504b0c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:47:07 +0800 +Subject: [PATCH] arm/cpu: Some fixes for arm_cpu_unrealizefn() + +Some minor fixes for arm_cpu_unrealizefn(). + +Signed-off-by: Keqian Zhu +--- + target/arm/cpu.c | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 501f88eb2f..9dd61c10ea 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2418,6 +2418,7 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + CPUState *cs = CPU(dev); + bool has_secure; + ++#ifndef CONFIG_USER_ONLY + has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); + + /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ +@@ -2433,30 +2434,38 @@ static void arm_cpu_unrealizefn(DeviceState *dev) + if (has_secure) { + cpu_address_space_destroy(cs, ARMASIdx_S); + } ++#endif + + destroy_cpreg_list(cpu); + arm_cpu_unregister_gdb_regs(cpu); + unregister_cp_regs_for_features(cpu); + ++#ifndef CONFIG_USER_ONLY ++ if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { ++ arm_unregister_el_change_hooks(cpu); ++ } ++#endif ++ + if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { + g_free(env->sau.rbar); + g_free(env->sau.rlar); + } + + if (arm_feature(env, ARM_FEATURE_PMSA) && +- arm_feature(env, ARM_FEATURE_V7) && +- cpu->pmsav7_dregion) { +- if (arm_feature(env, ARM_FEATURE_V8)) { +- g_free(env->pmsav8.rbar[M_REG_NS]); +- g_free(env->pmsav8.rlar[M_REG_NS]); +- if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { +- g_free(env->pmsav8.rbar[M_REG_S]); +- g_free(env->pmsav8.rlar[M_REG_S]); ++ arm_feature(env, ARM_FEATURE_V7)) { ++ if (cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); + } +- } else { +- g_free(env->pmsav7.drbar); +- g_free(env->pmsav7.drsr); +- g_free(env->pmsav7.dracr); + } + if (cpu->pmsav8r_hdregion) { + g_free(env->pmsav8.hprbar); +-- +2.27.0 + diff --git a/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..d457c6f33275472cb1e5a546a2822de4b4b979ee --- /dev/null +++ b/arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch @@ -0,0 +1,72 @@ +From 85e8e1ee8560e587845142342f81b218e44cba6a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:07:33 +0800 +Subject: [PATCH] arm/kvm: Set psci smccc filter only with vcpu hotplug + +The smccc filter mechanism is supported by newer Linux kernel, +don't try to do it unconditionaly. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 4 +++- + target/arm/kvm.c | 21 ++++++++++++--------- + 2 files changed, 15 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e60f3431f9..38b5d214a1 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2366,8 +2366,10 @@ static void machvirt_init(MachineState *machine) + finalize_gic_version(vms); + if (tcg_enabled() || hvf_enabled() || qtest_enabled() || + (vms->gic_version < VIRT_GIC_VERSION_3)) { +- machine->smp.max_cpus = smp_cpus; + mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ machine->smp.max_cpus = smp_cpus; + warn_report("cpu hotplug feature has been disabled"); + } + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 66caf9e5e7..19783d567f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -259,6 +259,7 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + int ret = 0; + + /* For ARM interrupt delivery is always asynchronous, +@@ -316,15 +317,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + * filter in the Host KVM. This is required to support features like + * virtual CPU Hotplug on ARM platforms. + */ +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU On PSCI-to-user-space fwd filter install failed"); +- abort(); +- } +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, +- KVM_SMCCC_FILTER_FWD_TO_USER)) { +- error_report("CPU Off PSCI-to-user-space fwd filter install failed"); +- abort(); ++ if (mc->has_hotpluggable_cpus && ms->smp.max_cpus > ms->smp.cpus) { ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ mc->has_hotpluggable_cpus = false; ++ } + } + + kvm_arm_init_debug(s); +-- +2.27.0 + diff --git a/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch new file mode 100644 index 0000000000000000000000000000000000000000..61f5f9718628156d742d697fe0736400ffe16faf --- /dev/null +++ b/arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch @@ -0,0 +1,67 @@ +From f8914ec04d4d892520aa443eaf8018c80516adee Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 16:27:01 +0000 +Subject: [PATCH] arm/virt: Add cpu hotplug events to GED during creation + +Add CPU Hotplug event to the set of supported ged-events during the creation of +GED device during VM init. Also initialize the memory map for CPU Hotplug +control device used in event exchanges between Qemu/VMM and the guest. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 5 ++++- + include/hw/arm/virt.h | 1 + + 2 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 78ed3c4ba8..155000f22f 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -78,6 +78,7 @@ + #include "hw/mem/pc-dimm.h" + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/virtio/virtio-md-pci.h" + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" +@@ -157,6 +158,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, ++ [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ +@@ -725,7 +727,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + DeviceState *dev; + MachineState *ms = MACHINE(vms); + int irq = vms->irqmap[VIRT_ACPI_GED]; +- uint32_t event = ACPI_GED_PWR_DOWN_EVT; ++ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT; + + if (ms->ram_slots) { + event |= ACPI_GED_MEM_HOTPLUG_EVT; +@@ -741,6 +743,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + return dev; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index c2fde0522c..5de0185063 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -76,6 +76,7 @@ enum { + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, + VIRT_NVDIMM_ACPI, ++ VIRT_CPUHP_ACPI, + VIRT_PVTIME, + VIRT_LOWMEMMAP_LAST, + }; +-- +2.27.0 + diff --git a/arm-virt-Add-update-basic-hot-un-plug-framework.patch b/arm-virt-Add-update-basic-hot-un-plug-framework.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea7c3772eab061766e8df4f16ec75bfb6d399aff --- /dev/null +++ b/arm-virt-Add-update-basic-hot-un-plug-framework.patch @@ -0,0 +1,197 @@ +From 724ab355c047cfb3e970d9ea78577087568eb095 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:40:19 +0100 +Subject: [PATCH] arm/virt: Add/update basic hot-(un)plug framework + +Add CPU hot-unplug hooks and update hotplug hooks with additional sanity checks +for use in hotplug paths. + +Note, Functional contents of the hooks(now left with TODO comment) shall be +gradually filled in the subsequent patches in an incremental approach to patch +and logic building which would be roughly as follows: +1. (Un-)wiring of interrupts between vCPU<->GIC +2. Sending events to Guest for hot-(un)plug so that guest can take appropriate + actions. +3. Notifying GIC about hot-(un)plug action so that vCPU could be (un-)stitched + to the GIC CPU interface. +4. Updating the Guest with Next boot info for this vCPU in the firmware. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 104 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index bf385a469c..ed354be326 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -83,6 +83,7 @@ + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" ++#include "qapi/qmp/qdict.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -3083,12 +3084,23 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + int32_t min_cpuid = 0; + int32_t max_cpuid; + ++ if (dev->hotplugged && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (dev->hotplugged && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hotplug not supported on this machine"); ++ return; ++ } ++ + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3137,6 +3149,22 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); + ++ /* ++ * Fix the GIC for this new vCPU being plugged. The QOM CPU object for the ++ * new vCPU need to be updated in the corresponding QOM GICv3CPUState object ++ * We also need to re-wire the IRQs for this new CPU object. This update ++ * is limited to the QOM only and does not affects the KVM. Later has ++ * already been pre-sized with possible CPU at VM init time. This is a ++ * workaround to the constraints posed by ARM architecture w.r.t supporting ++ * CPU Hotplug. Specification does not exist for the later. ++ * This patch-up is required both for {cold,hot}-plugged vCPUs. Cold-inited ++ * vCPUs have their GIC state initialized during machvit_init(). ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update GIC about this hotplug change here */ ++ /* TODO: wire the GIC<->CPU irqs */ ++ } ++ + /* + * To give persistent presence view of vCPUs to the guest, ACPI might need + * to fake the presence of the vCPUs to the guest but keep them disabled. +@@ -3148,6 +3176,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +@@ -3156,10 +3185,81 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + cpu_slot->cpu = OBJECT(dev); + ++ /* ++ * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. ++ * vCPUs can be cold-plugged using '-device' option. For vCPUs being hot ++ * plugged, guest is also notified. ++ */ ++ if (vms->acpi_dev) { ++ /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ /* TODO: register cpu for reset & update F/W info for the next boot */ ++ } ++ + cs->disabled = false; + return; + } + ++static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ if (!mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU hot(un)plug not supported on this machine"); ++ return; ++ } ++ ++ if (cs->cpu_index == first_cpu->cpu_index) { ++ error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", ++ first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, ++ cpu->core_id, cpu->thread_id); ++ return; ++ } ++ ++ /* TODO: request cpu hotplug from guest */ ++ ++ return; ++} ++ ++static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ ++ /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ ++ /* TODO: unwire the gic-cpu irqs here */ ++ /* TODO: update the GIC about this hot unplug change */ ++ ++ /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ ++ qobject_unref(dev->opts); ++ dev->opts = NULL; ++ ++ cpu_slot->cpu = NULL; ++ cs->disabled = true; ++ ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -3284,6 +3384,8 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -3297,6 +3399,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, + virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "virt: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +-- +2.27.0 + diff --git a/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..61c298d7bec0adbb7fc302343e5b09f94dd947b5 --- /dev/null +++ b/arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch @@ -0,0 +1,221 @@ +From a68abeefcbd78daaf7179b922f6b9040b4b63101 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:50:33 +0100 +Subject: [PATCH] arm/virt: Changes to (un)wire GICC<->vCPU IRQs during + hot-(un)plug + +Refactors the existing GIC create code to extract common code to wire the +vcpu<->gic interrupts. This function could be used with cold-plug case and also +used when vCPU is hot-plugged. It also introduces a new function to unwire the +vcpu<->gic interrupts for the vCPU hot-unplug cases. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 138 ++++++++++++++++++++++++++++------------- + hw/core/gpio.c | 2 +- + include/hw/qdev-core.h | 2 + + 3 files changed, 99 insertions(+), 43 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed354be326..97bf4cca11 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -798,6 +798,99 @@ static void create_v2m(VirtMachineState *vms) + vms->msi_controller = VIRT_MSI_CTRL_GICV2M; + } + ++/* ++ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs ++ * we use for the virt board. ++ */ ++const int timer_irq[] = { ++ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, ++ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, ++ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, ++ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, ++}; ++ ++static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ int irq; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_disconnect_gpio_out_named(cpudev, NULL, irq); ++ } ++ ++ if (type != VIRT_GIC_VERSION_2) { ++ qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0); ++ } else if (vms->virt) { ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 4 * max_cpus); ++ } ++ ++ /* ++ * RFC: Question: This currently does not takes care of intimating the ++ * devices which might be sitting on system bus. Do we need a ++ * sysbus_disconnect_irq() which also does the job of notification beside ++ * disconnection? ++ */ ++ qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, cpu); ++ qdev_disconnect_gpio_out_named(gicdev, ++ SYSBUS_DEVICE_GPIO_IRQ, cpu + max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 2 * max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 3 * max_cpus); ++} ++ ++static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ SysBusDevice *gicbusdev; ++ int intidbase; ++ int irq; ++ ++ intidbase = NUM_IRQS + cpu * GIC_INTERNAL; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_connect_gpio_out(cpudev, irq, ++ qdev_get_gpio_in(gicdev, ++ intidbase + timer_irq[irq])); ++ } ++ ++ gicbusdev = SYS_BUS_DEVICE(gicdev); ++ if (type != VIRT_GIC_VERSION_2) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0, qirq); ++ } else if (vms->virt) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, qirq); ++ } ++ ++ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, ++ qdev_get_gpio_in(gicdev, ++ intidbase + VIRTUAL_PMU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++} ++ + static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + { + MachineState *ms = MACHINE(vms); +@@ -894,46 +987,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. + */ + for (i = 0; i < smp_cpus; i++) { +- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); +- int intidbase = NUM_IRQS + i * GIC_INTERNAL; +- /* Mapping from the output timer irq lines from the CPU to the +- * GIC PPI inputs we use for the virt board. +- */ +- const int timer_irq[] = { +- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, +- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, +- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, +- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, +- }; +- +- for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +- qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(vms->gic, +- intidbase + timer_irq[irq])); +- } +- +- if (vms->gic_version != VIRT_GIC_VERSION_2) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", +- 0, irq); +- } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); +- } +- +- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(vms->gic, intidbase +- + VIRTUAL_PMU_IRQ)); +- +- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++ wire_gic_cpu_irqs(vms, qemu_get_cpu(i)); + } + + fdt_add_gic_node(vms); +@@ -3162,7 +3216,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + */ + if (vms->acpi_dev) { + /* TODO: update GIC about this hotplug change here */ +- /* TODO: wire the GIC<->CPU irqs */ ++ wire_gic_cpu_irqs(vms, cs); + } + + /* +@@ -3246,7 +3300,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + +- /* TODO: unwire the gic-cpu irqs here */ ++ unwire_gic_cpu_irqs(vms, cs); + /* TODO: update the GIC about this hot unplug change */ + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ +diff --git a/hw/core/gpio.c b/hw/core/gpio.c +index 80d07a6ec9..abb164d5c0 100644 +--- a/hw/core/gpio.c ++++ b/hw/core/gpio.c +@@ -143,7 +143,7 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) + + /* disconnect a GPIO output, returning the disconnected input (if any) */ + +-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, + const char *name, int n) + { + char *propname = g_strdup_printf("%s[%d]", +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 151d968238..2d3661d6cd 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -739,6 +739,8 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n); + */ + qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n); ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++ const char *name, int n); + + BusState *qdev_get_child_bus(DeviceState *dev, const char *name); + +-- +2.27.0 + diff --git a/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch new file mode 100644 index 0000000000000000000000000000000000000000..27ca6d7ab1b918bffda8a3e78beae1626d19d6fb --- /dev/null +++ b/arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch @@ -0,0 +1,73 @@ +From baa26f2fc075522f91c3e9a332fc4fa3f3b167bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:55:49 +0800 +Subject: [PATCH] arm/virt: Consider has_ged when set mc->has_hotpluggable_cpus + +Vcpu hotplug relies on ged device. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 26 ++++++++++++++++---------- + 1 file changed, 16 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 38b5d214a1..00e57f2d75 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2357,6 +2357,7 @@ static void machvirt_init(MachineState *machine) + bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; ++ ObjectClass *cpu_class; + + if (!cpu_type_valid(machine->cpu_type)) { + error_report("mach-virt: CPU type %s not supported", machine->cpu_type); +@@ -2364,14 +2365,6 @@ static void machvirt_init(MachineState *machine) + } + + finalize_gic_version(vms); +- if (tcg_enabled() || hvf_enabled() || qtest_enabled() || +- (vms->gic_version < VIRT_GIC_VERSION_3)) { +- mc->has_hotpluggable_cpus = false; +- } +- if (!mc->has_hotpluggable_cpus) { +- machine->smp.max_cpus = smp_cpus; +- warn_report("cpu hotplug feature has been disabled"); +- } + + possible_cpus = mc->possible_cpu_arch_ids(machine); + +@@ -2501,6 +2494,21 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ cpu_class = object_class_by_name(machine->cpu_type); ++ has_ged = has_ged && firmware_loaded && ++ virt_is_acpi_enabled(vms) && ++ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { ++ mc->has_hotpluggable_cpus = false; ++ } ++ if (!mc->has_hotpluggable_cpus) { ++ if (machine->smp.max_cpus > smp_cpus) { ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ machine->smp.max_cpus = smp_cpus; ++ } ++ + notifier_list_init(&vms->cpuhp_notifiers); + possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); +@@ -2581,8 +2589,6 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + +- has_ged = has_ged && aarch64 && firmware_loaded && +- virt_is_acpi_enabled(vms); + if (has_ged) { + vms->acpi_dev = create_acpi_ged(vms); + } +-- +2.27.0 + diff --git a/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch new file mode 100644 index 0000000000000000000000000000000000000000..d120fe42a27c540e4c58a345b6425c7278453f26 --- /dev/null +++ b/arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch @@ -0,0 +1,54 @@ +From 028d71744dfeedabfa67d629c71a6ed5e494cc68 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 29 Aug 2023 00:47:05 +0000 +Subject: [PATCH] arm/virt: Create GED dev before *disabled* CPU Objs are + destroyed + +ACPI CPU hotplug state (is_present=_STA.PRESENT, is_enabled=_STA.ENABLED) for +all the possible vCPUs MUST be initialized during machine init. This is done +during the creation of the GED device. VMM/Qemu MUST expose/fake the ACPI state +of the disabled vCPUs to the Guest kernel as 'present' (_STA.PRESENT) always +i.e. ACPI persistent. if the 'disabled' vCPU objectes are destroyed before the +GED device has been created then their ACPI hotplug state might not get +initialized correctly as acpi_persistent flag is part of the CPUState. This will +expose wrong status of the unplugged vCPUs to the Guest kernel. + +Hence, moving the GED device creation before disabled vCPU objects get destroyed +as part of the post CPU init routine. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 155000f22f..818398e753 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2472,6 +2472,12 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + ++ has_ged = has_ged && aarch64 && firmware_loaded && ++ virt_is_acpi_enabled(vms); ++ if (has_ged) { ++ vms->acpi_dev = create_acpi_ged(vms); ++ } ++ + virt_cpu_post_init(vms, sysmem); + + fdt_add_pmu_nodes(vms); +@@ -2496,9 +2502,7 @@ static void machvirt_init(MachineState *machine) + + create_pcie(vms); + +- if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { +- vms->acpi_dev = create_acpi_ged(vms); +- } else { ++ if (!has_ged) { + create_gpio_devices(vms, VIRT_GPIO, sysmem); + } + +-- +2.27.0 + diff --git a/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch b/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..3347c5836b9f4527ec96abf6ea6ea0f2d76facf1 --- /dev/null +++ b/arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch @@ -0,0 +1,152 @@ +From 52909d74ec37e851df3762a6eab1d7a6eeb89fba Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 12:56:47 +0800 +Subject: [PATCH] arm/virt: Don't modify smp.max_cpus when vcpu hotplug + disabled + +The smp.max_cpus has been used when create possible_cpus, so +we must not change it after that. + +We should use smp.cpus when create cpu and acpi table if vcpu +hotplug is disabled, instead of change smp.max_cpus to smp.cpus +and use it everywhere. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 8 +++++++- + hw/arm/virt.c | 24 ++++++++++++++++++++++-- + include/hw/arm/virt.h | 8 +++++++- + 3 files changed, 36 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 99296fc6d8..179600d4fe 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -814,9 +814,15 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + int i; + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; + AcpiTable table = { .sig = "APIC", .rev = 4, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ +@@ -835,7 +841,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) { ++ for (i = 0; i < max_cpus; i++) { + CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index e4473354d4..507b09d96c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -831,6 +831,10 @@ static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) + int type = vms->gic_version; + int irq; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { + qdev_disconnect_gpio_out_named(cpudev, NULL, irq); + } +@@ -871,6 +875,10 @@ static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) + int intidbase; + int irq; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + intidbase = NUM_IRQS + cpu * GIC_INTERNAL; + + for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +@@ -915,6 +923,10 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + uint32_t nb_redist_regions = 0; + int revision; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + if (vms->gic_version == VIRT_GIC_VERSION_2) { + gictype = gic_class_name(); + } else { +@@ -2165,6 +2177,9 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + + for (n = 0; n < possible_cpus->len; n++) { + cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } + + if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); +@@ -2195,6 +2210,9 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + if (kvm_enabled() || tcg_enabled()) { + for (n = 0; n < possible_cpus->len; n++) { + cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } + + /* + * Now, GIC has been sized with possible CPUs and we dont require +@@ -2511,16 +2529,18 @@ static void machvirt_init(MachineState *machine) + if (machine->smp.max_cpus > smp_cpus) { + warn_report("cpu hotplug feature has been disabled"); + } +- machine->smp.max_cpus = smp_cpus; + } + + notifier_list_init(&vms->cpuhp_notifiers); +- possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; + CPUState *cs; + ++ if (!vms->cpu_hotplug_enabled && n >= smp_cpus) { ++ break; ++ } ++ + cpuobj = object_new(possible_cpus->cpus[n].type); + cs = CPU(cpuobj); + +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 138531f9c1..7a734f07f7 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -210,10 +210,16 @@ static uint32_t virt_redist_capacity(VirtMachineState *vms, int region) + static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + { + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.max_cpus > redist0_capacity && ++ return (max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +-- +2.27.0 + diff --git a/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..30a11521e48cbfda6ec3bbf2d9861bf189398472 --- /dev/null +++ b/arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch @@ -0,0 +1,47 @@ +From 00a78edf572783c18a1d4945758371c0f175e321 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:41:14 +0800 +Subject: [PATCH] arm/virt: Fix adjudgement of core_id for vcpu hotplugged + +The core_id should between 0 and ms->smp.cores - 1. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 934b0412ef..e60f3431f9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3170,8 +3170,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +- int32_t min_cpuid = 0; +- int32_t max_cpuid; + + if (dev->hotplugged && !vms->acpi_dev) { + error_setg(errp, "GED acpi device does not exists"); +@@ -3196,15 +3194,9 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- max_cpuid = ms->possible_cpus->len - 1; +- if (!dev->hotplugged) { +- min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; +- max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; +- } +- +- if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { +- error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", +- cpu->core_id, min_cpuid, max_cpuid); ++ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range 0:%u", ++ cpu->core_id, ms->smp.cores - 1); + return; + } + +-- +2.27.0 + diff --git a/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch new file mode 100644 index 0000000000000000000000000000000000000000..89fd4ca49fe6115d3ed7d19eb2a265bbe462bc46 --- /dev/null +++ b/arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch @@ -0,0 +1,71 @@ +From c375e6fdc49f7d3d0232786e4cfd8b792379107c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 14:12:34 +0100 +Subject: [PATCH] arm/virt: Init PMU at host for all possible vcpus + +PMU for all possible vCPUs must be initialized at the VM initialization time. +Refactor existing code to accomodate possible vCPUs. This also assumes that all +processor being used are identical. + +Past discussion for reference: +Link: https://lists.gnu.org/archive/html/qemu-devel/2020-06/msg00131.html + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 12 ++++++++---- + include/hw/arm/virt.h | 1 + + 2 files changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 08ba255317..78ed3c4ba8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2055,12 +2055,14 @@ static void finalize_gic_version(VirtMachineState *vms) + */ + static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { ++ CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; +- bool aarch64, pmu, steal_time; ++ bool aarch64, steal_time; + CPUState *cpu; ++ int n; + + aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL); +- pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); ++ vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); + steal_time = object_property_get_bool(OBJECT(first_cpu), + "kvm-steal-time", NULL); + +@@ -2087,8 +2089,10 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); + } + +- CPU_FOREACH(cpu) { +- if (pmu) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); + if (kvm_irqchip_in_kernel()) { + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index a6977bade5..c2fde0522c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -155,6 +155,7 @@ struct VirtMachineState { + bool ras; + bool mte; + bool dtb_randomness; ++ bool pmu; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +-- +2.27.0 + diff --git a/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch new file mode 100644 index 0000000000000000000000000000000000000000..c8e661145e7b1b42272971979d463d51a5ee7b4e --- /dev/null +++ b/arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch @@ -0,0 +1,97 @@ +From 3780dddd4fc8f0471525c50893e24846d1474692 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 8 Aug 2023 00:43:18 +0000 +Subject: [PATCH] arm/virt: Make ARM vCPU *present* status ACPI *persistent* + +ARM arch does not allow CPUs presence to be changed [1] after kernel has booted. +Hence, firmware/ACPI/Qemu must ensure persistent view of the vCPUs to the Guest +kernel even when they are not present in the QoM i.e. are unplugged or are +yet-to-be-plugged + +References: +[1] Check comment 5 in the bugzilla entry + Link: https://bugzilla.tianocore.org/show_bug.cgi?id=4481#c5 + +Signed-off-by: Salil Mehta +--- + cpu-common.c | 6 ++++++ + hw/arm/virt.c | 7 +++++++ + include/hw/core/cpu.h | 20 ++++++++++++++++++++ + 3 files changed, 33 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index d041a351ab..da52e45760 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -128,6 +128,12 @@ bool qemu_enabled_cpu(CPUState *cpu) + return cpu && !cpu->disabled; + } + ++bool qemu_persistent_cpu(CPUState *cpu) ++{ ++ /* cpu state can be faked to the guest via acpi */ ++ return cpu->acpi_persistent; ++} ++ + uint64_t qemu_get_cpu_archid(int cpu_index) + { + MachineState *ms = MACHINE(qdev_get_machine()); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 818398e753..91b2653c03 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3104,6 +3104,13 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++ ++ /* ++ * To give persistent presence view of vCPUs to the guest, ACPI might need ++ * to fake the presence of the vCPUs to the guest but keep them disabled. ++ * This shall be used during the init of ACPI Hotplug state and hot-unplug ++ */ ++ cs->acpi_persistent = true; + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index fdfb952259..0ca778eb75 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -550,6 +550,13 @@ struct CPUState { + * By default every CPUState is enabled as of now across all archs. + */ + bool disabled; ++ /* ++ * On certain architectures, to give persistent view of the 'presence' of ++ * vCPUs to the guest, ACPI might need to fake the 'presence' of the vCPUs ++ * but keep them ACPI disabled to the guest. This is done by returning ++ * _STA.PRES=True and _STA.Ena=False for the unplugged vCPUs in QEMU QoM. ++ */ ++ bool acpi_persistent; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -957,6 +964,19 @@ bool qemu_present_cpu(CPUState *cpu); + */ + bool qemu_enabled_cpu(CPUState *cpu); + ++/** ++ * qemu_persistent_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU state should always be reflected as *present* via ACPI ++ * to the Guest. By default, this is False on all architectures and has to be ++ * explicity set during initialization. ++ * ++ * Returns: True if it is ACPI 'persistent' CPU ++ * ++ */ ++bool qemu_persistent_cpu(CPUState *cpu); ++ + /** + * qemu_get_cpu_archid: + * @cpu_index: possible vCPU for which arch-id needs to be retreived +-- +2.27.0 + diff --git a/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a1198a9d63b5203f251134500fc03dc3847611a --- /dev/null +++ b/arm-virt-Release-objects-for-disabled-possible-vCPUs.patch @@ -0,0 +1,88 @@ +From 097e3b46a7eede0182a846f7b993e14d3eed83b7 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 03:01:08 +0100 +Subject: [PATCH] arm/virt: Release objects for *disabled* possible vCPUs after + init + +During machvirt_init(), QOM ARMCPU objects are also pre-created along with the +corresponding KVM vCPUs in the host for all possible vCPUs. This necessary +because of the architectural constraint, KVM restricts the deferred creation of +the KVM vCPUs and VGIC initialization/sizing after VM init. Hence, VGIC is +pre-sized with possible vCPUs. + +After initialization of the machine is complete disabled possible KVM vCPUs are +then parked at the per-virt-machine list "kvm_parked_vcpus" and we release the +QOM ARMCPU objects for the disabled vCPUs. These shall be re-created at the time +when vCPU is hotplugged again. QOM ARMCPU object is then re-attached with +corresponding parked KVM vCPU. + +Alternatively, we could've never released the QOM CPU objects and kept on +reusing. This approach might require some modifications of qdevice_add() +interface to get old ARMCPU object instead of creating a new one for the hotplug +request. + +Each of the above approaches come with their own pros and cons. This prototype +uses the 1st approach.(suggestions are welcome!) + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 91b2653c03..bf385a469c 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2060,6 +2060,7 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { + CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; ++ MachineState *ms = MACHINE(vms); + bool aarch64, steal_time; + CPUState *cpu; + int n; +@@ -2120,6 +2121,37 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + } ++ ++ if (kvm_enabled() || tcg_enabled()) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ ++ /* ++ * Now, GIC has been sized with possible CPUs and we dont require ++ * disabled vCPU objects to be represented in the QOM. Release the ++ * disabled ARMCPU objects earlier used during init for pre-sizing. ++ * ++ * We fake to the guest through ACPI about the presence(_STA.PRES=1) ++ * of these non-existent vCPUs at VMM/qemu and present these as ++ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These vCPUs ++ * can be later added to the guest through hotplug exchanges when ++ * ARMCPU objects are created back again using 'device_add' QMP ++ * command. ++ */ ++ /* ++ * RFC: Question: Other approach could've been to keep them forever ++ * and release it only once when qemu exits as part of finalize or ++ * when new vCPU is hotplugged. In the later old could be released ++ * for the newly created object for the same vCPU? ++ */ ++ if (!qemu_enabled_cpu(cpu)) { ++ CPUArchId *cpu_slot; ++ cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index); ++ cpu_slot->cpu = NULL; ++ object_unref(OBJECT(cpu)); ++ } ++ } ++ } + } + + static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, +-- +2.27.0 + diff --git a/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..d64e1a07a1cdb9de75b21fe11f1cac4b340a596f --- /dev/null +++ b/arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch @@ -0,0 +1,55 @@ +From 519699c61eeb980bb7d7f443eb95c0406aae82da Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:05:39 +0800 +Subject: [PATCH] arm/virt: Require mc->has_hotpluggable_cpus for cold-plugged + vcpu + +Cold-plugged vCPU also need mc->has_hotpluggable_cpus. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 21 +++++++++++---------- + 1 file changed, 11 insertions(+), 10 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 00e57f2d75..73b29c7f73 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3179,16 +3179,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; + +- if (dev->hotplugged && !vms->acpi_dev) { +- error_setg(errp, "GED acpi device does not exists"); +- return; +- } +- +- if (dev->hotplugged && !mc->has_hotpluggable_cpus) { +- error_setg(errp, "CPU hotplug not supported on this machine"); +- return; +- } +- + /* sanity check the cpu */ + if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { + error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", +@@ -3222,6 +3212,17 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); + ++ /* Except for cold-booted vCPUs, this should check presence of ACPI GED */ ++ if (cs->cpu_index >= ms->smp.cpus && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (cs->cpu_index >= ms->smp.cpus && !mc->has_hotpluggable_cpus) { ++ error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); ++ return; ++ } ++ + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + if (qemu_present_cpu(CPU(cpu_slot->cpu))) { + error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", +-- +2.27.0 + diff --git a/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch new file mode 100644 index 0000000000000000000000000000000000000000..a45f47dea256e42e6e8b8f1817f724c9cac6cb10 --- /dev/null +++ b/arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch @@ -0,0 +1,123 @@ +From afb71c88d935349cdf9763e8f51f77334ab615ec Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 18:54:10 +0100 +Subject: [PATCH] arm/virt: Update the guest(via GED) about CPU hot-(un)plug + events + +During any vCPU hot-(un)plug, running guest VM needs to be intimated about the +new vCPU being added or request the deletion of the vCPU which is already part +of the guest VM. This is done using the ACPI GED event which eventually gets +demultiplexed to a CPU hotplug event and further to specific hot-(un)plug event +of a particular vCPU. + +This change adds the ACPI calls to the existing hot-(un)plug hooks to trigger +ACPI GED events from QEMU to guest VM. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 33 ++++++++++++++++++++++++++++++--- + 1 file changed, 30 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 0312fa366d..60cd560ab9 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3256,6 +3256,7 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + /* insert the cold/hot-plugged vcpu in the slot */ +@@ -3268,12 +3269,20 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * plugged, guest is also notified. + */ + if (vms->acpi_dev) { +- /* TODO: update acpi hotplug state. Send cpu hotplug event to guest */ ++ HotplugHandlerClass *hhc; ++ /* update acpi hotplug state and send cpu hotplug event to guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + /* TODO: register cpu for reset & update F/W info for the next boot */ + } + + cs->disabled = false; + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, +@@ -3281,8 +3290,10 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + { + MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + + if (!vms->acpi_dev || !dev->realized) { + error_setg(errp, "GED does not exists or device is not realized!"); +@@ -3301,9 +3312,16 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + +- /* TODO: request cpu hotplug from guest */ ++ /* request cpu hotplug from guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3311,7 +3329,9 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; + CPUState *cs = CPU(dev); ++ Error *local_err = NULL; + CPUArchId *cpu_slot; + + if (!vms->acpi_dev || !dev->realized) { +@@ -3321,7 +3341,12 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + + cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); + +- /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ ++ /* update the acpi cpu hotplug state for cpu hot-unplug */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } + + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); +@@ -3335,6 +3360,8 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + cs->disabled = true; + + return; ++fail: ++ error_propagate(errp, local_err); + } + + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, +-- +2.27.0 + diff --git a/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch b/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch new file mode 100644 index 0000000000000000000000000000000000000000..2a40a2ace8b01d0d7897c1c1f61e248995cd7780 --- /dev/null +++ b/arm-virt-Use-max_cpus-to-calculate-redist1_count.patch @@ -0,0 +1,29 @@ +From 4a3d9e9dc874f6825b8b5f18a4dece1609d48d2f Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 15 Apr 2024 22:40:29 +0800 +Subject: [PATCH] arm/virt: Use max_cpus to calculate redist1_count + +When cpu hotplug is enabled, the redist1_count should +include all possible cpus. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 44931355d6..e4473354d4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -959,7 +959,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); + + qlist_append_int(redist_region_count, +- MIN(smp_cpus - redist0_count, redist1_capacity)); ++ MIN(max_cpus - redist0_count, redist1_capacity)); + } + qdev_prop_set_array(vms->gic, "redist-region-count", + redist_region_count); +-- +2.41.0 + diff --git a/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch b/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c5d0e75a15ba0adf3a36d38e29f44d5437ad658 --- /dev/null +++ b/arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch @@ -0,0 +1,226 @@ +From 0ec1c95eea8c68243919ee4f8cd28b9a97dfc2f0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 15 Apr 2024 22:37:53 +0800 +Subject: [PATCH] arm/virt: Use separate filed to identify cpu-hotplug enable + +The mc->has_hotpluggable_cpus should not be modified after +machine class init. + +Signed-off-by: Keqian Zhu +--- + accel/kvm/kvm-all.c | 6 ++++++ + hw/arm/virt-acpi-build.c | 13 +++++-------- + hw/arm/virt.c | 20 +++++++++++++------- + include/hw/arm/virt.h | 1 + + include/sysemu/kvm.h | 2 ++ + include/sysemu/kvm_int.h | 1 + + target/arm/kvm.c | 7 +++---- + 7 files changed, 31 insertions(+), 19 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 75a3075c14..b791aad1d6 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3603,6 +3603,11 @@ bool kvm_kernel_irqchip_split(void) + return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; + } + ++bool kvm_smccc_filter_enabled(void) ++{ ++ return kvm_state->kvm_smccc_filter_enabled; ++} ++ + static void kvm_get_dirty_ring_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +@@ -3648,6 +3653,7 @@ static void kvm_accel_instance_init(Object *obj) + /* KVM dirty ring is by default off */ + s->kvm_dirty_ring_size = 0; + s->kvm_dirty_ring_with_bitmap = false; ++ s->kvm_smccc_filter_enabled = false; + s->kvm_eager_split_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 46642efac4..99296fc6d8 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -779,12 +779,10 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + +-static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu, VirtMachineState *vms) + { +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- + /* can only exist in 'enabled' state */ +- if (!mc->has_hotpluggable_cpus) { ++ if (!vms->cpu_hotplug_enabled) { + return 1; + } + +@@ -842,7 +840,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; + uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; +- uint32_t flags = virt_acpi_get_gicc_flags(cpu); ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu, vms); + uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { +@@ -1003,7 +1001,6 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); +- MachineClass *mc = MACHINE_GET_CLASS(vms); + Aml *scope, *dsdt; + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; +@@ -1020,8 +1017,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ +- if (mc->has_hotpluggable_cpus) { ++ ++ if (vms->cpu_hotplug_enabled) { + CPUHotplugFeatures opts = { + .acpi_1_compatible = false, + .has_legacy_cphp = false +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 73b29c7f73..44931355d6 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -756,7 +756,7 @@ static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) + { + MachineClass *mc = MACHINE_GET_CLASS(vms); + +- if (mc->has_hotpluggable_cpus) { ++ if (mc->has_hotpluggable_cpus && vms->gic_version >= VIRT_GIC_VERSION_3) { + Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); + notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); + } +@@ -2498,11 +2498,16 @@ static void machvirt_init(MachineState *machine) + has_ged = has_ged && firmware_loaded && + virt_is_acpi_enabled(vms) && + !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ + if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (kvm_enabled() && !kvm_smccc_filter_enabled()) || + (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { +- mc->has_hotpluggable_cpus = false; ++ vms->cpu_hotplug_enabled = false; ++ } else { ++ vms->cpu_hotplug_enabled = true; + } +- if (!mc->has_hotpluggable_cpus) { ++ ++ if (!vms->cpu_hotplug_enabled) { + if (machine->smp.max_cpus > smp_cpus) { + warn_report("cpu hotplug feature has been disabled"); + } +@@ -3174,7 +3179,6 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + { + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + MachineState *ms = MACHINE(hotplug_dev); +- MachineClass *mc = MACHINE_GET_CLASS(ms); + ARMCPU *cpu = ARM_CPU(dev); + CPUState *cs = CPU(dev); + CPUArchId *cpu_slot; +@@ -3218,7 +3222,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + return; + } + +- if (cs->cpu_index >= ms->smp.cpus && !mc->has_hotpluggable_cpus) { ++ if (cs->cpu_index >= ms->smp.cpus && !vms->cpu_hotplug_enabled) { + error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); + return; + } +@@ -3304,7 +3308,6 @@ fail: + static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + HotplugHandlerClass *hhc; + ARMCPU *cpu = ARM_CPU(dev); +@@ -3316,7 +3319,7 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + +- if (!mc->has_hotpluggable_cpus) { ++ if (!vms->cpu_hotplug_enabled) { + error_setg(errp, "CPU hot(un)plug not supported on this machine"); + return; + } +@@ -3780,6 +3783,9 @@ static void virt_instance_init(Object *obj) + /* EL2 is also disabled by default, for similar reasons */ + vms->virt = false; + ++ /* CPU hotplug is enabled by default */ ++ vms->cpu_hotplug_enabled = true; ++ + /* High memory is enabled by default */ + vms->highmem = true; + vms->highmem_compact = !vmc->no_highmem_compact; +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index ae0f5beb26..138531f9c1 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -153,6 +153,7 @@ struct VirtMachineState { + bool its; + bool tcg_its; + bool virt; ++ bool cpu_hotplug_enabled; + bool ras; + bool mte; + bool dtb_randomness; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index e534411ddc..cfa77cc15b 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -492,6 +492,8 @@ bool kvm_kernel_irqchip_allowed(void); + bool kvm_kernel_irqchip_required(void); + bool kvm_kernel_irqchip_split(void); + ++bool kvm_smccc_filter_enabled(void); ++ + /** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index fd846394be..b2d2c59477 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -112,6 +112,7 @@ struct KVMState + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + bool kvm_dirty_ring_with_bitmap; ++ bool kvm_smccc_filter_enabled; + uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 19783d567f..12c1b4b328 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -321,12 +321,11 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, + KVM_SMCCC_FILTER_FWD_TO_USER)) { + error_report("CPU On PSCI-to-user-space fwd filter install failed"); +- mc->has_hotpluggable_cpus = false; +- } +- if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ } else if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, + KVM_SMCCC_FILTER_FWD_TO_USER)) { + error_report("CPU Off PSCI-to-user-space fwd filter install failed"); +- mc->has_hotpluggable_cpus = false; ++ } else { ++ s->kvm_smccc_filter_enabled = true; + } + } + +-- +2.41.0 + diff --git a/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch new file mode 100644 index 0000000000000000000000000000000000000000..cde5af36c19f45400e9c75b3755d57fcd19967ba --- /dev/null +++ b/arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch @@ -0,0 +1,43 @@ +From bea23b0f82cedbd860b66c7b9e1f6bb0ca85d1cf Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 17:05:30 +0000 +Subject: [PATCH] arm/virt/acpi: Build CPUs AML with CPU Hotplug support + +Support of vCPU Hotplug requires sequence of ACPI handshakes between Qemu and +Guest kernel when a vCPU is plugged or unplugged. Most of the AML code to +support these handshakes already exists. This AML need to be build during VM +init for ARM architecture as well if the GED support exists. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 084c8abc7c..d88f3cded1 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -937,7 +937,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ ++ if (vms->acpi_dev) { ++ CPUHotplugFeatures opts = { ++ .acpi_1_compatible = false, ++ .has_legacy_cphp = false ++ }; ++ ++ build_cpus_aml(scope, ms, opts, NULL, virt_acpi_dsdt_cpu_cppc, ++ memmap[VIRT_CPUHP_ACPI].base, ++ "\\_SB", NULL, AML_SYSTEM_MEMORY); ++ } else { ++ acpi_dsdt_add_cpus(scope, vms); ++ } + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4efd1f14b68b65373b69f4f2508cd507873b1d2 --- /dev/null +++ b/arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch @@ -0,0 +1,66 @@ +From fb27704692362d151eb191f0c687ded09b04e04c Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 14:14:07 +0800 +Subject: [PATCH] arm/virt/acpi: Extend cpufreq to support max_cpus + +We support vcpu hotplug now, so extend memory region size to +allow hotplugged CPU access cpufreq space. + +Signed-off-by: Keqian Zhu +--- + hw/acpi/cpufreq.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c +index a84db490b3..a76f7b8fa2 100644 +--- a/hw/acpi/cpufreq.c ++++ b/hw/acpi/cpufreq.c +@@ -83,6 +83,7 @@ typedef struct CpuhzState { + uint32_t PerformanceLimited; + uint32_t LowestFreq; + uint32_t NominalFreq; ++ uint32_t num_cpu; + uint32_t reg_size; + } CpuhzState; + +@@ -93,10 +94,7 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) + uint64_t r; + uint64_t n; + +- MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; +- +- if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ if (offset >= s->num_cpu * CPPC_REG_PER_CPU_STRIDE) { + warn_report("cpufreq_read: offset 0x%lx out of range", offset); + return 0; + } +@@ -163,11 +161,10 @@ static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) + static void cpufreq_write(void *opaque, hwaddr offset, + uint64_t value, unsigned size) + { ++ CpuhzState *s = CPUFREQ(opaque); + uint64_t n; +- MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; + +- if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ if (offset >= s->num_cpu * CPPC_REG_PER_CPU_STRIDE) { + error_printf("cpufreq_write: offset 0x%lx out of range", offset); + return; + } +@@ -248,9 +245,9 @@ static void cpufreq_init(Object *obj) + CpuhzState *s = CPUFREQ(obj); + + MachineState *ms = MACHINE(qdev_get_machine()); +- unsigned int smp_cpus = ms->smp.cpus; ++ s->num_cpu = ms->smp.max_cpus; + +- s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; ++ s->reg_size = s->num_cpu * CPPC_REG_PER_CPU_STRIDE; + if (s->reg_size > MAX_SUPPORT_SPACE) { + error_report("Required space 0x%x excesses the max support 0x%x", + s->reg_size, MAX_SUPPORT_SPACE); +-- +2.27.0 + diff --git a/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a599cbfe434992133a647dd0b1f0e278a649397 --- /dev/null +++ b/arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch @@ -0,0 +1,76 @@ +From 2d5040ce21af5fc02a8588456be7316fcd5bc2a0 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 16:36:38 +0800 +Subject: [PATCH] arm/virt/acpi: Factor out CPPC building from DSDT CPU aml + +When CPU hotplug is enabled, we will use build_cpus_aml instead of +acpi_dsdt_add_cpus, so factor out CPPC building to reuse it. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 34 ++++++++++++++++++++-------------- + 1 file changed, 20 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 48fc77fb83..084c8abc7c 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -123,8 +123,23 @@ static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) + aml_append(dev, aml_name_decl("_CPC", cpc)); + } + +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, +- const MemMapEntry *cppc_memmap) ++static void virt_acpi_dsdt_cpu_cppc(int ncpu, int num_cpu, Aml *dev) { ++ VirtMachineState *vms = VIRT_MACHINE(qdev_get_machine()); ++ const MemMapEntry *cppc_memmap = &vms->memmap[VIRT_CPUFREQ]; ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + ncpu * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, num_cpu); ++ } ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) + { + MachineState *ms = MACHINE(vms); + uint16_t i; +@@ -134,18 +149,9 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); + +- /* +- * Append _CPC and _PSD to support CPU frequence show +- * Check CPPC available by DESIRED_PERF register +- */ +- if (cppc_regs_offset[DESIRED_PERF] != -1) { +- acpi_dsdt_add_cppc(dev, +- cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, +- cppc_regs_offset); +- acpi_dsdt_add_psd(dev, ms->smp.cpus); +- } ++ virt_acpi_dsdt_cpu_cppc(i, ms->smp.cpus, dev); + +- aml_append(scope, dev); ++ aml_append(scope, dev); + } + } + +@@ -931,7 +937,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); ++ acpi_dsdt_add_cpus(scope, vms); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch new file mode 100644 index 0000000000000000000000000000000000000000..c323c440952a681f11f46ce4f1f3bcfbcb954f91 --- /dev/null +++ b/arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch @@ -0,0 +1,38 @@ +From 0bee56446962676992d11e5879f6fbac57e785e8 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 23:38:31 +0800 +Subject: [PATCH] arm/virt-acpi: Require possible_cpu_arch_ids for + build_cpus_aml() + +As the acpi_dev requires possible_cpu_arch_ids to support +vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt-acpi-build.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 590afcfa98..46642efac4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -1003,6 +1003,7 @@ static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineClass *mc = MACHINE_GET_CLASS(vms); + Aml *scope, *dsdt; + MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; +@@ -1020,7 +1021,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + */ + scope = aml_scope("\\_SB"); + /* if GED is enabled then cpus AML shall be added as part build_cpus_aml */ +- if (vms->acpi_dev) { ++ if (mc->has_hotpluggable_cpus) { + CPUHotplugFeatures opts = { + .acpi_1_compatible = false, + .has_legacy_cphp = false +-- +2.27.0 + diff --git a/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2f8c67fce6c0c005d1e5537abc3faddfd22f250 --- /dev/null +++ b/arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch @@ -0,0 +1,225 @@ +From fe61cbaf2dc92b062c8d147b05c3ce213734c24a Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:20:23 +0100 +Subject: [PATCH] arm/virt,gicv3: Changes to pre-size GIC with possible vcpus + @machine init + +GIC needs to be pre-sized with possible vcpus at the initialization time. This +is necessary because Memory regions and resources associated with GICC/GICR +etc cannot be changed (add/del/modified) after VM has inited. Also, GIC_TYPER +needs to be initialized with mp_affinity and cpu interface number association. +This cannot be changed after GIC has initialized. + +Once all the cpu interfaces of the GIC has been inited it needs to be ensured +that any updates to the GICC during reset only takes place for the present +vcpus and not the disabled ones. Therefore, proper checks are required at +various places. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Jean-Philippe Brucker +[changed the comment in arm_gicv3_icc_reset] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 13 +++++++------ + hw/intc/arm_gicv3_common.c | 7 +++++-- + hw/intc/arm_gicv3_cpuif.c | 8 ++++++++ + hw/intc/arm_gicv3_kvm.c | 34 +++++++++++++++++++++++++++++++--- + include/hw/arm/virt.h | 2 +- + 5 files changed, 52 insertions(+), 12 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f10d75366b..08ba255317 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -802,6 +802,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + const char *gictype; + int i; + unsigned int smp_cpus = ms->smp.cpus; ++ unsigned int max_cpus = ms->smp.max_cpus; + uint32_t nb_redist_regions = 0; + int revision; + +@@ -826,7 +827,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } + vms->gic = qdev_new(gictype); + qdev_prop_set_uint32(vms->gic, "revision", revision); +- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +@@ -838,7 +839,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + if (vms->gic_version != VIRT_GIC_VERSION_2) { + QList *redist_region_count; + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); +- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); ++ uint32_t redist0_count = MIN(max_cpus, redist0_capacity); + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +@@ -915,7 +916,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->virt) { + qemu_irq irq = qdev_get_gpio_in(vms->gic, + intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); ++ sysbus_connect_irq(gicbusdev, i + 4 * max_cpus, irq); + } + + qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +@@ -923,11 +924,11 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + + VIRTUAL_PMU_IRQ)); + + sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 2 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, ++ sysbus_connect_irq(gicbusdev, i + 3 * max_cpus, + qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 2ebf880ead..ebd99af610 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -392,10 +392,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t cpu_affid; + +- s->cpu[i].cpu = cpu; ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[i].cpu = cpu; ++ } ++ + s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index ab1a00508e..0d0eb2f62f 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -934,6 +934,10 @@ void gicv3_cpuif_update(GICv3CPUState *cs) + ARMCPU *cpu = ARM_CPU(cs->cpu); + CPUARMState *env = &cpu->env; + ++ if (!qemu_enabled_cpu(cs->cpu)) { ++ return; ++ } ++ + g_assert(qemu_mutex_iothread_locked()); + + trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, +@@ -1826,6 +1830,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, + for (i = 0; i < s->num_cpu; i++) { + GICv3CPUState *ocs = &s->cpu[i]; + ++ if (!qemu_enabled_cpu(ocs->cpu)) { ++ continue; ++ } ++ + if (irm) { + /* IRM == 1 : route to all CPUs except self */ + if (cs == ocs) { +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index 77eb37e131..db06c75e2b 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -24,6 +24,7 @@ + #include "hw/intc/arm_gicv3_common.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/cpus.h" + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "kvm_arm.h" +@@ -458,6 +459,18 @@ static void kvm_arm_gicv3_put(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* ++ * To support hotplug of vcpus we need to make sure all gic cpuif/GICC ++ * are initialized at machvirt init time. Once the init is done we ++ * release the ARMCPU object for disabled vcpus but this leg could hit ++ * during reset of GICC later as well i.e. after init has happened and ++ * all of the cases we want to make sure we dont acess the GICC for ++ * the disabled VCPUs. ++ */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); +@@ -616,6 +629,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* don't access GICC for the disabled vCPUs. */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); +@@ -695,10 +713,19 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + return; + } + ++ /* ++ * This shall be called even when vcpu is being hotplugged or onlined and ++ * other vcpus might be running. Host kernel KVM code to handle device ++ * access of IOCTLs KVM_{GET|SET}_DEVICE_ATTR might fail due to inability to ++ * grab vcpu locks for all the vcpus. Hence, we need to pause all vcpus to ++ * facilitate locking within host. ++ */ ++ pause_all_vcpus(); + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), + &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); ++ resume_all_vcpus(); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + } +@@ -808,9 +835,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); + + for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ CPUState *cs = qemu_get_cpu(i); ++ if (qemu_enabled_cpu(cs)) { ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ } + } + + /* Try to create the device via the device control API */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 49d1ec8656..a6977bade5 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -208,7 +208,7 @@ static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.cpus > redist0_capacity && ++ return (MACHINE(vms)->smp.max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +-- +2.27.0 + diff --git a/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch new file mode 100644 index 0000000000000000000000000000000000000000..b752e1fd85490dc2292692b29bc34652b29d460b --- /dev/null +++ b/arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch @@ -0,0 +1,221 @@ +From 2669fd26cbc36e24ebfc844c240b45ad831701cc Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 18:44:59 +0100 +Subject: [PATCH] arm/virt,kvm: Pre-create disabled possible vCPUs @machine + init + +In ARMv8 architecture, GIC needs all the vCPUs to be created and present when +it is initialized. This is because: +1. GICC and MPIDR association must be fixed at the VM initialization time. + This is represented by register GIC_TYPER(mp_afffinity, proc_num) +2. GICC(cpu interfaces), GICR(redistributors) etc all must be initialized + at the boot time as well. +3. Memory regions associated with GICR etc. cannot be changed(add/del/mod) + after VM has inited. + +This patch adds the support to pre-create all such possible vCPUs within the +host using the KVM interface as part of the virt machine initialization. These +vCPUs could later be attached to QOM/ACPI while they are actually hot plugged +and made present. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 53 +++++++++++++++++++++++++++++++++++++++++-- + include/hw/core/cpu.h | 1 + + target/arm/cpu64.c | 1 + + target/arm/kvm.c | 32 ++++++++++++++++++++++++++ + target/arm/kvm64.c | 9 +++++++- + target/arm/kvm_arm.h | 11 +++++++++ + 6 files changed, 104 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 2f04bc7666..f10d75366b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2389,8 +2389,10 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; ++ CPUState *cs; + + cpuobj = object_new(possible_cpus->cpus[n].type); ++ cs = CPU(cpuobj); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", +@@ -2402,8 +2404,55 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); +- object_unref(cpuobj); ++ if (n < smp_cpus) { ++ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); ++ object_unref(cpuobj); ++ } else { ++ CPUArchId *cpu_slot; ++ ++ /* handling for vcpus which are yet to be hot-plugged */ ++ cs->cpu_index = n; ++ cpu_slot = virt_find_cpu_slot(machine, cs->cpu_index); ++ ++ /* ++ * ARM host vCPU features need to be fixed at the boot time. But as ++ * per current approach this CPU object will be destroyed during ++ * cpu_post_init(). During hotplug of vCPUs these properties are ++ * initialized again. ++ */ ++ virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal); ++ ++ /* ++ * For KVM, we shall be pre-creating the now disabled/un-plugged ++ * possbile host vcpus and park them till the time they are ++ * actually hot plugged. This is required to pre-size the host ++ * GICC and GICR with the all possible vcpus for this VM. ++ */ ++ if (kvm_enabled()) { ++ kvm_arm_create_host_vcpu(ARM_CPU(cs)); ++ } ++ /* ++ * Add disabled vCPU to CPU slot during the init phase of the virt ++ * machine ++ * 1. We need this ARMCPU object during the GIC init. This object ++ * will facilitate in pre-realizing the GIC. Any info like ++ * mp-affinity(required to derive gicr_type) etc. could still be ++ * fetched while preserving QOM abstraction akin to realized ++ * vCPUs. ++ * 2. Now, after initialization of the virt machine is complete we ++ * could use two approaches to deal with this ARMCPU object: ++ * (i) re-use this ARMCPU object during hotplug of this vCPU. ++ * OR ++ * (ii) defer release this ARMCPU object after gic has been ++ * initialized or during pre-plug phase when a vCPU is ++ * hotplugged. ++ * ++ * We will use the (ii) approach and release the ARMCPU objects ++ * after GIC and machine has been fully initialized during ++ * machine_init_done() phase. ++ */ ++ cpu_slot->cpu = OBJECT(cs); ++ } + } + fdt_add_timer_nodes(vms); + fdt_add_cpu_nodes(vms); +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c30636a936..fdfb952259 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -528,6 +528,7 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + int kvm_vcpu_stats_fd; ++ VMChangeStateEntry *vmcse; + + /* Use by accel-block: CPU is executing an ioctl() */ + QemuLockCnt in_ioctl_lock; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index e226b60b72..5d28838175 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -859,6 +859,7 @@ static void aarch64_cpu_initfn(Object *obj) + * enabled explicitly + */ + cs->disabled = true; ++ cs->thread_id = 0; + } + + static void aarch64_cpu_finalizefn(Object *obj) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index f59f4f81b2..70cf15b550 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -659,6 +659,38 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + write_list_to_cpustate(cpu); + } + ++void kvm_arm_create_host_vcpu(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ unsigned long vcpu_id = cs->cpu_index; ++ int ret; ++ ++ ret = kvm_create_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to create host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * Initialize the vCPU in the host. This will reset the sys regs ++ * for this vCPU and related registers like MPIDR_EL1 etc. also ++ * gets programmed during this call to host. These are referred ++ * later while setting device attributes of the GICR during GICv3 ++ * reset ++ */ ++ ret = kvm_arch_init_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to initialize host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * park the created vCPU. shall be used during kvm_get_vcpu() when ++ * threads are created during realization of ARM vCPUs. ++ */ ++ kvm_park_vcpu(cs); ++} ++ + /* + * Update KVM's MP_STATE based on what QEMU thinks it is + */ +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 3c175c93a7..03ce1e7525 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -562,7 +562,14 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + +- qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ /* ++ * Install VM change handler only when vCPU thread has been spawned ++ * i.e. vCPU is being realized ++ */ ++ if (cs->thread_id) { ++ cs->vmcse = qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, ++ cs); ++ } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 051a0da41c..31408499b3 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -163,6 +163,17 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); + */ + void kvm_arm_reset_vcpu(ARMCPU *cpu); + ++/** ++ * kvm_arm_create_host_vcpu: ++ * @cpu: ARMCPU ++ * ++ * Called at to pre create all possible kvm vCPUs within the the host at the ++ * virt machine init time. This will also init this pre-created vCPU and ++ * hence result in vCPU reset at host. These pre created and inited vCPUs ++ * shall be parked for use when ARM vCPUs are actually realized. ++ */ ++void kvm_arm_create_host_vcpu(ARMCPU *cpu); ++ + /** + * kvm_arm_init_serror_injection: + * @cs: CPUState +-- +2.27.0 + diff --git a/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch new file mode 100644 index 0000000000000000000000000000000000000000..71f2ff037965e861699d5448b53f67381156c7bb --- /dev/null +++ b/arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch @@ -0,0 +1,153 @@ +From c8e062285078e688e692214baf97b35246fc2552 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 5 May 2020 23:19:17 +0100 +Subject: [PATCH] arm/virt,target/arm: Add new ARMCPU + {socket,cluster,core,thread}-id property + +This shall be used to store user specified topology{socket,cluster,core,thread} +and shall be converted to a unique 'vcpu-id' which is used as slot-index during +hot(un)plug of vCPU. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.c | 4 +++ + target/arm/cpu.h | 4 +++ + 3 files changed, 71 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index f4c3d47f30..94481d45d4 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,11 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index); ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index); ++static int virt_get_core_id(const MachineState *ms, int cpu_index); ++static int virt_get_thread_id(const MachineState *ms, int cpu_index); ++ + static bool cpu_type_valid(const char *cpu) + { + int i; +@@ -2264,6 +2269,14 @@ static void machvirt_init(MachineState *machine) + &error_fatal); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); ++ object_property_set_int(cpuobj, "socket-id", ++ virt_get_socket_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "cluster-id", ++ virt_get_cluster_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "core-id", ++ virt_get_core_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "thread-id", ++ virt_get_thread_id(machine, n), NULL); + + if (!vms->secure) { + object_property_set_bool(cpuobj, "has_el3", false, NULL); +@@ -2750,10 +2763,59 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + return socket_id % ms->numa_state->num_nodes; + } + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.socket_id; ++} ++ ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.cluster_id; ++} ++ ++static int virt_get_core_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.core_id; ++} ++ ++static int virt_get_thread_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.thread_id; ++} ++ ++static int ++virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev) ++{ ++ int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num; ++ ARMCPU *cpu = ARM_CPU(dev); ++ ++ /* calculate total logical cpus across socket/cluster/core */ ++ sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores * ++ ms->smp.clusters); ++ clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores); ++ core_vcpu_num = cpu->core_id * ms->smp.threads; ++ ++ /* get vcpu-id(logical cpu index) for this vcpu from this topology */ ++ cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id; ++ ++ assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len); ++ ++ return cpu_id; ++} ++ + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; ++ unsigned int smp_threads = ms->smp.threads; + VirtMachineState *vms = VIRT_MACHINE(ms); + MachineClass *mc = MACHINE_GET_CLASS(vms); + +@@ -2767,6 +2829,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; ++ ms->possible_cpus->cpus[n].vcpus_count = smp_threads; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index efb22a87f9..cce315c18a 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2422,6 +2422,10 @@ static Property arm_cpu_properties[] = { + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, + mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0), ++ DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0), ++ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0), ++ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0), + DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + DEFINE_PROP_END_OF_LIST() + }; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0d28..145d3dbf13 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1096,6 +1096,10 @@ struct ArchCPU { + QLIST_HEAD(, ARMELChangeHook) el_change_hooks; + + int32_t node_id; /* NUMA node this CPU belongs to */ ++ int32_t socket_id; ++ int32_t cluster_id; ++ int32_t core_id; ++ int32_t thread_id; + + /* Used to synchronize KVM and QEMU in-kernel device levels */ + uint8_t device_irq_level; +-- +2.27.0 + diff --git a/arm-virt-target-arm-Machine-init-time-change-common-.patch b/arm-virt-target-arm-Machine-init-time-change-common-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d8199f7ce05614c00088072be19912fda08e3c13 --- /dev/null +++ b/arm-virt-target-arm-Machine-init-time-change-common-.patch @@ -0,0 +1,328 @@ +From 7cd2d7ef7bb7f6c6a97988d86b97922ff700ab06 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 00:13:31 +0100 +Subject: [PATCH] arm/virt,target/arm: Machine init time change common to vCPU + {cold|hot}-plug + +Refactor and introduce the common logic required during the initialization of +both cold and hot plugged vCPUs. Also initialize the *disabled* state of the +vCPUs which shall be used further during init phases of various other components +like GIC, PMU, ACPI etc as part of the virt machine initialization. + +KVM vCPUs corresponding to unplugged/yet-to-be-plugged QOM CPUs are kept in +powered-off state in the KVM Host and do not run the guest code. Plugged vCPUs +are also kept in powered-off state but vCPU threads exist and is kept sleeping. + +TBD: +For the cold booted vCPUs, this change also exists in the arm_load_kernel() +in boot.c but for the hotplugged CPUs this change should still remain part of +the pre-plug phase. We are duplicating the powering-off of the cold booted CPUs. +Shall we remove the duplicate change from boot.c? + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Gavin Shan +[GS: pointed the assertion due to wrong range check] +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 149 ++++++++++++++++++++++++++++++++++++++++----- + target/arm/cpu.c | 7 +++ + target/arm/cpu64.c | 14 +++++ + 3 files changed, 156 insertions(+), 14 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 8f647422d8..2f04bc7666 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -227,6 +227,7 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid); + static int virt_get_socket_id(const MachineState *ms, int cpu_index); + static int virt_get_cluster_id(const MachineState *ms, int cpu_index); + static int virt_get_core_id(const MachineState *ms, int cpu_index); +@@ -2249,6 +2250,14 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ finalize_gic_version(vms); ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (vms->gic_version < VIRT_GIC_VERSION_3)) { ++ machine->smp.max_cpus = smp_cpus; ++ mc->has_hotpluggable_cpus = false; ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ + possible_cpus = mc->possible_cpu_arch_ids(machine); + + /* +@@ -2275,11 +2284,6 @@ static void machvirt_init(MachineState *machine) + virt_set_memmap(vms, pa_bits); + } + +- /* We can probe only here because during property set +- * KVM is not available yet +- */ +- finalize_gic_version(vms); +- + sysmem = vms->sysmem = get_system_memory(); + + if (vms->secure) { +@@ -2385,17 +2389,9 @@ static void machvirt_init(MachineState *machine) + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +- CPUState *cs; +- +- if (n >= smp_cpus) { +- break; +- } + + cpuobj = object_new(possible_cpus->cpus[n].type); + +- cs = CPU(cpuobj); +- cs->cpu_index = n; +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2902,6 +2898,50 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ CPUArchId *found_cpu; ++ uint64_t mp_affinity; ++ ++ assert(vcpuid >= 0 && vcpuid < ms->possible_cpus->len); ++ ++ /* ++ * RFC: Question: ++ * TBD: Should mp-affinity be treated as MPIDR? ++ */ ++ mp_affinity = virt_cpu_mp_affinity(vms, vcpuid); ++ found_cpu = &ms->possible_cpus->cpus[vcpuid]; ++ ++ assert(found_cpu->arch_id == mp_affinity); ++ ++ /* ++ * RFC: Question: ++ * Slot-id is the index where vCPU with certain arch-id(=mpidr/ap-affinity) ++ * is plugged. For Host KVM, MPIDR for vCPU is derived using vcpu-id. ++ * As I understand, MPIDR and vcpu-id are property of vCPU but slot-id is ++ * more related to machine? Current code assumes slot-id and vcpu-id are ++ * same i.e. meaning of slot is bit vague. ++ * ++ * Q1: Is there any requirement to clearly represent slot and dissociate it ++ * from vcpu-id? ++ * Q2: Should we make MPIDR within host KVM user configurable? ++ * ++ * +----+----+----+----+----+----+----+----+ ++ * MPIDR ||| Res | Aff2 | Aff1 | Aff0 | ++ * +----+----+----+----+----+----+----+----+ ++ * \ \ \ | | ++ * \ 8bit \ 8bit \ |4bit| ++ * \<------->\<------->\ |<-->| ++ * \ \ \| | ++ * +----+----+----+----+----+----+----+----+ ++ * VCPU-ID | Byte4 | Byte2 | Byte1 | Byte0 | ++ * +----+----+----+----+----+----+----+----+ ++ */ ++ ++ return found_cpu; ++} ++ + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -2945,6 +2985,81 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ int32_t min_cpuid = 0; ++ int32_t max_cpuid; ++ ++ /* sanity check the cpu */ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) { ++ error_setg(errp, "Invalid thread-id %u specified, correct range 0:%u", ++ cpu->thread_id, ms->smp.threads - 1); ++ return; ++ } ++ ++ max_cpuid = ms->possible_cpus->len - 1; ++ if (!dev->hotplugged) { ++ min_cpuid = vms->acpi_dev ? ms->smp.cpus : 0; ++ max_cpuid = vms->acpi_dev ? max_cpuid : ms->smp.cpus - 1; ++ } ++ ++ if ((cpu->core_id < min_cpuid) || (cpu->core_id > max_cpuid)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range %d:%d", ++ cpu->core_id, min_cpuid, max_cpuid); ++ return; ++ } ++ ++ if ((cpu->cluster_id < 0) || (cpu->cluster_id >= ms->smp.clusters)) { ++ error_setg(errp, "Invalid cluster-id %u specified, correct range 0:%u", ++ cpu->cluster_id, ms->smp.clusters - 1); ++ return; ++ } ++ ++ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) { ++ error_setg(errp, "Invalid socket-id %u specified, correct range 0:%u", ++ cpu->socket_id, ms->smp.sockets - 1); ++ return; ++ } ++ ++ cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ if (qemu_present_cpu(CPU(cpu_slot->cpu))) { ++ error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", ++ cs->cpu_index, cpu->socket_id, cpu->cluster_id, cpu->core_id, ++ cpu->thread_id, cpu_slot->arch_id); ++ return; ++ } ++ virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ /* insert the cold/hot-plugged vcpu in the slot */ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ cpu_slot->cpu = OBJECT(dev); ++ ++ cs->disabled = false; ++ return; ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -2987,6 +3102,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + qlist_append_str(reserved_regions, resv_prop_str); + qdev_prop_set_array(dev, "reserved-regions", reserved_regions); + g_free(resv_prop_str); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -3008,6 +3125,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + virt_memory_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { +@@ -3092,7 +3211,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + if (device_is_dynamic_sysbus(mc, dev) || + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || +- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { ++ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || ++ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +@@ -3169,6 +3289,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + mc->kvm_type = virt_kvm_type; ++ mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->pre_plug = virt_machine_device_pre_plug_cb; +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index cce315c18a..18b8a79c8f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -2477,6 +2477,12 @@ static const struct TCGCPUOps arm_tcg_ops = { + }; + #endif /* CONFIG_TCG */ + ++static int64_t arm_cpu_get_arch_id(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ return cpu->mp_affinity; ++} ++ + static void arm_cpu_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); +@@ -2495,6 +2501,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->dump_state = arm_cpu_dump_state; ++ cc->get_arch_id = arm_cpu_get_arch_id; + cc->set_pc = arm_cpu_set_pc; + cc->get_pc = arm_cpu_get_pc; + cc->gdb_read_register = arm_cpu_gdb_read_register; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 471014b5a9..e226b60b72 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -850,6 +850,17 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) + } + } + ++static void aarch64_cpu_initfn(Object *obj) ++{ ++ CPUState *cs = CPU(obj); ++ ++ /* ++ * we start every ARM64 vcpu as disabled possible vCPU. It needs to be ++ * enabled explicitly ++ */ ++ cs->disabled = true; ++} ++ + static void aarch64_cpu_finalizefn(Object *obj) + { + } +@@ -862,7 +873,9 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs) + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); ++ DeviceClass *dc = DEVICE_CLASS(oc); + ++ dc->user_creatable = true; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; + cc->gdb_num_core_regs = 34; +@@ -908,6 +921,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info) + static const TypeInfo aarch64_cpu_type_info = { + .name = TYPE_AARCH64_CPU, + .parent = TYPE_ARM_CPU, ++ .instance_init = aarch64_cpu_initfn, + .instance_finalize = aarch64_cpu_finalizefn, + .abstract = true, + .class_init = aarch64_cpu_class_init, +-- +2.27.0 + diff --git a/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a2b9ced7bc1c37c4c19bd08d7662f3b63342ee2 --- /dev/null +++ b/arm-virt.c-Convey-local_err-when-set-psci-conduit.patch @@ -0,0 +1,29 @@ +From 25438f2cdb13d07c1bd228fcf4223c21da368548 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 15:15:31 +0800 +Subject: [PATCH] arm/virt.c: Convey local_err when set psci-conduit + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index ed437ce0e8..934b0412ef 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2323,7 +2323,10 @@ static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, + */ + if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { + object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, +- NULL); ++ &local_err); ++ if (local_err) { ++ goto out; ++ } + + /* Secondary CPUs start in PSCI powered-down state */ + if (CPU(cpuobj)->cpu_index > 0) { +-- +2.27.0 + diff --git a/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch new file mode 100644 index 0000000000000000000000000000000000000000..052ac56a4167e507ce6866b5e5c19ea33fdc75c8 --- /dev/null +++ b/arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch @@ -0,0 +1,615 @@ +From ebe05c34a66969e4cacc4d6c030dfe93ace89cb2 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Tue, 19 Mar 2024 14:35:55 +0800 +Subject: [PATCH] arm64: Add the cpufreq device to show cpufreq info to guest + +On ARM64 platform, cpu frequency is retrieved via ACPI CPPC. +A virtual cpufreq device based on ACPI CPPC is created to +present cpu frequency info to the guest. + +The default frequency is set to host cpu nominal frequency, +which is obtained from the host CPPC sysfs. Other performance +data are set to the same value, since we don't support guest +performance scaling here. + +Performance counters are also not emulated and they simply +return 1 if read, and guest should fallback to use desired +performance value as the current performance. + +Guest kernel version above 4.18 is required to make it work. + +This series is backported from: +https://patchwork.kernel.org/cover/11379943/ + +Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + configs/devices/aarch64-softmmu/default.mak | 1 + + hw/acpi/aml-build.c | 22 ++ + hw/acpi/cpufreq.c | 283 ++++++++++++++++++++ + hw/acpi/meson.build | 1 + + hw/arm/virt-acpi-build.c | 79 +++++- + hw/arm/virt.c | 13 + + hw/char/Kconfig | 4 + + include/hw/acpi/acpi-defs.h | 40 +++ + include/hw/acpi/aml-build.h | 3 + + include/hw/arm/virt.h | 1 + + 10 files changed, 444 insertions(+), 3 deletions(-) + create mode 100644 hw/acpi/cpufreq.c + +diff --git a/configs/devices/aarch64-softmmu/default.mak b/configs/devices/aarch64-softmmu/default.mak +index f82a04c27d..8d66d0f1af 100644 +--- a/configs/devices/aarch64-softmmu/default.mak ++++ b/configs/devices/aarch64-softmmu/default.mak +@@ -8,3 +8,4 @@ include ../arm-softmmu/default.mak + # CONFIG_XLNX_ZYNQMP_ARM=n + # CONFIG_XLNX_VERSAL=n + # CONFIG_SBSA_REF=n ++# CONFIG_CPUFREQ=n +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index 2968df5562..714498165a 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1554,6 +1554,28 @@ Aml *aml_sleep(uint64_t msec) + return var; + } + ++/* ACPI 5.0b: 6.4.3.7 Generic Register Descriptor */ ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, uint64_t addr) ++{ ++ int i; ++ Aml *var = aml_alloc(); ++ build_append_byte(var->buf, 0x82); /* Generic Register Descriptor */ ++ build_append_byte(var->buf, 0x0C); /* Length, bits[7:0] value = 0x0C */ ++ build_append_byte(var->buf, 0); /* Length, bits[15:8] value = 0 */ ++ build_append_byte(var->buf, rs); /* Address Space ID */ ++ build_append_byte(var->buf, reg_width); /* Register Bit Width */ ++ build_append_byte(var->buf, reg_offset); /* Register Bit Offset */ ++ build_append_byte(var->buf, type); /* Access Size */ ++ ++ /* Register address */ ++ for (i = 0; i < 8; i++) { ++ build_append_byte(var->buf, extract64(addr, i * 8, 8)); ++ } ++ ++ return var; ++} ++ + static uint8_t Hex2Byte(const char *src) + { + int hi, lo; +diff --git a/hw/acpi/cpufreq.c b/hw/acpi/cpufreq.c +new file mode 100644 +index 0000000000..a84db490b3 +--- /dev/null ++++ b/hw/acpi/cpufreq.c +@@ -0,0 +1,283 @@ ++/* ++ * ACPI CPPC register device ++ * ++ * Support for showing CPU frequency in guest OS. ++ * ++ * Copyright (c) 2019 HUAWEI TECHNOLOGIES CO.,LTD. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/sysbus.h" ++#include "chardev/char.h" ++#include "qemu/log.h" ++#include "trace.h" ++#include "qemu/option.h" ++#include "sysemu/sysemu.h" ++#include "hw/acpi/acpi-defs.h" ++#include "qemu/cutils.h" ++#include "qemu/error-report.h" ++#include "hw/boards.h" ++ ++#define TYPE_CPUFREQ "cpufreq" ++#define CPUFREQ(obj) OBJECT_CHECK(CpuhzState, (obj), TYPE_CPUFREQ) ++#define NOMINAL_FREQ_FILE "/sys/devices/system/cpu/cpu0/acpi_cppc/nominal_freq" ++#define CPU_MAX_FREQ_FILE "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" ++#define HZ_MAX_LENGTH 1024 ++#define MAX_SUPPORT_SPACE 0x10000 ++ ++/* ++ * Since Hi1616 will not support CPPC, we simply use its nominal frequency as ++ * the default. ++ */ ++#define DEFAULT_HZ 2400 ++ ++int cppc_regs_offset[CPPC_REG_COUNT] = { ++ [HIGHEST_PERF] = 0, ++ [NOMINAL_PERF] = 4, ++ [LOW_NON_LINEAR_PERF] = 8, ++ [LOWEST_PERF] = 12, ++ [GUARANTEED_PERF] = 16, ++ [DESIRED_PERF] = 20, ++ [MIN_PERF] = -1, ++ [MAX_PERF] = -1, ++ [PERF_REDUC_TOLERANCE] = -1, ++ [TIME_WINDOW] = -1, ++ [CTR_WRAP_TIME] = -1, ++ [REFERENCE_CTR] = 24, ++ [DELIVERED_CTR] = 32, ++ [PERF_LIMITED] = 40, ++ [ENABLE] = -1, ++ [AUTO_SEL_ENABLE] = -1, ++ [AUTO_ACT_WINDOW] = -1, ++ [ENERGY_PERF] = -1, ++ [REFERENCE_PERF] = -1, ++ [LOWEST_FREQ] = 44, ++ [NOMINAL_FREQ] = 48, ++}; ++ ++typedef struct CpuhzState { ++ SysBusDevice parent_obj; ++ ++ MemoryRegion iomem; ++ uint32_t HighestPerformance; ++ uint32_t NominalPerformance; ++ uint32_t LowestNonlinearPerformance; ++ uint32_t LowestPerformance; ++ uint32_t GuaranteedPerformance; ++ uint32_t DesiredPerformance; ++ uint64_t ReferencePerformanceCounter; ++ uint64_t DeliveredPerformanceCounter; ++ uint32_t PerformanceLimited; ++ uint32_t LowestFreq; ++ uint32_t NominalFreq; ++ uint32_t reg_size; ++} CpuhzState; ++ ++ ++static uint64_t cpufreq_read(void *opaque, hwaddr offset, unsigned size) ++{ ++ CpuhzState *s = (CpuhzState *)opaque; ++ uint64_t r; ++ uint64_t n; ++ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ warn_report("cpufreq_read: offset 0x%lx out of range", offset); ++ return 0; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ switch (n) { ++ case 0: ++ r = s->HighestPerformance; ++ break; ++ case 4: ++ r = s->NominalPerformance; ++ break; ++ case 8: ++ r = s->LowestNonlinearPerformance; ++ break; ++ case 12: ++ r = s->LowestPerformance; ++ break; ++ case 16: ++ r = s->GuaranteedPerformance; ++ break; ++ case 20: ++ r = s->DesiredPerformance; ++ break; ++ /* ++ * We don't have real counters and it is hard to emulate, so always set the ++ * counter value to 1 to rely on Linux to use the DesiredPerformance value ++ * directly. ++ */ ++ case 24: ++ r = s->ReferencePerformanceCounter; ++ break; ++ /* ++ * Guest may still access the register by 32bit; add the process to ++ * eliminate unnecessary warnings. ++ */ ++ case 28: ++ r = s->ReferencePerformanceCounter >> 32; ++ break; ++ case 32: ++ r = s->DeliveredPerformanceCounter; ++ break; ++ case 36: ++ r = s->DeliveredPerformanceCounter >> 32; ++ break; ++ ++ case 40: ++ r = s->PerformanceLimited; ++ break; ++ case 44: ++ r = s->LowestFreq; ++ break; ++ case 48: ++ r = s->NominalFreq; ++ break; ++ default: ++ error_printf("cpufreq_read: Bad offset 0x%lx\n", offset); ++ r = 0; ++ break; ++ } ++ return r; ++} ++ ++static void cpufreq_write(void *opaque, hwaddr offset, ++ uint64_t value, unsigned size) ++{ ++ uint64_t n; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ if (offset >= smp_cpus * CPPC_REG_PER_CPU_STRIDE) { ++ error_printf("cpufreq_write: offset 0x%lx out of range", offset); ++ return; ++ } ++ ++ n = offset % CPPC_REG_PER_CPU_STRIDE; ++ ++ switch (n) { ++ case 20: ++ break; ++ default: ++ error_printf("cpufreq_write: Bad offset 0x%lx\n", offset); ++ } ++} ++ ++static uint32_t CPPC_Read(const char *hostpath) ++{ ++ int fd; ++ char buffer[HZ_MAX_LENGTH] = { 0 }; ++ uint64_t hz; ++ int len; ++ const char *endptr = NULL; ++ int ret; ++ ++ fd = qemu_open_old(hostpath, O_RDONLY); ++ if (fd < 0) { ++ return 0; ++ } ++ ++ len = read(fd, buffer, HZ_MAX_LENGTH); ++ qemu_close(fd); ++ if (len <= 0) { ++ return 0; ++ } ++ ret = qemu_strtoul(buffer, &endptr, 0, &hz); ++ if (ret < 0) { ++ return 0; ++ } ++ return (uint32_t)hz; ++} ++ ++static const MemoryRegionOps cpufreq_ops = { ++ .read = cpufreq_read, ++ .write = cpufreq_write, ++ .endianness = DEVICE_NATIVE_ENDIAN, ++}; ++ ++static void hz_init(CpuhzState *s) ++{ ++ uint32_t hz; ++ ++ hz = CPPC_Read(NOMINAL_FREQ_FILE); ++ if (hz == 0) { ++ hz = CPPC_Read(CPU_MAX_FREQ_FILE); ++ if (hz == 0) { ++ hz = DEFAULT_HZ; ++ } else { ++ /* Value in CpuMaxFrequency is in KHz unit; convert to MHz */ ++ hz = hz / 1000; ++ } ++ } ++ ++ s->HighestPerformance = hz; ++ s->NominalPerformance = hz; ++ s->LowestNonlinearPerformance = hz; ++ s->LowestPerformance = hz; ++ s->GuaranteedPerformance = hz; ++ s->DesiredPerformance = hz; ++ s->ReferencePerformanceCounter = 1; ++ s->DeliveredPerformanceCounter = 1; ++ s->PerformanceLimited = 0; ++ s->LowestFreq = hz; ++ s->NominalFreq = hz; ++} ++ ++static void cpufreq_init(Object *obj) ++{ ++ SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ CpuhzState *s = CPUFREQ(obj); ++ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ unsigned int smp_cpus = ms->smp.cpus; ++ ++ s->reg_size = smp_cpus * CPPC_REG_PER_CPU_STRIDE; ++ if (s->reg_size > MAX_SUPPORT_SPACE) { ++ error_report("Required space 0x%x excesses the max support 0x%x", ++ s->reg_size, MAX_SUPPORT_SPACE); ++ goto err_end; ++ } ++ ++ memory_region_init_io(&s->iomem, OBJECT(s), &cpufreq_ops, s, "cpufreq", ++ s->reg_size); ++ sysbus_init_mmio(sbd, &s->iomem); ++ hz_init(s); ++ return; ++ ++err_end: ++ /* Set desired perf register offset to -1 to indicate no support for CPPC */ ++ cppc_regs_offset[DESIRED_PERF] = -1; ++} ++ ++static const TypeInfo cpufreq_arm_info = { ++ .name = TYPE_CPUFREQ, ++ .parent = TYPE_SYS_BUS_DEVICE, ++ .instance_size = sizeof(CpuhzState), ++ .instance_init = cpufreq_init, ++}; ++ ++static void cpufreq_register_types(void) ++{ ++ type_register_static(&cpufreq_arm_info); ++} ++ ++type_init(cpufreq_register_types) +diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build +index fc1b952379..d36b10ea3c 100644 +--- a/hw/acpi/meson.build ++++ b/hw/acpi/meson.build +@@ -27,6 +27,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_ICH9', if_true: files('ich9.c', 'ich9_tco.c')) + acpi_ss.add(when: 'CONFIG_ACPI_ERST', if_true: files('erst.c')) + acpi_ss.add(when: 'CONFIG_IPMI', if_true: files('ipmi.c'), if_false: files('ipmi-stub.c')) + acpi_ss.add(when: 'CONFIG_PC', if_false: files('acpi-x86-stub.c')) ++acpi_ss.add(when: 'CONFIG_CPUFREQ', if_true: files('cpufreq.c')) + if have_tpm + acpi_ss.add(files('tpm.c')) + endif +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 8bc35a483c..3cb50bdc65 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -63,7 +63,68 @@ + + #define ACPI_BUILD_TABLE_SIZE 0x20000 + +-static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) ++static void acpi_dsdt_add_psd(Aml *dev, int cpus) ++{ ++ Aml *pkg; ++ Aml *sub; ++ ++ sub = aml_package(5); ++ aml_append(sub, aml_int(5)); ++ aml_append(sub, aml_int(0)); ++ /* Assume all vCPUs belong to the same domain */ ++ aml_append(sub, aml_int(0)); ++ /* SW_ANY: OSPM coordinate, initiate on any processor */ ++ aml_append(sub, aml_int(0xFD)); ++ aml_append(sub, aml_int(cpus)); ++ ++ pkg = aml_package(1); ++ aml_append(pkg, sub); ++ ++ aml_append(dev, aml_name_decl("_PSD", pkg)); ++} ++ ++static void acpi_dsdt_add_cppc(Aml *dev, uint64_t cpu_base, int *regs_offset) ++{ ++ Aml *cpc; ++ int i; ++ ++ /* Use version 3 of CPPC table from ACPI 6.3 */ ++ cpc = aml_package(23); ++ aml_append(cpc, aml_int(23)); ++ aml_append(cpc, aml_int(3)); ++ ++ for (i = 0; i < CPPC_REG_COUNT; i++) { ++ Aml *res; ++ uint8_t reg_width; ++ uint8_t acc_type; ++ uint64_t addr; ++ ++ if (regs_offset[i] == -1) { ++ reg_width = 0; ++ acc_type = AML_ANY_ACC; ++ addr = 0; ++ } else { ++ addr = cpu_base + regs_offset[i]; ++ if (i == REFERENCE_CTR || i == DELIVERED_CTR) { ++ reg_width = 64; ++ acc_type = AML_QWORD_ACC; ++ } else { ++ reg_width = 32; ++ acc_type = AML_DWORD_ACC; ++ } ++ } ++ ++ res = aml_resource_template(); ++ aml_append(res, aml_generic_register(AML_SYSTEM_MEMORY, reg_width, 0, ++ acc_type, addr)); ++ aml_append(cpc, res); ++ } ++ ++ aml_append(dev, aml_name_decl("_CPC", cpc)); ++} ++ ++static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms, ++ const MemMapEntry *cppc_memmap) + { + MachineState *ms = MACHINE(vms); + uint16_t i; +@@ -72,7 +133,19 @@ static void acpi_dsdt_add_cpus(Aml *scope, VirtMachineState *vms) + Aml *dev = aml_device("C%.03X", i); + aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0007"))); + aml_append(dev, aml_name_decl("_UID", aml_int(i))); +- aml_append(scope, dev); ++ ++ /* ++ * Append _CPC and _PSD to support CPU frequence show ++ * Check CPPC available by DESIRED_PERF register ++ */ ++ if (cppc_regs_offset[DESIRED_PERF] != -1) { ++ acpi_dsdt_add_cppc(dev, ++ cppc_memmap->base + i * CPPC_REG_PER_CPU_STRIDE, ++ cppc_regs_offset); ++ acpi_dsdt_add_psd(dev, ms->smp.cpus); ++ } ++ ++ aml_append(scope, dev); + } + } + +@@ -858,7 +931,7 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ acpi_dsdt_add_cpus(scope, vms, &memmap[VIRT_CPUFREQ]); + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index b82bd1b8c8..c19cacec8b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -157,6 +157,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, ++ [VIRT_CPUFREQ] = { 0x0b000000, 0x00010000 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, + [VIRT_SECURE_MEM] = { 0x0e000000, 0x01000000 }, +@@ -980,6 +981,16 @@ static void create_uart(const VirtMachineState *vms, int uart, + g_free(nodename); + } + ++static void create_cpufreq(const VirtMachineState *vms, MemoryRegion *mem) ++{ ++ hwaddr base = vms->memmap[VIRT_CPUFREQ].base; ++ DeviceState *dev = qdev_new("cpufreq"); ++ SysBusDevice *s = SYS_BUS_DEVICE(dev); ++ ++ sysbus_realize_and_unref(s, &error_fatal); ++ memory_region_add_subregion(mem, base, sysbus_mmio_get_region(s, 0)); ++} ++ + static void create_rtc(const VirtMachineState *vms) + { + char *nodename; +@@ -2346,6 +2357,8 @@ static void machvirt_init(MachineState *machine) + + create_uart(vms, VIRT_UART, sysmem, serial_hd(0)); + ++ create_cpufreq(vms, sysmem); ++ + if (vms->secure) { + create_secure_ram(vms, secure_sysmem, secure_tag_sysmem); + create_uart(vms, VIRT_SECURE_UART, secure_sysmem, serial_hd(1)); +diff --git a/hw/char/Kconfig b/hw/char/Kconfig +index 6b6cf2fc1d..335a60c2c1 100644 +--- a/hw/char/Kconfig ++++ b/hw/char/Kconfig +@@ -71,3 +71,7 @@ config GOLDFISH_TTY + + config SHAKTI_UART + bool ++ ++config CPUFREQ ++ bool ++ default y +diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h +index 2b42e4192b..b1f389fb4b 100644 +--- a/include/hw/acpi/acpi-defs.h ++++ b/include/hw/acpi/acpi-defs.h +@@ -93,4 +93,44 @@ typedef struct AcpiFadtData { + #define ACPI_FADT_ARM_PSCI_COMPLIANT (1 << 0) + #define ACPI_FADT_ARM_PSCI_USE_HVC (1 << 1) + ++/* ++ * CPPC register definition from kernel header ++ * include/acpi/cppc_acpi.h ++ * The last element is newly added for easy use ++ */ ++enum cppc_regs { ++ HIGHEST_PERF, ++ NOMINAL_PERF, ++ LOW_NON_LINEAR_PERF, ++ LOWEST_PERF, ++ GUARANTEED_PERF, ++ DESIRED_PERF, ++ MIN_PERF, ++ MAX_PERF, ++ PERF_REDUC_TOLERANCE, ++ TIME_WINDOW, ++ CTR_WRAP_TIME, ++ REFERENCE_CTR, ++ DELIVERED_CTR, ++ PERF_LIMITED, ++ ENABLE, ++ AUTO_SEL_ENABLE, ++ AUTO_ACT_WINDOW, ++ ENERGY_PERF, ++ REFERENCE_PERF, ++ LOWEST_FREQ, ++ NOMINAL_FREQ, ++ CPPC_REG_COUNT, ++}; ++ ++#define CPPC_REG_PER_CPU_STRIDE 0x40 ++ ++/* ++ * Offset for each CPPC register; -1 for unavailable ++ * ++ * Offset for each CPPC register; -1 for unavailable ++ * The whole register space is unavailable if desired perf offset is -1. ++ */ ++extern int cppc_regs_offset[CPPC_REG_COUNT]; ++ + #endif +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index 84ded2ecd3..200cb113de 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -429,6 +429,9 @@ Aml *aml_dma(AmlDmaType typ, AmlDmaBusMaster bm, AmlTransferSize sz, + uint8_t channel); + Aml *aml_sleep(uint64_t msec); + Aml *aml_i2c_serial_bus_device(uint16_t address, const char *resource_source); ++Aml *aml_generic_register(AmlRegionSpace rs, uint8_t reg_width, ++ uint8_t reg_offset, AmlAccessType type, ++ uint64_t addr); + + /* Block AML object primitives */ + Aml *aml_scope(const char *name_format, ...) G_GNUC_PRINTF(1, 2); +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index f69239850e..e944d434c4 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -60,6 +60,7 @@ enum { + VIRT_GIC_REDIST, + VIRT_SMMU, + VIRT_UART, ++ VIRT_CPUFREQ, + VIRT_MMIO, + VIRT_RTC, + VIRT_FW_CFG, +-- +2.27.0 + diff --git a/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a08dc5ebb4da87c7704a272ce63b058a690df42 --- /dev/null +++ b/backup-memory-bakcup-hugepages-hugepages-files-maybe.patch @@ -0,0 +1,101 @@ +From c28455a0bac4bbf171d1f19e162557377a85e96c Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:32:46 +0800 +Subject: [PATCH] [backup] memory: bakcup hugepages: hugepages files maybe + leftover + +old info: +commit id: +3cb1b0ce091998532a30793e3272925da4e6f3aa +old messages: +hugepages: hugepages files maybe leftover + +Before qemu uses the hugepage memory directory /dev/hugepages/libvirt/qemu/xxx, +The directory may be deleted because of the destroy virtual machine. +Cause qemu to create files directly under /dev/hugepages/libvirt/qemu/. +After the file is created, the file is not cleaned up by unlink, +and when the virtual machine is destroyed, libvirt will only clean up +/dev/hugepages/libvirt/qemu/xxx directory. After creating the hugepage file, +execute unlink to clean up the file to fix the problem. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + include/qemu/mmap-alloc.h | 4 ++++ + system/physmem.c | 9 ++++++++- + util/mmap-alloc.c | 22 ++++++++++++++++++++++ + 3 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h +index 8344daaa03..63e4edfd2f 100644 +--- a/include/qemu/mmap-alloc.h ++++ b/include/qemu/mmap-alloc.h +@@ -1,6 +1,10 @@ + #ifndef QEMU_MMAP_ALLOC_H + #define QEMU_MMAP_ALLOC_H + ++#define HUGETLBFS_MAGIC 0x958458f6 ++ ++size_t qemu_fd_getfiletype(int fd); ++ + typedef enum { + QEMU_FS_TYPE_UNKNOWN = 0, + QEMU_FS_TYPE_TMPFS, +diff --git a/system/physmem.c b/system/physmem.c +index a63853a7bc..f14d64819b 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -1329,7 +1329,14 @@ static int file_ram_open(const char *path, + /* @path names a file that doesn't exist, create it */ + fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644); + if (fd >= 0) { +- *created = true; ++ info_report("open %s success \n", path); ++ /* if fd file type is HUGETLBFS_MAGIC, unlink it, */ ++ /* in case to prevent residue after qemu killed */ ++ if (qemu_fd_getfiletype(fd) == HUGETLBFS_MAGIC) { ++ unlink(path); ++ } else { ++ *created = true; ++ } + break; + } + } else if (errno == EISDIR) { +diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c +index ed14f9c64d..6890ad676c 100644 +--- a/util/mmap-alloc.c ++++ b/util/mmap-alloc.c +@@ -30,6 +30,28 @@ + #include + #endif + ++size_t qemu_fd_getfiletype(int fd) ++{ ++ struct statfs fs; ++ int ret; ++ ++ if (fd != -1) { ++ do { ++ ret = fstatfs(fd, &fs); ++ } while (ret != 0 && errno == EINTR); ++ ++ if (ret != 0) { ++ fprintf(stderr, "Couldn't fstatfs() fd: %s\n", ++ strerror(errno)); ++ return -1; ++ } ++ return fs.f_type; ++ } else { ++ fprintf(stderr, "fd is invalid \n"); ++ return -1; ++ } ++} ++ + QemuFsType qemu_fd_getfs(int fd) + { + #ifdef CONFIG_LINUX +-- +2.27.0 + diff --git a/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch b/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch new file mode 100644 index 0000000000000000000000000000000000000000..3acc7ea338d5f9b2baa6298881417ec07f2cfcec --- /dev/null +++ b/blkio-Respect-memory-alignment-for-bounce-buffer-all.patch @@ -0,0 +1,47 @@ +From c93d512dddb00e3eed2ce9484c55f5f1fbb54c8b Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 19:02:52 +0800 +Subject: [PATCH] blkio: Respect memory-alignment for bounce buffer allocations + +cheery-pick from 10b2393e5e7f4c1d633f1ac8578465681c333efb + +blkio_alloc_mem_region() requires that the requested buffer size is a +multiple of the memory-alignment property. If it isn't, the allocation +fails with a return value of -EINVAL. + +Fix the call in blkio_resize_bounce_pool() to make sure the requested +size is properly aligned. + +I observed this problem with vhost-vdpa, which requires page aligned +memory. As the virtio-blk device behind it still had 512 byte blocks, we +got bs->bl.request_alignment = 512, but actually any request that needed +a bounce buffer and was not aligned to 4k would fail without this fix. + +Suggested-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +Message-ID: <20240131173140.42398-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Signed-off-by: Kevin Wolf +Signed-off-by: dinglimin +--- + block/blkio.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/blkio.c b/block/blkio.c +index 0a0a6c0f5f..b989617608 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -89,6 +89,9 @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes) + /* Pad size to reduce frequency of resize calls */ + bytes += 128 * 1024; + ++ /* Align the pool size to avoid blkio_alloc_mem_region() failure */ ++ bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment); ++ + WITH_QEMU_LOCK_GUARD(&s->blkio_lock) { + int ret; + +-- +2.27.0 + diff --git a/block-Add-error-retry-param-setting.patch b/block-Add-error-retry-param-setting.patch new file mode 100644 index 0000000000000000000000000000000000000000..6399cd5072dc92a60ba03eacafd83500c7b7efe8 --- /dev/null +++ b/block-Add-error-retry-param-setting.patch @@ -0,0 +1,232 @@ +From d777d1585603aa7599ae8bac4492fafdf1e4b109 Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:50 +0800 +Subject: [PATCH] block: Add error retry param setting + +Add "retry_interval" and "retry_timeout" parameter for drive and device +option. These parameter are valid only when werror/rerror=retry. + +eg. -device device_name,drive=drive_id,rerror=retry,retry_interval=1000,retry_timeout=5000 + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 13 ++++-- + blockdev.c | 50 +++++++++++++++++++++ + hw/block/block.c | 10 +++++ + include/hw/block/block.h | 7 ++- + include/sysemu/block-backend-common.h | 3 ++ + include/sysemu/block-backend-global-state.h | 2 + + 6 files changed, 81 insertions(+), 4 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 919699bb70..85d732de7e 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -33,9 +33,6 @@ + + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + +-/* block backend default retry interval */ +-#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 +- + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -2149,6 +2146,16 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval) ++{ ++ blk->retry_interval = interval; ++} ++ ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout) ++{ ++ blk->retry_timeout = timeout; ++} ++ + static bool blk_error_retry_timeout(BlockBackend *blk) + { + /* No timeout set, infinite retries. */ +diff --git a/blockdev.c b/blockdev.c +index 2817f73fad..6a229e77a5 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -484,6 +484,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + const char *buf; + int bdrv_flags = 0; + int on_read_error, on_write_error; ++ int64_t retry_interval, retry_timeout; + OnOffAuto account_invalid, account_failed; + bool writethrough, read_only; + BlockBackend *blk; +@@ -576,6 +577,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + } + } + ++ retry_interval = qemu_opt_get_number(opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0); ++ + if (snapshot) { + bdrv_flags |= BDRV_O_SNAPSHOT; + } +@@ -639,6 +644,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + blk_set_enable_write_cache(blk, !writethrough); + blk_set_on_error(blk, on_read_error, on_write_error); ++ if (on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ on_write_error == BLOCKDEV_ON_ERROR_RETRY) { ++ blk_set_on_error_retry_interval(blk, retry_interval); ++ blk_set_on_error_retry_timeout(blk, retry_timeout); ++ } + + if (!monitor_add_blk(blk, id, errp)) { + blk_unref(blk); +@@ -773,6 +783,14 @@ QemuOptsList qemu_legacy_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = "copy-on-read", + .type = QEMU_OPT_BOOL, +@@ -795,6 +813,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + BlockInterfaceType type; + int max_devs, bus_id, unit_id, index; + const char *werror, *rerror; ++ int64_t retry_interval, retry_timeout; + bool read_only = false; + bool copy_on_read; + const char *filename; +@@ -1013,6 +1032,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, + qdict_put_str(bs_opts, "rerror", rerror); + } + ++ if (qemu_opt_find(legacy_opts, "retry_interval")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_interval is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval", ++ BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); ++ qdict_put_int(bs_opts, "retry_interval", retry_interval); ++ } ++ ++ if (qemu_opt_find(legacy_opts, "retry_timeout")) { ++ if ((werror == NULL || strcmp(werror, "retry")) && ++ (rerror == NULL || strcmp(rerror, "retry"))) { ++ error_setg(errp, "retry_timeout is only supported " ++ "by werror/rerror=retry"); ++ goto fail; ++ } ++ retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0); ++ qdict_put_int(bs_opts, "retry_timeout", retry_timeout); ++ } ++ + /* Actual block device init: Functionality shared with blockdev-add */ + blk = blockdev_init(filename, bs_opts, errp); + bs_opts = NULL; +@@ -3794,6 +3836,14 @@ QemuOptsList qemu_common_drive_opts = { + .name = "werror", + .type = QEMU_OPT_STRING, + .help = "write error action", ++ },{ ++ .name = "retry_interval", ++ .type = QEMU_OPT_NUMBER, ++ .help = "interval for retry action in millisecond", ++ },{ ++ .name = "retry_timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "timeout for retry action in millisecond", + },{ + .name = BDRV_OPT_READ_ONLY, + .type = QEMU_OPT_BOOL, +diff --git a/hw/block/block.c b/hw/block/block.c +index 9f52ee6e72..6bece87709 100644 +--- a/hw/block/block.c ++++ b/hw/block/block.c +@@ -239,6 +239,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, + blk_set_enable_write_cache(blk, wce); + blk_set_on_error(blk, rerror, werror); + ++ if (rerror == BLOCKDEV_ON_ERROR_RETRY || ++ werror == BLOCKDEV_ON_ERROR_RETRY) { ++ if (conf->retry_interval >= 0) { ++ blk_set_on_error_retry_interval(blk, conf->retry_interval); ++ } ++ if (conf->retry_timeout >= 0) { ++ blk_set_on_error_retry_timeout(blk, conf->retry_timeout); ++ } ++ } ++ + block_acct_setup(blk_get_stats(blk), conf->account_invalid, + conf->account_failed); + return true; +diff --git a/include/hw/block/block.h b/include/hw/block/block.h +index 15fff66435..fb8c0df4a5 100644 +--- a/include/hw/block/block.h ++++ b/include/hw/block/block.h +@@ -34,6 +34,8 @@ typedef struct BlockConf { + OnOffAuto account_invalid, account_failed; + BlockdevOnError rerror; + BlockdevOnError werror; ++ int64_t retry_interval; ++ int64_t retry_timeout; + } BlockConf; + + static inline unsigned int get_physical_block_exp(BlockConf *conf) +@@ -84,7 +86,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) + DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ +- BLOCKDEV_ON_ERROR_AUTO) ++ BLOCKDEV_ON_ERROR_AUTO), \ ++ DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ ++ -1), \ ++ DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) + + /* Backend access helpers */ + +diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h +index b76df8834a..5a1cdac9c4 100644 +--- a/include/sysemu/block-backend-common.h ++++ b/include/sysemu/block-backend-common.h +@@ -16,6 +16,9 @@ + #include "qemu/iov.h" + #include "block/throttle-groups.h" + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + /* + * TODO Have to include block/block.h for a bunch of block layer + * types. Unfortunately, this pulls in the whole BlockDriverState +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 7f59fd411d..d56592c22e 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -84,6 +84,8 @@ int blk_commit_all(void); + bool blk_in_drain(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); ++void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout); + void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +-- +2.27.0 + diff --git a/block-Add-sanity-check-when-setting-retry-parameters.patch b/block-Add-sanity-check-when-setting-retry-parameters.patch new file mode 100644 index 0000000000000000000000000000000000000000..27c253e367c4ff62202478dc4b5a3460e11f0ef3 --- /dev/null +++ b/block-Add-sanity-check-when-setting-retry-parameters.patch @@ -0,0 +1,156 @@ +From e880fc334edb8d07593679cf0c6a9af810c51d0d Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 18 Mar 2021 19:45:11 +0800 +Subject: [PATCH] block: Add sanity check when setting retry parameters + +Add sanity check when setting retry parameters to avoid invalid retry +configuration. + +Signed-off-by: Jiahui Cen +Signed-off-by: Alex Chen +--- + hw/core/qdev-prop-internal.h | 2 ++ + hw/core/qdev-properties-system.c | 45 +++++++++++++++++++++++++++++ + hw/core/qdev-properties.c | 4 +-- + include/hw/block/block.h | 7 +++-- + include/hw/qdev-properties-system.h | 8 +++++ + 5 files changed, 61 insertions(+), 5 deletions(-) + +diff --git a/hw/core/qdev-prop-internal.h b/hw/core/qdev-prop-internal.h +index d7b77844fe..68b1b9d10c 100644 +--- a/hw/core/qdev-prop-internal.h ++++ b/hw/core/qdev-prop-internal.h +@@ -22,6 +22,8 @@ void qdev_propinfo_set_default_value_uint(ObjectProperty *op, + + void qdev_propinfo_get_int32(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp); ++void qdev_propinfo_get_int64(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp); + void qdev_propinfo_get_size32(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp); + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 1473ab3d5e..f2e2718c74 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -635,6 +635,51 @@ const PropertyInfo qdev_prop_blockdev_on_error = { + .set_default_value = qdev_propinfo_set_default_value_enum, + }; + ++static void set_retry_time(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ DeviceState *dev = DEVICE(obj); ++ Property *prop = opaque; ++ int64_t value, *ptr = object_field_prop_ptr(obj, prop); ++ Error *local_err = NULL; ++ ++ if (dev->realized) { ++ qdev_prop_set_after_realize(dev, name, errp); ++ return; ++ } ++ ++ visit_type_int64(v, name, &value, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ /* value should not be negative */ ++ if (value < 0) { ++ error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, ++ dev->id ? : "", name, (int64_t)value, 0L, LONG_MAX); ++ return; ++ } ++ ++ *ptr = value; ++} ++ ++const PropertyInfo qdev_prop_blockdev_retry_interval = { ++ .name = "BlockdevRetryInterval", ++ .description = "Interval for retry error handling policy", ++ .get = qdev_propinfo_get_int64, ++ .set = set_retry_time, ++ .set_default_value = qdev_propinfo_set_default_value_int, ++}; ++ ++const PropertyInfo qdev_prop_blockdev_retry_timeout = { ++ .name = "BlockdevRetryTimeout", ++ .description = "Timeout for retry error handling policy", ++ .get = qdev_propinfo_get_int64, ++ .set = set_retry_time, ++ .set_default_value = qdev_propinfo_set_default_value_int, ++}; ++ + /* --- BIOS CHS translation */ + + QEMU_BUILD_BUG_ON(sizeof(BiosAtaTranslation) != sizeof(int)); +diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c +index 840006e953..19b7450b4d 100644 +--- a/hw/core/qdev-properties.c ++++ b/hw/core/qdev-properties.c +@@ -398,7 +398,7 @@ static void set_uint64(Object *obj, Visitor *v, const char *name, + visit_type_uint64(v, name, ptr, errp); + } + +-static void get_int64(Object *obj, Visitor *v, const char *name, ++void qdev_propinfo_get_int64(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) + { + Property *prop = opaque; +@@ -425,7 +425,7 @@ const PropertyInfo qdev_prop_uint64 = { + + const PropertyInfo qdev_prop_int64 = { + .name = "int64", +- .get = get_int64, ++ .get = qdev_propinfo_get_int64, + .set = set_int64, + .set_default_value = qdev_propinfo_set_default_value_int, + }; +diff --git a/include/hw/block/block.h b/include/hw/block/block.h +index fb8c0df4a5..844e87495a 100644 +--- a/include/hw/block/block.h ++++ b/include/hw/block/block.h +@@ -87,9 +87,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ + BLOCKDEV_ON_ERROR_AUTO), \ +- DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ +- -1), \ +- DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1) ++ DEFINE_PROP_BLOCKDEV_RETRY_INTERVAL("retry_interval", _state, \ ++ _conf.retry_interval, 1000), \ ++ DEFINE_PROP_BLOCKDEV_RETRY_TIMEOUT("retry_timeout", _state, \ ++ _conf.retry_timeout, 0) + + /* Backend access helpers */ + +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 91f7a2452d..7cf27e51b9 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -10,6 +10,8 @@ extern const PropertyInfo qdev_prop_multifd_compression; + extern const PropertyInfo qdev_prop_mig_mode; + extern const PropertyInfo qdev_prop_losttickpolicy; + extern const PropertyInfo qdev_prop_blockdev_on_error; ++extern const PropertyInfo qdev_prop_blockdev_retry_interval; ++extern const PropertyInfo qdev_prop_blockdev_retry_timeout; + extern const PropertyInfo qdev_prop_bios_chs_trans; + extern const PropertyInfo qdev_prop_fdc_drive_type; + extern const PropertyInfo qdev_prop_drive; +@@ -52,6 +54,12 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; + #define DEFINE_PROP_BLOCKDEV_ON_ERROR(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_on_error, \ + BlockdevOnError) ++#define DEFINE_PROP_BLOCKDEV_RETRY_INTERVAL(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_retry_interval, \ ++ int64_t) ++#define DEFINE_PROP_BLOCKDEV_RETRY_TIMEOUT(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_blockdev_retry_timeout, \ ++ int64_t) + #define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) + #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ +-- +2.27.0 + diff --git a/block-backend-Add-device-specific-retry-callback.patch b/block-backend-Add-device-specific-retry-callback.patch new file mode 100644 index 0000000000000000000000000000000000000000..c912ea20e8d1f2bcce46d2719232aff75456b9e4 --- /dev/null +++ b/block-backend-Add-device-specific-retry-callback.patch @@ -0,0 +1,54 @@ +From 94580294f0fda3c715caa19f4b33718212c9c531 Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:47 +0800 +Subject: [PATCH] block-backend: Add device specific retry callback + +Add retry_request_cb in BlockDevOps to do device specific retry action. +Backend's timer would be registered only when the backend is set 'retry' +on errors and the device supports retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 8 ++++++++ + include/sysemu/block-backend-common.h | 4 ++++ + 2 files changed, 12 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 2f56cc8382..7e25d5a058 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -1123,6 +1123,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, + blk->dev_ops = ops; + blk->dev_opaque = opaque; + ++ if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY || ++ blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) && ++ ops->retry_request_cb) { ++ blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME, ++ SCALE_MS, ops->retry_request_cb, ++ opaque); ++ } ++ + /* Are we currently quiesced? Should we enforce this right now? */ + if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) { + ops->drained_begin(opaque); +diff --git a/include/sysemu/block-backend-common.h b/include/sysemu/block-backend-common.h +index 780cea7305..b76df8834a 100644 +--- a/include/sysemu/block-backend-common.h ++++ b/include/sysemu/block-backend-common.h +@@ -71,6 +71,10 @@ typedef struct BlockDevOps { + * Is the device still busy? + */ + bool (*drained_poll)(void *opaque); ++ /* ++ * Runs when retrying failed requests. ++ */ ++ void (*retry_request_cb)(void *opaque); + + /* + * I/O API functions. These functions are thread-safe. +-- +2.27.0 + diff --git a/block-backend-Add-timeout-support-for-retry.patch b/block-backend-Add-timeout-support-for-retry.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a35cd27449e352d4d4edc0280ae3edee484b2c8 --- /dev/null +++ b/block-backend-Add-timeout-support-for-retry.patch @@ -0,0 +1,75 @@ +From b4bb154e6587b6d3fef819efcced803e309c4e05 Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:49 +0800 +Subject: [PATCH] block-backend: Add timeout support for retry + +Retry should only be triggered when timeout is not reached, so let's check +timeout before retry. Device should also reset retry_start_time after +successful retry. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 25 ++++++++++++++++++++- + include/sysemu/block-backend-global-state.h | 1 + + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index e62808fc03..919699bb70 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2149,6 +2149,29 @@ void blk_drain_all(void) + bdrv_drain_all_end(); + } + ++static bool blk_error_retry_timeout(BlockBackend *blk) ++{ ++ /* No timeout set, infinite retries. */ ++ if (!blk->retry_timeout) { ++ return false; ++ } ++ ++ /* The first time an error occurs. */ ++ if (!blk->retry_start_time) { ++ blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); ++ return false; ++ } ++ ++ return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time + ++ blk->retry_timeout); ++} ++ ++void blk_error_retry_reset_timeout(BlockBackend *blk) ++{ ++ if (blk->retry_timer && blk->retry_start_time) ++ blk->retry_start_time = 0; ++} ++ + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) + { +@@ -2180,7 +2203,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_RETRY: +- return (blk->retry_timer) ? ++ return (blk->retry_timer && !blk_error_retry_timeout(blk)) ? + BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: +diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h +index 49c12b0fa9..7f59fd411d 100644 +--- a/include/sysemu/block-backend-global-state.h ++++ b/include/sysemu/block-backend-global-state.h +@@ -84,6 +84,7 @@ int blk_commit_all(void); + bool blk_in_drain(BlockBackend *blk); + void blk_drain(BlockBackend *blk); + void blk_drain_all(void); ++void blk_error_retry_reset_timeout(BlockBackend *blk); + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); + bool blk_supports_write_perm(BlockBackend *blk); +-- +2.27.0 + diff --git a/block-backend-Enable-retry-action-on-errors.patch b/block-backend-Enable-retry-action-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..6581ac1b362f71773e5dae8806fd73e00ee2289f --- /dev/null +++ b/block-backend-Enable-retry-action-on-errors.patch @@ -0,0 +1,43 @@ +From 7bcf4385f518580509990ff71c8209505c887abc Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:48 +0800 +Subject: [PATCH] block-backend: Enable retry action on errors + +Enable retry action when backend's retry timer is available. It would +trigger the timer to do device specific retry action. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 7e25d5a058..e62808fc03 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2179,6 +2179,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; ++ case BLOCKDEV_ON_ERROR_RETRY: ++ return (blk->retry_timer) ? ++ BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_AUTO: + default: + abort(); +@@ -2227,6 +2230,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, + qemu_system_vmstop_request_prepare(); + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ++ blk->retry_interval); ++ send_qmp_error_event(blk, action, is_read, error); + } else { + send_qmp_error_event(blk, action, is_read, error); + } +-- +2.27.0 + diff --git a/block-backend-Introduce-retry-timer.patch b/block-backend-Introduce-retry-timer.patch new file mode 100644 index 0000000000000000000000000000000000000000..e085a3eb1cbd90656a2ba816a17ae48ed693cefc --- /dev/null +++ b/block-backend-Introduce-retry-timer.patch @@ -0,0 +1,70 @@ +From 9567fce96050342f393f546d3c5131118c3cad7c Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:46 +0800 +Subject: [PATCH] block-backend: Introduce retry timer + +Add a timer to regularly trigger retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/block/block-backend.c b/block/block-backend.c +index ec21148806..2f56cc8382 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -33,6 +33,9 @@ + + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + ++/* block backend default retry interval */ ++#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 ++ + typedef struct BlockBackendAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); +@@ -92,6 +95,15 @@ struct BlockBackend { + * Accessed with atomic ops. + */ + unsigned int in_flight; ++ ++ /* Timer for retry on errors. */ ++ QEMUTimer *retry_timer; ++ /* Interval in ms to trigger next retry. */ ++ int64_t retry_interval; ++ /* Start time of the first error. Used to check timeout. */ ++ int64_t retry_start_time; ++ /* Retry timeout. 0 represents infinite retry. */ ++ int64_t retry_timeout; + }; + + typedef struct BlockBackendAIOCB { +@@ -368,6 +380,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) + blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; + blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; + ++ blk->retry_timer = NULL; ++ blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL; ++ blk->retry_start_time = 0; ++ blk->retry_timeout = 0; ++ + block_acct_init(&blk->stats); + + qemu_mutex_init(&blk->queued_requests_lock); +@@ -508,6 +525,10 @@ static void blk_delete(BlockBackend *blk) + QTAILQ_REMOVE(&block_backends, blk, link); + drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); ++ if (blk->retry_timer) { ++ timer_del(blk->retry_timer); ++ timer_free(blk->retry_timer); ++ } + g_free(blk); + } + +-- +2.27.0 + diff --git a/block-backend-Stop-retrying-when-draining.patch b/block-backend-Stop-retrying-when-draining.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2996b16ade4968d08bf9ee2cedb690aed5e48ce --- /dev/null +++ b/block-backend-Stop-retrying-when-draining.patch @@ -0,0 +1,38 @@ +From bbac66be575c76216c18d68c558e0dc80a078f68 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 25 Feb 2021 18:03:57 +0800 +Subject: [PATCH] block-backend: Stop retrying when draining + +Retrying failed requests when draining would make the draining hung. So it +is better not to trigger the retry timer when draining. And after the +virtual devices go back to work, they would retry those queued requests. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + block/block-backend.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/block/block-backend.c b/block/block-backend.c +index 85d732de7e..bfbbb18af1 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -2261,9 +2261,11 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { +- timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + +- blk->retry_interval); +- send_qmp_error_event(blk, action, is_read, error); ++ if (!blk->quiesce_counter) { ++ timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + ++ blk->retry_interval); ++ send_qmp_error_event(blk, action, is_read, error); ++ } + } else { + send_qmp_error_event(blk, action, is_read, error); + } +-- +2.27.0 + diff --git a/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch b/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch new file mode 100644 index 0000000000000000000000000000000000000000..d082d283e83399d2d8655fc9c2137f2f329ad56c --- /dev/null +++ b/block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch @@ -0,0 +1,33 @@ +From ea0feb8a262383582416283ad1af1819c1e0e22a Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 16:10:22 +0800 +Subject: [PATCH] block: bugfix: Don't pause vm when NOSPACE EIO happened + +When backend disk is FULL and disk IO type is 'dataplane', +QEMU will pause the vm, and this may cause endless-loop in +QEMU main thread if we do the snapshot merge now. + +When backend disk is FULL, only reporting an error rather +than pausing the virtual machine. + +Signed-off-by: wangjian161 +--- + blockdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/blockdev.c b/blockdev.c +index bc2099e9da..455ae8606d 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -557,7 +557,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + qdict_put_str(bs_opts, "driver", buf); + } + +- on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; ++ on_write_error = BLOCKDEV_ON_ERROR_REPORT; + if ((buf = qemu_opt_get(opts, "werror")) != NULL) { + on_write_error = parse_block_error_action(buf, 0, &error); + if (error) { +-- +2.27.0 + diff --git a/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch new file mode 100644 index 0000000000000000000000000000000000000000..f2f7ad6a14474d6ae4b4558280f78047540e689f --- /dev/null +++ b/block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch @@ -0,0 +1,47 @@ +From f9aef3909d23af6a33c604f59dccfcb764090f01 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:29:15 +0800 +Subject: [PATCH] block: disallow block jobs when there is a BDRV_O_INACTIVE + flag + +Currently, migration will put a BDRV_O_INACTIVE flag +on bs's open_flags until another resume being called. In that case, +any IO from vm or block jobs will cause a qemu crash with an assert +'assert(!(bs->open_flags & BDRV_O_INACTIVE))' failure in bdrv_co_pwritev +function. we hereby disallow block jobs by faking a blocker. + +Signed-off-by: wangjian161 +--- + block.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/block.c b/block.c +index bfb0861ec6..b7cb963929 100644 +--- a/block.c ++++ b/block.c +@@ -7298,6 +7298,22 @@ bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) + bdrv_get_device_or_node_name(bs)); + return true; + } ++ ++ /* ++ * When migration puts a BDRV_O_INACTIVE flag on driver's open_flags, ++ * we fake a blocker that doesn't exist. From now on, block jobs ++ * will not be permitted. ++ */ ++ if ((op == BLOCK_OP_TYPE_RESIZE || op == BLOCK_OP_TYPE_COMMIT_SOURCE || ++ op == BLOCK_OP_TYPE_MIRROR_SOURCE || op == BLOCK_OP_TYPE_MIRROR_TARGET) && ++ (bs->open_flags & BDRV_O_INACTIVE)) { ++ if (errp) { ++ error_setg(errp, "block device is in use by migration with" ++ " a driver BDRV_O_INACTIVE flag setted"); ++ } ++ return true; ++ } ++ + return false; + } + +-- +2.27.0 + diff --git a/block-enable-cache-mode-of-empty-cdrom.patch b/block-enable-cache-mode-of-empty-cdrom.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7aae757e53e7c9f84a576ccf2dfd1476bab2acb --- /dev/null +++ b/block-enable-cache-mode-of-empty-cdrom.patch @@ -0,0 +1,49 @@ +From 652325f9a04143ffabf5e9a418253a05e927ec37 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:18:21 +0800 +Subject: [PATCH] block: enable cache mode of empty cdrom + +enable cache mode even if cdrom is empty + +Signed-off-by: wangjian161 +--- + blockdev.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/blockdev.c b/blockdev.c +index c91f49e7b6..bc2099e9da 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -493,6 +493,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + QDict *interval_dict = NULL; + QList *interval_list = NULL; + const char *id; ++ const char *cache; + BlockdevDetectZeroesOptions detect_zeroes = + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF; + const char *throttling_group = NULL; +@@ -580,6 +581,21 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, + + read_only = qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false); + ++ if (!file || !*file) { ++ cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); ++ if (cache && !strcmp(cache, "on")) { ++ bdrv_flags |= BDRV_O_NO_FLUSH; ++ } ++ ++ cache = qdict_get_try_str(bs_opts, BDRV_OPT_CACHE_DIRECT); ++ if (cache && !strcmp(cache, "on")) { ++ bdrv_flags |= BDRV_O_NOCACHE; ++ } ++ ++ qdict_del(bs_opts, BDRV_OPT_CACHE_NO_FLUSH); ++ qdict_del(bs_opts, BDRV_OPT_CACHE_DIRECT); ++ } ++ + /* init */ + if ((!file || !*file) && !qdict_size(bs_opts)) { + BlockBackendRootState *blk_rs; +-- +2.27.0 + diff --git a/block-mirror-fix-file-system-went-to-read-only-after.patch b/block-mirror-fix-file-system-went-to-read-only-after.patch new file mode 100644 index 0000000000000000000000000000000000000000..b36f8cd870663ea2bb2c4a84d4d70f0527915ee5 --- /dev/null +++ b/block-mirror-fix-file-system-went-to-read-only-after.patch @@ -0,0 +1,32 @@ +From 6203b11d2a900c60d2ee3c3a980d2c385050eb62 Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 10 Feb 2022 21:37:49 +0800 +Subject: [PATCH] block/mirror: fix file-system went to read-only after + block-mirror + +config vm disk with prdm, keep the disk writing data continuously +during block-mirror, the file-system will went to read-only after +block-mirror, fix it. + +Signed-off-by: caojinhua +Signed-off-by: jiangdongxu +--- + block/mirror.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/mirror.c b/block/mirror.c +index cd9d3ad4a8..20b3e8e5d8 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -1774,7 +1774,7 @@ static BlockJob *mirror_start_job( + * reads on the top, while disabling it in the intermediate nodes, and make + * the backing chain writable. */ + mirror_top_bs = bdrv_new_open_driver(&bdrv_mirror_top, filter_node_name, +- BDRV_O_RDWR, errp); ++ BDRV_O_RDWR | BDRV_O_NOCACHE, errp); + if (mirror_top_bs == NULL) { + return NULL; + } +-- +2.27.0 + diff --git a/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch b/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch new file mode 100644 index 0000000000000000000000000000000000000000..8acce11890dc45ec4be509e207a1b65c6f0111f3 --- /dev/null +++ b/block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch @@ -0,0 +1,49 @@ +From b54d853396820150735294107e2e3d060724de04 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Mon, 8 Apr 2024 14:39:43 +0800 +Subject: [PATCH] block/virtio-blk: Fix memory leak from virtio_blk_zone_report + +cheery-pick from bbdf9023665f409113cb07b463732861af63fb47 + +This modification ensures that in scenarios where the buffer size is +insufficient for a zone report, the function will now properly set an +error status and proceed to a cleanup label, instead of merely +returning. + +The following ASAN log reveals it: + +==1767400==ERROR: LeakSanitizer: detected memory leaks +Direct leak of 312 byte(s) in 1 object(s) allocated from: + #0 0x64ac7b3280cd in malloc llvm/compiler-rt/lib/asan/asan_malloc_linux.cpp:129:3 + #1 0x735b02fb9738 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5e738) + #2 0x64ac7d23be96 in virtqueue_split_pop hw/virtio/virtio.c:1612:12 + #3 0x64ac7d23728a in virtqueue_pop hw/virtio/virtio.c:1783:16 + #4 0x64ac7cfcaacd in virtio_blk_get_request hw/block/virtio-blk.c:228:27 + #5 0x64ac7cfca7c7 in virtio_blk_handle_vq hw/block/virtio-blk.c:1123:23 + #6 0x64ac7cfecb95 in virtio_blk_handle_output hw/block/virtio-blk.c:1157:5 + +Signed-off-by: Zheyu Ma +Message-id: 20240404120040.1951466-1-zheyuma97@gmail.com +Signed-off-by: Stefan Hajnoczi +Signed-off-by: qihao_yewu +--- + hw/block/virtio-blk.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index 1ebc9188c0..2eb096a6dc 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -790,7 +790,8 @@ static void virtio_blk_handle_zone_report(VirtIOBlockReq *req, + sizeof(struct virtio_blk_zone_report) + + sizeof(struct virtio_blk_zone_descriptor)) { + virtio_error(vdev, "in buffer too small for zone report"); +- return; ++ err_status = VIRTIO_BLK_S_ZONE_INVALID_CMD; ++ goto out; + } + + /* start byte offset of the zone report */ +-- +2.27.0 + diff --git a/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch b/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch new file mode 100644 index 0000000000000000000000000000000000000000..1160489609bce25c0fceffb2c8d98c04ba2283a3 --- /dev/null +++ b/bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch @@ -0,0 +1,49 @@ +From 6588c017de54bab8a11509d43e2ddabf065cfa50 Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 21:50:28 +0800 +Subject: [PATCH] bugfix: fix eventfds may double free when vm_id reused in + ivshmem + +As the ivshmem Server-Client Protol describes, when a +client disconnects from the server, server sends disconnect +notifications to the other clients. And the other clients +will free the eventfds of the disconnected client according +to the client ID. If the client ID is reused, the eventfds +may be double freed. + +It will be solved by setting eventfds to NULL after freeing +and allocating memory for it when it's used. + +Signed-off-by: Peng Liang +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + hw/misc/ivshmem.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c +index 0447888029..ad9a3c546e 100644 +--- a/hw/misc/ivshmem.c ++++ b/hw/misc/ivshmem.c +@@ -400,6 +400,7 @@ static void close_peer_eventfds(IVShmemState *s, int posn) + } + + g_free(s->peers[posn].eventfds); ++ s->peers[posn].eventfds = NULL; + s->peers[posn].nb_eventfds = 0; + } + +@@ -533,6 +534,10 @@ static void process_msg_connect(IVShmemState *s, uint16_t posn, int fd, + close(fd); + return; + } ++ if (peer->eventfds == NULL) { ++ peer->eventfds = g_new0(EventNotifier, s->vectors); ++ peer->nb_eventfds = 0; ++ } + vector = peer->nb_eventfds++; + + IVSHMEM_DPRINTF("eventfds[%d][%d] = %d\n", posn, vector, fd); +-- +2.27.0 + diff --git a/bugfix-fix-possible-memory-leak.patch b/bugfix-fix-possible-memory-leak.patch new file mode 100644 index 0000000000000000000000000000000000000000..34d88766ee9e88a8345ff2c247e51e7045a3e159 --- /dev/null +++ b/bugfix-fix-possible-memory-leak.patch @@ -0,0 +1,98 @@ +From e6a20580801314e9d47682d7b8d8161c030eab04 Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 22:12:50 +0800 +Subject: [PATCH] bugfix: fix possible memory leak + +Signed-off-by: caojinhua +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + migration/savevm.c | 2 ++ + qga/main.c | 18 +++++++++++++----- + 2 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/migration/savevm.c b/migration/savevm.c +index eec5503a42..477a19719f 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1553,6 +1553,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + ret = vmstate_save(f, se, vmdesc); + if (ret) { + qemu_file_set_error(f, ret); ++ json_writer_free(vmdesc); + return ret; + } + +@@ -1572,6 +1573,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + migrate_set_error(ms, local_err); + error_report_err(local_err); + qemu_file_set_error(f, ret); ++ json_writer_free(vmdesc); + return ret; + } + } +diff --git a/qga/main.c b/qga/main.c +index 8668b9f3d3..c4dcbb86be 100644 +--- a/qga/main.c ++++ b/qga/main.c +@@ -1399,7 +1399,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + if (g_mkdir_with_parents(config->state_dir, S_IRWXU) == -1) { + g_critical("unable to create (an ancestor of) the state directory" + " '%s': %s", config->state_dir, strerror(errno)); +- return NULL; ++ goto failed; + } + #endif + +@@ -1424,7 +1424,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + if (!log_file) { + g_critical("unable to open specified log file: %s", + strerror(errno)); +- return NULL; ++ goto failed; + } + s->log_file = log_file; + } +@@ -1435,7 +1435,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + s->pstate_filepath, + ga_is_frozen(s))) { + g_critical("failed to load persistent state"); +- return NULL; ++ goto failed; + } + + if (config->allowedrpcs) { +@@ -1465,7 +1465,7 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + #ifndef _WIN32 + if (!register_signal_handlers()) { + g_critical("failed to register signal handlers"); +- return NULL; ++ goto failed; + } + #endif + +@@ -1478,12 +1478,20 @@ static GAState *initialize_agent(GAConfig *config, int socket_activation) + s->wakeup_event = CreateEvent(NULL, TRUE, FALSE, TEXT("WakeUp")); + if (s->wakeup_event == NULL) { + g_critical("CreateEvent failed"); +- return NULL; ++ goto failed; + } + #endif + + ga_state = s; + return s; ++failed: ++ g_free(s->pstate_filepath); ++ g_free(s->state_filepath_isfrozen); ++ if (s->log_file) { ++ fclose(s->log_file); ++ } ++ g_free(s); ++ return NULL; + } + + static void cleanup_agent(GAState *s) +-- +2.27.0 + diff --git a/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch b/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch new file mode 100644 index 0000000000000000000000000000000000000000..18c983974ba07088dcb79920b2b84926f35bef45 --- /dev/null +++ b/bugfix-fix-some-illegal-memory-access-and-memory-lea.patch @@ -0,0 +1,27 @@ +From 35054aa25a0d7758a35d75e3298555b502e37b0f Mon Sep 17 00:00:00 2001 +From: jiangdongxu +Date: Thu, 10 Feb 2022 21:32:37 +0800 +Subject: [PATCH] bugfix: fix some illegal memory access and memory leak + +Signed-off-by: yuxiating +Signed-off-by: jiangdongxu +Signed-off-by: Adttil +--- + util/range.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/util/range.c b/util/range.c +index f3f40098d5..2ea640662b 100644 +--- a/util/range.c ++++ b/util/range.c +@@ -61,6 +61,7 @@ GList *range_list_insert(GList *list, Range *data) + range_extend(l->data, l->next->data); + g_free(l->next->data); + new_l = g_list_delete_link(list, l->next); ++ l->next = NULL; + assert(new_l == list); + } + +-- +2.27.0 + diff --git a/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch new file mode 100644 index 0000000000000000000000000000000000000000..9d45a21f04698d0c4def05fce0fbe86fc84b8cf0 --- /dev/null +++ b/bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch @@ -0,0 +1,32 @@ +From 48a328ee1a5a71b7048e4591310471c759fc5af6 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 27 Jul 2020 20:39:07 +0800 +Subject: [PATCH] bugfix: irq: Avoid covering object refcount of qemu_irq + +Avoid covering object refcount of qemu_irq, otherwise it may causes +memory leak. + +Signed-off-by: Keqian Zhu +--- + hw/core/irq.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/core/irq.c b/hw/core/irq.c +index 3f14e2dda7..df9b5dac9b 100644 +--- a/hw/core/irq.c ++++ b/hw/core/irq.c +@@ -110,7 +110,10 @@ void qemu_irq_intercept_in(qemu_irq *gpio_in, qemu_irq_handler handler, int n) + int i; + qemu_irq *old_irqs = qemu_allocate_irqs(NULL, NULL, n); + for (i = 0; i < n; i++) { +- *old_irqs[i] = *gpio_in[i]; ++ old_irqs[i]->handler = gpio_in[i]->handler; ++ old_irqs[i]->opaque = gpio_in[i]->opaque; ++ old_irqs[i]->n = gpio_in[i]->n; ++ + gpio_in[i]->handler = handler; + gpio_in[i]->opaque = &old_irqs[i]; + } +-- +2.27.0 + diff --git a/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch b/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f494ea7f0f43d02b856ea9f1b3d7836a40e7d202 --- /dev/null +++ b/chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch @@ -0,0 +1,91 @@ +From 2d0d05b7d5925f71d7ddd4df9f1ac12add453298 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 7 Mar 2024 10:39:23 +0800 +Subject: [PATCH] chardev/char-socket: Fix TLS io channels sending too much + data to the backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 462945cd22d2bcd233401ed3aa167d83a8e35b05 + +Commit ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") +changed the behavior of the TLS io channels to schedule a second reading +attempt if there is still incoming data pending. This caused a regression +with backends like the sclpconsole that check in their read function that +the sender does not try to write more bytes to it than the device can +currently handle. + +The problem can be reproduced like this: + + 1) In one terminal, do this: + + mkdir qemu-pki + cd qemu-pki + openssl genrsa 2048 > ca-key.pem + openssl req -new -x509 -nodes -days 365000 -key ca-key.pem -out ca-cert.pem + # enter some dummy value for the cert + openssl genrsa 2048 > server-key.pem + openssl req -new -x509 -nodes -days 365000 -key server-key.pem \ + -out server-cert.pem + # enter some other dummy values for the cert + + gnutls-serv --echo --x509cafile ca-cert.pem --x509keyfile server-key.pem \ + --x509certfile server-cert.pem -p 8338 + + 2) In another terminal, do this: + + wget https://download.fedoraproject.org/pub/fedora-secondary/releases/39/Cloud/s390x/images/Fedora-Cloud-Base-39-1.5.s390x.qcow2 + + qemu-system-s390x -nographic -nodefaults \ + -hda Fedora-Cloud-Base-39-1.5.s390x.qcow2 \ + -object tls-creds-x509,id=tls0,endpoint=client,verify-peer=false,dir=$PWD/qemu-pki \ + -chardev socket,id=tls_chardev,host=localhost,port=8338,tls-creds=tls0 \ + -device sclpconsole,chardev=tls_chardev,id=tls_serial + +QEMU then aborts after a second or two with: + + qemu-system-s390x: ../hw/char/sclpconsole.c:73: chr_read: Assertion + `size <= SIZE_BUFFER_VT220 - scon->iov_data_len' failed. + Aborted (core dumped) + +It looks like the second read does not trigger the chr_can_read() function +to be called before the second read, which should normally always be done +before sending bytes to a character device to see how much it can handle, +so the s->max_size in tcp_chr_read() still contains the old value from the +previous read. Let's make sure that we use the up-to-date value by calling +tcp_chr_read_poll() again here. + +Fixes: ffda5db65a ("io/channel-tls: fix handling of bigger read buffers") +Buglink: https://issues.redhat.com/browse/RHEL-24614 +Reviewed-by: "Daniel P. BerrangĂ©" +Message-ID: <20240229104339.42574-1-thuth@redhat.com> +Reviewed-by: Antoine Damhet +Tested-by: Antoine Damhet +Reviewed-by: Marc-AndrĂ© Lureau +Signed-off-by: Thomas Huth +Signed-off-by: qihao_yewu +--- + chardev/char-socket.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 73947da188..034840593d 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -492,9 +492,9 @@ static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) + s->max_size <= 0) { + return TRUE; + } +- len = sizeof(buf); +- if (len > s->max_size) { +- len = s->max_size; ++ len = tcp_chr_read_poll(opaque); ++ if (len > sizeof(buf)) { ++ len = sizeof(buf); + } + size = tcp_chr_recv(chr, (void *)buf, len); + if (size == 0 || (size == -1 && errno != EAGAIN)) { +-- +2.27.0 + diff --git a/configure-Add-linux-header-compile-support-for-Loong.patch b/configure-Add-linux-header-compile-support-for-Loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc73eaf6537c25051767305ad40c3cd75dabb9b0 --- /dev/null +++ b/configure-Add-linux-header-compile-support-for-Loong.patch @@ -0,0 +1,40 @@ +From b21a705562867cc9dcbf0012ffa200caad8458ba Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Tue, 16 Jan 2024 09:39:52 +0800 +Subject: [PATCH] configure: Add linux header compile support for LoongArch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When compiling qemu with system KVM mode for LoongArch, header files +in directory linux-headers/asm-loongarch should be used firstly. +Otherwise it fails to find kvm.h on system with old glibc, since +latest kernel header files are not installed. + +This patch adds linux_arch definition for LoongArch system so that +header files in directory linux-headers/asm-loongarch can be included. + +Fixes: 714b03c125 ("target/loongarch: Add loongarch kvm into meson build") +Signed-off-by: Bibo Mao +Reviewed-by: Philippe Mathieu-DaudĂ© +Message-ID: <20240116013952.264474-1-maobibo@loongson.cn> +Signed-off-by: Philippe Mathieu-DaudĂ© +--- + configure | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/configure b/configure +index bdda912f36..6036de83a4 100755 +--- a/configure ++++ b/configure +@@ -445,6 +445,7 @@ case "$cpu" in + loongarch*) + cpu=loongarch64 + host_arch=loongarch64 ++ linux_arch=loongarch + ;; + + mips64*) +-- +2.27.0 + diff --git a/coro-support-live-patch-for-libcare.patch b/coro-support-live-patch-for-libcare.patch new file mode 100644 index 0000000000000000000000000000000000000000..71b83c2df433f74db003e6ceee10a067f7db39af --- /dev/null +++ b/coro-support-live-patch-for-libcare.patch @@ -0,0 +1,116 @@ +From c2b377814e7874811d7eb98462d5153e966281cf Mon Sep 17 00:00:00 2001 +From: Fei Xu +Date: Wed, 3 Apr 2024 18:05:25 +0800 +Subject: [PATCH] coro: support live patch for libcare + +Signed-off-by: Dawei Jiang +--- + include/qemu/coroutine_int.h | 3 ++- + util/coroutine-ucontext.c | 52 ++++++++++++++++++++++++++++++++++++ + util/qemu-coroutine.c | 4 +++ + 3 files changed, 58 insertions(+), 1 deletion(-) + +diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h +index 1da148552f..11b550a0fc 100644 +--- a/include/qemu/coroutine_int.h ++++ b/include/qemu/coroutine_int.h +@@ -73,5 +73,6 @@ Coroutine *qemu_coroutine_new(void); + void qemu_coroutine_delete(Coroutine *co); + CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, + CoroutineAction action); +- ++void qemu_coroutine_info_add(const Coroutine *co_); ++void qemu_coroutine_info_delete(const Coroutine *co_); + #endif +diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c +index 7b304c79d9..650c21846d 100644 +--- a/util/coroutine-ucontext.c ++++ b/util/coroutine-ucontext.c +@@ -80,6 +80,19 @@ union cc_arg { + int i[2]; + }; + ++/** ++ * coroutines list for libcare ++ */ ++struct CoroutineInformation { ++ sigjmp_buf *env; ++ QLIST_ENTRY(CoroutineInformation) next; ++}; ++ ++static QemuMutex coro_mtx; ++QLIST_HEAD(, CoroutineInformation) coro_info_list = QLIST_HEAD_INITIALIZER(pool); ++int coro_env_offset = offsetof(struct CoroutineInformation, env); ++int coro_next_offset = offsetof(struct CoroutineInformation, next); ++ + /* + * QEMU_ALWAYS_INLINE only does so if __OPTIMIZE__, so we cannot use it. + * always_inline is required to avoid TSan runtime fatal errors. +@@ -340,3 +353,42 @@ bool qemu_in_coroutine(void) + + return self && self->caller; + } ++ ++static void __attribute__((constructor)) coro_mutex_init(void) ++{ ++ qemu_mutex_init(&coro_mtx); ++} ++ ++void qemu_coroutine_info_add(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* save coroutine env to coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ coro_info = g_malloc0(sizeof(struct CoroutineInformation)); ++ coro_info->env = &co->env; ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_INSERT_HEAD(&coro_info_list, coro_info, next); ++ qemu_mutex_unlock(&coro_mtx); ++} ++ ++void qemu_coroutine_info_delete(const Coroutine *co_) ++{ ++ CoroutineUContext *co; ++ struct CoroutineInformation *coro_info; ++ ++ /* Remove relative coroutine env info from coro_info_list */ ++ co = DO_UPCAST(CoroutineUContext, base, co_); ++ ++ qemu_mutex_lock(&coro_mtx); ++ QLIST_FOREACH(coro_info, &coro_info_list, next) { ++ if (coro_info->env == &co->env) { ++ QLIST_REMOVE(coro_info, next); ++ g_free(coro_info); ++ break; ++ } ++ } ++ qemu_mutex_unlock(&coro_mtx); ++} +diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c +index 5fd2dbaf8b..f550214484 100644 +--- a/util/qemu-coroutine.c ++++ b/util/qemu-coroutine.c +@@ -89,6 +89,8 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque) + co = qemu_coroutine_new(); + } + ++ qemu_coroutine_info_add(co); ++ + co->entry = entry; + co->entry_arg = opaque; + QSIMPLEQ_INIT(&co->co_queue_wakeup); +@@ -99,6 +101,8 @@ static void coroutine_delete(Coroutine *co) + { + co->caller = NULL; + ++ qemu_coroutine_info_delete(co); ++ + if (IS_ENABLED(CONFIG_COROUTINE_POOL)) { + if (release_pool_size < qatomic_read(&pool_max_size) * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); +-- +2.27.0 + diff --git a/cpu-add-Cortex-A72-processor-kvm-target-support.patch b/cpu-add-Cortex-A72-processor-kvm-target-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e35603acb11d3b44e0df0faa1e46e018aea0051 --- /dev/null +++ b/cpu-add-Cortex-A72-processor-kvm-target-support.patch @@ -0,0 +1,60 @@ +From 5853333c9513caea541701c95a4ac691bb97452f Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Tue, 19 Mar 2024 10:45:56 +0800 +Subject: [PATCH] cpu: add Cortex-A72 processor kvm target support + +The ARM Cortex-A72 is ARMv8-A micro-architecture, +add kvm target to ARM Cortex-A72 processor definition. + +Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang +--- + target/arm/cpu64.c | 2 +- + target/arm/kvm-consts.h | 3 +++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 922eac3b61..471014b5a9 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -710,6 +710,7 @@ static void aarch64_a72_initfn(Object *obj) + ARMCPU *cpu = ARM_CPU(obj); + + cpu->dtb_compatible = "arm,cortex-a72"; ++ cpu->kvm_target = QEMU_KVM_ARM_TARGET_GENERIC_V8; + set_feature(&cpu->env, ARM_FEATURE_V8); + set_feature(&cpu->env, ARM_FEATURE_NEON); + set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); +@@ -773,7 +774,6 @@ static void aarch64_kunpeng_920_initfn(Object *obj) + cpu->isar.id_aa64dfr0 = 0x110305408; + cpu->isar.id_aa64isar0 = 0x10211120; + cpu->isar.id_aa64mmfr0 = 0x101125; +- cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; + } + + static void aarch64_host_initfn(Object *obj) +diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h +index 7c6adc14f6..c034823170 100644 +--- a/target/arm/kvm-consts.h ++++ b/target/arm/kvm-consts.h +@@ -133,6 +133,8 @@ MISMATCH_CHECK(QEMU_PSCI_RET_DISABLED, PSCI_RET_DISABLED); + #define QEMU_KVM_ARM_TARGET_CORTEX_A57 2 + #define QEMU_KVM_ARM_TARGET_XGENE_POTENZA 3 + #define QEMU_KVM_ARM_TARGET_CORTEX_A53 4 ++/* Generic ARM v8 target */ ++#define QEMU_KVM_ARM_TARGET_GENERIC_V8 5 + + /* There's no kernel define for this: sentinel value which + * matches no KVM target value for either 64 or 32 bit +@@ -144,6 +146,7 @@ MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_FOUNDATION_V8, KVM_ARM_TARGET_FOUNDATION_V8); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A57, KVM_ARM_TARGET_CORTEX_A57); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_XGENE_POTENZA, KVM_ARM_TARGET_XGENE_POTENZA); + MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_CORTEX_A53, KVM_ARM_TARGET_CORTEX_A53); ++MISMATCH_CHECK(QEMU_KVM_ARM_TARGET_GENERIC_V8, KVM_ARM_TARGET_GENERIC_V8); + + #define CP_REG_ARM64 0x6000000000000000ULL + #define CP_REG_ARM_COPROC_MASK 0x000000000FFF0000 +-- +2.27.0 + diff --git a/cpu-add-Kunpeng-920-cpu-support.patch b/cpu-add-Kunpeng-920-cpu-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc9c4cc8f35dc3b53cf64f7f47b1135e9dc197e3 --- /dev/null +++ b/cpu-add-Kunpeng-920-cpu-support.patch @@ -0,0 +1,120 @@ +From e4ae54316651bf6af12de263da158c5ec4ed0401 Mon Sep 17 00:00:00 2001 +From: Xu Yandong +Date: Mon, 18 Mar 2024 17:31:31 +0800 +Subject: [PATCH] cpu: add Kunpeng-920 cpu support + +Add the Kunpeng-920 CPU model + +Signed-off-by: Xu Yandong +Signed-off-by: Mingwang Li +Signed-off-by: Yuan Zhang +--- + hw/arm/virt.c | 1 + + target/arm/cpu64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 73 insertions(+) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index be2856c018..500a15aa5b 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -220,6 +220,7 @@ static const char *valid_cpus[] = { + #endif + ARM_CPU_TYPE_NAME("cortex-a53"), + ARM_CPU_TYPE_NAME("cortex-a57"), ++ ARM_CPU_TYPE_NAME("Kunpeng-920"), + ARM_CPU_TYPE_NAME("host"), + ARM_CPU_TYPE_NAME("max"), + }; +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 1e9c6c85ae..922eac3b61 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -705,6 +705,77 @@ static void aarch64_a53_initfn(Object *obj) + define_cortex_a72_a57_a53_cp_reginfo(cpu); + } + ++static void aarch64_a72_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ cpu->dtb_compatible = "arm,cortex-a72"; ++ set_feature(&cpu->env, ARM_FEATURE_V8); ++ set_feature(&cpu->env, ARM_FEATURE_NEON); ++ set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); ++ set_feature(&cpu->env, ARM_FEATURE_AARCH64); ++ set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); ++ set_feature(&cpu->env, ARM_FEATURE_EL2); ++ set_feature(&cpu->env, ARM_FEATURE_EL3); ++ set_feature(&cpu->env, ARM_FEATURE_PMU); ++ cpu->midr = 0x410fd083; ++ cpu->revidr = 0x00000000; ++ cpu->reset_fpsid = 0x41034080; ++ cpu->isar.mvfr0 = 0x10110222; ++ cpu->isar.mvfr1 = 0x12111111; ++ cpu->isar.mvfr2 = 0x00000043; ++ cpu->ctr = 0x8444c004; ++ cpu->reset_sctlr = 0x00c50838; ++ cpu->isar.id_pfr0 = 0x00000131; ++ cpu->isar.id_pfr1 = 0x00011011; ++ cpu->isar.id_dfr0 = 0x03010066; ++ cpu->id_afr0 = 0x00000000; ++ cpu->isar.id_mmfr0 = 0x10201105; ++ cpu->isar.id_mmfr1 = 0x40000000; ++ cpu->isar.id_mmfr2 = 0x01260000; ++ cpu->isar.id_mmfr3 = 0x02102211; ++ cpu->isar.id_isar0 = 0x02101110; ++ cpu->isar.id_isar1 = 0x13112111; ++ cpu->isar.id_isar2 = 0x21232042; ++ cpu->isar.id_isar3 = 0x01112131; ++ cpu->isar.id_isar4 = 0x00011142; ++ cpu->isar.id_isar5 = 0x00011121; ++ cpu->isar.id_aa64pfr0 = 0x00002222; ++ cpu->isar.id_aa64dfr0 = 0x10305106; ++ cpu->isar.id_aa64isar0 = 0x00011120; ++ cpu->isar.id_aa64mmfr0 = 0x00001124; ++ cpu->isar.dbgdidr = 0x3516d000; ++ cpu->clidr = 0x0a200023; ++ cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */ ++ cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */ ++ cpu->ccsidr[2] = 0x707fe07a; /* 1MB L2 cache */ ++ cpu->dcz_blocksize = 4; /* 64 bytes */ ++ cpu->gic_num_lrs = 4; ++ cpu->gic_vpribits = 5; ++ cpu->gic_vprebits = 5; ++ define_cortex_a72_a57_a53_cp_reginfo(cpu); ++} ++ ++static void aarch64_kunpeng_920_initfn(Object *obj) ++{ ++ ARMCPU *cpu = ARM_CPU(obj); ++ ++ /* ++ * Hisilicon Kunpeng-920 CPU is similar to cortex-a72, ++ * so first initialize cpu data as cortex-a72, ++ * and then update the special register. ++ */ ++ aarch64_a72_initfn(obj); ++ ++ cpu->midr = 0x480fd010; ++ cpu->ctr = 0x84448004; ++ cpu->isar.id_aa64pfr0 = 0x11001111; ++ cpu->isar.id_aa64dfr0 = 0x110305408; ++ cpu->isar.id_aa64isar0 = 0x10211120; ++ cpu->isar.id_aa64mmfr0 = 0x101125; ++ cpu->kvm_target = KVM_ARM_TARGET_GENERIC_V8; ++} ++ + static void aarch64_host_initfn(Object *obj) + { + #if defined(CONFIG_KVM) +@@ -744,6 +815,7 @@ static void aarch64_max_initfn(Object *obj) + static const ARMCPUInfo aarch64_cpus[] = { + { .name = "cortex-a57", .initfn = aarch64_a57_initfn }, + { .name = "cortex-a53", .initfn = aarch64_a53_initfn }, ++ { .name = "Kunpeng-920", .initfn = aarch64_kunpeng_920_initfn}, + { .name = "max", .initfn = aarch64_max_initfn }, + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + { .name = "host", .initfn = aarch64_host_initfn }, +-- +2.27.0 + diff --git a/cpu-features-fix-bug-for-memory-leakage.patch b/cpu-features-fix-bug-for-memory-leakage.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e6793d462ca876a46fd6f377e4f7dd896d48e06 --- /dev/null +++ b/cpu-features-fix-bug-for-memory-leakage.patch @@ -0,0 +1,25 @@ +From 9ebad9c3020625df0a178e6a2d06eaae15ef767c Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 12:51:19 +0800 +Subject: [PATCH] cpu/features: fix bug for memory leakage + +strList hash not free after used, Fix it. +--- + target/i386/cpu.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index fc61a84b1e..f94405c02b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5475,6 +5475,7 @@ static void x86_cpu_get_unavailable_features(Object *obj, Visitor *v, + + x86_cpu_list_feature_names(xc->filtered_features, &result); + visit_type_strList(v, "unavailable-features", &result, errp); ++ qapi_free_strList(result); + } + + /* Print all cpuid feature names in featureset +-- +2.27.0 + diff --git a/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2148e867fe6d83f1c5b25d7c242b17f02dfd472 --- /dev/null +++ b/cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch @@ -0,0 +1,144 @@ +From 444de91551c1e141a76bf3dae4cebee9dbd57b49 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 02:48:49 +0100 +Subject: [PATCH] cpus-common: Add common CPU utility for possible vCPUs + +Adds various utility functions which might be required to fetch or check the +state of the possible vCPUs. This also introduces concept of *disabled* vCPUs, +which are part of the *possible* vCPUs but are not part of the *present* vCPU. +This state shall be used during machine init time to check the presence of +vcpus. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + cpu-common.c | 31 +++++++++++++++++++++++++ + include/hw/core/cpu.h | 53 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 84 insertions(+) + +diff --git a/cpu-common.c b/cpu-common.c +index c81fd72d16..d041a351ab 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -24,6 +24,7 @@ + #include "sysemu/cpus.h" + #include "qemu/lockable.h" + #include "trace/trace-root.h" ++#include "hw/boards.h" + + QemuMutex qemu_cpu_list_lock; + static QemuCond exclusive_cond; +@@ -107,6 +108,36 @@ void cpu_list_remove(CPUState *cpu) + cpu_list_generation_id++; + } + ++CPUState *qemu_get_possible_cpu(int index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((index >= 0) && (index < possible_cpus->len)); ++ ++ return CPU(possible_cpus->cpus[index].cpu); ++} ++ ++bool qemu_present_cpu(CPUState *cpu) ++{ ++ return cpu; ++} ++ ++bool qemu_enabled_cpu(CPUState *cpu) ++{ ++ return cpu && !cpu->disabled; ++} ++ ++uint64_t qemu_get_cpu_archid(int cpu_index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((cpu_index >= 0) && (cpu_index < possible_cpus->len)); ++ ++ return possible_cpus->cpus[cpu_index].arch_id; ++} ++ + CPUState *qemu_get_cpu(int index) + { + CPUState *cpu; +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index c0c8320413..c30636a936 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -538,6 +538,17 @@ struct CPUState { + GArray *plugin_mem_cbs; + #endif + ++ /* ++ * Some architectures do not allow *presence* of vCPUs to be changed ++ * after guest has booted using information specified by VMM/firmware ++ * via ACPI MADT at the boot time. Thus to enable vCPU hotplug on these ++ * architectures possible vCPU can have CPUState object in 'disabled' ++ * state or can also not have CPUState object at all. This is possible ++ * when vCPU Hotplug is supported and vCPUs are 'yet-to-be-plugged' in ++ * the QOM or have been hot-unplugged. ++ * By default every CPUState is enabled as of now across all archs. ++ */ ++ bool disabled; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -913,6 +924,48 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu) + */ + CPUState *qemu_get_cpu(int index); + ++/** ++ * qemu_get_possible_cpu: ++ * @index: The CPUState@cpu_index value of the CPU to obtain. ++ * Input index MUST be in range [0, Max Possible CPUs) ++ * ++ * If CPUState object exists,then it gets a CPU matching ++ * @index in the possible CPU array. ++ * ++ * Returns: The possible CPU or %NULL if CPU does not exist. ++ */ ++CPUState *qemu_get_possible_cpu(int index); ++ ++/** ++ * qemu_present_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is amongst the present possible vcpus. ++ * ++ * Returns: True if it is present possible vCPU else false ++ */ ++bool qemu_present_cpu(CPUState *cpu); ++ ++/** ++ * qemu_enabled_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is enabled. ++ * ++ * Returns: True if it is 'enabled' else false ++ */ ++bool qemu_enabled_cpu(CPUState *cpu); ++ ++/** ++ * qemu_get_cpu_archid: ++ * @cpu_index: possible vCPU for which arch-id needs to be retreived ++ * ++ * Fetches the vCPU arch-id from the present possible vCPUs. ++ * ++ * Returns: arch-id of the possible vCPU ++ */ ++uint64_t qemu_get_cpu_archid(int cpu_index); ++ + /** + * cpu_exists: + * @id: Guest-exposed CPU ID to lookup. +-- +2.27.0 + diff --git a/disable-keyring-option.patch b/disable-keyring-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..a33b320bbc596ceb0491ddc0d75208c94012f859 --- /dev/null +++ b/disable-keyring-option.patch @@ -0,0 +1,28 @@ +From fe771abc365ba0cb62dd1726f1aa5274f1807876 Mon Sep 17 00:00:00 2001 +From: Jiabo Feng +Date: Sat, 30 Mar 2024 16:24:45 +0800 +Subject: [PATCH] disable keyring option + +Due to the default prohibition of some syscall(e.g. add_key) in the Docker compilation environment, the testcases in test-crypto-secret.c cannot pass. + +Signed-off-by: Jiabo Feng +--- + meson_options.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/meson_options.txt b/meson_options.txt +index c9baeda639..cf9706c411 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -121,7 +121,7 @@ option('avx512f', type: 'feature', value: 'disabled', + description: 'AVX512F optimizations') + option('avx512bw', type: 'feature', value: 'auto', + description: 'AVX512BW optimizations') +-option('keyring', type: 'feature', value: 'auto', ++option('keyring', type: 'feature', value: 'disabled', + description: 'Linux keyring support') + option('libkeyutils', type: 'feature', value: 'auto', + description: 'Linux keyutils support') +-- +2.41.0.windows.1 + diff --git a/doc-Update-multi-thread-compression-doc.patch b/doc-Update-multi-thread-compression-doc.patch new file mode 100644 index 0000000000000000000000000000000000000000..e1f1db086dbf9a31213839897a47546ce331db1d --- /dev/null +++ b/doc-Update-multi-thread-compression-doc.patch @@ -0,0 +1,86 @@ +From 55e5f8cafda3c7d4a91e9d58c7b3259476e0dab9 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:36:47 +0800 +Subject: [PATCH] doc: Update multi-thread compression doc + +Modify the doc to fit the previous changes. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + docs/multi-thread-compression.txt | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +diff --git a/docs/multi-thread-compression.txt b/docs/multi-thread-compression.txt +index 95b1556f67..450e5de469 100644 +--- a/docs/multi-thread-compression.txt ++++ b/docs/multi-thread-compression.txt +@@ -33,14 +33,15 @@ thread compression can be used to accelerate the compression process. + + The decompression speed of Zlib is at least 4 times as quick as + compression, if the source and destination CPU have equal speed, +-keeping the compression thread count 4 times the decompression +-thread count can avoid resource waste. ++and you choose Zlib as compression method, keeping the compression ++thread count 4 times the decompression thread count can avoid resource waste. + + Compression level can be used to control the compression speed and the +-compression ratio. High compression ratio will take more time, level 0 +-stands for no compression, level 1 stands for the best compression +-speed, and level 9 stands for the best compression ratio. Users can +-select a level number between 0 and 9. ++compression ratio. High compression ratio will take more time, ++level 1 stands for the best compression speed, and higher level means higher ++compression ration. For Zlib, users can select a level number between 0 and 9, ++where level 0 stands for no compression. For Zstd, users can select a ++level number between 1 and 22. + + + When to use the multiple thread compression in live migration +@@ -116,16 +117,19 @@ to support the multiple thread compression migration: + 2. Activate compression on the source: + {qemu} migrate_set_capability compress on + +-3. Set the compression thread count on source: ++3. Set the compression method: ++ {qemu} migrate_set_parameter compress_method zstd ++ ++4. Set the compression thread count on source: + {qemu} migrate_set_parameter compress-threads 12 + +-4. Set the compression level on the source: ++5. Set the compression level on the source: + {qemu} migrate_set_parameter compress-level 1 + +-5. Set the decompression thread count on destination: ++6. Set the decompression thread count on destination: + {qemu} migrate_set_parameter decompress-threads 3 + +-6. Start outgoing migration: ++7. Start outgoing migration: + {qemu} migrate -d tcp:destination.host:4444 + {qemu} info migrate + Capabilities: ... compress: on +@@ -136,6 +140,7 @@ The following are the default settings: + compress-threads: 8 + decompress-threads: 2 + compress-level: 1 (which means best speed) ++ compress_method: zlib + + So, only the first two steps are required to use the multiple + thread compression in migration. You can do more if the default +@@ -143,7 +148,7 @@ settings are not appropriate. + + TODO + ==== +-Some faster (de)compression method such as LZ4 and Quicklz can help +-to reduce the CPU consumption when doing (de)compression. If using +-these faster (de)compression method, less (de)compression threads ++Comparing to Zlib, Some faster (de)compression method such as LZ4 ++and Quicklz can help to reduce the CPU consumption when doing (de)compression. ++If using these faster (de)compression method, less (de)compression threads + are needed when doing the migration. +-- +2.27.0 + diff --git a/docs-Add-generic-vhost-vdpa-device-documentation.patch b/docs-Add-generic-vhost-vdpa-device-documentation.patch new file mode 100644 index 0000000000000000000000000000000000000000..3480791dfabf4f6641a57b423c8185e5b74c63da --- /dev/null +++ b/docs-Add-generic-vhost-vdpa-device-documentation.patch @@ -0,0 +1,78 @@ +From 28ed79b98f08b5701dcaab7c6ad1015602b28e02 Mon Sep 17 00:00:00 2001 +From: libai +Date: Sat, 12 Nov 2022 22:40:13 +0800 +Subject: [PATCH] docs: Add generic vhost-vdpa device documentation + +Add the description of the generic vhost-vdpa device + +Signed-off-by: libai +--- + docs/system/device-emulation.rst | 1 + + .../devices/vhost-vdpa-generic-device.rst | 46 +++++++++++++++++++ + 2 files changed, 47 insertions(+) + create mode 100644 docs/system/devices/vhost-vdpa-generic-device.rst + +diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst +index d1f3277cb0..e1b2d18fb1 100644 +--- a/docs/system/device-emulation.rst ++++ b/docs/system/device-emulation.rst +@@ -98,3 +98,4 @@ Emulated Devices + devices/canokey.rst + devices/usb-u2f.rst + devices/igb.rst ++ devices/vhost-vdpa-generic-device.rst +diff --git a/docs/system/devices/vhost-vdpa-generic-device.rst b/docs/system/devices/vhost-vdpa-generic-device.rst +new file mode 100644 +index 0000000000..25fbcac60e +--- /dev/null ++++ b/docs/system/devices/vhost-vdpa-generic-device.rst +@@ -0,0 +1,46 @@ ++ ++========================= ++vhost-vDPA generic device ++========================= ++ ++This document explains the usage of the vhost-vDPA generic device. ++ ++Description ++----------- ++ ++vDPA(virtio data path acceleration) device is a device that uses a datapath ++which complies with the virtio specifications with vendor specific control ++path. ++ ++QEMU provides two types of vhost-vDPA devices to enable the vDPA device, one ++is type sensitive which means QEMU needs to know the actual device type ++(e.g. net, blk, scsi) and another is called "vhost-vDPA generic device" which ++is type insensitive ++ ++The vhost-vDPA generic device builds on the vhost-vdpa subsystem and virtio ++subsystem. It is quite small, but it can support any type of virtio device. ++ ++Examples ++-------- ++ ++Prepare the vhost-vDPA backends first: ++ ++:: ++ host# ls -l /dev/vhost-vdpa-* ++ crw------- 1 root root 236, 0 Nov 2 00:49 /dev/vhost-vdpa-0 ++ ++Start QEMU with virtio-mmio bus: ++ ++:: ++ host# qemu-system \ ++ -M microvm -m 512 -smp 2 -kernel ... -initrd ... \ ++ -device vhost-vdpa-device,vhostdev=/dev/vhost-vdpa-0 \ ++ ... ++ ++Start QEMU with virtio-pci bus: ++ ++:: ++ host# qemu-system \ ++ -M pc -m 512 -smp 2 \ ++ -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-0 \ ++ ...\ +-- +2.27.0 + diff --git a/feature-Add-log-for-each-modules.patch b/feature-Add-log-for-each-modules.patch new file mode 100644 index 0000000000000000000000000000000000000000..477a2eaa6facf8366775e2c0c7a0f5c30e01b3c5 --- /dev/null +++ b/feature-Add-log-for-each-modules.patch @@ -0,0 +1,250 @@ +From 30cc47b6dd3e9ff4842eb1c2a918bbabfd8c593b Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:44:28 +0800 +Subject: [PATCH] feature: Add log for each modules + +add log for each modules. + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + accel/kvm/kvm-all.c | 5 ++++- + hw/char/virtio-serial-bus.c | 5 +++++ + hw/pci/pci.c | 1 + + hw/usb/bus.c | 6 ++++++ + hw/usb/host-libusb.c | 5 +++++ + hw/virtio/virtio-scsi-pci.c | 3 +++ + monitor/qmp-cmds.c | 3 +++ + os-posix.c | 1 + + qapi/qmp-dispatch.c | 15 +++++++++++++++ + system/qdev-monitor.c | 5 +++++ + 10 files changed, 48 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 33f4c6d547..d900df93a4 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -1834,7 +1834,10 @@ void kvm_irqchip_commit_routes(KVMState *s) + s->irq_routes->flags = 0; + trace_kvm_irqchip_commit_routes(); + ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes); +- assert(ret == 0); ++ if (ret < 0) { ++ error_report("Set GSI routing failed: %m"); ++ abort(); ++ } + } + + static void kvm_add_routing_entry(KVMState *s, +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index dd619f0731..44906057be 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -257,6 +257,8 @@ static size_t send_control_event(VirtIOSerial *vser, uint32_t port_id, + virtio_stw_p(vdev, &cpkt.value, value); + + trace_virtio_serial_send_control_event(port_id, event, value); ++ qemu_log("virtio serial port %d send control message" ++ " event = %d, value = %d\n", port_id, event, value); + return send_control_msg(vser, &cpkt, sizeof(cpkt)); + } + +@@ -364,6 +366,9 @@ static void handle_control_message(VirtIOSerial *vser, void *buf, size_t len) + cpkt.value = virtio_lduw_p(vdev, &gcpkt->value); + + trace_virtio_serial_handle_control_message(cpkt.event, cpkt.value); ++ qemu_log("virtio serial port '%u' handle control message" ++ " event = %d, value = %d\n", ++ virtio_ldl_p(vdev, &gcpkt->id), cpkt.event, cpkt.value); + + if (cpkt.event == VIRTIO_CONSOLE_DEVICE_READY) { + if (!cpkt.value) { +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index c49417abb2..9da41088df 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -2411,6 +2411,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, + snprintf(name, sizeof(name), "%s.rom", + vmsd ? vmsd->name : object_get_typename(OBJECT(pdev))); + ++ qemu_log("add rom file: %s\n", name); + pdev->has_rom = true; + memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, + &error_fatal); +diff --git a/hw/usb/bus.c b/hw/usb/bus.c +index 92d6ed5626..20cd9b6e6f 100644 +--- a/hw/usb/bus.c ++++ b/hw/usb/bus.c +@@ -536,6 +536,10 @@ void usb_check_attach(USBDevice *dev, Error **errp) + bus->qbus.name, port->path, portspeed); + return; + } ++ ++ qemu_log("attach usb device \"%s\" (%s speed) to VM bus \"%s\", " ++ "port \"%s\" (%s speed)\n", dev->product_desc, devspeed, ++ bus->qbus.name, port->path, portspeed); + } + + void usb_device_attach(USBDevice *dev, Error **errp) +@@ -564,6 +568,8 @@ int usb_device_detach(USBDevice *dev) + + usb_detach(port); + dev->attached = false; ++ qemu_log("detach usb device \"%s\" from VM bus \"%s\", port \"%s\"\n", ++ dev->product_desc, bus->qbus.name, port->path); + return 0; + } + +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index dba469c1ef..11a246ac72 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -992,6 +992,8 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + rc = libusb_open(dev, &s->dh); + if (rc != 0) { ++ qemu_log("libusb open usb device bus %d, device %d failed\n", ++ bus_num, addr); + goto fail; + } + } else { +@@ -1019,6 +1021,7 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + + libusb_get_device_descriptor(dev, &s->ddesc); + usb_host_get_port(s->dev, s->port, sizeof(s->port)); ++ qemu_log("open a host usb device on bus %d, device %d\n", bus_num, addr); + + usb_ep_init(udev); + usb_host_ep_update(s); +@@ -1146,6 +1149,8 @@ static int usb_host_close(USBHostDevice *s) + usb_device_detach(udev); + } + ++ qemu_log("begin to reset the usb device, bus : %d, device : %d\n", ++ s->bus_num, s->addr); + usb_host_release_interfaces(s); + libusb_reset_device(s->dh); + usb_host_attach_kernel(s); +diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c +index e8e3442f38..e542d47162 100644 +--- a/hw/virtio/virtio-scsi-pci.c ++++ b/hw/virtio/virtio-scsi-pci.c +@@ -20,6 +20,7 @@ + #include "qemu/module.h" + #include "hw/virtio/virtio-pci.h" + #include "qom/object.h" ++#include "qemu/log.h" + + typedef struct VirtIOSCSIPCI VirtIOSCSIPCI; + +@@ -51,6 +52,8 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) + VirtIOSCSIConf *conf = &dev->vdev.parent_obj.conf; + char *bus_name; + ++ qemu_log("virtio scsi HBA %s begin to initialize.\n", ++ !proxy->id ? "NULL" : proxy->id); + if (conf->num_queues == VIRTIO_SCSI_AUTO_NUM_QUEUES) { + conf->num_queues = + virtio_pci_optimal_num_queues(VIRTIO_SCSI_VQ_NUM_FIXED); +diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c +index b0f948d337..e78462b857 100644 +--- a/monitor/qmp-cmds.c ++++ b/monitor/qmp-cmds.c +@@ -32,6 +32,7 @@ + #include "hw/mem/memory-device.h" + #include "hw/intc/intc.h" + #include "hw/rdma/rdma.h" ++#include "qemu/log.h" + + NameInfo *qmp_query_name(Error **errp) + { +@@ -110,8 +111,10 @@ void qmp_cont(Error **errp) + } + + if (runstate_check(RUN_STATE_INMIGRATE)) { ++ qemu_log("qmp cont is received in migration\n"); + autostart = 1; + } else { ++ qemu_log("qmp cont is received and vm is started\n"); + vm_start(); + } + } +diff --git a/os-posix.c b/os-posix.c +index 52ef6990ff..8f70ee0534 100644 +--- a/os-posix.c ++++ b/os-posix.c +@@ -306,6 +306,7 @@ int os_mlock(void) + #ifdef HAVE_MLOCKALL + int ret = 0; + ++ qemu_log("do mlockall\n"); + ret = mlockall(MCL_CURRENT | MCL_FUTURE); + if (ret < 0) { + error_report("mlockall: %s", strerror(errno)); +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 7a215cbfd7..e33efd3740 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -25,6 +25,7 @@ + #include "qemu/coroutine.h" + #include "qemu/main-loop.h" + #include "qemu/log.h" ++#include "qapi/qmp/qstring.h" + + Visitor *qobject_input_visitor_new_qmp(QObject *obj) + { +@@ -220,6 +221,20 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); ++ ++ json = qobject_to_json(QOBJECT(args)); ++ if (json) { ++ if ((strcmp(command, "query-block-jobs") != 0) ++ && (strcmp(command, "query-migrate") != 0) ++ && (strcmp(command, "query-blockstats") != 0) ++ && (strcmp(command, "query-balloon") != 0) ++ && (strcmp(command, "set_password") != 0)) { ++ qemu_log("qmp_cmd_name: %s, arguments: %s\n", ++ command, json->str); ++ } ++ g_string_free(json, true); ++ } ++ + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { + monitor_set_cur(qemu_coroutine_self(), cur_mon); + cmd->fn(args, &ret, &err); +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index b10e483a9a..5b35704b5e 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,6 +644,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { ++ qemu_log("can not find bus for %s\n", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +@@ -714,6 +715,8 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + object_set_properties_from_keyval(&dev->parent_obj, dev->opts, from_json, + errp); + if (*errp) { ++ qemu_log("the bus %s -driver %s set property failed\n", ++ bus ? bus->name : "None", driver); + goto err_del_dev; + } + qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none"); +@@ -738,6 +741,8 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp) + + ret = qdev_device_add_from_qdict(qdict, false, errp); + if (ret) { ++ qemu_log("add qdev %s:%s success\n", qemu_opt_get(opts, "driver"), ++ qemu_opts_id(opts) ? qemu_opts_id(opts) : "none"); + qemu_opts_del(opts); + } + qobject_unref(qdict); +-- +2.27.0 + diff --git a/feature-Add-logs-for-vm-start-and-destroy.patch b/feature-Add-logs-for-vm-start-and-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3964bdf4efdb14f7e62d9f914938c5da7429296 --- /dev/null +++ b/feature-Add-logs-for-vm-start-and-destroy.patch @@ -0,0 +1,158 @@ +From 9a47271fb6c855ec92e087d59d65f3cc0c684725 Mon Sep 17 00:00:00 2001 +From: "wangxinxin.wang@huawei.com" +Date: Sun, 17 Mar 2024 15:04:09 +0800 +Subject: [PATCH] feature: Add logs for vm start and destroy + +Add QEMU_LOG for vm start and destroy + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/acpi/core.c | 4 ++++ + hw/core/reset.c | 2 ++ + system/main.c | 2 ++ + system/runstate.c | 2 ++ + system/vl.c | 6 ++++++ + 5 files changed, 16 insertions(+) + +diff --git a/hw/acpi/core.c b/hw/acpi/core.c +index ec5e127d17..b6241f70e9 100644 +--- a/hw/acpi/core.c ++++ b/hw/acpi/core.c +@@ -24,6 +24,7 @@ + #include "hw/acpi/acpi.h" + #include "hw/nvram/fw_cfg.h" + #include "qemu/config-file.h" ++#include "qemu/log.h" + #include "qapi/error.h" + #include "qapi/opts-visitor.h" + #include "qapi/qapi-events-run-state.h" +@@ -588,13 +589,16 @@ static void acpi_pm_cnt_write(void *opaque, hwaddr addr, uint64_t val, + uint16_t sus_typ = (val >> 10) & 7; + switch (sus_typ) { + case 0: /* soft power off */ ++ qemu_log("VM will be soft power off\n"); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + case 1: ++ qemu_log("VM will be suspend state\n"); + qemu_system_suspend_request(); + break; + default: + if (sus_typ == ar->pm1.cnt.s4_val) { /* S4 request */ ++ qemu_log("VM will be S4 state\n"); + qapi_event_send_suspend_disk(); + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } +diff --git a/hw/core/reset.c b/hw/core/reset.c +index d3263b613e..fa63bfedb7 100644 +--- a/hw/core/reset.c ++++ b/hw/core/reset.c +@@ -25,6 +25,7 @@ + + #include "qemu/osdep.h" + #include "qemu/queue.h" ++#include "qemu/log.h" + #include "sysemu/reset.h" + + /* reset/shutdown handler */ +@@ -75,6 +76,7 @@ void qemu_devices_reset(ShutdownCause reason) + { + QEMUResetEntry *re, *nre; + ++ qemu_log("reset all devices\n"); + /* reset all devices */ + QTAILQ_FOREACH_SAFE(re, &reset_handlers, entry, nre) { + if (reason == SHUTDOWN_CAUSE_SNAPSHOT_LOAD && +diff --git a/system/main.c b/system/main.c +index 9b91d21ea8..28bb283ebf 100644 +--- a/system/main.c ++++ b/system/main.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu-main.h" + #include "sysemu/sysemu.h" + +@@ -34,6 +35,7 @@ int qemu_default_main(void) + { + int status; + ++ qemu_log("qemu enter main_loop\n"); + status = qemu_main_loop(); + qemu_cleanup(status); + +diff --git a/system/runstate.c b/system/runstate.c +index 62e6db8d42..538c645326 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -769,9 +769,11 @@ static bool main_loop_should_exit(int *status) + } + if (qemu_powerdown_requested()) { + qemu_system_powerdown(); ++ qemu_log("domain is power down by outside operation\n"); + } + if (qemu_vmstop_requested(&r)) { + vm_stop(r); ++ qemu_log("domain is stopped by outside operation\n"); + } + return false; + } +diff --git a/system/vl.c b/system/vl.c +index 2bcd9efb9a..165c3cae8a 100644 +--- a/system/vl.c ++++ b/system/vl.c +@@ -26,6 +26,7 @@ + #include "qemu/help-texts.h" + #include "qemu/datadir.h" + #include "qemu/units.h" ++#include "qemu/log.h" + #include "exec/cpu-common.h" + #include "exec/page-vary.h" + #include "hw/qdev-properties.h" +@@ -2633,6 +2634,7 @@ static void qemu_create_cli_devices(void) + } + + /* init generic devices */ ++ qemu_log("device init start\n"); + rom_set_order_override(FW_CFG_ORDER_OVERRIDE_DEVICE); + qemu_opts_foreach(qemu_find_opts("device"), + device_init_func, NULL, &error_fatal); +@@ -2778,6 +2780,7 @@ void qemu_init(int argc, char **argv) + + qemu_init_subsystems(); + ++ qemu_log("qemu pid is %d, options parsing start\n", getpid()); + /* first pass of option parsing */ + optind = 1; + while (optind < argc) { +@@ -2997,6 +3000,7 @@ void qemu_init(int argc, char **argv) + exit(0); + break; + case QEMU_OPTION_m: ++ qemu_log("memory options parse start\n"); + opts = qemu_opts_parse_noisily(qemu_find_opts("memory"), optarg, true); + if (opts == NULL) { + exit(1); +@@ -3714,6 +3718,7 @@ void qemu_init(int argc, char **argv) + */ + + machine_class = MACHINE_GET_CLASS(current_machine); ++ qemu_log("configure accelerator %s start\n", machine_class->name); + if (!qtest_enabled() && machine_class->deprecation_reason) { + warn_report("Machine type '%s' is deprecated: %s", + machine_class->name, machine_class->deprecation_reason); +@@ -3732,6 +3737,7 @@ void qemu_init(int argc, char **argv) + */ + migration_object_init(); + ++ qemu_log("machine init start\n"); + /* parse features once if machine provides default cpu_type */ + current_machine->cpu_type = machine_class->default_cpu_type; + if (cpu_option) { +-- +2.27.0 + diff --git a/fix-qemu-core-when-vhost-user-net-config-with-server.patch b/fix-qemu-core-when-vhost-user-net-config-with-server.patch new file mode 100644 index 0000000000000000000000000000000000000000..68ba7f2bc15cd6da54f26c3bf5886786bb609324 --- /dev/null +++ b/fix-qemu-core-when-vhost-user-net-config-with-server.patch @@ -0,0 +1,46 @@ +From 97335ac382e36db18a61d3891f1fafd15475822e Mon Sep 17 00:00:00 2001 +From: caojinhuahw +Date: Mon, 19 Dec 2022 12:35:50 +0000 +Subject: [PATCH] fix qemu-core when vhost-user-net config with server mode + +commit 3a223111d7 set default reconnect for vhost-user-net +device, if vhost-user-net config with server mode will +casuse the core when ovs client stop. +tcp_chr_disconnect ---> set tcp_char state disconnect +tcp_chr start reconnect ---> set tcp_char state connecting +tcp_char is listen ---> call tcp_chr_accept() +fun tcp_char_accept() set tcp_char state to connecting, but +current tcp_char state already is connecting, assert failed +in tcp_char_change_state() raise qemu core + assert(s->state == TCP_CHARDEV_STATE_DISCONNECTED) + +this commit check tcp_char mode, if tcp_char config with server +mode, dont set reconnect time for tcp_chr. + +fix: 3a223111d7 vhost-user: Add support reconnect vhost-user socket + +Signed-off-by: caojinhuahw +--- + chardev/char-socket.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 9c60e15c8e..0c9ab069ae 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -347,6 +347,12 @@ static void tcp_chr_set_reconnect_time(Chardev *chr, + void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) + { + ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ ++ /* if sock dev is listen, dont set reconnect time */ ++ if (s->is_listen) { ++ return; ++ } + + if (cc->chr_set_reconnect_time) { + cc->chr_set_reconnect_time(chr, reconnect_time); +-- +2.27.0 + diff --git a/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..ffbb8a8643d72a257a6914007a971d6d63695704 --- /dev/null +++ b/freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch @@ -0,0 +1,129 @@ +From 0a6baf4799dd6e70d7959002ea6ddb998eddbc6d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Mon, 18 Mar 2024 15:53:43 +0800 +Subject: [PATCH] freeclock: add qmp command to get time offset of vm in + seconds + +When setting the system time in VM, a RTC_CHANGE event will be reported. +However, if libvirt is restarted while the event is be reporting, the +event will be lost and we will get the old time (not the time we set in +VM) after rebooting the VM. + +We save the delta time in QEMU and add a rtc-date-diff qmp to get the +delta time so that libvirt can get the latest time in VM according to +the qmp after libvirt is restarted. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/core/machine-qmp-cmds.c | 6 ++++++ + include/sysemu/rtc.h | 4 +++- + qapi/misc.json | 9 +++++++++ + qapi/pragma.json | 3 ++- + system/rtc.c | 11 +++++++++++ + 5 files changed, 31 insertions(+), 2 deletions(-) + +diff --git a/hw/core/machine-qmp-cmds.c b/hw/core/machine-qmp-cmds.c +index 3860a50c3b..f1389ef644 100644 +--- a/hw/core/machine-qmp-cmds.c ++++ b/hw/core/machine-qmp-cmds.c +@@ -8,6 +8,7 @@ + */ + + #include "qemu/osdep.h" ++#include "sysemu/rtc.h" + #include "hw/acpi/vmgenid.h" + #include "hw/boards.h" + #include "hw/intc/intc.h" +@@ -373,6 +374,11 @@ HumanReadableText *qmp_x_query_irq(Error **errp) + return human_readable_text_from_str(buf); + } + ++int64_t qmp_query_rtc_date_diff(Error **errp) ++{ ++ return get_rtc_date_diff(); ++} ++ + GuidInfo *qmp_query_vm_generation_id(Error **errp) + { + GuidInfo *info; +diff --git a/include/sysemu/rtc.h b/include/sysemu/rtc.h +index 0fc8ad6fdf..3edae762d4 100644 +--- a/include/sysemu/rtc.h ++++ b/include/sysemu/rtc.h +@@ -54,5 +54,7 @@ void qemu_get_timedate(struct tm *tm, time_t offset); + * then this function will return 3600. + */ + time_t qemu_timedate_diff(struct tm *tm); +- ++time_t get_rtc_date_diff(void); ++void set_rtc_date_diff(time_t diff); ++int64_t qmp_query_rtc_date_diff(Error **errp); + #endif +diff --git a/qapi/misc.json b/qapi/misc.json +index cda2effa81..1832d5f460 100644 +--- a/qapi/misc.json ++++ b/qapi/misc.json +@@ -550,6 +550,15 @@ + 'returns': ['CommandLineOptionInfo'], + 'allow-preconfig': true} + ++## ++# @query-rtc-date-diff: ++# ++# get vm's time offset ++# ++# Since: 2.8 ++## ++{ 'command': 'query-rtc-date-diff', 'returns': 'int64' } ++ + ## + # @RTC_CHANGE: + # +diff --git a/qapi/pragma.json b/qapi/pragma.json +index 0aa4eeddd3..7a07b44bb1 100644 +--- a/qapi/pragma.json ++++ b/qapi/pragma.json +@@ -30,7 +30,8 @@ + 'qom-get', + 'query-tpm-models', + 'query-tpm-types', +- 'ringbuf-read' ], ++ 'ringbuf-read', ++ 'query-rtc-date-diff'], + # Externally visible types whose member names may use uppercase + 'member-name-exceptions': [ # visible in: + 'ACPISlotType', # query-acpi-ospm-status +diff --git a/system/rtc.c b/system/rtc.c +index 4904581abe..e16b5fffc5 100644 +--- a/system/rtc.c ++++ b/system/rtc.c +@@ -44,6 +44,7 @@ static time_t rtc_ref_start_datetime; + static int rtc_realtime_clock_offset; /* used only with QEMU_CLOCK_REALTIME */ + static int rtc_host_datetime_offset = -1; /* valid & used only with + RTC_BASE_DATETIME */ ++static time_t rtc_date_diff = 0; + QEMUClockType rtc_clock; + /***********************************************************/ + /* RTC reference time/date access */ +@@ -108,6 +109,16 @@ time_t qemu_timedate_diff(struct tm *tm) + return seconds - qemu_ref_timedate(QEMU_CLOCK_HOST); + } + ++time_t get_rtc_date_diff(void) ++{ ++ return rtc_date_diff; ++} ++ ++void set_rtc_date_diff(time_t diff) ++{ ++ rtc_date_diff = diff; ++} ++ + static void configure_rtc_base_datetime(const char *startdate) + { + time_t rtc_start_datetime; +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-X86.patch b/freeclock-set-rtc_date_diff-for-X86.patch new file mode 100644 index 0000000000000000000000000000000000000000..4711551f99a870a82f292cc3e9ba39e6f695c163 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-X86.patch @@ -0,0 +1,31 @@ +From 0a0010fe0656a63e82aea495ab0a59145d3b5750 Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 12:26:38 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for X86 + +Set rtc_date_diff in mc146818rtc. + +Signed-off-by: l00500761 +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/mc146818rtc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/mc146818rtc.c b/hw/rtc/mc146818rtc.c +index 2d391a8396..e61c76d060 100644 +--- a/hw/rtc/mc146818rtc.c ++++ b/hw/rtc/mc146818rtc.c +@@ -606,7 +606,8 @@ static void rtc_set_time(MC146818RtcState *s) + s->base_rtc = mktimegm(&tm); + s->last_update = qemu_clock_get_ns(rtc_clock); + +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + } + + static void rtc_set_cmos(MC146818RtcState *s, const struct tm *tm) +-- +2.27.0 + diff --git a/freeclock-set-rtc_date_diff-for-arm.patch b/freeclock-set-rtc_date_diff-for-arm.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c6b15ab26be27990b2d8028fc647dd48fca9312 --- /dev/null +++ b/freeclock-set-rtc_date_diff-for-arm.patch @@ -0,0 +1,31 @@ +From 156be254a48d1d9b7aadcbfa4423485c592bc75d Mon Sep 17 00:00:00 2001 +From: "shenghualong@huawei.com" +Date: Thu, 21 Mar 2024 11:21:14 +0800 +Subject: [PATCH] freeclock: set rtc_date_diff for arm + +Set rtc_date_diff in pl031. + +Signed-off-by: Peng Liang +Signed-off-by: zhangxinhao +Signed-off-by: Yuan Zhang +--- + hw/rtc/pl031.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index b01d0e75d1..f2e6baebba 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -144,7 +144,8 @@ static void pl031_write(void * opaque, hwaddr offset, + s->tick_offset += value - pl031_get_count(s); + + qemu_get_timedate(&tm, s->tick_offset); +- qapi_event_send_rtc_change(qemu_timedate_diff(&tm), qom_path); ++ set_rtc_date_diff(qemu_timedate_diff(&tm)); ++ qapi_event_send_rtc_change(get_rtc_date_diff(), qom_path); + + pl031_set_alarm(s); + break; +-- +2.27.0 + diff --git a/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch new file mode 100644 index 0000000000000000000000000000000000000000..34fa1c91b9d0e375f7f85a4477b3d897b63d936f --- /dev/null +++ b/hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch @@ -0,0 +1,187 @@ +From 19a8fbccbc997110f472df308813ad2d7738065c Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 14 Nov 2022 02:25:28 +0000 +Subject: [PATCH] hw/acpi: ACPI/AML Changes to reflect the correct + _STA.{PRES,ENA} Bits to Guest + +ACPI AML changes to properly reflect the _STA.PRES and _STA.ENA Bits to the +guest during initialzation, when CPUs are hotplugged and after CPUs are +hot-unplugged. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 49 +++++++++++++++++++++++++++++++--- + hw/acpi/generic_event_device.c | 11 ++++++++ + include/hw/acpi/cpu.h | 2 ++ + 3 files changed, 58 insertions(+), 4 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index c8c11e51c6..991f1d4181 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -64,10 +64,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) + cdev = &cpu_st->devs[cpu_st->selector]; + switch (addr) { + case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ +- val |= cdev->cpu ? 1 : 0; ++ val |= cdev->is_enabled ? 1 : 0; + val |= cdev->is_inserting ? 2 : 0; + val |= cdev->is_removing ? 4 : 0; + val |= cdev->fw_remove ? 16 : 0; ++ val |= cdev->is_present ? 32 : 0; + trace_cpuhp_acpi_read_flags(cpu_st->selector, val); + break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: +@@ -229,7 +230,21 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + struct CPUState *cpu = CPU(id_list->cpus[i].cpu); + if (qemu_present_cpu(cpu)) { + state->devs[i].cpu = cpu; ++ state->devs[i].is_present = true; ++ } else { ++ if (qemu_persistent_cpu(cpu)) { ++ state->devs[i].is_present = true; ++ } else { ++ state->devs[i].is_present = false; ++ } + } ++ ++ if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].is_enabled = true; ++ } else { ++ state->devs[i].is_enabled = false; ++ } ++ + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +@@ -262,6 +277,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + } + + cdev->cpu = CPU(dev); ++ cdev->is_present = true; ++ cdev->is_enabled = true; + if (dev->hotplugged) { + cdev->is_inserting = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); +@@ -293,6 +310,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + return; + } + ++ cdev->is_enabled = false; ++ if (!qemu_persistent_cpu(CPU(dev))) { ++ cdev->is_present = false; ++ } ++ + cdev->cpu = NULL; + } + +@@ -303,6 +325,8 @@ static const VMStateDescription vmstate_cpuhp_sts = { + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_BOOL(is_removing, AcpiCpuStatus), ++ VMSTATE_BOOL(is_present, AcpiCpuStatus), ++ VMSTATE_BOOL(is_enabled, AcpiCpuStatus), + VMSTATE_UINT32(ost_event, AcpiCpuStatus), + VMSTATE_UINT32(ost_status, AcpiCpuStatus), + VMSTATE_END_OF_LIST() +@@ -340,6 +364,7 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_REMOVE_EVENT "CRMV" + #define CPU_EJECT_EVENT "CEJ0" + #define CPU_FW_EJECT_EVENT "CEJF" ++#define CPU_PRESENT "CPRS" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + build_madt_cpu_fn build_madt_cpu, +@@ -400,7 +425,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); + /* tell firmware to do device eject, write only */ + aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1)); +- aml_append(field, aml_reserved_field(3)); ++ /* 1 if present, read only */ ++ aml_append(field, aml_named_field(CPU_PRESENT, 1)); ++ aml_append(field, aml_reserved_field(2)); + aml_append(field, aml_named_field(CPU_COMMAND, 8)); + aml_append(cpu_ctrl_dev, field); + +@@ -430,6 +457,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); + Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); + Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); ++ Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT); + Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); + Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); + Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); +@@ -458,13 +486,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + { + Aml *idx = aml_arg(0); + Aml *sta = aml_local(0); ++ Aml *ifctx2; ++ Aml *else_ctx; + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(zero, sta)); +- ifctx = aml_if(aml_equal(is_enabled, one)); ++ ifctx = aml_if(aml_equal(is_present, one)); + { +- aml_append(ifctx, aml_store(aml_int(0xF), sta)); ++ ifctx2 = aml_if(aml_equal(is_enabled, one)); ++ { ++ /* cpu is present and enabled */ ++ aml_append(ifctx2, aml_store(aml_int(0xF), sta)); ++ } ++ aml_append(ifctx, ifctx2); ++ else_ctx = aml_else(); ++ { ++ /* cpu is present but disabled */ ++ aml_append(else_ctx, aml_store(aml_int(0xD), sta)); ++ } ++ aml_append(ifctx, else_ctx); + } + aml_append(method, ifctx); + aml_append(method, aml_release(ctrl_lock)); +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index d2fa1d0e4a..b84602b238 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -319,6 +319,16 @@ static const VMStateDescription vmstate_memhp_state = { + } + }; + ++static const VMStateDescription vmstate_cpuhp_state = { ++ .name = "acpi-ged/cpuhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static const VMStateDescription vmstate_ged_state = { + .name = "acpi-ged-state", + .version_id = 1, +@@ -367,6 +377,7 @@ static const VMStateDescription vmstate_acpi_ged = { + }, + .subsections = (const VMStateDescription * []) { + &vmstate_memhp_state, ++ &vmstate_cpuhp_state, + &vmstate_ghes_state, + NULL + } +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index b31a2e50d9..fced952152 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -23,6 +23,8 @@ typedef struct AcpiCpuStatus { + uint64_t arch_id; + bool is_inserting; + bool is_removing; ++ bool is_present; ++ bool is_enabled; + bool fw_remove; + uint32_t ost_event; + uint32_t ost_status; +-- +2.27.0 + diff --git a/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch new file mode 100644 index 0000000000000000000000000000000000000000..072d4eb9bac4ea0a553e14b6e2ce85a6961cc19b --- /dev/null +++ b/hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch @@ -0,0 +1,34 @@ +From e442d0f8670dc4218ab4beebe645e369f925410d Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 19 Aug 2023 00:26:20 +0000 +Subject: [PATCH] hw/acpi: Add ACPI CPU hotplug init stub + +ACPI CPU hotplug related initialization should only happend if ACPI_CPU_HOTPLUG +support has been enabled for particular architecture. Add cpu_hotplug_hw_init() +stub to avoid compilation break. + +Signed-off-by: Salil Mehta +--- + hw/acpi/acpi-cpu-hotplug-stub.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/acpi/acpi-cpu-hotplug-stub.c b/hw/acpi/acpi-cpu-hotplug-stub.c +index 3fc4b14c26..c6c61bb9cd 100644 +--- a/hw/acpi/acpi-cpu-hotplug-stub.c ++++ b/hw/acpi/acpi-cpu-hotplug-stub.c +@@ -19,6 +19,12 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, + return; + } + ++void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, ++ CPUHotplugState *state, hwaddr base_addr) ++{ ++ return; ++} ++ + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list) + { + return; +-- +2.27.0 + diff --git a/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch new file mode 100644 index 0000000000000000000000000000000000000000..191328fd094c386d672ca9c32341105fa1cc0b1a --- /dev/null +++ b/hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch @@ -0,0 +1,81 @@ +From de1c8d6be3de67ff9854e9b008a000e1898aaacb Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 8 Jun 2020 21:50:08 +0100 +Subject: [PATCH] hw/acpi: Init GED framework with cpu hotplug events + +ACPI GED(as described in the ACPI 6.2 spec) can be used to generate ACPI events +when OSPM/guest receives an interrupt listed in the _CRS object of GED. OSPM +then maps or demultiplexes the event by evaluating _EVT method. + +This change adds the support of cpu hotplug event initialization in the +existing GED framework. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + include/hw/acpi/generic_event_device.h | 5 +++++ + 2 files changed, 13 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index a3d31631fe..d2fa1d0e4a 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -25,6 +25,7 @@ static const uint32_t ged_supported_events[] = { + ACPI_GED_MEM_HOTPLUG_EVT, + ACPI_GED_PWR_DOWN_EVT, + ACPI_GED_NVDIMM_HOTPLUG_EVT, ++ ACPI_GED_CPU_HOTPLUG_EVT, + }; + + /* +@@ -400,6 +401,13 @@ static void acpi_ged_initfn(Object *obj) + memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); ++ ++ s->cpuhp.device = OBJECT(s); ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index ba84ce0214..a803ea818e 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -60,6 +60,7 @@ + #define HW_ACPI_GENERIC_EVENT_DEVICE_H + + #include "hw/sysbus.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/acpi/ghes.h" + #include "qom/object.h" +@@ -95,6 +96,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(AcpiGedState, ACPI_GED) + #define ACPI_GED_MEM_HOTPLUG_EVT 0x1 + #define ACPI_GED_PWR_DOWN_EVT 0x2 + #define ACPI_GED_NVDIMM_HOTPLUG_EVT 0x4 ++#define ACPI_GED_CPU_HOTPLUG_EVT 0x8 + + typedef struct GEDState { + MemoryRegion evt; +@@ -106,6 +108,9 @@ struct AcpiGedState { + SysBusDevice parent_obj; + MemHotplugState memhp_state; + MemoryRegion container_memhp; ++ CPUHotplugState cpuhp_state; ++ MemoryRegion container_cpuhp; ++ AcpiCpuHotplug cpuhp; + GEDState ged_state; + uint32_t ged_event_bitmap; + qemu_irq irq; +-- +2.27.0 + diff --git a/hw-acpi-Make-_MAT-method-optional.patch b/hw-acpi-Make-_MAT-method-optional.patch new file mode 100644 index 0000000000000000000000000000000000000000..5695a1981c6efbe5ed71981f97dcf81d8eee5e8e --- /dev/null +++ b/hw-acpi-Make-_MAT-method-optional.patch @@ -0,0 +1,41 @@ +From e9b0d476172e872bf695780a9ffa8072faeb3cd0 Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Mon, 25 Apr 2022 17:40:57 +0100 +Subject: [PATCH] hw/acpi: Make _MAT method optional + +The GICC interface on arm64 vCPUs is statically defined in the MADT, and +doesn't require a _MAT entry. Although the GICC is indicated as present +by the MADT entry, it can only be used from vCPU sysregs, which aren't +accessible until hot-add. + +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Co-developed-by: Jonathan Cameron +Signed-off-by: Jonathan Cameron +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 991f1d4181..c922c380aa 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -720,9 +720,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, method); + + /* build _MAT object */ +- build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ +- aml_append(dev, aml_name_decl("_MAT", +- aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ if (build_madt_cpu) { ++ build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ ++ aml_append(dev, aml_name_decl("_MAT", ++ aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ } + g_array_free(madt_buf, true); + + if (CPU(arch_ids->cpus[i].cpu) != first_cpu) { +-- +2.27.0 + diff --git a/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..9bfb91e15162c90365c621927205f857269bc4c3 --- /dev/null +++ b/hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch @@ -0,0 +1,52 @@ +From fd6e7e7278e1c0fb08e0a09d9e22157e11b36ece Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 20 Aug 2023 17:11:04 +0000 +Subject: [PATCH] hw/acpi: Move CPU ctrl-dev MMIO region len macro to common + header file + +CPU ctrl-dev MMIO region length could be used in ACPI GED (common ACPI code +across architectures) and various other architecture specific places. To make +these code places independent of compilation order, ACPI_CPU_HOTPLUG_REG_LEN +macro should be moved to a header file. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 2 +- + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 011d2c6c2d..4b24a25003 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -1,13 +1,13 @@ + #include "qemu/osdep.h" + #include "migration/vmstate.h" + #include "hw/acpi/cpu.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/core/cpu.h" + #include "qapi/error.h" + #include "qapi/qapi-events-acpi.h" + #include "trace.h" + #include "sysemu/numa.h" + +-#define ACPI_CPU_HOTPLUG_REG_LEN 12 + #define ACPI_CPU_SELECTOR_OFFSET_WR 0 + #define ACPI_CPU_FLAGS_OFFSET_RW 4 + #define ACPI_CPU_CMD_OFFSET_WR 5 +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 3b932abbbb..48b291e45e 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -19,6 +19,8 @@ + #include "hw/hotplug.h" + #include "hw/acpi/cpu.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++ + typedef struct AcpiCpuHotplug { + Object *device; + MemoryRegion io; +-- +2.27.0 + diff --git a/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b2a0a023e0ed032379dd39051804d267edad0f1 --- /dev/null +++ b/hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch @@ -0,0 +1,77 @@ +From 0bdb1861985704af9b82e35053b5ab99f7880eb6 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 7 May 2020 21:30:09 +0100 +Subject: [PATCH] hw/acpi: Update ACPI GED framework to support vCPU Hotplug + +ACPI GED shall be used to convey to the guest kernel about any CPU hot-(un)plug +events. Therefore, existing ACPI GED framework inside QEMU needs to be enhanced +to support CPU hotplug state and events. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index ad252e6a91..0266733a54 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -12,6 +12,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "hw/acpi/acpi.h" ++#include "hw/acpi/cpu.h" + #include "hw/acpi/generic_event_device.h" + #include "hw/irq.h" + #include "hw/mem/pc-dimm.h" +@@ -239,6 +240,8 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, + } else { + acpi_memory_plug_cb(hotplug_dev, &s->memhp_state, dev, errp); + } ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_plug_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "virt: device plug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -253,6 +256,8 @@ static void acpi_ged_unplug_request_cb(HotplugHandler *hotplug_dev, + if ((object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) && + !(object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)))) { + acpi_memory_unplug_request_cb(hotplug_dev, &s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_request_cb(hotplug_dev, &s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -266,6 +271,8 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + acpi_memory_unplug_cb(&s->memhp_state, dev, errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ acpi_cpu_unplug_cb(&s->cpuhp_state, dev, errp); + } else { + error_setg(errp, "acpi: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -277,6 +284,7 @@ static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) + AcpiGedState *s = ACPI_GED(adev); + + acpi_memory_ospm_status(&s->memhp_state, list); ++ acpi_cpu_ospm_status(&s->cpuhp_state, list); + } + + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) +@@ -291,6 +299,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + sel = ACPI_GED_PWR_DOWN_EVT; + } else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { + sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; ++ } else if (ev & ACPI_CPU_HOTPLUG_STATUS) { ++ sel = ACPI_GED_CPU_HOTPLUG_EVT; + } else { + /* Unknown event. Return without generating interrupt. */ + warn_report("GED: Unsupported event %d. No irq injected", ev); +-- +2.27.0 + diff --git a/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch new file mode 100644 index 0000000000000000000000000000000000000000..91d4c4d78e6922d07c7981c2a886c70d07c45d2d --- /dev/null +++ b/hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch @@ -0,0 +1,118 @@ +From 06059c960d863c21c7d9cf4829ad2078692ed9e1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Fri, 8 May 2020 13:27:57 +0100 +Subject: [PATCH] hw/acpi: Update CPUs AML with cpu-(ctrl)dev change + +CPUs Control device(\\_SB.PCI0) register interface for the x86 arch is based on +PCI and is IO port based and hence existing cpus AML code assumes _CRS objects +would evaluate to a system resource which describes IO Port address. But on ARM +arch CPUs control device(\\_SB.PRES) register interface is memory-mapped hence +_CRS object should evaluate to system resource which describes memory-mapped +base address. + +This cpus AML code change updates the existing inerface of the build cpus AML +function to accept both IO/MEMORY type regions and update the _CRS object +correspondingly. + +NOTE: Beside above CPU scan shall be triggered when OSPM evaluates _EVT method + part of the GED framework which is covered in subsequent patch. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 23 ++++++++++++++++------- + hw/i386/acpi-build.c | 3 ++- + include/hw/acpi/cpu.h | 5 +++-- + 3 files changed, 21 insertions(+), 10 deletions(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index cabeb4e86b..cf0c7e8538 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -342,9 +342,10 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_FW_EJECT_EVENT "CEJF" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, + const char *res_root, +- const char *event_handler_method) ++ const char *event_handler_method, ++ AmlRegionSpace rs) + { + Aml *ifctx; + Aml *field; +@@ -369,13 +370,19 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(cpu_ctrl_dev, aml_mutex(CPU_LOCK, 0)); + + crs = aml_resource_template(); +- aml_append(crs, aml_io(AML_DECODE16, io_base, io_base, 1, ++ if (rs == AML_SYSTEM_IO) { ++ aml_append(crs, aml_io(AML_DECODE16, base_addr, base_addr, 1, + ACPI_CPU_HOTPLUG_REG_LEN)); ++ } else { ++ aml_append(crs, aml_memory32_fixed(base_addr, ++ ACPI_CPU_HOTPLUG_REG_LEN, AML_READ_WRITE)); ++ } ++ + aml_append(cpu_ctrl_dev, aml_name_decl("_CRS", crs)); + + /* declare CPU hotplug MMIO region with related access fields */ + aml_append(cpu_ctrl_dev, +- aml_operation_region("PRST", AML_SYSTEM_IO, aml_int(io_base), ++ aml_operation_region("PRST", rs, aml_int(base_addr), + ACPI_CPU_HOTPLUG_REG_LEN)); + + field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, +@@ -699,9 +706,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(sb_scope, cpus_dev); + aml_append(table, sb_scope); + +- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); +- aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); +- aml_append(table, method); ++ if (event_handler_method) { ++ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); ++ aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); ++ aml_append(table, method); ++ } + + g_free(cphp_res_path); + } +diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c +index 80db183b78..db4ca8a66a 100644 +--- a/hw/i386/acpi-build.c ++++ b/hw/i386/acpi-build.c +@@ -1546,7 +1546,8 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, + .fw_unplugs_cpu = pm->smi_on_cpu_unplug, + }; + build_cpus_aml(dsdt, machine, opts, pc_madt_cpu_entry, +- pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02"); ++ pm->cpu_hp_io_base, "\\_SB.PCI0", "\\_GPE._E02", ++ AML_SYSTEM_IO); + } + + if (pcms->memhp_io_base && nr_mem) { +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index 209e1773f8..76bc7eb251 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -60,9 +60,10 @@ typedef void (*build_madt_cpu_fn)(int uid, const CPUArchIdList *apic_ids, + GArray *entry, bool force_enabled); + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, +- build_madt_cpu_fn build_madt_cpu, hwaddr io_base, ++ build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, + const char *res_root, +- const char *event_handler_method); ++ const char *event_handler_method, ++ AmlRegionSpace rs); + + void acpi_cpu_ospm_status(CPUHotplugState *cpu_st, ACPIOSTInfoList ***list); + +-- +2.27.0 + diff --git a/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2dceaf31fbf7503d09b1410428b17ffe03f6338 --- /dev/null +++ b/hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch @@ -0,0 +1,53 @@ +From cfdb0f24431ae0f5115f905a1411509c01a50e88 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Tue, 9 Jun 2020 00:50:36 +0100 +Subject: [PATCH] hw/acpi: Update GED _EVT method AML with cpu scan + +OSPM evaluates _EVT method to map the event. The cpu hotplug event eventually +results in start of the cpu scan. Scan figures out the cpu and the kind of +event(plug/unplug) and notifies it back to the guest. + +The change in this patch updates the GED AML _EVT method with the call to +\\_SB.CPUS.CSCN which will do above. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/acpi/generic_event_device.c | 4 ++++ + include/hw/acpi/cpu_hotplug.h | 2 ++ + 2 files changed, 6 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index b84602b238..ad252e6a91 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -108,6 +108,10 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + aml_append(if_ctx, aml_call0(MEMORY_DEVICES_CONTAINER "." + MEMORY_SLOT_SCAN_METHOD)); + break; ++ case ACPI_GED_CPU_HOTPLUG_EVT: ++ aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "." ++ ACPI_CPU_SCAN_METHOD)); ++ break; + case ACPI_GED_PWR_DOWN_EVT: + aml_append(if_ctx, + aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 48b291e45e..ef631750b4 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -20,6 +20,8 @@ + #include "hw/acpi/cpu.h" + + #define ACPI_CPU_HOTPLUG_REG_LEN 12 ++#define ACPI_CPU_SCAN_METHOD "CSCN" ++#define ACPI_CPU_CONTAINER "\\_SB.CPUS" + + typedef struct AcpiCpuHotplug { + Object *device; +-- +2.27.0 + diff --git a/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..93772f029c4a74fcc623b2cd8138ac2063e63049 --- /dev/null +++ b/hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch @@ -0,0 +1,37 @@ +From 576a2a88625978f1befde11f0823f32bbc54cad1 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Mon, 28 Aug 2023 20:00:08 +0000 +Subject: [PATCH] hw/acpi: Use qemu_present_cpu() API in ACPI CPU hotplug init + +ACPI CPU Hotplug code assumes a virtual CPU is unplugged if the CPUState object +is absent in the list of ths possible CPUs(CPUArchIdList *possible_cpus) +maintained on per-machine basis. Use the earlier introduced qemu_present_cpu() +API to check this state. + +This change should have no bearing on the functionality of any architecture and +is mere a representational change. + +Signed-off-by: Salil Mehta +--- + hw/acpi/cpu.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index 4b24a25003..cabeb4e86b 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -226,7 +226,10 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + state->dev_count = id_list->len; + state->devs = g_new0(typeof(*state->devs), state->dev_count); + for (i = 0; i < id_list->len; i++) { +- state->devs[i].cpu = CPU(id_list->cpus[i].cpu); ++ struct CPUState *cpu = CPU(id_list->cpus[i].cpu); ++ if (qemu_present_cpu(cpu)) { ++ state->devs[i].cpu = cpu; ++ } + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +-- +2.27.0 + diff --git a/hw-acpi-cpu-Use-CPUState-typedef.patch b/hw-acpi-cpu-Use-CPUState-typedef.patch new file mode 100644 index 0000000000000000000000000000000000000000..06f9df398f89a68317504335a9e84d38859724ff --- /dev/null +++ b/hw-acpi-cpu-Use-CPUState-typedef.patch @@ -0,0 +1,34 @@ +From 105ea4d8301791bbb5a76df1f527fb5df439c565 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 16:01:50 +0800 +Subject: [PATCH] hw/acpi/cpu: Use CPUState typedef +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from b8492bd430ecc1ceb80cac19b46870d423f1e854 +QEMU coding style recommend using structure typedefs: +https://www.qemu.org/docs/master/devel/style.html#typedefs + +Signed-off-by: Philippe Mathieu-DaudĂ© +Signed-off-by: dinglimin +--- + include/hw/acpi/cpu.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index bc901660fb..209e1773f8 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -19,7 +19,7 @@ + #include "hw/hotplug.h" + + typedef struct AcpiCpuStatus { +- struct CPUState *cpu; ++ CPUState *cpu; + uint64_t arch_id; + bool is_inserting; + bool is_removing; +-- +2.27.0 + diff --git a/hw-arm-Changes-required-for-reset-and-to-support-nex.patch b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch new file mode 100644 index 0000000000000000000000000000000000000000..66816ac14e0fd5e975d63b1786c03028908f8569 --- /dev/null +++ b/hw-arm-Changes-required-for-reset-and-to-support-nex.patch @@ -0,0 +1,111 @@ +From 3e5f043c493fa4765c5637bec66be2bd620bc53f Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 18:10:24 +0100 +Subject: [PATCH] hw/arm: Changes required for reset and to support next boot + +Updates the firmware config with the next boot cpus information and also +registers the reset callback to be called when guest reboots to reset the cpu. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/boot.c | 2 +- + hw/arm/virt.c | 18 +++++++++++++++--- + include/hw/arm/boot.h | 2 ++ + include/hw/arm/virt.h | 1 + + 4 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index d1671e1d42..345c7cfa19 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -683,7 +683,7 @@ fail: + return -1; + } + +-static void do_cpu_reset(void *opaque) ++void do_cpu_reset(void *opaque) + { + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 60cd560ab9..eedff8e525 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -46,6 +46,8 @@ + #include "sysemu/device_tree.h" + #include "sysemu/numa.h" + #include "sysemu/runstate.h" ++#include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "sysemu/tpm.h" + #include "sysemu/tcg.h" + #include "sysemu/kvm.h" +@@ -1453,7 +1455,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as) + char *nodename; + + fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as); +- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus); ++ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -3276,7 +3278,13 @@ static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (local_err) { + goto fail; + } +- /* TODO: register cpu for reset & update F/W info for the next boot */ ++ /* register this cpu for reset & update F/W info for the next boot */ ++ qemu_register_reset(do_cpu_reset, ARM_CPU(cs)); ++ } ++ ++ vms->boot_cpus++; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + } + + cs->disabled = false; +@@ -3351,7 +3359,11 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + unwire_gic_cpu_irqs(vms, cs); + virt_update_gic(vms, cs); + +- /* TODO: unregister cpu for reset & update F/W info for the next boot */ ++ qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs)); ++ vms->boot_cpus--; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } + + qobject_unref(dev->opts); + dev->opts = NULL; +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 80c492d742..f81326a1dc 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu, + int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as, MachineState *ms); + ++void do_cpu_reset(void *opaque); ++ + /* Write a secure board setup routine with a dummy handler for SMCs */ + void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, + const struct arm_boot_info *info, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 069c9f2a09..ae0f5beb26 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -167,6 +167,7 @@ struct VirtMachineState { + MemMapEntry *memmap; + char *pciehb_nodename; + const int *irqmap; ++ uint16_t boot_cpus; + int fdt_size; + uint32_t clock_phandle; + uint32_t gic_phandle; +-- +2.27.0 + diff --git a/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch new file mode 100644 index 0000000000000000000000000000000000000000..75abc4e58ce1f30527638b0c62f94c50b033911b --- /dev/null +++ b/hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch @@ -0,0 +1,98 @@ +From 8e1b8d624128523654786953b381557c82654a57 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Wed, 6 May 2020 18:03:11 +0100 +Subject: [PATCH] hw/arm: MADT Tbl change to size the guest with possible vCPUs + +Changes required during building of MADT Table by QEMU to accomodate disabled +possible vCPUs. This info shall be used by the guest kernel to size up its +resources during boot time. This pre-sizing of the guest kernel done on +possible vCPUs will facilitate hotplug of the disabled vCPUs. + +This change also caters ACPI MADT GIC CPU Interface flag related changes +recently introduced in the UEFI ACPI 6.5 Specification which allows deferred +virtual CPU online'ing in the Guest Kernel. + +Link: https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#gic-cpu-interface-gicc-structure + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 36 ++++++++++++++++++++++++++++++------ + 1 file changed, 30 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index d88f3cded1..2870c1ec5a 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -779,6 +779,29 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ ++ /* can only exist in 'enabled' state */ ++ if (!mc->has_hotpluggable_cpus) { ++ return 1; ++ } ++ ++ /* ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot ++ * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the ++ * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. ++ * Though as-of-now this is only used as a debugging feature. ++ * ++ * UEFI ACPI Specification 6.5 ++ * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure ++ * Table: 5.37 GICC CPU Interface Flags ++ * Link: https://uefi.org/specs/ACPI/6.5 ++ */ ++ return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++} ++ + static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { +@@ -805,12 +828,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { +- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); ++ for (i = 0; i < MACHINE(vms)->smp.max_cpus; i++) { ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +- uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? +- VIRTUAL_PMU_IRQ : 0; ++ uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu); ++ uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { + physical_base_address = memmap[VIRT_GIC_CPU].base; +@@ -825,7 +849,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, i, 4); /* GIC ID */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags */ +- build_append_int_noprefix(table_data, 1, 4); /* Enabled */ ++ build_append_int_noprefix(table_data, flags, 4); + /* Parking Protocol Version */ + build_append_int_noprefix(table_data, 0, 4); + /* Performance Interrupt GSIV */ +@@ -839,7 +863,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vgic_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ + /* MPIDR */ +- build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); ++ build_append_int_noprefix(table_data, mpidr, 8); + /* Processor Power Efficiency Class */ + build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ +-- +2.27.0 + diff --git a/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..87e236b6f83accff9b839e2c363e47949feb1f9b --- /dev/null +++ b/hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch @@ -0,0 +1,128 @@ +From c5dfec0bfd78f7e8f84a527a1aa73896f69b2367 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Thu, 10 Aug 2023 01:15:31 +0000 +Subject: [PATCH] hw/arm: Support hotplug capability check using _OSC method + +Physical CPU hotplug results in (un)setting of ACPI _STA.Present bit. AARCH64 +platforms do not support physical CPU hotplug. Virtual CPU hotplug support being +implemented toggles ACPI _STA.Enabled Bit to achieve hotplug functionality. This +is not same as physical CPU hotplug support. + +In future, if ARM architecture supports physical CPU hotplug then the current +design of virtual CPU hotplug can be used unchanged. Hence, there is a need for +firmware/VMM/Qemu to support evaluation of platform wide capabilitiy related to +the *type* of CPU hotplug support present on the platform. OSPM might need this +during boot time to correctly initialize the CPUs and other related components +in the kernel. + +NOTE: This implementation will be improved to add the support of *query* in the +subsequent versions. This is very minimal support to assist kernel. + +ASL for the implemented _OSC method: + +Method (_OSC, 4, NotSerialized) // _OSC: Operating System Capabilities +{ + CreateDWordField (Arg3, Zero, CDW1) + If ((Arg0 == ToUUID ("0811b06e-4a27-44f9-8d60-3cbbc22e7b48") /* Platform-wide Capabilities */)) + { + CreateDWordField (Arg3, 0x04, CDW2) + Local0 = CDW2 /* \_SB_._OSC.CDW2 */ + If ((Arg1 != One)) + { + CDW1 |= 0x08 + } + + Local0 &= 0x00800000 + If ((CDW2 != Local0)) + { + CDW1 |= 0x10 + } + + CDW2 = Local0 + } + Else + { + CDW1 |= 0x04 + } + + Return (Arg3) +} + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 52 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 2870c1ec5a..c402e102c4 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -940,6 +940,55 @@ static void build_fadt_rev6(GArray *table_data, BIOSLinker *linker, + build_fadt(table_data, linker, &fadt, vms->oem_id, vms->oem_table_id); + } + ++static void build_virt_osc_method(Aml *scope, VirtMachineState *vms) ++{ ++ Aml *if_uuid, *else_uuid, *if_rev, *if_caps_masked, *method; ++ Aml *a_cdw1 = aml_name("CDW1"); ++ Aml *a_cdw2 = aml_local(0); ++ ++ method = aml_method("_OSC", 4, AML_NOTSERIALIZED); ++ aml_append(method, aml_create_dword_field(aml_arg(3), aml_int(0), "CDW1")); ++ ++ /* match UUID */ ++ if_uuid = aml_if(aml_equal( ++ aml_arg(0), aml_touuid("0811B06E-4A27-44F9-8D60-3CBBC22E7B48"))); ++ ++ aml_append(if_uuid, aml_create_dword_field(aml_arg(3), aml_int(4), "CDW2")); ++ aml_append(if_uuid, aml_store(aml_name("CDW2"), a_cdw2)); ++ ++ /* check unknown revision in arg(1) */ ++ if_rev = aml_if(aml_lnot(aml_equal(aml_arg(1), aml_int(1)))); ++ /* set revision error bits, DWORD1 Bit[3] */ ++ aml_append(if_rev, aml_or(a_cdw1, aml_int(0x08), a_cdw1)); ++ aml_append(if_uuid, if_rev); ++ ++ /* ++ * check support for vCPU hotplug type(=enabled) platform-wide capability ++ * in DWORD2 as sepcified in the below ACPI Specification ECR, ++ * # https://bugzilla.tianocore.org/show_bug.cgi?id=4481 ++ */ ++ if (vms->acpi_dev) { ++ aml_append(if_uuid, aml_and(a_cdw2, aml_int(0x800000), a_cdw2)); ++ /* check if OSPM specified hotplug capability bits were masked */ ++ if_caps_masked = aml_if(aml_lnot(aml_equal(aml_name("CDW2"), a_cdw2))); ++ aml_append(if_caps_masked, aml_or(a_cdw1, aml_int(0x10), a_cdw1)); ++ aml_append(if_uuid, if_caps_masked); ++ } ++ aml_append(if_uuid, aml_store(a_cdw2, aml_name("CDW2"))); ++ ++ aml_append(method, if_uuid); ++ else_uuid = aml_else(); ++ ++ /* set unrecognized UUID error bits, DWORD1 Bit[2] */ ++ aml_append(else_uuid, aml_or(a_cdw1, aml_int(4), a_cdw1)); ++ aml_append(method, else_uuid); ++ ++ aml_append(method, aml_return(aml_arg(3))); ++ aml_append(scope, method); ++ ++ return; ++} ++ + /* DSDT */ + static void + build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) +@@ -974,6 +1023,9 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + } else { + acpi_dsdt_add_cpus(scope, vms); + } ++ ++ build_virt_osc_method(scope, vms); ++ + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +-- +2.27.0 + diff --git a/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8097e1973b31092363f156c4ee2df9372884906 --- /dev/null +++ b/hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch @@ -0,0 +1,267 @@ +From 8ad397f33f8b7d82c0ef72608ef8dc3e0ecba1c2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 14:38:38 +0100 +Subject: [PATCH] hw/arm,gicv3: Changes to update GIC with vCPU hot-plug + notification + +vCPU hot-(un)plug events MUST be notified to the GIC. Introduce a notfication +mechanism to update any such events to GIC so that it can update its vCPU to GIC +CPU interface association. + +This is required to implement a workaround to the limitations posed by the ARM +architecture. For details about the constraints and workarounds please check +below slides: + +Link: https://kvm-forum.qemu.org/2023/talk/9SMPDQ/ + +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 27 +++++++++++++-- + hw/intc/arm_gicv3_common.c | 54 +++++++++++++++++++++++++++++- + hw/intc/arm_gicv3_cpuif_common.c | 5 +++ + hw/intc/gicv3_internal.h | 1 + + include/hw/arm/virt.h | 1 + + include/hw/intc/arm_gicv3_common.h | 22 ++++++++++++ + 6 files changed, 107 insertions(+), 3 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 97bf4cca11..0312fa366d 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -750,6 +750,16 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + return dev; + } + ++static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ ++ if (mc->has_hotpluggable_cpus) { ++ Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); ++ notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); ++ } ++} ++ + static void create_its(VirtMachineState *vms) + { + const char *itsclass = its_class_name(); +@@ -997,6 +1007,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->gic_version == VIRT_GIC_VERSION_2) { + create_v2m(vms); + } ++ ++ /* add GIC CPU hot(un)plug update notifier */ ++ virt_add_gic_cpuhp_notifier(vms); + } + + static void create_uart(const VirtMachineState *vms, int uart, +@@ -2481,6 +2494,8 @@ static void machvirt_init(MachineState *machine) + create_fdt(vms); + qemu_log("cpu init start\n"); + ++ notifier_list_init(&vms->cpuhp_notifiers); ++ possible_cpus = mc->possible_cpu_arch_ids(machine); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; +@@ -3133,6 +3148,14 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_update_gic(VirtMachineState *vms, CPUState *cs) ++{ ++ GICv3CPUHotplugInfo gic_info = { .gic = vms->gic, .cpu = cs }; ++ ++ /* notify gic to stitch GICC to this new cpu */ ++ notifier_list_notify(&vms->cpuhp_notifiers, &gic_info); ++} ++ + static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -3215,7 +3238,7 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * vCPUs have their GIC state initialized during machvit_init(). + */ + if (vms->acpi_dev) { +- /* TODO: update GIC about this hotplug change here */ ++ virt_update_gic(vms, cs); + wire_gic_cpu_irqs(vms, cs); + } + +@@ -3301,7 +3324,7 @@ static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, + /* TODO: update the acpi cpu hotplug state for cpu hot-unplug */ + + unwire_gic_cpu_irqs(vms, cs); +- /* TODO: update the GIC about this hot unplug change */ ++ virt_update_gic(vms, cs); + + /* TODO: unregister cpu for reset & update F/W info for the next boot */ + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index ebd99af610..fc87fa9369 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -33,7 +33,6 @@ + #include "hw/arm/linux-boot-if.h" + #include "sysemu/kvm.h" + +- + static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs) + { + if (cs->gicd_no_migration_shift_bug) { +@@ -322,6 +321,56 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + } + } + ++static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu) ++{ ++ uint64_t mp_affinity; ++ uint64_t gicr_typer; ++ uint64_t cpu_affid; ++ int i; ++ ++ mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ /* match the cpu mp-affinity to get the gic cpuif number */ ++ for (i = 0; i < s->num_cpu; i++) { ++ gicr_typer = s->cpu[i].gicr_typer; ++ cpu_affid = (gicr_typer >> 32) & 0xFFFFFF; ++ if (cpu_affid == mp_affinity) { ++ return i; ++ } ++ } ++ ++ return -1; ++} ++ ++static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) ++{ ++ GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; ++ CPUState *cpu = gic_info->cpu; ++ int gic_cpuif_num; ++ GICv3State *s; ++ ++ s = ARM_GICV3_COMMON(gic_info->gic); ++ ++ /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ ++ gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); ++ if (gic_cpuif_num < 0) { ++ error_report("Failed to associate cpu %d with any GIC cpuif", ++ cpu->cpu_index); ++ abort(); ++ } ++ ++ /* check if update is for vcpu hot-unplug */ ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[gic_cpuif_num].cpu = NULL; ++ return; ++ } ++ ++ /* re-stitch the gic cpuif to this new cpu */ ++ gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); ++ gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); ++ ++ /* TODO: initialize the registers info for this newly added cpu */ ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -444,6 +493,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } + ++ s->cpu_update_notifier.notify = arm_gicv3_cpu_update_notifier; ++ + s->itslist = g_ptr_array_new(); + } + +@@ -451,6 +502,7 @@ static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); + ++ notifier_remove(&s->cpu_update_notifier); + g_free(s->redist_region_count); + } + +diff --git a/hw/intc/arm_gicv3_cpuif_common.c b/hw/intc/arm_gicv3_cpuif_common.c +index ff1239f65d..381cf2754b 100644 +--- a/hw/intc/arm_gicv3_cpuif_common.c ++++ b/hw/intc/arm_gicv3_cpuif_common.c +@@ -20,3 +20,8 @@ void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) + + env->gicv3state = (void *)s; + }; ++ ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu) ++{ ++ s->cpu = cpu; ++} +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 29d5cdc1b6..9d4c1209bd 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -848,5 +848,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) + } + + void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu); + + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index 5de0185063..069c9f2a09 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -180,6 +180,7 @@ struct VirtMachineState { + PCIBus *bus; + char *oem_id; + char *oem_table_id; ++ NotifierList cpuhp_notifiers; + }; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 4e2fb518e7..97a48f44b9 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -280,6 +280,7 @@ struct GICv3State { + GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ]; + uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)]; + ++ Notifier cpu_update_notifier; + GICv3CPUState *cpu; + /* List of all ITSes connected to this GIC */ + GPtrArray *itslist; +@@ -328,6 +329,27 @@ struct ARMGICv3CommonClass { + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + const MemoryRegionOps *ops); ++/** ++ * Structure used by GICv3 CPU hotplug notifier ++ */ ++typedef struct GICv3CPUHotplugInfo { ++ DeviceState *gic; /* GICv3State */ ++ CPUState *cpu; ++} GICv3CPUHotplugInfo; ++ ++/** ++ * gicv3_cpuhp_notifier ++ * ++ * Returns CPU hotplug notifier which could be used to update GIC about any ++ * CPU hot(un)plug events. ++ * ++ * Returns: Notifier initialized with CPU Hot(un)plug update function ++ */ ++static inline Notifier *gicv3_cpuhp_notifier(DeviceState *dev) ++{ ++ GICv3State *s = ARM_GICV3_COMMON(dev); ++ return &s->cpu_update_notifier; ++} + + /** + * gicv3_class_name +-- +2.27.0 + diff --git a/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c124e92aa8a31f4573ac5b9098d39c925ec6fa3 --- /dev/null +++ b/hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch @@ -0,0 +1,107 @@ +From 837b04877be49b930a2d437f55e2ae15ff820421 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 23 Sep 2023 22:31:49 +0000 +Subject: [PATCH] hw/arm/virt: Expose cold-booted CPUs as MADT GICC Enabled + +Hotpluggable CPUs MUST be exposed as 'online-capable' as per the new change. But +cold booted CPUs if made 'online-capable' during boot time might not get +detected in the legacy OS. Hence, can cause compatibility problems. + +Original Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + +Specification change might take time and hence disabling the support of +unplugging any cold booted CPUs to preserve the compatibility with legacy OS. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt-acpi-build.c | 19 ++++++++++++++----- + hw/arm/virt.c | 16 ++++++++++++++++ + include/hw/core/cpu.h | 2 ++ + 3 files changed, 32 insertions(+), 5 deletions(-) + +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index c402e102c4..590afcfa98 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -789,17 +789,26 @@ static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu) + } + + /* +- * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot +- * We MUST set 'online-capable' Bit for all hotpluggable CPUs except the +- * first/boot CPU. Cold-booted CPUs without 'Id' can also be unplugged. +- * Though as-of-now this is only used as a debugging feature. ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot. We ++ * MUST set 'online-capable' bit for all hotpluggable CPUs. ++ * Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 + * + * UEFI ACPI Specification 6.5 + * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure + * Table: 5.37 GICC CPU Interface Flags + * Link: https://uefi.org/specs/ACPI/6.5 ++ * ++ * Cold-booted CPUs, except for the first/boot CPU, SHOULD be allowed to be ++ * hot(un)plug as well but for this to happen these MUST have ++ * 'online-capable' bit set. Later creates compatibility problem with legacy ++ * OS as it might ignore online-capable' bits during boot time and hence ++ * some CPUs might not get detected. To fix this MADT GIC CPU interface flag ++ * should be allowed to have both bits set i.e. 'online-capable' and ++ * 'Enabled' bits together. This change will require UEFI ACPI standard ++ * change. Till this happens exposing all cold-booted CPUs as 'enabled' only ++ * + */ +- return cpu && !cpu->cpu_index ? 1 : (1 << 3); ++ return cpu && cpu->cold_booted ? 1 : (1 << 3); + } + + static void +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index eedff8e525..ed437ce0e8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3250,6 +3250,10 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + * This shall be used during the init of ACPI Hotplug state and hot-unplug + */ + cs->acpi_persistent = true; ++ ++ if (!dev->hotplugged) { ++ cs->cold_booted = true; ++ } + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +@@ -3313,6 +3317,18 @@ static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, + return; + } + ++ /* ++ * UEFI ACPI standard change is required to make both 'enabled' and the ++ * 'online-capable' bit co-exist instead of being mutually exclusive. ++ * check virt_acpi_get_gicc_flags() for more details. ++ * ++ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation. ++ */ ++ if (cs->cold_booted) { ++ error_setg(errp, "Hot-unplug of cold-booted CPU not supported!"); ++ return; ++ } ++ + if (cs->cpu_index == first_cpu->cpu_index) { + error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", + first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 6dbe163548..ee04ee44c2 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -565,6 +565,8 @@ struct CPUState { + uint32_t halted; + int32_t exception_index; + ++ bool cold_booted; ++ + AccelCPUState *accel; + /* shared by kvm and hvf */ + bool vcpu_dirty; +-- +2.27.0 + diff --git a/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec3b72f02c9fe269e68623de66654906734e83f4 --- /dev/null +++ b/hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch @@ -0,0 +1,311 @@ +From 8daa90ad502b79e232377f831f67df456a743304 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 26 Aug 2023 01:29:37 +0000 +Subject: [PATCH] hw/arm/virt: Move setting of common CPU properties in a + function + +Factor out CPU properties code common for {hot,cold}-plugged CPUs. This allows +code reuse. + +Signed-off-by: Salil Mehta +--- + hw/arm/virt.c | 220 ++++++++++++++++++++++++++---------------- + include/hw/arm/virt.h | 4 + + 2 files changed, 140 insertions(+), 84 deletions(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 94481d45d4..8f647422d8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -2113,16 +2113,130 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + ++static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ Error *local_err = NULL; ++ VirtMachineClass *vmc; ++ ++ vmc = VIRT_MACHINE_GET_CLASS(ms); ++ ++ /* now, set the cpu object property values */ ++ numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL); ++ ++ if (!vms->secure) { ++ object_property_set_bool(cpuobj, "has_el3", false, NULL); ++ } ++ ++ if (!vms->virt && object_property_find(cpuobj, "has_el2")) { ++ object_property_set_bool(cpuobj, "has_el2", false, NULL); ++ } ++ ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime")) { ++ object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); ++ } ++ ++ if (vmc->no_kvm_steal_time && ++ object_property_find(cpuobj, "kvm-steal-time")) { ++ object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); ++ } ++ ++ if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { ++ object_property_set_bool(cpuobj, "pmu", false, NULL); ++ } ++ ++ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { ++ object_property_set_bool(cpuobj, "lpa2", false, NULL); ++ } ++ ++ if (object_property_find(cpuobj, "reset-cbar")) { ++ object_property_set_int(cpuobj, "reset-cbar", ++ vms->memmap[VIRT_CPUPERIPHS].base, ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* link already initialized {secure,tag}-memory regions to this cpu */ ++ object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-memory", ++ OBJECT(vms->secure_sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ if (vms->mte) { ++ if (!object_property_find(cpuobj, "tag-memory")) { ++ error_setg(&local_err, "MTE requested, but not supported " ++ "by the guest CPU"); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-tag-memory", ++ OBJECT(vms->secure_tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ } ++ ++ /* ++ * RFC: Question: this must only be called for the hotplugged cpus. For the ++ * cold booted secondary cpus this is being taken care in arm_load_kernel() ++ * in boot.c. Perhaps we should remove that code now? ++ */ ++ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { ++ object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, ++ NULL); ++ ++ /* Secondary CPUs start in PSCI powered-down state */ ++ if (CPU(cpuobj)->cpu_index > 0) { ++ object_property_set_bool(cpuobj, "start-powered-off", true, NULL); ++ } ++ } ++ ++out: ++ if (local_err) { ++ error_propagate(errp, local_err); ++ } ++ return; ++} ++ + static void machvirt_init(MachineState *machine) + { + VirtMachineState *vms = VIRT_MACHINE(machine); + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; +- MemoryRegion *sysmem = get_system_memory(); ++ MemoryRegion *secure_tag_sysmem = NULL; + MemoryRegion *secure_sysmem = NULL; + MemoryRegion *tag_sysmem = NULL; +- MemoryRegion *secure_tag_sysmem = NULL; ++ MemoryRegion *sysmem; + int n, virt_max_cpus; + bool firmware_loaded; + bool aarch64 = true; +@@ -2166,6 +2280,8 @@ static void machvirt_init(MachineState *machine) + */ + finalize_gic_version(vms); + ++ sysmem = vms->sysmem = get_system_memory(); ++ + if (vms->secure) { + /* + * The Secure view of the world is the same as the NonSecure, +@@ -2173,7 +2289,7 @@ static void machvirt_init(MachineState *machine) + * containing the system memory at low priority; any secure-only + * devices go in at higher priority and take precedence. + */ +- secure_sysmem = g_new(MemoryRegion, 1); ++ secure_sysmem = vms->secure_sysmem = g_new(MemoryRegion, 1); + memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory", + UINT64_MAX); + memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1); +@@ -2246,6 +2362,23 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ if (vms->mte) { ++ /* Create the memory region only once, but link to all cpus later */ ++ tag_sysmem = vms->tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(tag_sysmem, OBJECT(machine), ++ "tag-memory", UINT64_MAX / 32); ++ ++ if (vms->secure) { ++ secure_tag_sysmem = vms->secure_tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(secure_tag_sysmem, OBJECT(machine), ++ "secure-tag-memory", UINT64_MAX / 32); ++ ++ /* As with ram, secure-tag takes precedence over tag. */ ++ memory_region_add_subregion_overlap(secure_tag_sysmem, 0, ++ tag_sysmem, -1); ++ } ++ } ++ + create_fdt(vms); + qemu_log("cpu init start\n"); + +@@ -2259,15 +2392,10 @@ static void machvirt_init(MachineState *machine) + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +- object_property_set_int(cpuobj, "mp-affinity", +- possible_cpus->cpus[n].arch_id, NULL); + + cs = CPU(cpuobj); + cs->cpu_index = n; + +- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), +- &error_fatal); +- + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); + object_property_set_int(cpuobj, "socket-id", + virt_get_socket_id(machine, n), NULL); +@@ -2278,82 +2406,6 @@ static void machvirt_init(MachineState *machine) + object_property_set_int(cpuobj, "thread-id", + virt_get_thread_id(machine, n), NULL); + +- if (!vms->secure) { +- object_property_set_bool(cpuobj, "has_el3", false, NULL); +- } +- +- if (!vms->virt && object_property_find(cpuobj, "has_el2")) { +- object_property_set_bool(cpuobj, "has_el2", false, NULL); +- } +- +- if (vmc->kvm_no_adjvtime && +- object_property_find(cpuobj, "kvm-no-adjvtime")) { +- object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); +- } +- +- if (vmc->no_kvm_steal_time && +- object_property_find(cpuobj, "kvm-steal-time")) { +- object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); +- } +- +- if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { +- object_property_set_bool(cpuobj, "pmu", false, NULL); +- } +- +- if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { +- object_property_set_bool(cpuobj, "lpa2", false, NULL); +- } +- +- if (object_property_find(cpuobj, "reset-cbar")) { +- object_property_set_int(cpuobj, "reset-cbar", +- vms->memmap[VIRT_CPUPERIPHS].base, +- &error_abort); +- } +- +- object_property_set_link(cpuobj, "memory", OBJECT(sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-memory", +- OBJECT(secure_sysmem), &error_abort); +- } +- +- if (vms->mte) { +- /* Create the memory region only once, but link to all cpus. */ +- if (!tag_sysmem) { +- /* +- * The property exists only if MemTag is supported. +- * If it is, we must allocate the ram to back that up. +- */ +- if (!object_property_find(cpuobj, "tag-memory")) { +- error_report("MTE requested, but not supported " +- "by the guest CPU"); +- exit(1); +- } +- +- tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(tag_sysmem, OBJECT(machine), +- "tag-memory", UINT64_MAX / 32); +- +- if (vms->secure) { +- secure_tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(secure_tag_sysmem, OBJECT(machine), +- "secure-tag-memory", UINT64_MAX / 32); +- +- /* As with ram, secure-tag takes precedence over tag. */ +- memory_region_add_subregion_overlap(secure_tag_sysmem, 0, +- tag_sysmem, -1); +- } +- } +- +- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-tag-memory", +- OBJECT(secure_tag_sysmem), +- &error_abort); +- } +- } +- + qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); + object_unref(cpuobj); + } +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index e944d434c4..49d1ec8656 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -139,6 +139,10 @@ struct VirtMachineState { + DeviceState *platform_bus_dev; + FWCfgState *fw_cfg; + PFlashCFI01 *flash[2]; ++ MemoryRegion *sysmem; ++ MemoryRegion *secure_sysmem; ++ MemoryRegion *tag_sysmem; ++ MemoryRegion *secure_tag_sysmem; + bool secure; + bool highmem; + bool highmem_compact; +-- +2.27.0 + diff --git a/hw-arm64-add-vcpu-cache-info-support.patch b/hw-arm64-add-vcpu-cache-info-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e09d4fa537e3ed220bfb67e95685c60c64ee016 --- /dev/null +++ b/hw-arm64-add-vcpu-cache-info-support.patch @@ -0,0 +1,352 @@ +From 7d3d37d3af4278aee627952d6a81b63dec6ac62b Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Sun, 17 Mar 2024 18:56:09 +0800 +Subject: [PATCH] hw/arm64: add vcpu cache info support + +Support VCPU Cache info by dtb and PPTT table, including L1, L2 and L3 Cache. + +Signed-off-by: zhanghailiang +Signed-off-by: Honghao +Signed-off-by: Ying Fang +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + hw/acpi/aml-build.c | 158 ++++++++++++++++++++++++++++++++++++ + hw/arm/virt.c | 72 ++++++++++++++++ + include/hw/acpi/aml-build.h | 47 +++++++++++ + 3 files changed, 277 insertions(+) + +diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c +index af66bde0f5..2968df5562 100644 +--- a/hw/acpi/aml-build.c ++++ b/hw/acpi/aml-build.c +@@ -1994,6 +1994,163 @@ static void build_processor_hierarchy_node(GArray *tbl, uint32_t flags, + } + } + ++#ifdef __aarch64__ ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29.2 Cache Type Structure (Type 1) ++ */ ++static void build_cache_hierarchy_node(GArray *tbl, uint32_t next_level, ++ uint32_t cache_type) ++{ ++ build_append_byte(tbl, 1); ++ build_append_byte(tbl, 24); ++ build_append_int_noprefix(tbl, 0, 2); ++ build_append_int_noprefix(tbl, 127, 4); ++ build_append_int_noprefix(tbl, next_level, 4); ++ ++ switch (cache_type) { ++ case ARM_L1D_CACHE: /* L1 dcache info */ ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1DCACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1DCACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1DCACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L1I_CACHE: /* L1 icache info */ ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L1ICACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L1ICACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L1ICACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L2_CACHE: /* L2 cache info */ ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L2CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L2CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L2CACHE_LINE_SIZE, 2); ++ break; ++ case ARM_L3_CACHE: /* L3 cache info */ ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SIZE, 4); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_SETS, 4); ++ build_append_byte(tbl, ARM_L3CACHE_ASSOCIATIVITY); ++ build_append_byte(tbl, ARM_L3CACHE_ATTRIBUTES); ++ build_append_int_noprefix(tbl, ARM_L3CACHE_LINE_SIZE, 2); ++ break; ++ default: ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_int_noprefix(tbl, 0, 4); ++ build_append_byte(tbl, 0); ++ build_append_byte(tbl, 0); ++ build_append_int_noprefix(tbl, 0, 2); ++ } ++} ++ ++/* ++ * ACPI spec, Revision 6.3 ++ * 5.2.29 Processor Properties Topology Table (PPTT) ++ */ ++void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, ++ const char *oem_id, const char *oem_table_id) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(ms); ++ GQueue *list = g_queue_new(); ++ guint pptt_start = table_data->len; ++ guint parent_offset; ++ guint length, i; ++ int uid = 0; ++ int socket; ++ AcpiTable table = { .sig = "PPTT", .rev = 2, ++ .oem_id = oem_id, .oem_table_id = oem_table_id }; ++ ++ acpi_table_begin(&table, table_data); ++ ++ for (socket = 0; socket < ms->smp.sockets; socket++) { ++ uint32_t l3_cache_offset = table_data->len - pptt_start; ++ build_cache_hierarchy_node(table_data, 0, ARM_L3_CACHE); ++ ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ /* ++ * Physical package - represents the boundary ++ * of a physical package ++ */ ++ (1 << 0), ++ 0, socket, &l3_cache_offset, 1); ++ } ++ ++ if (mc->smp_props.clusters_supported) { ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int cluster; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (cluster = 0; cluster < ms->smp.clusters; cluster++) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, cluster, NULL, 0); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int core; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (core = 0; core < ms->smp.cores; core++) { ++ uint32_t priv_rsrc[3] = {}; ++ priv_rsrc[0] = table_data->len - pptt_start; /* L2 cache offset */ ++ build_cache_hierarchy_node(table_data, 0, ARM_L2_CACHE); ++ ++ priv_rsrc[1] = table_data->len - pptt_start; /* L1 dcache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1D_CACHE); ++ ++ priv_rsrc[2] = table_data->len - pptt_start; /* L1 icache offset */ ++ build_cache_hierarchy_node(table_data, priv_rsrc[0], ARM_L1I_CACHE); ++ ++ if (ms->smp.threads > 1) { ++ g_queue_push_tail(list, ++ GUINT_TO_POINTER(table_data->len - pptt_start)); ++ build_processor_hierarchy_node( ++ table_data, ++ (0 << 0), /* not a physical package */ ++ parent_offset, core, priv_rsrc, 3); ++ } else { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, priv_rsrc, 3); ++ } ++ } ++ } ++ ++ length = g_queue_get_length(list); ++ for (i = 0; i < length; i++) { ++ int thread; ++ ++ parent_offset = GPOINTER_TO_UINT(g_queue_pop_head(list)); ++ for (thread = 0; thread < ms->smp.threads; thread++) { ++ build_processor_hierarchy_node( ++ table_data, ++ (1 << 1) | /* ACPI Processor ID valid */ ++ (1 << 2) | /* Processor is a Thread */ ++ (1 << 3), /* Node is a Leaf */ ++ parent_offset, uid++, NULL, 0); ++ } ++ } ++ ++ g_queue_free(list); ++ acpi_table_end(linker, &table); ++} ++ ++#else + /* + * ACPI spec, Revision 6.3 + * 5.2.29 Processor Properties Topology Table (PPTT) +@@ -2069,6 +2226,7 @@ void build_pptt(GArray *table_data, BIOSLinker *linker, MachineState *ms, + + acpi_table_end(linker, &table); + } ++#endif + + /* build rev1/rev3/rev5.1/rev6.0 FADT */ + void build_fadt(GArray *tbl, BIOSLinker *linker, const AcpiFadtData *f, +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 500a15aa5b..b82bd1b8c8 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -379,6 +379,72 @@ static void fdt_add_timer_nodes(const VirtMachineState *vms) + INTID_TO_PPI(ARCH_TIMER_NS_EL2_IRQ), irqflags); + } + ++static void fdt_add_l3cache_nodes(const VirtMachineState *vms) ++{ ++ int i; ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int sockets = (ms->smp.cpus + cpus_per_socket - 1) / cpus_per_socket; ++ ++ for (i = 0; i < sockets; i++) { ++ char *nodename = g_strdup_printf("/cpus/l3-cache%d", i); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "cache-unified", "true"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-level", 3); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x2000000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 128); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 2048); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); ++ g_free(nodename); ++ } ++} ++ ++static void fdt_add_l2cache_nodes(const VirtMachineState *vms) ++{ ++ const MachineState *ms = MACHINE(vms); ++ int cpus_per_socket = ms->smp.clusters * ms->smp.cores * ms->smp.threads; ++ int cpu; ++ ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { ++ char *next_path = g_strdup_printf("/cpus/l3-cache%d", ++ cpu / cpus_per_socket); ++ char *nodename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_add_subnode(ms->fdt, nodename); ++ qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cache"); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-size", 0x80000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "cache-sets", 1024); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ next_path); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", ++ qemu_fdt_alloc_phandle(ms->fdt)); ++ ++ g_free(next_path); ++ g_free(nodename); ++ } ++} ++ ++static void fdt_add_l1cache_prop(const VirtMachineState *vms, ++ char *nodename, int cpu) ++{ ++ const MachineState *ms = MACHINE(vms); ++ char *cachename = g_strdup_printf("/cpus/l2-cache%d", cpu); ++ ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "d-cache-sets", 256); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-size", 0x10000); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-line-size", 64); ++ qemu_fdt_setprop_cell(ms->fdt, nodename, "i-cache-sets", 256); ++ qemu_fdt_setprop_phandle(ms->fdt, nodename, "next-level-cache", ++ cachename); ++ g_free(cachename); ++} ++ + static void fdt_add_cpu_nodes(const VirtMachineState *vms) + { + int cpu; +@@ -413,6 +479,11 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#address-cells", addr_cells); + qemu_fdt_setprop_cell(ms->fdt, "/cpus", "#size-cells", 0x0); + ++ if (!vmc->no_cpu_topology) { ++ fdt_add_l3cache_nodes(vms); ++ fdt_add_l2cache_nodes(vms); ++ } ++ + for (cpu = smp_cpus - 1; cpu >= 0; cpu--) { + char *nodename = g_strdup_printf("/cpus/cpu@%d", cpu); + ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(cpu)); +@@ -442,6 +513,7 @@ static void fdt_add_cpu_nodes(const VirtMachineState *vms) + } + + if (!vmc->no_cpu_topology) { ++ fdt_add_l1cache_prop(vms, nodename, cpu); + qemu_fdt_setprop_cell(ms->fdt, nodename, "phandle", + qemu_fdt_alloc_phandle(ms->fdt)); + } +diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h +index ff2a310270..84ded2ecd3 100644 +--- a/include/hw/acpi/aml-build.h ++++ b/include/hw/acpi/aml-build.h +@@ -221,6 +221,53 @@ struct AcpiBuildTables { + BIOSLinker *linker; + } AcpiBuildTables; + ++#ifdef __aarch64__ ++/* Definitions of the hardcoded cache info*/ ++ ++typedef enum { ++ ARM_L1D_CACHE, ++ ARM_L1I_CACHE, ++ ARM_L2_CACHE, ++ ARM_L3_CACHE ++} ArmCacheType; ++ ++/* L1 data cache: */ ++#define ARM_L1DCACHE_SIZE 65536 ++#define ARM_L1DCACHE_SETS 256 ++#define ARM_L1DCACHE_ASSOCIATIVITY 4 ++#define ARM_L1DCACHE_ATTRIBUTES 2 ++#define ARM_L1DCACHE_LINE_SIZE 64 ++ ++/* L1 instruction cache: */ ++#define ARM_L1ICACHE_SIZE 65536 ++#define ARM_L1ICACHE_SETS 256 ++#define ARM_L1ICACHE_ASSOCIATIVITY 4 ++#define ARM_L1ICACHE_ATTRIBUTES 4 ++#define ARM_L1ICACHE_LINE_SIZE 64 ++ ++/* Level 2 unified cache: */ ++#define ARM_L2CACHE_SIZE 524288 ++#define ARM_L2CACHE_SETS 1024 ++#define ARM_L2CACHE_ASSOCIATIVITY 8 ++#define ARM_L2CACHE_ATTRIBUTES 10 ++#define ARM_L2CACHE_LINE_SIZE 64 ++ ++/* Level 3 unified cache: */ ++#define ARM_L3CACHE_SIZE 33554432 ++#define ARM_L3CACHE_SETS 2048 ++#define ARM_L3CACHE_ASSOCIATIVITY 15 ++#define ARM_L3CACHE_ATTRIBUTES 10 ++#define ARM_L3CACHE_LINE_SIZE 128 ++ ++struct offset_status { ++ uint32_t parent; ++ uint32_t l2_offset; ++ uint32_t l1d_offset; ++ uint32_t l1i_offset; ++}; ++ ++#endif ++ + typedef + struct CrsRangeEntry { + uint64_t base; +-- +2.27.0 + diff --git a/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch b/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch new file mode 100644 index 0000000000000000000000000000000000000000..2008350d4590acdec21769fdb53102371500287a --- /dev/null +++ b/hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch @@ -0,0 +1,42 @@ +From fa62831c301fa2a1d4226e0fefdeb6b7a280fca6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:35 +0200 +Subject: [PATCH] hw/char/virtio-serial-bus: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed. + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Suggested-by: Alexander Bulekov +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-4-philmd@linaro.org> +--- + hw/char/virtio-serial-bus.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c +index 44906057be..096214b11b 100644 +--- a/hw/char/virtio-serial-bus.c ++++ b/hw/char/virtio-serial-bus.c +@@ -990,8 +990,7 @@ static void virtser_port_device_realize(DeviceState *dev, Error **errp) + return; + } + +- port->bh = qemu_bh_new_guarded(flush_queued_data_bh, port, +- &dev->mem_reentrancy_guard); ++ port->bh = virtio_bh_new_guarded(dev, flush_queued_data_bh, port); + port->elem = NULL; + } + +-- +2.27.0 + diff --git a/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch b/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch new file mode 100644 index 0000000000000000000000000000000000000000..ceb933b88035c1b6fca5c3128fc46223e139dc6b --- /dev/null +++ b/hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch @@ -0,0 +1,79 @@ +From 66d91f8cb6c9668744cf0acda4402f75c5e533e0 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 19 Mar 2024 14:36:46 +0800 +Subject: [PATCH] hw/cxl/cxl-host: Fix missing ERRP_GUARD() in + cxl_fixed_memory_window_config() + +cheery-pick from 2a0e0a35002db7ac64f4e82ea2a4ad2fb6d934b0 + +As the comment in qapi/error, dereferencing @errp requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +* - It must not be dereferenced, because it may be null. +... +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. +* +* Using it when it's not needed is safe, but please avoid cluttering +* the source with useless code. + +But in cxl_fixed_memory_window_config(), @errp is dereferenced in 2 +places without ERRP_GUARD(): + +fw->enc_int_ways = cxl_interleave_ways_enc(fw->num_targets, errp); +if (*errp) { + return; +} + +and + +fw->enc_int_gran = + cxl_interleave_granularity_enc(object->interleave_granularity, + errp); +if (*errp) { + return; +} + +For the above 2 places, we check "*errp", because neither function +returns a suitable error code. And since machine_set_cfmw() - the caller +of cxl_fixed_memory_window_config() - doesn't get the NULL @errp +parameter as the "set" method of object property, +cxl_fixed_memory_window_config() hasn't triggered the bug that +dereferencing the NULL @errp. + +To follow the requirement of @errp, add missing ERRP_GUARD() in +cxl_fixed_memory_window_config(). + +Suggested-by: Markus Armbruster +Signed-off-by: Zhao Liu +Reviewed-by: Markus Armbruster +Message-Id: <20240223085653.1255438-2-zhao1.liu@linux.intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jonathan Cameron +Signed-off-by: qihao_yewu +--- + hw/cxl/cxl-host.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c +index 2aa776c79c..c5f5fcfd64 100644 +--- a/hw/cxl/cxl-host.c ++++ b/hw/cxl/cxl-host.c +@@ -26,6 +26,7 @@ static void cxl_fixed_memory_window_config(CXLState *cxl_state, + CXLFixedMemoryWindowOptions *object, + Error **errp) + { ++ ERRP_GUARD(); + g_autofree CXLFixedWindow *fw = g_malloc0(sizeof(*fw)); + strList *target; + int i; +-- +2.27.0 + diff --git a/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch b/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba043b631bb37466c47d20e091ce68c900cdafb3 --- /dev/null +++ b/hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch @@ -0,0 +1,68 @@ +From c9ee283913cc9df8998a21544a68ac1d2f86aa49 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 19 Mar 2024 15:07:51 +0800 +Subject: [PATCH] hw/display/macfb: Fix missing ERRP_GUARD() in + macfb_nubus_realize() + +cheery-pick from 5aa4a6417b0f7acbfd7f4c21dca26293bc3d9348 + +As the comment in qapi/error, dereferencing @errp requires +ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +* - It must not be dereferenced, because it may be null. +... +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. +* +* Using it when it's not needed is safe, but please avoid cluttering +* the source with useless code. + +But in macfb_nubus_realize(), @errp is dereferenced without +ERRP_GUARD(): + +ndc->parent_realize(dev, errp); +if (*errp) { + return; +} + +Here we check *errp, because the ndc->parent_realize(), as a +DeviceClass.realize() callback, returns void. And since +macfb_nubus_realize(), also as a DeviceClass.realize(), doesn't get the +NULL @errp parameter, it hasn't triggered the bug that dereferencing the +NULL @errp. + +To follow the requirement of @errp, add missing ERRP_GUARD() in +macfb_nubus_realize(). + +Suggested-by: Markus Armbruster +Signed-off-by: Zhao Liu +Reviewed-by: Markus Armbruster +Message-Id: <20240223085653.1255438-3-zhao1.liu@linux.intel.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: qihao_yewu +--- + hw/display/macfb.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/macfb.c b/hw/display/macfb.c +index d61541ccb5..170da35757 100644 +--- a/hw/display/macfb.c ++++ b/hw/display/macfb.c +@@ -714,6 +714,7 @@ static void macfb_nubus_set_irq(void *opaque, int n, int level) + + static void macfb_nubus_realize(DeviceState *dev, Error **errp) + { ++ ERRP_GUARD(); + NubusDevice *nd = NUBUS_DEVICE(dev); + MacfbNubusState *s = NUBUS_MACFB(dev); + MacfbNubusDeviceClass *ndc = NUBUS_MACFB_GET_CLASS(dev); +-- +2.27.0 + diff --git a/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch b/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ba7daca6cc386215a7180e045d4b9f5a67b0510 --- /dev/null +++ b/hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch @@ -0,0 +1,142 @@ +From e72177cc2b3a4425c4be5ca8cc12bc99e63e2788 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:27 +0200 +Subject: [PATCH] hw/display/virtio-gpu: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed: + + $ cat << EOF | qemu-system-i386 -display none -nodefaults \ + -machine q35,accel=qtest \ + -m 512M \ + -device virtio-gpu \ + -qtest stdio + outl 0xcf8 0x80000820 + outl 0xcfc 0xe0004000 + outl 0xcf8 0x80000804 + outw 0xcfc 0x06 + write 0xe0004030 0x4 0x024000e0 + write 0xe0004028 0x1 0xff + write 0xe0004020 0x4 0x00009300 + write 0xe000401c 0x1 0x01 + write 0x101 0x1 0x04 + write 0x103 0x1 0x1c + write 0x9301c8 0x1 0x18 + write 0x105 0x1 0x1c + write 0x107 0x1 0x1c + write 0x109 0x1 0x1c + write 0x10b 0x1 0x00 + write 0x10d 0x1 0x00 + write 0x10f 0x1 0x00 + write 0x111 0x1 0x00 + write 0x113 0x1 0x00 + write 0x115 0x1 0x00 + write 0x117 0x1 0x00 + write 0x119 0x1 0x00 + write 0x11b 0x1 0x00 + write 0x11d 0x1 0x00 + write 0x11f 0x1 0x00 + write 0x121 0x1 0x00 + write 0x123 0x1 0x00 + write 0x125 0x1 0x00 + write 0x127 0x1 0x00 + write 0x129 0x1 0x00 + write 0x12b 0x1 0x00 + write 0x12d 0x1 0x00 + write 0x12f 0x1 0x00 + write 0x131 0x1 0x00 + write 0x133 0x1 0x00 + write 0x135 0x1 0x00 + write 0x137 0x1 0x00 + write 0x139 0x1 0x00 + write 0xe0007003 0x1 0x00 + EOF + ... + ================================================================= + ==276099==ERROR: AddressSanitizer: heap-use-after-free on address 0x60d000011178 + at pc 0x562cc3b736c7 bp 0x7ffed49dee60 sp 0x7ffed49dee58 + READ of size 8 at 0x60d000011178 thread T0 + #0 0x562cc3b736c6 in virtio_gpu_ctrl_response hw/display/virtio-gpu.c:180:42 + #1 0x562cc3b7c40b in virtio_gpu_ctrl_response_nodata hw/display/virtio-gpu.c:192:5 + #2 0x562cc3b7c40b in virtio_gpu_simple_process_cmd hw/display/virtio-gpu.c:1015:13 + #3 0x562cc3b82873 in virtio_gpu_process_cmdq hw/display/virtio-gpu.c:1050:9 + #4 0x562cc4a85514 in aio_bh_call util/async.c:169:5 + #5 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13 + #6 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5 + #7 0x562cc4a8a2da in aio_ctx_dispatch util/async.c:358:5 + #8 0x7f36840547a8 in g_main_context_dispatch (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x547a8) + #9 0x562cc4a8b753 in glib_pollfds_poll util/main-loop.c:290:9 + #10 0x562cc4a8b753 in os_host_main_loop_wait util/main-loop.c:313:5 + #11 0x562cc4a8b753 in main_loop_wait util/main-loop.c:592:11 + #12 0x562cc3938186 in qemu_main_loop system/runstate.c:782:9 + #13 0x562cc43b7af5 in qemu_default_main system/main.c:37:14 + #14 0x7f3683a6c189 in __libc_start_call_main csu/../sysdeps/nptl/libc_start_call_main.h:58:16 + #15 0x7f3683a6c244 in __libc_start_main csu/../csu/libc-start.c:381:3 + #16 0x562cc2a58ac0 in _start (qemu-system-i386+0x231bac0) + + 0x60d000011178 is located 56 bytes inside of 136-byte region [0x60d000011140,0x60d0000111c8) + freed by thread T0 here: + #0 0x562cc2adb662 in __interceptor_free (qemu-system-i386+0x239e662) + #1 0x562cc3b86b21 in virtio_gpu_reset hw/display/virtio-gpu.c:1524:9 + #2 0x562cc416e20e in virtio_reset hw/virtio/virtio.c:2145:9 + #3 0x562cc37c5644 in virtio_pci_reset hw/virtio/virtio-pci.c:2249:5 + #4 0x562cc4233758 in memory_region_write_accessor system/memory.c:497:5 + #5 0x562cc4232eea in access_with_adjusted_size system/memory.c:573:18 + + previously allocated by thread T0 here: + #0 0x562cc2adb90e in malloc (qemu-system-i386+0x239e90e) + #1 0x7f368405a678 in g_malloc (/lib/x86_64-linux-gnu/libglib-2.0.so.0+0x5a678) + #2 0x562cc4163ffc in virtqueue_split_pop hw/virtio/virtio.c:1612:12 + #3 0x562cc4163ffc in virtqueue_pop hw/virtio/virtio.c:1783:16 + #4 0x562cc3b91a95 in virtio_gpu_handle_ctrl hw/display/virtio-gpu.c:1112:15 + #5 0x562cc4a85514 in aio_bh_call util/async.c:169:5 + #6 0x562cc4a85c52 in aio_bh_poll util/async.c:216:13 + #7 0x562cc4a1a79b in aio_dispatch util/aio-posix.c:423:5 + + SUMMARY: AddressSanitizer: heap-use-after-free hw/display/virtio-gpu.c:180:42 in virtio_gpu_ctrl_response + +With this change, the same reproducer triggers: + + qemu-system-i386: warning: Blocked re-entrant IO on MemoryRegion: virtio-pci-common-virtio-gpu at addr: 0x6 + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Reported-by: Alexander Bulekov +Reported-by: Yongkang Jia +Reported-by: Xiao Lei +Reported-by: Yiming Tao +Buglink: https://bugs.launchpad.net/qemu/+bug/1888606 +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-3-philmd@linaro.org> +--- + hw/display/virtio-gpu.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b02d1e3a4c..a714638822 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -1456,10 +1456,8 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error **errp) + + g->ctrl_vq = virtio_get_queue(vdev, 0); + g->cursor_vq = virtio_get_queue(vdev, 1); +- g->ctrl_bh = qemu_bh_new_guarded(virtio_gpu_ctrl_bh, g, +- &qdev->mem_reentrancy_guard); +- g->cursor_bh = qemu_bh_new_guarded(virtio_gpu_cursor_bh, g, +- &qdev->mem_reentrancy_guard); ++ g->ctrl_bh = virtio_bh_new_guarded(qdev, virtio_gpu_ctrl_bh, g); ++ g->cursor_bh = virtio_bh_new_guarded(qdev, virtio_gpu_cursor_bh, g); + g->reset_bh = qemu_bh_new(virtio_gpu_reset_bh, g); + qemu_cond_init(&g->reset_cond); + QTAILQ_INIT(&g->reslist); +-- +2.27.0 + diff --git a/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch b/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa63deb3fd944ed52bfc5e662c862ca45868ddd7 --- /dev/null +++ b/hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch @@ -0,0 +1,49 @@ +From f8ed9dd954fbd558d549c7c2e2ab7322107218a1 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 17:40:21 +0800 +Subject: [PATCH] hw/i2c/smbus_slave: Add object path on error prints +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from fcc8299e29816c9b6f8d9766254fce6e8a50ee52 + +The current logging doesn't tell us which specific smbus device is an +error state. + +Signed-off-by: Joe Komlodi +Reviewed-by: Peter Maydell +Reviewed-by: Philippe Mathieu-DaudĂ© +Message-ID: <20240202204847.2062798-3-komlodi@google.com> +Signed-off-by: Philippe Mathieu-DaudĂ© + +Signed-off-by: dinglimin +--- + hw/i2c/smbus_slave.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/i2c/smbus_slave.c b/hw/i2c/smbus_slave.c +index 2ef2c7c5f6..b35516a404 100644 +--- a/hw/i2c/smbus_slave.c ++++ b/hw/i2c/smbus_slave.c +@@ -25,11 +25,15 @@ + #define DPRINTF(fmt, ...) \ + do { printf("smbus(%02x): " fmt , dev->i2c.address, ## __VA_ARGS__); } while (0) + #define BADF(fmt, ...) \ +-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__); exit(1);} while (0) ++do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev)); \ ++ fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \ ++ exit(1); } while (0) + #else + #define DPRINTF(fmt, ...) do {} while(0) + #define BADF(fmt, ...) \ +-do { fprintf(stderr, "smbus: error: " fmt , ## __VA_ARGS__);} while (0) ++do { g_autofree char *qom_path = object_get_canonical_path(OBJECT(dev)); \ ++ fprintf(stderr, "%s: smbus: error: " fmt , qom_path, ## __VA_ARGS__); \ ++ } while (0) + #endif + + enum { +-- +2.27.0 + diff --git a/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch new file mode 100644 index 0000000000000000000000000000000000000000..d551fad998c56d23a82c912ab300dcf261bf381d --- /dev/null +++ b/hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch @@ -0,0 +1,403 @@ +From 4e0a4443b7c36608fc30dcaaf0db120220111dd2 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 15:26:27 +0100 +Subject: [PATCH] hw/intc/arm-gicv3*: Changes required to (re)init the vCPU + register info + +vCPU register info needs to be re-initialized each time vCPU is hot-plugged. +This has to be done both for emulation/TCG and KVM case. This is done in +context to the GIC update notification for any vCPU hot-(un)plug events. This +change adds that support and re-factors existing to maximize the code re-use. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +--- + hw/intc/arm_gicv3.c | 1 + + hw/intc/arm_gicv3_common.c | 7 +- + hw/intc/arm_gicv3_cpuif.c | 257 +++++++++++++++-------------- + hw/intc/arm_gicv3_kvm.c | 7 +- + hw/intc/gicv3_internal.h | 1 + + include/hw/intc/arm_gicv3_common.h | 1 + + 6 files changed, 150 insertions(+), 124 deletions(-) + +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index 0b8f79a122..e1c7c8c4bc 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -410,6 +410,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); + + agcc->post_load = arm_gicv3_post_load; ++ agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index fc87fa9369..d051024a30 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -345,10 +345,12 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + { + GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; + CPUState *cpu = gic_info->cpu; ++ ARMGICv3CommonClass *c; + int gic_cpuif_num; + GICv3State *s; + + s = ARM_GICV3_COMMON(gic_info->gic); ++ c = ARM_GICV3_COMMON_GET_CLASS(s); + + /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ + gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); +@@ -368,7 +370,10 @@ static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) + gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); + gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); + +- /* TODO: initialize the registers info for this newly added cpu */ ++ /* initialize the registers info for this newly added cpu */ ++ if (c->init_cpu_reginfo) { ++ c->init_cpu_reginfo(cpu); ++ } + } + + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index 0d0eb2f62f..a013510074 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -2782,6 +2782,127 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr23_reginfo[] = { + }, + }; + ++void gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ GICv3CPUState *gcs = icc_cs_from_env(&cpu->env); ++ ++ /* ++ * If the CPU doesn't define a GICv3 configuration, probably because ++ * in real hardware it doesn't have one, then we use default values ++ * matching the one used by most Arm CPUs. This applies to: ++ * cpu->gic_num_lrs ++ * cpu->gic_vpribits ++ * cpu->gic_vprebits ++ * cpu->gic_pribits ++ */ ++ ++ /* ++ * Note that we can't just use the GICv3CPUState as an opaque pointer ++ * in define_arm_cp_regs_with_opaque(), because when we're called back ++ * it might be with code translated by CPU 0 but run by CPU 1, in ++ * which case we'd get the wrong value. ++ * So instead we define the regs with no ri->opaque info, and ++ * get back to the GICv3CPUState from the CPUARMState. ++ */ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ ++ /* ++ * The CPU implementation specifies the number of supported ++ * bits of physical priority. For backwards compatibility ++ * of migration, we have a compat property that forces use ++ * of 8 priority bits regardless of what the CPU really has. ++ */ ++ if (gcs->gic->force_8bit_prio) { ++ gcs->pribits = 8; ++ } else { ++ gcs->pribits = cpu->gic_pribits ?: 5; ++ } ++ ++ /* ++ * The GICv3 has separate ID register fields for virtual priority ++ * and preemption bit values, but only a single ID register field ++ * for the physical priority bits. The preemption bit count is ++ * always the same as the priority bit count, except that 8 bits ++ * of priority means 7 preemption bits. We precalculate the ++ * preemption bits because it simplifies the code and makes the ++ * parallels between the virtual and physical bits of the GIC ++ * a bit clearer. ++ */ ++ gcs->prebits = gcs->pribits; ++ if (gcs->prebits == 8) { ++ gcs->prebits--; ++ } ++ /* ++ * Check that CPU code defining pribits didn't violate ++ * architectural constraints our implementation relies on. ++ */ ++ g_assert(gcs->pribits >= 4 && gcs->pribits <= 8); ++ ++ /* ++ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions ++ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. ++ */ ++ if (gcs->prebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); ++ } ++ if (gcs->prebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); ++ } ++ ++ if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { ++ int j; ++ ++ gcs->num_list_regs = cpu->gic_num_lrs ?: 4; ++ gcs->vpribits = cpu->gic_vpribits ?: 5; ++ gcs->vprebits = cpu->gic_vprebits ?: 5; ++ ++ /* ++ * Check against architectural constraints: getting these ++ * wrong would be a bug in the CPU code defining these, ++ * and the implementation relies on them holding. ++ */ ++ g_assert(gcs->vprebits <= gcs->vpribits); ++ g_assert(gcs->vprebits >= 5 && gcs->vprebits <= 7); ++ g_assert(gcs->vpribits >= 5 && gcs->vpribits <= 8); ++ ++ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); ++ ++ for (j = 0; j < gcs->num_list_regs; j++) { ++ /* ++ * Note that the AArch64 LRs are 64-bit; the AArch32 LRs ++ * are split into two cp15 regs, LR (the low part, with the ++ * same encoding as the AArch64 LR) and LRC (the high part). ++ */ ++ ARMCPRegInfo lr_regset[] = { ++ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, ++ .opc0 = 3, .opc1 = 4, .crn = 12, ++ .crm = 12 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, ++ .cp = 15, .opc1 = 4, .crn = 12, ++ .crm = 14 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ }; ++ define_arm_cp_regs(cpu, lr_regset); ++ } ++ if (gcs->vprebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); ++ } ++ if (gcs->vprebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ } ++ } ++} ++ + static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) + { + GICv3CPUState *cs = opaque; +@@ -2804,131 +2925,23 @@ void gicv3_init_cpuif(GICv3State *s) + + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- GICv3CPUState *cs = &s->cpu[i]; +- +- /* +- * If the CPU doesn't define a GICv3 configuration, probably because +- * in real hardware it doesn't have one, then we use default values +- * matching the one used by most Arm CPUs. This applies to: +- * cpu->gic_num_lrs +- * cpu->gic_vpribits +- * cpu->gic_vprebits +- * cpu->gic_pribits +- */ +- +- /* Note that we can't just use the GICv3CPUState as an opaque pointer +- * in define_arm_cp_regs_with_opaque(), because when we're called back +- * it might be with code translated by CPU 0 but run by CPU 1, in +- * which case we'd get the wrong value. +- * So instead we define the regs with no ri->opaque info, and +- * get back to the GICv3CPUState from the CPUARMState. +- * +- * These CP regs callbacks can be called from either TCG or HVF code. +- */ +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); +- +- /* +- * The CPU implementation specifies the number of supported +- * bits of physical priority. For backwards compatibility +- * of migration, we have a compat property that forces use +- * of 8 priority bits regardless of what the CPU really has. +- */ +- if (s->force_8bit_prio) { +- cs->pribits = 8; +- } else { +- cs->pribits = cpu->gic_pribits ?: 5; +- } +- +- /* +- * The GICv3 has separate ID register fields for virtual priority +- * and preemption bit values, but only a single ID register field +- * for the physical priority bits. The preemption bit count is +- * always the same as the priority bit count, except that 8 bits +- * of priority means 7 preemption bits. We precalculate the +- * preemption bits because it simplifies the code and makes the +- * parallels between the virtual and physical bits of the GIC +- * a bit clearer. +- */ +- cs->prebits = cs->pribits; +- if (cs->prebits == 8) { +- cs->prebits--; +- } +- /* +- * Check that CPU code defining pribits didn't violate +- * architectural constraints our implementation relies on. +- */ +- g_assert(cs->pribits >= 4 && cs->pribits <= 8); + +- /* +- * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions +- * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. +- */ +- if (cs->prebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); +- } +- if (cs->prebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); +- } +- +- if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { +- int j; +- +- cs->num_list_regs = cpu->gic_num_lrs ?: 4; +- cs->vpribits = cpu->gic_vpribits ?: 5; +- cs->vprebits = cpu->gic_vprebits ?: 5; +- +- /* Check against architectural constraints: getting these +- * wrong would be a bug in the CPU code defining these, +- * and the implementation relies on them holding. +- */ +- g_assert(cs->vprebits <= cs->vpribits); +- g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); +- g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); +- +- for (j = 0; j < cs->num_list_regs; j++) { +- /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs +- * are split into two cp15 regs, LR (the low part, with the +- * same encoding as the AArch64 LR) and LRC (the high part). ++ if (qemu_enabled_cpu(CPU(cpu))) { ++ GICv3CPUState *cs = icc_cs_from_env(&cpu->env); ++ gicv3_init_cpu_reginfo(CPU(cpu)); ++ if (tcg_enabled() || qtest_enabled()) { ++ /* ++ * We can only trap EL changes with TCG. However the GIC ++ * interrupt state only changes on EL changes involving EL2 or ++ * EL3, so for the non-TCG case this is OK, as EL2 and EL3 can't ++ * exist. + */ +- ARMCPRegInfo lr_regset[] = { +- { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, +- .opc0 = 3, .opc1 = 4, .crn = 12, +- .crm = 12 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, +- .cp = 15, .opc1 = 4, .crn = 12, +- .crm = 14 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- }; +- define_arm_cp_regs(cpu, lr_regset); +- } +- if (cs->vprebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); +- } +- if (cs->vprebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, ++ cs); ++ } else { ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); + } + } +- if (tcg_enabled() || qtest_enabled()) { +- /* +- * We can only trap EL changes with TCG. However the GIC interrupt +- * state only changes on EL changes involving EL2 or EL3, so for +- * the non-TCG case this is OK, as EL2 and EL3 can't exist. +- */ +- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); +- } else { +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); +- } + } + } +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index db06c75e2b..dd2a60fa20 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -804,6 +804,10 @@ static void vm_change_state_handler(void *opaque, bool running, + } + } + ++static void kvm_gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++} + + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { +@@ -837,7 +841,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + for (i = 0; i < s->num_cpu; i++) { + CPUState *cs = qemu_get_cpu(i); + if (qemu_enabled_cpu(cs)) { +- define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++ kvm_gicv3_init_cpu_reginfo(cs); + } + } + +@@ -925,6 +929,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) + + agcc->pre_save = kvm_arm_gicv3_get; + agcc->post_load = kvm_arm_gicv3_put; ++ agcc->init_cpu_reginfo = kvm_gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, + &kgc->parent_realize); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL, +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 9d4c1209bd..0bed0f6e2a 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -709,6 +709,7 @@ void gicv3_redist_vinvall(GICv3CPUState *cs, uint64_t vptaddr); + + void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); + void gicv3_init_cpuif(GICv3State *s); ++void gicv3_init_cpu_reginfo(CPUState *cs); + + /** + * gicv3_cpuif_update: +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 97a48f44b9..b5f8ba17ff 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -325,6 +325,7 @@ struct ARMGICv3CommonClass { + + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); ++ void (*init_cpu_reginfo)(CPUState *cs); + }; + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, +-- +2.27.0 + diff --git a/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch b/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d3c57b4e3e803ef673888d6e34af009ed660a11 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch @@ -0,0 +1,297 @@ +From 4440ab99f7f7b04ef79f6b35b8330edf7fe66002 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 15 Dec 2023 11:07:36 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add dynamic cpu number support + +On LoongArch physical machine, one extioi interrupt controller only +supports 4 cpus. With processor more than 4 cpus, there are multiple +extioi interrupt controllers; if interrupts need to be routed to +other cpus, they are forwarded from extioi node0 to other extioi nodes. + +On virt machine model, there is simple extioi interrupt device model. +All cpus can access register of extioi interrupt controller, however +interrupt can only be route to 4 vcpu for compatible with old kernel. + +This patch adds dynamic cpu number support about extioi interrupt. +With old kernel legacy extioi model is used, however kernel can detect +and choose new route method in future, so that interrupt can be routed to +all vcpus. + +confict: + +++<<<<<<< HEAD + + .fields = (VMStateField[]) { +++======= ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT32_ARRAY(coreisr, ExtIOICore, EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_END_OF_LIST() ++ } ++ }; ++ ++ static const VMStateDescription vmstate_loongarch_extioi = { ++ .name = TYPE_LOONGARCH_EXTIOI, ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { +++>>>>>>> hw/intc/loongarch_extioi: Add dynamic cpu number support + +solve: + +save: hw/intc/loongarch_extioi: Add dynamic cpu number support + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-4-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 109 +++++++++++++++++++---------- + hw/loongarch/virt.c | 3 +- + include/hw/intc/loongarch_extioi.h | 11 ++- + 3 files changed, 82 insertions(+), 41 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 77b4776958..28802bf3ef 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -8,6 +8,7 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qemu/log.h" ++#include "qapi/error.h" + #include "hw/irq.h" + #include "hw/sysbus.h" + #include "hw/loongarch/virt.h" +@@ -32,23 +33,23 @@ static void extioi_update_irq(LoongArchExtIOI *s, int irq, int level) + if (((s->enable[irq_index]) & irq_mask) == 0) { + return; + } +- s->coreisr[cpu][irq_index] |= irq_mask; +- found = find_first_bit(s->sw_isr[cpu][ipnum], EXTIOI_IRQS); +- set_bit(irq, s->sw_isr[cpu][ipnum]); ++ s->cpu[cpu].coreisr[irq_index] |= irq_mask; ++ found = find_first_bit(s->cpu[cpu].sw_isr[ipnum], EXTIOI_IRQS); ++ set_bit(irq, s->cpu[cpu].sw_isr[ipnum]); + if (found < EXTIOI_IRQS) { + /* other irq is handling, need not update parent irq level */ + return; + } + } else { +- s->coreisr[cpu][irq_index] &= ~irq_mask; +- clear_bit(irq, s->sw_isr[cpu][ipnum]); +- found = find_first_bit(s->sw_isr[cpu][ipnum], EXTIOI_IRQS); ++ s->cpu[cpu].coreisr[irq_index] &= ~irq_mask; ++ clear_bit(irq, s->cpu[cpu].sw_isr[ipnum]); ++ found = find_first_bit(s->cpu[cpu].sw_isr[ipnum], EXTIOI_IRQS); + if (found < EXTIOI_IRQS) { + /* other irq is handling, need not update parent irq level */ + return; + } + } +- qemu_set_irq(s->parent_irq[cpu][ipnum], level); ++ qemu_set_irq(s->cpu[cpu].parent_irq[ipnum], level); + } + + static void extioi_setirq(void *opaque, int irq, int level) +@@ -96,7 +97,7 @@ static MemTxResult extioi_readw(void *opaque, hwaddr addr, uint64_t *data, + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = attrs.requester_id; +- *data = s->coreisr[cpu][index]; ++ *data = s->cpu[cpu].coreisr[index]; + break; + case EXTIOI_COREMAP_START ... EXTIOI_COREMAP_END - 1: + index = (offset - EXTIOI_COREMAP_START) >> 2; +@@ -189,8 +190,8 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + index = (offset - EXTIOI_COREISR_START) >> 2; + /* using attrs to get current cpu index */ + cpu = attrs.requester_id; +- old_data = s->coreisr[cpu][index]; +- s->coreisr[cpu][index] = old_data & ~val; ++ old_data = s->cpu[cpu].coreisr[index]; ++ s->cpu[cpu].coreisr[index] = old_data & ~val; + /* write 1 to clear interrupt */ + old_data &= val; + irq = ctz32(old_data); +@@ -248,14 +249,61 @@ static const MemoryRegionOps extioi_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-static const VMStateDescription vmstate_loongarch_extioi = { +- .name = TYPE_LOONGARCH_EXTIOI, ++static void loongarch_extioi_realize(DeviceState *dev, Error **errp) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev); ++ SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ++ int i, pin; ++ ++ if (s->num_cpu == 0) { ++ error_setg(errp, "num-cpu must be at least 1"); ++ return; ++ } ++ ++ for (i = 0; i < EXTIOI_IRQS; i++) { ++ sysbus_init_irq(sbd, &s->irq[i]); ++ } ++ ++ qdev_init_gpio_in(dev, extioi_setirq, EXTIOI_IRQS); ++ memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, ++ s, "extioi_system_mem", 0x900); ++ sysbus_init_mmio(sbd, &s->extioi_system_mem); ++ s->cpu = g_new0(ExtIOICore, s->num_cpu); ++ if (s->cpu == NULL) { ++ error_setg(errp, "Memory allocation for ExtIOICore faile"); ++ return; ++ } ++ ++ for (i = 0; i < s->num_cpu; i++) { ++ for (pin = 0; pin < LS3A_INTC_IP; pin++) { ++ qdev_init_gpio_out(dev, &s->cpu[i].parent_irq[pin], 1); ++ } ++ } ++} ++ ++static void loongarch_extioi_finalize(Object *obj) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(obj); ++ ++ g_free(s->cpu); ++} ++ ++static const VMStateDescription vmstate_extioi_core = { ++ .name = "extioi-core", + .version_id = 1, + .minimum_version_id = 1, +- .fields = (VMStateField[]) { ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT32_ARRAY(coreisr, ExtIOICore, EXTIOI_IRQS_GROUP_COUNT), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static const VMStateDescription vmstate_loongarch_extioi = { ++ .name = TYPE_LOONGARCH_EXTIOI, ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { + VMSTATE_UINT32_ARRAY(bounce, LoongArchExtIOI, EXTIOI_IRQS_GROUP_COUNT), +- VMSTATE_UINT32_2DARRAY(coreisr, LoongArchExtIOI, EXTIOI_CPUS, +- EXTIOI_IRQS_GROUP_COUNT), + VMSTATE_UINT32_ARRAY(nodetype, LoongArchExtIOI, + EXTIOI_IRQS_NODETYPE_COUNT / 2), + VMSTATE_UINT32_ARRAY(enable, LoongArchExtIOI, EXTIOI_IRQS / 32), +@@ -265,45 +313,32 @@ static const VMStateDescription vmstate_loongarch_extioi = { + VMSTATE_UINT8_ARRAY(sw_ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE), + VMSTATE_UINT8_ARRAY(sw_coremap, LoongArchExtIOI, EXTIOI_IRQS), + ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, ++ vmstate_extioi_core, ExtIOICore), + VMSTATE_END_OF_LIST() + } + }; + +-static void loongarch_extioi_instance_init(Object *obj) +-{ +- SysBusDevice *dev = SYS_BUS_DEVICE(obj); +- LoongArchExtIOI *s = LOONGARCH_EXTIOI(obj); +- int i, cpu, pin; +- +- for (i = 0; i < EXTIOI_IRQS; i++) { +- sysbus_init_irq(dev, &s->irq[i]); +- } +- +- qdev_init_gpio_in(DEVICE(obj), extioi_setirq, EXTIOI_IRQS); +- +- for (cpu = 0; cpu < EXTIOI_CPUS; cpu++) { +- for (pin = 0; pin < LS3A_INTC_IP; pin++) { +- qdev_init_gpio_out(DEVICE(obj), &s->parent_irq[cpu][pin], 1); +- } +- } +- memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, +- s, "extioi_system_mem", 0x900); +- sysbus_init_mmio(dev, &s->extioi_system_mem); +-} ++static Property extioi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_END_OF_LIST(), ++}; + + static void loongarch_extioi_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + ++ dc->realize = loongarch_extioi_realize; ++ device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_loongarch_extioi; + } + + static const TypeInfo loongarch_extioi_info = { + .name = TYPE_LOONGARCH_EXTIOI, + .parent = TYPE_SYS_BUS_DEVICE, +- .instance_init = loongarch_extioi_instance_init, + .instance_size = sizeof(struct LoongArchExtIOI), + .class_init = loongarch_extioi_class_init, ++ .instance_finalize = loongarch_extioi_finalize, + }; + + static void loongarch_extioi_register_types(void) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 13d19b6da3..c9a680e61a 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -582,6 +582,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); ++ qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); + memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); +@@ -590,7 +591,7 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + * connect ext irq to the cpu irq + * cpu_pin[9:2] <= intc_pin[7:0] + */ +- for (cpu = 0; cpu < MIN(ms->smp.cpus, EXTIOI_CPUS); cpu++) { ++ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpudev = DEVICE(qemu_get_cpu(cpu)); + for (pin = 0; pin < LS3A_INTC_IP; pin++) { + qdev_connect_gpio_out(extioi, (cpu * 8 + pin), +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index 110e5e8873..a0a46b888c 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -40,24 +40,29 @@ + #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) + #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) + ++typedef struct ExtIOICore { ++ uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; ++ DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); ++ qemu_irq parent_irq[LS3A_INTC_IP]; ++} ExtIOICore; ++ + #define TYPE_LOONGARCH_EXTIOI "loongarch.extioi" + OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI) + struct LoongArchExtIOI { + SysBusDevice parent_obj; ++ uint32_t num_cpu; + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; + uint32_t isr[EXTIOI_IRQS / 32]; +- uint32_t coreisr[EXTIOI_CPUS][EXTIOI_IRQS_GROUP_COUNT]; + uint32_t enable[EXTIOI_IRQS / 32]; + uint32_t ipmap[EXTIOI_IRQS_IPMAP_SIZE / 4]; + uint32_t coremap[EXTIOI_IRQS / 4]; + uint32_t sw_pending[EXTIOI_IRQS / 32]; +- DECLARE_BITMAP(sw_isr[EXTIOI_CPUS][LS3A_INTC_IP], EXTIOI_IRQS); + uint8_t sw_ipmap[EXTIOI_IRQS_IPMAP_SIZE]; + uint8_t sw_coremap[EXTIOI_IRQS]; +- qemu_irq parent_irq[EXTIOI_CPUS][LS3A_INTC_IP]; + qemu_irq irq[EXTIOI_IRQS]; ++ ExtIOICore *cpu; + MemoryRegion extioi_system_mem; + }; + #endif /* LOONGARCH_EXTIOI_H */ +-- +2.27.0 + diff --git a/hw-intc-loongarch_extioi-Add-virt-extension-support.patch b/hw-intc-loongarch_extioi-Add-virt-extension-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..e02d5afcf8f7bf23790de1713829abbdd9b51d73 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-virt-extension-support.patch @@ -0,0 +1,434 @@ +From 04aef27ede108edd63d288dd3bb395e22a603f42 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 11 Mar 2024 15:01:31 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add virt extension support + +With hardware extioi, irq can be routed to four vcpus with hardware +extioi. This patch adds virt extension support, sot that irq can +be routed to 256 vcpus. + +Signed-off-by: Song Gao +Signed-off-by: Bibo Mao +--- + hw/intc/loongarch_extioi.c | 88 ++++++++++++++++++++- + hw/loongarch/virt.c | 122 ++++++++++++++++++++++++++--- + include/hw/intc/loongarch_extioi.h | 21 +++++ + include/hw/loongarch/virt.h | 3 + + target/loongarch/cpu.h | 1 + + 5 files changed, 220 insertions(+), 15 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index bdfa3b481e..fa23e247ca 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -143,15 +143,17 @@ static inline void extioi_update_sw_coremap(LoongArchExtIOI *s, int irq, + + for (i = 0; i < 4; i++) { + cpu = val & 0xff; +- cpu = ctz32(cpu); +- cpu = (cpu >= 4) ? 0 : cpu; ++ if (!(s->status & BIT(EXTIOI_ENABLE_CPU_ENCODE))) { ++ cpu = ctz32(cpu); ++ cpu = (cpu >= 4) ? 0 : cpu; ++ } + val = val >> 8; + + if (s->sw_coremap[irq + i] == cpu) { + continue; + } + +- if (notify && test_bit(irq, (unsigned long *)s->isr)) { ++ if (notify && test_bit(irq + i, (unsigned long *)s->isr)) { + /* + * lower irq at old cpu and raise irq at new cpu + */ +@@ -265,6 +267,61 @@ static const MemoryRegionOps extioi_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static MemTxResult extioi_virt_readw(void *opaque, hwaddr addr, uint64_t *data, ++ unsigned size, MemTxAttrs attrs) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ ++ switch (addr) { ++ case EXTIOI_VIRT_FEATURES: ++ *data = s->features; ++ break; ++ case EXTIOI_VIRT_CONFIG: ++ *data = s->status; ++ break; ++ default: ++ break; ++ } ++ ++ return MEMTX_OK; ++} ++ ++static MemTxResult extioi_virt_writew(void *opaque, hwaddr addr, ++ uint64_t val, unsigned size, ++ MemTxAttrs attrs) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ ++ switch (addr) { ++ case EXTIOI_VIRT_FEATURES: ++ return MEMTX_ACCESS_ERROR; ++ ++ case EXTIOI_VIRT_CONFIG: ++ /* ++ * extioi features can only be set at disabled status ++ */ ++ if ((s->status & BIT(EXTIOI_ENABLE)) && val) { ++ return MEMTX_ACCESS_ERROR; ++ } ++ ++ s->status = val & s->features; ++ break; ++ default: ++ break; ++ } ++ return MEMTX_OK; ++} ++ ++static const MemoryRegionOps extioi_virt_ops = { ++ .read_with_attrs = extioi_virt_readw, ++ .write_with_attrs = extioi_virt_writew, ++ .impl.min_access_size = 4, ++ .impl.max_access_size = 4, ++ .valid.min_access_size = 4, ++ .valid.max_access_size = 8, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++}; ++ + static void loongarch_extioi_realize(DeviceState *dev, Error **errp) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev); +@@ -284,6 +341,16 @@ static void loongarch_extioi_realize(DeviceState *dev, Error **errp) + memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops, + s, "extioi_system_mem", 0x900); + sysbus_init_mmio(sbd, &s->extioi_system_mem); ++ ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops, ++ s, "extioi_virt", EXTIOI_VIRT_SIZE); ++ sysbus_init_mmio(sbd, &s->virt_extend); ++ s->features |= EXTIOI_VIRT_HAS_FEATURES; ++ } else { ++ s->status |= BIT(EXTIOI_ENABLE); ++ } ++ + s->cpu = g_new0(ExtIOICore, s->num_cpu); + if (s->cpu == NULL) { + error_setg(errp, "Memory allocation for ExtIOICore faile"); +@@ -304,6 +371,16 @@ static void loongarch_extioi_finalize(Object *obj) + g_free(s->cpu); + } + ++static void loongarch_extioi_reset(DeviceState *d) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(d); ++ ++ /* use legacy interrupt routing method by default */ ++ if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) { ++ s->status = 0; ++ } ++} ++ + static int vmstate_extioi_post_load(void *opaque, int version_id) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); +@@ -347,12 +424,16 @@ static const VMStateDescription vmstate_loongarch_extioi = { + + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, + vmstate_extioi_core, ExtIOICore), ++ VMSTATE_UINT32(features, LoongArchExtIOI), ++ VMSTATE_UINT32(status, LoongArchExtIOI), + VMSTATE_END_OF_LIST() + } + }; + + static Property extioi_properties[] = { + DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOI, num_cpu, 1), ++ DEFINE_PROP_BIT("has-virtualization-extension", LoongArchExtIOI, features, ++ EXTIOI_HAS_VIRT_EXTENSION, 0), + DEFINE_PROP_END_OF_LIST(), + }; + +@@ -361,6 +442,7 @@ static void loongarch_extioi_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = loongarch_extioi_realize; ++ dc->reset = loongarch_extioi_reset; + device_class_set_props(dc, extioi_properties); + dc->vmsd = &vmstate_loongarch_extioi; + } +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 6ef40fa24a..01e59f3a95 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -15,6 +15,8 @@ + #include "sysemu/runstate.h" + #include "sysemu/reset.h" + #include "sysemu/rtc.h" ++#include "sysemu/tcg.h" ++#include "sysemu/kvm.h" + #include "hw/loongarch/virt.h" + #include "exec/address-spaces.h" + #include "hw/irq.h" +@@ -54,6 +56,31 @@ struct loaderparams { + const char *initrd_filename; + }; + ++static bool virt_is_veiointc_enabled(LoongArchMachineState *lams) ++{ ++ if (lams->veiointc == ON_OFF_AUTO_OFF) { ++ return false; ++ } ++ return true; ++} ++ ++static void virt_get_veiointc(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ OnOffAuto veiointc = lams->veiointc; ++ ++ visit_type_OnOffAuto(v, name, &veiointc, errp); ++} ++ ++static void virt_set_veiointc(Object *obj, Visitor *v, const char *name, ++ void *opaque, Error **errp) ++{ ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); ++ ++ visit_type_OnOffAuto(v, name, &lams->veiointc, errp); ++} ++ + static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, + const char *name, + const char *alias_prop_name) +@@ -618,9 +645,18 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + /* Create EXTIOI device */ + extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); + qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus); ++ if (virt_is_veiointc_enabled(lams)) { ++ qdev_prop_set_bit(extioi, "has-virtualization-extension", true); ++ } + sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ + memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, + sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); ++ if (virt_is_veiointc_enabled(lams)) { ++ memory_region_add_subregion(&lams->system_iocsr, EXTIOI_VIRT_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1)); ++ } ++ lams->extioi = extioi; + + /* + * connect ext irq to the cpu irq +@@ -780,32 +816,87 @@ static void loongarch_direct_kernel_boot(LoongArchMachineState *lams, + } + } + +-static void loongarch_qemu_write(void *opaque, hwaddr addr, +- uint64_t val, unsigned size) ++static MemTxResult loongarch_qemu_write(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ uint64_t features; ++ ++ switch (addr) { ++ case MISC_FUNC_REG: ++ if (!virt_is_veiointc_enabled(lams)) { ++ return MEMTX_OK; ++ } ++ ++ features = address_space_ldl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ attrs, NULL); ++ if (val & BIT_ULL(IOCSRM_EXTIOI_EN)) { ++ features |= BIT(EXTIOI_ENABLE); ++ } ++ if (val & BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE)) { ++ features |= BIT(EXTIOI_ENABLE_INT_ENCODE); ++ } ++ ++ address_space_stl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ features, attrs, NULL); ++ } ++ ++ return MEMTX_OK; + } + +-static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) ++static MemTxResult loongarch_qemu_read(void *opaque, hwaddr addr, ++ uint64_t *data, ++ unsigned size, MemTxAttrs attrs) + { ++ LoongArchMachineState *lams = LOONGARCH_MACHINE(opaque); ++ uint64_t ret = 0; ++ int features; ++ + switch (addr) { + case VERSION_REG: +- return 0x11ULL; ++ ret = 0x11ULL; ++ break; + case FEATURE_REG: +- return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | +- 1ULL << IOCSRF_CSRIPI; ++ ret = 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | ++ 1ULL << IOCSRF_CSRIPI; ++ if (kvm_enabled()) { ++ ret |= 1ULL << IOCSRF_VM; ++ } ++ break; + case VENDOR_REG: +- return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ break; + case CPUNAME_REG: +- return 0x303030354133ULL; /* "3A5000" */ ++ ret = 0x303030354133ULL; /* "3A5000" */ ++ break; + case MISC_FUNC_REG: +- return 1ULL << IOCSRM_EXTIOI_EN; ++ if (!virt_is_veiointc_enabled(lams)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_EN); ++ break; ++ } ++ ++ features = address_space_ldl(&lams->as_iocsr, ++ EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG, ++ attrs, NULL); ++ if (features & BIT(EXTIOI_ENABLE)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_EN); ++ } ++ ++ if (features & BIT(EXTIOI_ENABLE_INT_ENCODE)) { ++ ret |= BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE); ++ } ++ break; + } +- return 0ULL; ++ ++ *data = ret; ++ return MEMTX_OK; + } + + static const MemoryRegionOps loongarch_qemu_ops = { +- .read = loongarch_qemu_read, +- .write = loongarch_qemu_write, ++ .read_with_attrs = loongarch_qemu_read, ++ .write_with_attrs = loongarch_qemu_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, +@@ -1010,6 +1101,9 @@ static void loongarch_machine_initfn(Object *obj) + { + LoongArchMachineState *lams = LOONGARCH_MACHINE(obj); + ++ if (tcg_enabled()) { ++ lams->veiointc = ON_OFF_AUTO_OFF; ++ } + lams->acpi = ON_OFF_AUTO_AUTO; + lams->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6); + lams->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8); +@@ -1197,6 +1291,10 @@ static void loongarch_class_init(ObjectClass *oc, void *data) + NULL, NULL); + object_class_property_set_description(oc, "acpi", + "Enable ACPI"); ++ object_class_property_add(oc, "v-eiointc", "OnOffAuto", ++ virt_get_veiointc, virt_set_veiointc, NULL, NULL); ++ object_class_property_set_description(oc, "v-eiointc", ++ "Enable Virt Extend I/O Interrupt Controller"); + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE); + #ifdef CONFIG_TPM + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index a0a46b888c..98f348c49d 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -40,6 +40,24 @@ + #define EXTIOI_COREMAP_START (0xC00 - APIC_OFFSET) + #define EXTIOI_COREMAP_END (0xD00 - APIC_OFFSET) + ++#define EXTIOI_VIRT_BASE (0x40000000) ++#define EXTIOI_VIRT_SIZE (0x1000) ++#define EXTIOI_VIRT_FEATURES (0x0) ++#define EXTIOI_HAS_VIRT_EXTENSION (0) ++#define EXTIOI_HAS_ENABLE_OPTION (1) ++#define EXTIOI_HAS_INT_ENCODE (2) ++#define EXTIOI_HAS_CPU_ENCODE (3) ++#define EXTIOI_VIRT_HAS_FEATURES (BIT(EXTIOI_HAS_VIRT_EXTENSION) \ ++ | BIT(EXTIOI_HAS_ENABLE_OPTION)\ ++ | BIT(EXTIOI_HAS_INT_ENCODE) \ ++ | BIT(EXTIOI_HAS_CPU_ENCODE)) ++#define EXTIOI_VIRT_CONFIG (0x4) ++#define EXTIOI_ENABLE (1) ++#define EXTIOI_ENABLE_INT_ENCODE (2) ++#define EXTIOI_ENABLE_CPU_ENCODE (3) ++#define EXTIOI_VIRT_COREMAP_START (0x40) ++#define EXTIOI_VIRT_COREMAP_END (0x240) ++ + typedef struct ExtIOICore { + uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT]; + DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS); +@@ -51,6 +69,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(LoongArchExtIOI, LOONGARCH_EXTIOI) + struct LoongArchExtIOI { + SysBusDevice parent_obj; + uint32_t num_cpu; ++ uint32_t features; ++ uint32_t status; + /* hardware state */ + uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2]; + uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT]; +@@ -64,5 +84,6 @@ struct LoongArchExtIOI { + qemu_irq irq[EXTIOI_IRQS]; + ExtIOICore *cpu; + MemoryRegion extioi_system_mem; ++ MemoryRegion virt_extend; + }; + #endif /* LOONGARCH_EXTIOI_H */ +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 252f7df7f4..99447fd1d6 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -45,16 +45,19 @@ struct LoongArchMachineState { + Notifier machine_done; + Notifier powerdown_notifier; + OnOffAuto acpi; ++ OnOffAuto veiointc; + char *oem_id; + char *oem_table_id; + DeviceState *acpi_ged; + int fdt_size; + DeviceState *platform_bus_dev; ++ DeviceState *extioi; + PCIBus *pci_bus; + PFlashCFI01 *flash[2]; + MemoryRegion system_iocsr; + MemoryRegion iocsr_mem; + AddressSpace as_iocsr; ++ int features; + }; + + #define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 4aba8aba4c..4749d41c8c 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -36,6 +36,7 @@ + #define CPUNAME_REG 0x20 + #define MISC_FUNC_REG 0x420 + #define IOCSRM_EXTIOI_EN 48 ++#define IOCSRM_EXTIOI_INT_ENCODE 49 + + #define IOCSR_MEM_SIZE 0x428 + +-- +2.33.0 + diff --git a/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch b/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch new file mode 100644 index 0000000000000000000000000000000000000000..03a378fe9aa98382c1b3b24780fe6bc39466d756 --- /dev/null +++ b/hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch @@ -0,0 +1,193 @@ +From db8c355d923c218c5ca373c4acd5d13493152889 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Fri, 15 Dec 2023 17:42:58 +0800 +Subject: [PATCH] hw/intc/loongarch_extioi: Add vmstate post_load support + +There are elements sw_ipmap and sw_coremap, which is usd to speed +up irq injection flow. They are saved and restored in vmstate during +migration, indeed they can calculated from hw registers. Here +post_load is added for get sw_ipmap and sw_coremap from extioi hw +state. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-5-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 120 +++++++++++++++++++++++-------------- + 1 file changed, 76 insertions(+), 44 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 28802bf3ef..bdfa3b481e 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -130,12 +130,66 @@ static inline void extioi_enable_irq(LoongArchExtIOI *s, int index,\ + } + } + ++static inline void extioi_update_sw_coremap(LoongArchExtIOI *s, int irq, ++ uint64_t val, bool notify) ++{ ++ int i, cpu; ++ ++ /* ++ * loongarch only support little endian, ++ * so we paresd the value with little endian. ++ */ ++ val = cpu_to_le64(val); ++ ++ for (i = 0; i < 4; i++) { ++ cpu = val & 0xff; ++ cpu = ctz32(cpu); ++ cpu = (cpu >= 4) ? 0 : cpu; ++ val = val >> 8; ++ ++ if (s->sw_coremap[irq + i] == cpu) { ++ continue; ++ } ++ ++ if (notify && test_bit(irq, (unsigned long *)s->isr)) { ++ /* ++ * lower irq at old cpu and raise irq at new cpu ++ */ ++ extioi_update_irq(s, irq + i, 0); ++ s->sw_coremap[irq + i] = cpu; ++ extioi_update_irq(s, irq + i, 1); ++ } else { ++ s->sw_coremap[irq + i] = cpu; ++ } ++ } ++} ++ ++static inline void extioi_update_sw_ipmap(LoongArchExtIOI *s, int index, ++ uint64_t val) ++{ ++ int i; ++ uint8_t ipnum; ++ ++ /* ++ * loongarch only support little endian, ++ * so we paresd the value with little endian. ++ */ ++ val = cpu_to_le64(val); ++ for (i = 0; i < 4; i++) { ++ ipnum = val & 0xff; ++ ipnum = ctz32(ipnum); ++ ipnum = (ipnum >= 4) ? 0 : ipnum; ++ s->sw_ipmap[index * 4 + i] = ipnum; ++ val = val >> 8; ++ } ++} ++ + static MemTxResult extioi_writew(void *opaque, hwaddr addr, + uint64_t val, unsigned size, + MemTxAttrs attrs) + { + LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); +- int i, cpu, index, old_data, irq; ++ int cpu, index, old_data, irq; + uint32_t offset; + + trace_loongarch_extioi_writew(addr, val); +@@ -153,20 +207,7 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + */ + index = (offset - EXTIOI_IPMAP_START) >> 2; + s->ipmap[index] = val; +- /* +- * loongarch only support little endian, +- * so we paresd the value with little endian. +- */ +- val = cpu_to_le64(val); +- for (i = 0; i < 4; i++) { +- uint8_t ipnum; +- ipnum = val & 0xff; +- ipnum = ctz32(ipnum); +- ipnum = (ipnum >= 4) ? 0 : ipnum; +- s->sw_ipmap[index * 4 + i] = ipnum; +- val = val >> 8; +- } +- ++ extioi_update_sw_ipmap(s, index, val); + break; + case EXTIOI_ENABLE_START ... EXTIOI_ENABLE_END - 1: + index = (offset - EXTIOI_ENABLE_START) >> 2; +@@ -205,33 +246,8 @@ static MemTxResult extioi_writew(void *opaque, hwaddr addr, + irq = offset - EXTIOI_COREMAP_START; + index = irq / 4; + s->coremap[index] = val; +- /* +- * loongarch only support little endian, +- * so we paresd the value with little endian. +- */ +- val = cpu_to_le64(val); +- +- for (i = 0; i < 4; i++) { +- cpu = val & 0xff; +- cpu = ctz32(cpu); +- cpu = (cpu >= 4) ? 0 : cpu; +- val = val >> 8; +- +- if (s->sw_coremap[irq + i] == cpu) { +- continue; +- } +- +- if (test_bit(irq, (unsigned long *)s->isr)) { +- /* +- * lower irq at old cpu and raise irq at new cpu +- */ +- extioi_update_irq(s, irq + i, 0); +- s->sw_coremap[irq + i] = cpu; +- extioi_update_irq(s, irq + i, 1); +- } else { +- s->sw_coremap[irq + i] = cpu; +- } +- } ++ ++ extioi_update_sw_coremap(s, irq, val, true); + break; + default: + break; +@@ -288,6 +304,23 @@ static void loongarch_extioi_finalize(Object *obj) + g_free(s->cpu); + } + ++static int vmstate_extioi_post_load(void *opaque, int version_id) ++{ ++ LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque); ++ int i, start_irq; ++ ++ for (i = 0; i < (EXTIOI_IRQS / 4); i++) { ++ start_irq = i * 4; ++ extioi_update_sw_coremap(s, start_irq, s->coremap[i], false); ++ } ++ ++ for (i = 0; i < (EXTIOI_IRQS_IPMAP_SIZE / 4); i++) { ++ extioi_update_sw_ipmap(s, i, s->ipmap[i]); ++ } ++ ++ return 0; ++} ++ + static const VMStateDescription vmstate_extioi_core = { + .name = "extioi-core", + .version_id = 1, +@@ -302,6 +335,7 @@ static const VMStateDescription vmstate_loongarch_extioi = { + .name = TYPE_LOONGARCH_EXTIOI, + .version_id = 2, + .minimum_version_id = 2, ++ .post_load = vmstate_extioi_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT32_ARRAY(bounce, LoongArchExtIOI, EXTIOI_IRQS_GROUP_COUNT), + VMSTATE_UINT32_ARRAY(nodetype, LoongArchExtIOI, +@@ -310,8 +344,6 @@ static const VMStateDescription vmstate_loongarch_extioi = { + VMSTATE_UINT32_ARRAY(isr, LoongArchExtIOI, EXTIOI_IRQS / 32), + VMSTATE_UINT32_ARRAY(ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE / 4), + VMSTATE_UINT32_ARRAY(coremap, LoongArchExtIOI, EXTIOI_IRQS / 4), +- VMSTATE_UINT8_ARRAY(sw_ipmap, LoongArchExtIOI, EXTIOI_IRQS_IPMAP_SIZE), +- VMSTATE_UINT8_ARRAY(sw_coremap, LoongArchExtIOI, EXTIOI_IRQS), + + VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu, + vmstate_extioi_core, ExtIOICore), +-- +2.27.0 + diff --git a/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch b/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..8309aa9b0b440f5eadfad2fa9d3bee075fe64158 --- /dev/null +++ b/hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch @@ -0,0 +1,286 @@ +From b8f53cfa91e86d5163318f8ade1cca18e94f3eb7 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Dec 2023 12:12:01 +0800 +Subject: [PATCH] hw/intc/loongarch_ipi: Use MemTxAttrs interface for ipi ops + +There are two interface pairs for MemoryRegionOps, read/write and +read_with_attrs/write_with_attrs. The later is better for ipi device +emulation since initial cpu can be parsed from attrs.requester_id. + +And requester_id can be overrided for IOCSR_IPI_SEND and mail_send +function when it is to forward message to another vcpu. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-2-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_ipi.c | 136 +++++++++++++++++++++++----------------- + 1 file changed, 77 insertions(+), 59 deletions(-) + +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index 67858b521c..221246c5cb 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -17,14 +17,16 @@ + #include "target/loongarch/internals.h" + #include "trace.h" + +-static void loongarch_ipi_writel(void *, hwaddr, uint64_t, unsigned); +- +-static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size) ++static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr addr, ++ uint64_t *data, ++ unsigned size, MemTxAttrs attrs) + { +- IPICore *s = opaque; ++ IPICore *s; ++ LoongArchIPI *ipi = opaque; + uint64_t ret = 0; + int index = 0; + ++ s = &ipi->ipi_core; + addr &= 0xff; + switch (addr) { + case CORE_STATUS_OFF: +@@ -49,10 +51,12 @@ static uint64_t loongarch_ipi_readl(void *opaque, hwaddr addr, unsigned size) + } + + trace_loongarch_ipi_read(size, (uint64_t)addr, ret); +- return ret; ++ *data = ret; ++ return MEMTX_OK; + } + +-static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) ++static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, ++ MemTxAttrs attrs) + { + int i, mask = 0, data = 0; + +@@ -62,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) + */ + if ((val >> 27) & 0xf) { + data = address_space_ldl(&env->address_space_iocsr, addr, +- MEMTXATTRS_UNSPECIFIED, NULL); ++ attrs, NULL); + for (i = 0; i < 4; i++) { + /* get mask for byte writing */ + if (val & (0x1 << (27 + i))) { +@@ -74,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr) + data &= mask; + data |= (val >> 32) & ~mask; + address_space_stl(&env->address_space_iocsr, addr, +- data, MEMTXATTRS_UNSPECIFIED, NULL); ++ data, attrs, NULL); + } + + static int archid_cmp(const void *a, const void *b) +@@ -103,80 +107,72 @@ static CPUState *ipi_getcpu(int arch_id) + CPUArchId *archid; + + archid = find_cpu_by_archid(machine, arch_id); +- return CPU(archid->cpu); +-} +- +-static void ipi_send(uint64_t val) +-{ +- uint32_t cpuid; +- uint8_t vector; +- CPUState *cs; +- LoongArchCPU *cpu; +- LoongArchIPI *s; +- +- cpuid = extract32(val, 16, 10); +- if (cpuid >= LOONGARCH_MAX_CPUS) { +- trace_loongarch_ipi_unsupported_cpuid("IOCSR_IPI_SEND", cpuid); +- return; ++ if (archid) { ++ return CPU(archid->cpu); + } + +- /* IPI status vector */ +- vector = extract8(val, 0, 5); +- +- cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- s = LOONGARCH_IPI(cpu->env.ipistate); +- loongarch_ipi_writel(&s->ipi_core, CORE_SET_OFF, BIT(vector), 4); ++ return NULL; + } + +-static void mail_send(uint64_t val) ++static MemTxResult mail_send(uint64_t val, MemTxAttrs attrs) + { + uint32_t cpuid; + hwaddr addr; +- CPULoongArchState *env; + CPUState *cs; +- LoongArchCPU *cpu; + + cpuid = extract32(val, 16, 10); + if (cpuid >= LOONGARCH_MAX_CPUS) { + trace_loongarch_ipi_unsupported_cpuid("IOCSR_MAIL_SEND", cpuid); +- return; ++ return MEMTX_DECODE_ERROR; + } + +- addr = 0x1020 + (val & 0x1c); + cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- env = &cpu->env; +- send_ipi_data(env, val, addr); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ addr = SMP_IPI_MAILBOX + CORE_BUF_20 + (val & 0x1c); ++ attrs.requester_id = cs->cpu_index; ++ send_ipi_data(&LOONGARCH_CPU(cs)->env, val, addr, attrs); ++ return MEMTX_OK; + } + +-static void any_send(uint64_t val) ++static MemTxResult any_send(uint64_t val, MemTxAttrs attrs) + { + uint32_t cpuid; + hwaddr addr; +- CPULoongArchState *env; + CPUState *cs; +- LoongArchCPU *cpu; + + cpuid = extract32(val, 16, 10); + if (cpuid >= LOONGARCH_MAX_CPUS) { + trace_loongarch_ipi_unsupported_cpuid("IOCSR_ANY_SEND", cpuid); +- return; ++ return MEMTX_DECODE_ERROR; + } + +- addr = val & 0xffff; + cs = ipi_getcpu(cpuid); +- cpu = LOONGARCH_CPU(cs); +- env = &cpu->env; +- send_ipi_data(env, val, addr); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ addr = val & 0xffff; ++ attrs.requester_id = cs->cpu_index; ++ send_ipi_data(&LOONGARCH_CPU(cs)->env, val, addr, attrs); ++ return MEMTX_OK; + } + +-static void loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, +- unsigned size) ++static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { +- IPICore *s = opaque; ++ LoongArchIPI *ipi = opaque; ++ IPICore *s; + int index = 0; ++ uint32_t cpuid; ++ uint8_t vector; ++ CPUState *cs; + ++ s = &ipi->ipi_core; + addr &= 0xff; + trace_loongarch_ipi_write(size, (uint64_t)addr, val); + switch (addr) { +@@ -203,17 +199,35 @@ static void loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + s->buf[index] = val; + break; + case IOCSR_IPI_SEND: +- ipi_send(val); ++ cpuid = extract32(val, 16, 10); ++ if (cpuid >= LOONGARCH_MAX_CPUS) { ++ trace_loongarch_ipi_unsupported_cpuid("IOCSR_IPI_SEND", cpuid); ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* IPI status vector */ ++ vector = extract8(val, 0, 5); ++ cs = ipi_getcpu(cpuid); ++ if (cs == NULL) { ++ return MEMTX_DECODE_ERROR; ++ } ++ ++ /* override requester_id */ ++ attrs.requester_id = cs->cpu_index; ++ ipi = LOONGARCH_IPI(LOONGARCH_CPU(cs)->env.ipistate); ++ loongarch_ipi_writel(ipi, CORE_SET_OFF, BIT(vector), 4, attrs); + break; + default: + qemu_log_mask(LOG_UNIMP, "invalid write: %x", (uint32_t)addr); + break; + } ++ ++ return MEMTX_OK; + } + + static const MemoryRegionOps loongarch_ipi_ops = { +- .read = loongarch_ipi_readl, +- .write = loongarch_ipi_writel, ++ .read_with_attrs = loongarch_ipi_readl, ++ .write_with_attrs = loongarch_ipi_writel, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .valid.min_access_size = 4, +@@ -222,24 +236,28 @@ static const MemoryRegionOps loongarch_ipi_ops = { + }; + + /* mail send and any send only support writeq */ +-static void loongarch_ipi_writeq(void *opaque, hwaddr addr, uint64_t val, +- unsigned size) ++static MemTxResult loongarch_ipi_writeq(void *opaque, hwaddr addr, uint64_t val, ++ unsigned size, MemTxAttrs attrs) + { ++ MemTxResult ret = MEMTX_OK; ++ + addr &= 0xfff; + switch (addr) { + case MAIL_SEND_OFFSET: +- mail_send(val); ++ ret = mail_send(val, attrs); + break; + case ANY_SEND_OFFSET: +- any_send(val); ++ ret = any_send(val, attrs); + break; + default: + break; + } ++ ++ return ret; + } + + static const MemoryRegionOps loongarch_ipi64_ops = { +- .write = loongarch_ipi_writeq, ++ .write_with_attrs = loongarch_ipi_writeq, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .valid.min_access_size = 8, +@@ -253,7 +271,7 @@ static void loongarch_ipi_init(Object *obj) + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + + memory_region_init_io(&s->ipi_iocsr_mem, obj, &loongarch_ipi_ops, +- &s->ipi_core, "loongarch_ipi_iocsr", 0x48); ++ s, "loongarch_ipi_iocsr", 0x48); + + /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ + s->ipi_iocsr_mem.disable_reentrancy_guard = true; +@@ -261,7 +279,7 @@ static void loongarch_ipi_init(Object *obj) + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem); + + memory_region_init_io(&s->ipi64_iocsr_mem, obj, &loongarch_ipi64_ops, +- &s->ipi_core, "loongarch_ipi64_iocsr", 0x118); ++ s, "loongarch_ipi64_iocsr", 0x118); + sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); + qdev_init_gpio_out(DEVICE(obj), &s->ipi_core.irq, 1); + } +-- +2.27.0 + diff --git a/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch b/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a3eab33573785ccf086284fcd4f8e5ff98f2e70 --- /dev/null +++ b/hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch @@ -0,0 +1,49 @@ +From 74817cbc4ccb4e3b0f6d7b464b5707d3fbc5f686 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Tue, 23 Apr 2024 10:40:32 +0800 +Subject: [PATCH] hw/isa/vt82c686: Keep track of PIRQ/PINT pins separately +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from f33274265a242df5d9fdb00915fe72fbb1b2a3c4 + +Move calculation of mask after the switch which sets the function +number for PIRQ/PINT pins to make sure the state of these pins are +kept track of separately and IRQ is raised if any of them is active. + +Cc: qemu-stable@nongnu.org +Fixes: 7e01bd80c1 hw/isa/vt82c686: Bring back via_isa_set_irq() +Signed-off-by: BALATON Zoltan +Reviewed-by: Philippe Mathieu-DaudĂ© +Message-ID: <20240410222543.0EA534E6005@zero.eik.bme.hu> +Signed-off-by: Philippe Mathieu-DaudĂ© +Signed-off-by: qihao_yewu +--- + hw/isa/vt82c686.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c +index 9c2333a277..0334431219 100644 +--- a/hw/isa/vt82c686.c ++++ b/hw/isa/vt82c686.c +@@ -613,7 +613,7 @@ void via_isa_set_irq(PCIDevice *d, int pin, int level) + ViaISAState *s = VIA_ISA(pci_get_function_0(d)); + uint8_t irq = d->config[PCI_INTERRUPT_LINE], max_irq = 15; + int f = PCI_FUNC(d->devfn); +- uint16_t mask = BIT(f); ++ uint16_t mask; + + switch (f) { + case 0: /* PIRQ/PINT inputs */ +@@ -628,6 +628,7 @@ void via_isa_set_irq(PCIDevice *d, int pin, int level) + } + + /* Keep track of the state of all sources */ ++ mask = BIT(f); + if (level) { + s->irq_state[0] |= mask; + } else { +-- +2.27.0 + diff --git a/hw-loongarch-virt-Align-high-memory-base-address-wit.patch b/hw-loongarch-virt-Align-high-memory-base-address-wit.patch new file mode 100644 index 0000000000000000000000000000000000000000..5dc5ce306046135f7a644e3c26ab88ded6a14320 --- /dev/null +++ b/hw-loongarch-virt-Align-high-memory-base-address-wit.patch @@ -0,0 +1,39 @@ +From 70e7ffec16e91138309ad3f76588cbd10c084394 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 27 Nov 2023 12:02:31 +0800 +Subject: [PATCH] hw/loongarch/virt: Align high memory base address with super + page size + +With LoongArch virt machine, there is low memory space with region +0--0x10000000, and high memory space with started from 0x90000000. +High memory space is aligned with 256M, it will be better if it is +aligned with 1G, which is super page aligned for 4K page size. + +Currently linux kernel and uefi bios has no limitation with high +memory base address, it is ok to set high memory base address +with 0x80000000. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231127040231.4123715-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + include/hw/loongarch/virt.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 674f4655e0..db0831b471 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -25,7 +25,7 @@ + + #define VIRT_LOWMEM_BASE 0 + #define VIRT_LOWMEM_SIZE 0x10000000 +-#define VIRT_HIGHMEM_BASE 0x90000000 ++#define VIRT_HIGHMEM_BASE 0x80000000 + #define VIRT_GED_EVT_ADDR 0x100e0000 + #define VIRT_GED_MEM_ADDR (VIRT_GED_EVT_ADDR + ACPI_GED_EVT_SEL_LEN) + #define VIRT_GED_REG_ADDR (VIRT_GED_MEM_ADDR + MEMORY_HOTPLUG_IO_LEN) +-- +2.27.0 + diff --git a/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch b/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd8ec42fdca10e12f964d76dbadf25294ac364b2 --- /dev/null +++ b/hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch @@ -0,0 +1,558 @@ +From 43100bba2bfd9de0c3bab7c3e815b02faa69242d Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Dec 2023 12:13:14 +0800 +Subject: [PATCH] hw/loongarch/virt: Set iocsr address space per-board rather + than percpu + +LoongArch system has iocsr address space, most iocsr registers are +per-board, however some iocsr register spaces banked for percpu such +as ipi mailbox and extioi interrupt status. For banked iocsr space, +each cpu has the same iocsr space, but separate data. + +This patch changes iocsr address space per-board rather percpu, +for iocsr registers specified for cpu, MemTxAttrs.requester_id +can be parsed for the cpu. With this patches, the total address space +on board will be simple, only iocsr address space and system memory, +rather than the number of cpu and system memory. + +confict: + ++<<<<<<< HEAD + + .version_id = 1, + + .minimum_version_id = 1, + + .fields = (VMStateField[]) { + + VMSTATE_STRUCT(ipi_core, LoongArchIPI, 0, vmstate_ipi_core, IPICore), +++======= ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchIPI, num_cpu, ++ vmstate_ipi_core, IPICore), +++>>>>>>> hw/loongarch/virt: Set iocsr address space per-board rather than percpu + +solve: +save: hw/loongarch/virt: Set iocsr address space per-board rather than percpu + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231215100333.3933632-3-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + hw/intc/loongarch_extioi.c | 3 - + hw/intc/loongarch_ipi.c | 63 +++++++++++++++----- + hw/loongarch/virt.c | 91 +++++++++++++++++++++-------- + include/hw/intc/loongarch_extioi.h | 1 - + include/hw/intc/loongarch_ipi.h | 3 +- + include/hw/loongarch/virt.h | 3 + + target/loongarch/cpu.c | 48 --------------- + target/loongarch/cpu.h | 4 +- + target/loongarch/kvm/kvm.c | 2 +- + target/loongarch/tcg/iocsr_helper.c | 16 ++--- + 10 files changed, 129 insertions(+), 105 deletions(-) + +diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c +index 24fb3af8cc..77b4776958 100644 +--- a/hw/intc/loongarch_extioi.c ++++ b/hw/intc/loongarch_extioi.c +@@ -282,9 +282,6 @@ static void loongarch_extioi_instance_init(Object *obj) + qdev_init_gpio_in(DEVICE(obj), extioi_setirq, EXTIOI_IRQS); + + for (cpu = 0; cpu < EXTIOI_CPUS; cpu++) { +- memory_region_init_io(&s->extioi_iocsr_mem[cpu], OBJECT(s), &extioi_ops, +- s, "extioi_iocsr", 0x900); +- sysbus_init_mmio(dev, &s->extioi_iocsr_mem[cpu]); + for (pin = 0; pin < LS3A_INTC_IP; pin++) { + qdev_init_gpio_out(DEVICE(obj), &s->parent_irq[cpu][pin], 1); + } +diff --git a/hw/intc/loongarch_ipi.c b/hw/intc/loongarch_ipi.c +index 221246c5cb..e228669aa5 100644 +--- a/hw/intc/loongarch_ipi.c ++++ b/hw/intc/loongarch_ipi.c +@@ -9,6 +9,7 @@ + #include "hw/sysbus.h" + #include "hw/intc/loongarch_ipi.h" + #include "hw/irq.h" ++#include "hw/qdev-properties.h" + #include "qapi/error.h" + #include "qemu/log.h" + #include "exec/address-spaces.h" +@@ -26,7 +27,7 @@ static MemTxResult loongarch_ipi_readl(void *opaque, hwaddr addr, + uint64_t ret = 0; + int index = 0; + +- s = &ipi->ipi_core; ++ s = &ipi->cpu[attrs.requester_id]; + addr &= 0xff; + switch (addr) { + case CORE_STATUS_OFF: +@@ -65,7 +66,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, + * if the mask is 0, we need not to do anything. + */ + if ((val >> 27) & 0xf) { +- data = address_space_ldl(&env->address_space_iocsr, addr, ++ data = address_space_ldl(env->address_space_iocsr, addr, + attrs, NULL); + for (i = 0; i < 4; i++) { + /* get mask for byte writing */ +@@ -77,7 +78,7 @@ static void send_ipi_data(CPULoongArchState *env, uint64_t val, hwaddr addr, + + data &= mask; + data |= (val >> 32) & ~mask; +- address_space_stl(&env->address_space_iocsr, addr, ++ address_space_stl(env->address_space_iocsr, addr, + data, attrs, NULL); + } + +@@ -172,7 +173,7 @@ static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + uint8_t vector; + CPUState *cs; + +- s = &ipi->ipi_core; ++ s = &ipi->cpu[attrs.requester_id]; + addr &= 0xff; + trace_loongarch_ipi_write(size, (uint64_t)addr, val); + switch (addr) { +@@ -214,7 +215,6 @@ static MemTxResult loongarch_ipi_writel(void *opaque, hwaddr addr, uint64_t val, + + /* override requester_id */ + attrs.requester_id = cs->cpu_index; +- ipi = LOONGARCH_IPI(LOONGARCH_CPU(cs)->env.ipistate); + loongarch_ipi_writel(ipi, CORE_SET_OFF, BIT(vector), 4, attrs); + break; + default: +@@ -265,12 +265,18 @@ static const MemoryRegionOps loongarch_ipi64_ops = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + +-static void loongarch_ipi_init(Object *obj) ++static void loongarch_ipi_realize(DeviceState *dev, Error **errp) + { +- LoongArchIPI *s = LOONGARCH_IPI(obj); +- SysBusDevice *sbd = SYS_BUS_DEVICE(obj); ++ LoongArchIPI *s = LOONGARCH_IPI(dev); ++ SysBusDevice *sbd = SYS_BUS_DEVICE(dev); ++ int i; ++ ++ if (s->num_cpu == 0) { ++ error_setg(errp, "num-cpu must be at least 1"); ++ return; ++ } + +- memory_region_init_io(&s->ipi_iocsr_mem, obj, &loongarch_ipi_ops, ++ memory_region_init_io(&s->ipi_iocsr_mem, OBJECT(dev), &loongarch_ipi_ops, + s, "loongarch_ipi_iocsr", 0x48); + + /* loongarch_ipi_iocsr performs re-entrant IO through ipi_send */ +@@ -278,10 +284,20 @@ static void loongarch_ipi_init(Object *obj) + + sysbus_init_mmio(sbd, &s->ipi_iocsr_mem); + +- memory_region_init_io(&s->ipi64_iocsr_mem, obj, &loongarch_ipi64_ops, ++ memory_region_init_io(&s->ipi64_iocsr_mem, OBJECT(dev), ++ &loongarch_ipi64_ops, + s, "loongarch_ipi64_iocsr", 0x118); + sysbus_init_mmio(sbd, &s->ipi64_iocsr_mem); +- qdev_init_gpio_out(DEVICE(obj), &s->ipi_core.irq, 1); ++ ++ s->cpu = g_new0(IPICore, s->num_cpu); ++ if (s->cpu == NULL) { ++ error_setg(errp, "Memory allocation for ExtIOICore faile"); ++ return; ++ } ++ ++ for (i = 0; i < s->num_cpu; i++) { ++ qdev_init_gpio_out(dev, &s->cpu[i].irq, 1); ++ } + } + + static const VMStateDescription vmstate_ipi_core = { +@@ -300,27 +316,42 @@ static const VMStateDescription vmstate_ipi_core = { + + static const VMStateDescription vmstate_loongarch_ipi = { + .name = TYPE_LOONGARCH_IPI, +- .version_id = 1, +- .minimum_version_id = 1, +- .fields = (VMStateField[]) { +- VMSTATE_STRUCT(ipi_core, LoongArchIPI, 0, vmstate_ipi_core, IPICore), ++ .version_id = 2, ++ .minimum_version_id = 2, ++ .fields = (const VMStateField[]) { ++ VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchIPI, num_cpu, ++ vmstate_ipi_core, IPICore), + VMSTATE_END_OF_LIST() + } + }; + ++static Property ipi_properties[] = { ++ DEFINE_PROP_UINT32("num-cpu", LoongArchIPI, num_cpu, 1), ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + static void loongarch_ipi_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + ++ dc->realize = loongarch_ipi_realize; ++ device_class_set_props(dc, ipi_properties); + dc->vmsd = &vmstate_loongarch_ipi; + } + ++static void loongarch_ipi_finalize(Object *obj) ++{ ++ LoongArchIPI *s = LOONGARCH_IPI(obj); ++ ++ g_free(s->cpu); ++} ++ + static const TypeInfo loongarch_ipi_info = { + .name = TYPE_LOONGARCH_IPI, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(LoongArchIPI), +- .instance_init = loongarch_ipi_init, + .class_init = loongarch_ipi_class_init, ++ .instance_finalize = loongarch_ipi_finalize, + }; + + static void loongarch_ipi_register_types(void) +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index 4b7dc67a2d..13d19b6da3 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -535,9 +535,6 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + CPUState *cpu_state; + int cpu, pin, i, start, num; + +- extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); +- + /* + * The connection of interrupts: + * +-----+ +---------+ +-------+ +@@ -559,36 +556,36 @@ static void loongarch_irq_init(LoongArchMachineState *lams) + * | UARTs | | Devices | | Devices | + * +--------+ +---------+ +---------+ + */ ++ ++ /* Create IPI device */ ++ ipi = qdev_new(TYPE_LOONGARCH_IPI); ++ qdev_prop_set_uint32(ipi, "num-cpu", ms->smp.cpus); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ ++ /* IPI iocsr memory region */ ++ memory_region_add_subregion(&lams->system_iocsr, SMP_IPI_MAILBOX, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 0)); ++ memory_region_add_subregion(&lams->system_iocsr, MAIL_SEND_ADDR, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), 1)); ++ + for (cpu = 0; cpu < ms->smp.cpus; cpu++) { + cpu_state = qemu_get_cpu(cpu); + cpudev = DEVICE(cpu_state); + lacpu = LOONGARCH_CPU(cpu_state); + env = &(lacpu->env); +- +- ipi = qdev_new(TYPE_LOONGARCH_IPI); +- sysbus_realize_and_unref(SYS_BUS_DEVICE(ipi), &error_fatal); ++ env->address_space_iocsr = &lams->as_iocsr; + + /* connect ipi irq to cpu irq */ +- qdev_connect_gpio_out(ipi, 0, qdev_get_gpio_in(cpudev, IRQ_IPI)); +- /* IPI iocsr memory region */ +- memory_region_add_subregion(&env->system_iocsr, SMP_IPI_MAILBOX, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), +- 0)); +- memory_region_add_subregion(&env->system_iocsr, MAIL_SEND_ADDR, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(ipi), +- 1)); +- /* +- * extioi iocsr memory region +- * only one extioi is added on loongarch virt machine +- * external device interrupt can only be routed to cpu 0-3 +- */ +- if (cpu < EXTIOI_CPUS) +- memory_region_add_subregion(&env->system_iocsr, APIC_BASE, +- sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), +- cpu)); ++ qdev_connect_gpio_out(ipi, cpu, qdev_get_gpio_in(cpudev, IRQ_IPI)); + env->ipistate = ipi; + } + ++ /* Create EXTIOI device */ ++ extioi = qdev_new(TYPE_LOONGARCH_EXTIOI); ++ sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal); ++ memory_region_add_subregion(&lams->system_iocsr, APIC_BASE, ++ sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0)); ++ + /* + * connect ext irq to the cpu irq + * cpu_pin[9:2] <= intc_pin[7:0] +@@ -733,6 +730,43 @@ static void loongarch_direct_kernel_boot(LoongArchMachineState *lams, + } + } + ++static void loongarch_qemu_write(void *opaque, hwaddr addr, ++ uint64_t val, unsigned size) ++{ ++} ++ ++static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) ++{ ++ switch (addr) { ++ case VERSION_REG: ++ return 0x11ULL; ++ case FEATURE_REG: ++ return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | ++ 1ULL << IOCSRF_CSRIPI; ++ case VENDOR_REG: ++ return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ ++ case CPUNAME_REG: ++ return 0x303030354133ULL; /* "3A5000" */ ++ case MISC_FUNC_REG: ++ return 1ULL << IOCSRM_EXTIOI_EN; ++ } ++ return 0ULL; ++} ++ ++static const MemoryRegionOps loongarch_qemu_ops = { ++ .read = loongarch_qemu_read, ++ .write = loongarch_qemu_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++ .valid = { ++ .min_access_size = 4, ++ .max_access_size = 8, ++ }, ++ .impl = { ++ .min_access_size = 8, ++ .max_access_size = 8, ++ }, ++}; ++ + static void loongarch_init(MachineState *machine) + { + LoongArchCPU *lacpu; +@@ -761,8 +795,17 @@ static void loongarch_init(MachineState *machine) + exit(1); + } + create_fdt(lams); +- /* Init CPUs */ + ++ /* Create IOCSR space */ ++ memory_region_init_io(&lams->system_iocsr, OBJECT(machine), NULL, ++ machine, "iocsr", UINT64_MAX); ++ address_space_init(&lams->as_iocsr, &lams->system_iocsr, "IOCSR"); ++ memory_region_init_io(&lams->iocsr_mem, OBJECT(machine), ++ &loongarch_qemu_ops, ++ machine, "iocsr_misc", 0x428); ++ memory_region_add_subregion(&lams->system_iocsr, 0, &lams->iocsr_mem); ++ ++ /* Init CPUs */ + possible_cpus = mc->possible_cpu_arch_ids(machine); + for (i = 0; i < possible_cpus->len; i++) { + cpu = cpu_create(machine->cpu_type); +diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h +index fbdef9a7b3..110e5e8873 100644 +--- a/include/hw/intc/loongarch_extioi.h ++++ b/include/hw/intc/loongarch_extioi.h +@@ -58,7 +58,6 @@ struct LoongArchExtIOI { + uint8_t sw_coremap[EXTIOI_IRQS]; + qemu_irq parent_irq[EXTIOI_CPUS][LS3A_INTC_IP]; + qemu_irq irq[EXTIOI_IRQS]; +- MemoryRegion extioi_iocsr_mem[EXTIOI_CPUS]; + MemoryRegion extioi_system_mem; + }; + #endif /* LOONGARCH_EXTIOI_H */ +diff --git a/include/hw/intc/loongarch_ipi.h b/include/hw/intc/loongarch_ipi.h +index 6c6194786e..1c1e834849 100644 +--- a/include/hw/intc/loongarch_ipi.h ++++ b/include/hw/intc/loongarch_ipi.h +@@ -47,7 +47,8 @@ struct LoongArchIPI { + SysBusDevice parent_obj; + MemoryRegion ipi_iocsr_mem; + MemoryRegion ipi64_iocsr_mem; +- IPICore ipi_core; ++ uint32_t num_cpu; ++ IPICore *cpu; + }; + + #endif +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index db0831b471..6ef9a92394 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -50,6 +50,9 @@ struct LoongArchMachineState { + DeviceState *platform_bus_dev; + PCIBus *pci_bus; + PFlashCFI01 *flash; ++ MemoryRegion system_iocsr; ++ MemoryRegion iocsr_mem; ++ AddressSpace as_iocsr; + }; + + #define TYPE_LOONGARCH_MACHINE MACHINE_TYPE_NAME("virt") +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 413414392b..6611d137a1 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -602,47 +602,6 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error **errp) + lacc->parent_realize(dev, errp); + } + +-#ifndef CONFIG_USER_ONLY +-static void loongarch_qemu_write(void *opaque, hwaddr addr, +- uint64_t val, unsigned size) +-{ +- qemu_log_mask(LOG_UNIMP, "[%s]: Unimplemented reg 0x%" HWADDR_PRIx "\n", +- __func__, addr); +-} +- +-static uint64_t loongarch_qemu_read(void *opaque, hwaddr addr, unsigned size) +-{ +- switch (addr) { +- case VERSION_REG: +- return 0x11ULL; +- case FEATURE_REG: +- return 1ULL << IOCSRF_MSI | 1ULL << IOCSRF_EXTIOI | +- 1ULL << IOCSRF_CSRIPI; +- case VENDOR_REG: +- return 0x6e6f73676e6f6f4cULL; /* "Loongson" */ +- case CPUNAME_REG: +- return 0x303030354133ULL; /* "3A5000" */ +- case MISC_FUNC_REG: +- return 1ULL << IOCSRM_EXTIOI_EN; +- } +- return 0ULL; +-} +- +-static const MemoryRegionOps loongarch_qemu_ops = { +- .read = loongarch_qemu_read, +- .write = loongarch_qemu_write, +- .endianness = DEVICE_LITTLE_ENDIAN, +- .valid = { +- .min_access_size = 4, +- .max_access_size = 8, +- }, +- .impl = { +- .min_access_size = 8, +- .max_access_size = 8, +- }, +-}; +-#endif +- + static bool loongarch_get_lsx(Object *obj, Error **errp) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -713,19 +672,12 @@ static void loongarch_cpu_init(Object *obj) + { + #ifndef CONFIG_USER_ONLY + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +- CPULoongArchState *env = &cpu->env; + + qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS); + #ifdef CONFIG_TCG + timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL, + &loongarch_constant_timer_cb, cpu); + #endif +- memory_region_init_io(&env->system_iocsr, OBJECT(cpu), NULL, +- env, "iocsr", UINT64_MAX); +- address_space_init(&env->address_space_iocsr, &env->system_iocsr, "IOCSR"); +- memory_region_init_io(&env->iocsr_mem, OBJECT(cpu), &loongarch_qemu_ops, +- NULL, "iocsr_misc", 0x428); +- memory_region_add_subregion(&env->system_iocsr, 0, &env->iocsr_mem); + #endif + } + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 8ebd6fa1a7..4aba8aba4c 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -355,9 +355,7 @@ typedef struct CPUArchState { + #ifndef CONFIG_USER_ONLY + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; + +- AddressSpace address_space_iocsr; +- MemoryRegion system_iocsr; +- MemoryRegion iocsr_mem; ++ AddressSpace *address_space_iocsr; + bool load_elf; + uint64_t elf_address; + uint32_t mp_state; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index bd33ec2114..84bcdf5f86 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -733,7 +733,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + trace_kvm_arch_handle_exit(run->exit_reason); + switch (run->exit_reason) { + case KVM_EXIT_LOONGARCH_IOCSR: +- address_space_rw(&env->address_space_iocsr, ++ address_space_rw(env->address_space_iocsr, + run->iocsr_io.phys_addr, + attrs, + run->iocsr_io.data, +diff --git a/target/loongarch/tcg/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c +index 6cd01d5f09..b6916f53d2 100644 +--- a/target/loongarch/tcg/iocsr_helper.c ++++ b/target/loongarch/tcg/iocsr_helper.c +@@ -17,52 +17,52 @@ + + uint64_t helper_iocsrrd_b(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldub(&env->address_space_iocsr, r_addr, ++ return address_space_ldub(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_h(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_lduw(&env->address_space_iocsr, r_addr, ++ return address_space_lduw(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_w(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldl(&env->address_space_iocsr, r_addr, ++ return address_space_ldl(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + uint64_t helper_iocsrrd_d(CPULoongArchState *env, target_ulong r_addr) + { +- return address_space_ldq(&env->address_space_iocsr, r_addr, ++ return address_space_ldq(env->address_space_iocsr, r_addr, + GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_b(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stb(&env->address_space_iocsr, w_addr, ++ address_space_stb(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_h(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stw(&env->address_space_iocsr, w_addr, ++ address_space_stw(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_w(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stl(&env->address_space_iocsr, w_addr, ++ address_space_stl(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } + + void helper_iocsrwr_d(CPULoongArchState *env, target_ulong w_addr, + target_ulong val) + { +- address_space_stq(&env->address_space_iocsr, w_addr, ++ address_space_stq(env->address_space_iocsr, w_addr, + val, GET_MEMTXATTRS(env), NULL); + } +-- +2.27.0 + diff --git a/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch b/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch new file mode 100644 index 0000000000000000000000000000000000000000..e0659827143f2b0d4c8651ed7c1e6161f6fba139 --- /dev/null +++ b/hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch @@ -0,0 +1,71 @@ +From c23034c79ad8632388bc00dd4268e429638eee9e Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 18 Apr 2024 14:45:15 +0800 +Subject: [PATCH] hw/net/net_tx_pkt: Fix overrun in update_sctp_checksum() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 83ddb3dbba2ee0f1767442ae6ee665058aeb1093 + +If a fragmented packet size is too short, do not try to +calculate its checksum. + +Reproduced using: + + $ cat << EOF | qemu-system-i386 -display none -nodefaults \ + -machine q35,accel=qtest -m 32M \ + -device igb,netdev=net0 \ + -netdev user,id=net0 \ + -qtest stdio + outl 0xcf8 0x80000810 + outl 0xcfc 0xe0000000 + outl 0xcf8 0x80000804 + outw 0xcfc 0x06 + write 0xe0000403 0x1 0x02 + writel 0xe0003808 0xffffffff + write 0xe000381a 0x1 0x5b + write 0xe000381b 0x1 0x00 + EOF + Assertion failed: (offset == 0), function iov_from_buf_full, file util/iov.c, line 39. + #1 0x5575e81e952a in iov_from_buf_full qemu/util/iov.c:39:5 + #2 0x5575e6500768 in net_tx_pkt_update_sctp_checksum qemu/hw/net/net_tx_pkt.c:144:9 + #3 0x5575e659f3e1 in igb_setup_tx_offloads qemu/hw/net/igb_core.c:478:11 + #4 0x5575e659f3e1 in igb_tx_pkt_send qemu/hw/net/igb_core.c:552:10 + #5 0x5575e659f3e1 in igb_process_tx_desc qemu/hw/net/igb_core.c:671:17 + #6 0x5575e659f3e1 in igb_start_xmit qemu/hw/net/igb_core.c:903:9 + #7 0x5575e659f3e1 in igb_set_tdt qemu/hw/net/igb_core.c:2812:5 + #8 0x5575e657d6a4 in igb_core_write qemu/hw/net/igb_core.c:4248:9 + +Fixes: CVE-2024-3567 +Cc: qemu-stable@nongnu.org +Reported-by: Zheyu Ma +Fixes: f199b13bc1 ("igb: Implement Tx SCTP CSO") +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2273 +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Message-Id: <20240410070459.49112-1-philmd@linaro.org> +Signed-off-by: qihao_yewu +--- + hw/net/net_tx_pkt.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c +index 2e5f58b3c9..d40d508a11 100644 +--- a/hw/net/net_tx_pkt.c ++++ b/hw/net/net_tx_pkt.c +@@ -141,6 +141,10 @@ bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt) + uint32_t csum = 0; + struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG; + ++ if (iov_size(pl_start_frag, pkt->payload_frags) < 8 + sizeof(csum)) { ++ return false; ++ } ++ + if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { + return false; + } +-- +2.27.0 + diff --git a/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..052c6cbe2e167250833171c371bc737f6351ddf8 --- /dev/null +++ b/hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch @@ -0,0 +1,40 @@ +From c3f204e02eacdd3e9ec6ac55396ccc7f115ad63e Mon Sep 17 00:00:00 2001 +From: Qiang Ning +Date: Mon, 12 Jul 2021 17:30:45 +0800 +Subject: [PATCH] hw/net/rocker_of_dpa: fix double free bug of rocker device + +The of_dpa_cmd_add_l2_flood function of the rocker device +releases the memory of group->l2_flood.group_ids before +applying for new memory. If the l2_group configured by +the guest does not match the input group->l2_flood.group_ids, +the err_out branch is redirected to release the memory of the +group->l2_flood.group_ids branch. The pointer is not set to +NULL after the memory is freed. When the guest accesses the +of_dpa_cmd_add_l2_flood function again, the memory of +group->l2_flood.group_ids is released again. As a result, +the memory is double free. + +Fix that by setting group->l2_flood.group_ids to NULL after free. + +Signed-off-by: Jiajie Li +Signed-off-by: Qiang Ning +Signed-off-by: Yan Wang +--- + hw/net/rocker/rocker_of_dpa.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c +index 5e16056be6..c25438cccc 100644 +--- a/hw/net/rocker/rocker_of_dpa.c ++++ b/hw/net/rocker/rocker_of_dpa.c +@@ -2070,6 +2070,7 @@ static int of_dpa_cmd_add_l2_flood(OfDpa *of_dpa, OfDpaGroup *group, + err_out: + group->l2_flood.group_count = 0; + g_free(group->l2_flood.group_ids); ++ group->l2_flood.group_ids = NULL; + g_free(tlvs); + + return err; +-- +2.27.0 + diff --git a/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch b/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..8789267286651d8dbfdc94fe845ec0eb198cf421 --- /dev/null +++ b/hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch @@ -0,0 +1,73 @@ +From 7e18fd22e9c0b5b28462455f60c508d5341e0230 Mon Sep 17 00:00:00 2001 +From: qihao +Date: Wed, 3 Apr 2024 16:34:39 +0800 +Subject: [PATCH] hw/net/virtio-net: fix qemu set used ring flag even vhost + started +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 4c54f5bc8e1d38f15cc35b6a6932d8fbe219c692 + +When vhost-user or vhost-kernel is handling virtio net datapath, +QEMU should not touch used ring. + +But with vhost-user socket reconnect scenario, in a very rare case +(has pending kick event). VRING_USED_F_NO_NOTIFY is set by QEMU in +following code path: + + #0 virtio_queue_split_set_notification (vq=0x7ff5f4c920a8, enable=0) at ../hw/virtio/virtio.c:511 + #1 0x0000559d6dbf033b in virtio_queue_set_notification (vq=0x7ff5f4c920a8, enable=0) at ../hw/virtio/virtio.c:576 + #2 0x0000559d6dbbbdbc in virtio_net_handle_tx_bh (vdev=0x559d703a6aa0, vq=0x7ff5f4c920a8) at ../hw/net/virtio-net.c:2801 + #3 0x0000559d6dbf4791 in virtio_queue_notify_vq (vq=0x7ff5f4c920a8) at ../hw/virtio/virtio.c:2248 + #4 0x0000559d6dbf79da in virtio_queue_host_notifier_read (n=0x7ff5f4c9211c) at ../hw/virtio/virtio.c:3525 + #5 0x0000559d6d9a5814 in virtio_bus_cleanup_host_notifier (bus=0x559d703a6a20, n=1) at ../hw/virtio/virtio-bus.c:321 + #6 0x0000559d6dbf83c9 in virtio_device_stop_ioeventfd_impl (vdev=0x559d703a6aa0) at ../hw/virtio/virtio.c:3774 + #7 0x0000559d6d9a55c8 in virtio_bus_stop_ioeventfd (bus=0x559d703a6a20) at ../hw/virtio/virtio-bus.c:259 + #8 0x0000559d6d9a53e8 in virtio_bus_grab_ioeventfd (bus=0x559d703a6a20) at ../hw/virtio/virtio-bus.c:199 + #9 0x0000559d6dbf841c in virtio_device_grab_ioeventfd (vdev=0x559d703a6aa0) at ../hw/virtio/virtio.c:3783 + #10 0x0000559d6d9bde18 in vhost_dev_enable_notifiers (hdev=0x559d707edd70, vdev=0x559d703a6aa0) at ../hw/virtio/vhost.c:1592 + #11 0x0000559d6d89a0b8 in vhost_net_start_one (net=0x559d707edd70, dev=0x559d703a6aa0) at ../hw/net/vhost_net.c:266 + #12 0x0000559d6d89a6df in vhost_net_start (dev=0x559d703a6aa0, ncs=0x559d7048d890, data_queue_pairs=31, cvq=0) at ../hw/net/vhost_net.c:412 + #13 0x0000559d6dbb5b89 in virtio_net_vhost_status (n=0x559d703a6aa0, status=15 '\017') at ../hw/net/virtio-net.c:311 + #14 0x0000559d6dbb5e34 in virtio_net_set_status (vdev=0x559d703a6aa0, status=15 '\017') at ../hw/net/virtio-net.c:392 + #15 0x0000559d6dbb60d8 in virtio_net_set_link_status (nc=0x559d7048d890) at ../hw/net/virtio-net.c:455 + #16 0x0000559d6da64863 in qmp_set_link (name=0x559d6f0b83d0 "hostnet1", up=true, errp=0x7ffdd76569f0) at ../net/net.c:1459 + #17 0x0000559d6da7226e in net_vhost_user_event (opaque=0x559d6f0b83d0, event=CHR_EVENT_OPENED) at ../net/vhost-user.c:301 + #18 0x0000559d6ddc7f63 in chr_be_event (s=0x559d6f2ffea0, event=CHR_EVENT_OPENED) at ../chardev/char.c:62 + #19 0x0000559d6ddc7fdc in qemu_chr_be_event (s=0x559d6f2ffea0, event=CHR_EVENT_OPENED) at ../chardev/char.c:82 + +This issue causes guest kernel stop kicking device and traffic stop. + +Add vhost_started check in virtio_net_handle_tx_bh to fix this wrong +VRING_USED_F_NO_NOTIFY set. + +Signed-off-by: Yajun Wu +Reviewed-by: Jiri Pirko +Acked-by: Michael S. Tsirkin +Message-ID: <20240402045109.97729-1-yajunw@nvidia.com> +[PMD: Use unlikely()] +Signed-off-by: Philippe Mathieu-DaudĂ© +Signed-off-by: qihao_yewu +--- + hw/net/virtio-net.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 9559b3386a..c0a54f2d61 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -2831,6 +2831,10 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) + VirtIONet *n = VIRTIO_NET(vdev); + VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))]; + ++ if (unlikely(n->vhost_started)) { ++ return; ++ } ++ + if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) { + virtio_net_drop_tx_queue_data(vdev, vq); + return; +-- +2.27.0 + diff --git a/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch b/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f546cd40b083c26f25eb974614a24e3c989ab35 --- /dev/null +++ b/hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch @@ -0,0 +1,85 @@ +From 6a32c9764439093fe4b53f87059c35761d711e39 Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Wed, 28 Feb 2024 20:33:12 +0900 +Subject: [PATCH] hw/nvme: Use pcie_sriov_num_vfs() (CVE-2024-26328) + +nvme_sriov_pre_write_ctrl() used to directly inspect SR-IOV +configurations to know the number of VFs being disabled due to SR-IOV +configuration writes, but the logic was flawed and resulted in +out-of-bound memory access. + +It assumed PCI_SRIOV_NUM_VF always has the number of currently enabled +VFs, but it actually doesn't in the following cases: +- PCI_SRIOV_NUM_VF has been set but PCI_SRIOV_CTRL_VFE has never been. +- PCI_SRIOV_NUM_VF was written after PCI_SRIOV_CTRL_VFE was set. +- VFs were only partially enabled because of realization failure. + +It is a responsibility of pcie_sriov to interpret SR-IOV configurations +and pcie_sriov does it correctly, so use pcie_sriov_num_vfs(), which it +provides, to get the number of enabled VFs before and after SR-IOV +configuration writes. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2024-26328 +Fixes: 11871f53ef8e ("hw/nvme: Add support for the Virtualization Management command") +Suggested-by: Michael S. Tsirkin +Signed-off-by: Akihiko Odaki +Message-Id: <20240228-reuse-v8-1-282660281e60@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + hw/nvme/ctrl.c | 26 ++++++++------------------ + 1 file changed, 8 insertions(+), 18 deletions(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index f026245d1e..7a56e7b79b 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -8466,36 +8466,26 @@ static void nvme_pci_reset(DeviceState *qdev) + nvme_ctrl_reset(n, NVME_RESET_FUNCTION); + } + +-static void nvme_sriov_pre_write_ctrl(PCIDevice *dev, uint32_t address, +- uint32_t val, int len) ++static void nvme_sriov_post_write_config(PCIDevice *dev, uint16_t old_num_vfs) + { + NvmeCtrl *n = NVME(dev); + NvmeSecCtrlEntry *sctrl; +- uint16_t sriov_cap = dev->exp.sriov_cap; +- uint32_t off = address - sriov_cap; +- int i, num_vfs; ++ int i; + +- if (!sriov_cap) { +- return; +- } +- +- if (range_covers_byte(off, len, PCI_SRIOV_CTRL)) { +- if (!(val & PCI_SRIOV_CTRL_VFE)) { +- num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); +- for (i = 0; i < num_vfs; i++) { +- sctrl = &n->sec_ctrl_list.sec[i]; +- nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); +- } +- } ++ for (i = pcie_sriov_num_vfs(dev); i < old_num_vfs; i++) { ++ sctrl = &n->sec_ctrl_list.sec[i]; ++ nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false); + } + } + + static void nvme_pci_write_config(PCIDevice *dev, uint32_t address, + uint32_t val, int len) + { +- nvme_sriov_pre_write_ctrl(dev, address, val, len); ++ uint16_t old_num_vfs = pcie_sriov_num_vfs(dev); ++ + pci_default_write_config(dev, address, val, len); + pcie_cap_flr_write_config(dev, address, val, len); ++ nvme_sriov_post_write_config(dev, old_num_vfs); + } + + static const VMStateDescription nvme_vmstate = { +-- +2.27.0 + diff --git a/hw-nvme-fix-Werror-maybe-uninitialized.patch b/hw-nvme-fix-Werror-maybe-uninitialized.patch new file mode 100644 index 0000000000000000000000000000000000000000..29b8449f0ab4a8c37a778b45ccfb5040f50e2f3d --- /dev/null +++ b/hw-nvme-fix-Werror-maybe-uninitialized.patch @@ -0,0 +1,38 @@ +From 2fc8029b9e274a0dbedc55b6b114b29e003b32ab Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=E5=88=98=E5=A9=A720201110?= + +Date: Mon, 8 Apr 2024 04:32:11 -0400 +Subject: [PATCH] hw/nvme: fix -Werror=maybe-uninitialized +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +../hw/nvme/ctrl.c:6081:21: error: ‘result’ may be used uninitialized [-Werror=maybe-uninitialized] + +It's not obvious that 'result' is set in all code paths. When &result is +a returned argument, it's even less clear. + +Looking at various assignments, 0 seems to be a suitable default value. + +Signed-off-by: Marc-AndrĂ© Lureau +Signed-off-by: Liu Jing +--- + hw/nvme/ctrl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 7a56e7b79b..237b5c8871 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -5882,7 +5882,7 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeRequest *req) + uint32_t dw10 = le32_to_cpu(cmd->cdw10); + uint32_t dw11 = le32_to_cpu(cmd->cdw11); + uint32_t nsid = le32_to_cpu(cmd->nsid); +- uint32_t result; ++ uint32_t result = 0; + uint8_t fid = NVME_GETSETFEAT_FID(dw10); + NvmeGetFeatureSelect sel = NVME_GETFEAT_SELECT(dw10); + uint16_t iv; +-- +2.27.0 + diff --git a/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch b/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f9b7abaabd8c367e22e6ba41a29a0c0cc8f9fc1 --- /dev/null +++ b/hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch @@ -0,0 +1,48 @@ +From a57cbe41cd8b2d8bc31eac33ee74a3ac058d67dd Mon Sep 17 00:00:00 2001 +From: qihao +Date: Thu, 28 Mar 2024 15:24:25 +0800 +Subject: [PATCH] hw/scsi/scsi-generic: Fix io_timeout property not applying +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 7c7a9f578e4fb1adff7ac8d9acaaaedb87474e76 + +The io_timeout property, introduced in c9b6609 (part of 6.0) is +silently overwritten by the hardcoded default value of 30 seconds +(DEFAULT_IO_TIMEOUT) in scsi_generic_realize because that function is +being called after the properties have already been applied. + +The property definition already has a default value which is applied +correctly when no value is explicitly set, so we can just remove the +code which overrides the io_timeout completely. + +This has been tested by stracing SG_IO operations with the io_timeout +property set and unset and now sets the timeout field in the ioctl +request to the proper value. + +Fixes: c9b6609b69facad ("scsi: make io_timeout configurable") +Signed-off-by: Lorenz Brun +Message-ID: <20240315145831.2531695-1-lorenz@brun.one> +Reviewed-by: Alex BennĂ©e +Signed-off-by: Philippe Mathieu-DaudĂ© +Signed-off-by: qihao_yewu +--- + hw/scsi/scsi-generic.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 22efcd09a6..12fdd8e748 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -782,7 +782,6 @@ static void scsi_generic_realize(SCSIDevice *s, Error **errp) + + /* Only used by scsi-block, but initialize it nevertheless to be clean. */ + s->default_scsi_version = -1; +- s->io_timeout = DEFAULT_IO_TIMEOUT; + scsi_generic_read_device_inquiry(s); + } + +-- +2.27.0 + diff --git a/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch b/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch new file mode 100644 index 0000000000000000000000000000000000000000..19a5a6a7f9d663b893b0c03c327892b14da03749 --- /dev/null +++ b/hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch @@ -0,0 +1,135 @@ +From b628859b936c6d6348d2af9e6b6d2887c697b9b7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Tue, 9 Apr 2024 16:19:27 +0200 +Subject: [PATCH] hw/sd/sdhci: Do not update TRNMOD when Command Inhibit (DAT) + is set(CVE-2024-3447) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Per "SD Host Controller Standard Specification Version 3.00": + + * 2.2.5 Transfer Mode Register (Offset 00Ch) + + Writes to this register shall be ignored when the Command + Inhibit (DAT) in the Present State register is 1. + +Do not update the TRNMOD register when Command Inhibit (DAT) +bit is set to avoid the present-status register going out of +sync, leading to malicious guest using DMA mode and overflowing +the FIFO buffer: + + $ cat << EOF | qemu-system-i386 \ + -display none -nographic -nodefaults \ + -machine accel=qtest -m 512M \ + -device sdhci-pci,sd-spec-version=3 \ + -device sd-card,drive=mydrive \ + -drive if=none,index=0,file=null-co://,format=raw,id=mydrive \ + -qtest stdio + outl 0xcf8 0x80001013 + outl 0xcfc 0x91 + outl 0xcf8 0x80001001 + outl 0xcfc 0x06000000 + write 0x9100002c 0x1 0x05 + write 0x91000058 0x1 0x16 + write 0x91000005 0x1 0x04 + write 0x91000028 0x1 0x08 + write 0x16 0x1 0x21 + write 0x19 0x1 0x20 + write 0x9100000c 0x1 0x01 + write 0x9100000e 0x1 0x20 + write 0x9100000f 0x1 0x00 + write 0x9100000c 0x1 0x00 + write 0x91000020 0x1 0x00 + EOF + +Stack trace (part): +================================================================= +==89993==ERROR: AddressSanitizer: heap-buffer-overflow on address +0x615000029900 at pc 0x55d5f885700d bp 0x7ffc1e1e9470 sp 0x7ffc1e1e9468 +WRITE of size 1 at 0x615000029900 thread T0 + #0 0x55d5f885700c in sdhci_write_dataport hw/sd/sdhci.c:564:39 + #1 0x55d5f8849150 in sdhci_write hw/sd/sdhci.c:1223:13 + #2 0x55d5fa01db63 in memory_region_write_accessor system/memory.c:497:5 + #3 0x55d5fa01d245 in access_with_adjusted_size system/memory.c:573:18 + #4 0x55d5fa01b1a9 in memory_region_dispatch_write system/memory.c:1521:16 + #5 0x55d5fa09f5c9 in flatview_write_continue system/physmem.c:2711:23 + #6 0x55d5fa08f78b in flatview_write system/physmem.c:2753:12 + #7 0x55d5fa08f258 in address_space_write system/physmem.c:2860:18 + ... +0x615000029900 is located 0 bytes to the right of 512-byte region +[0x615000029700,0x615000029900) allocated by thread T0 here: + #0 0x55d5f7237b27 in __interceptor_calloc + #1 0x7f9e36dd4c50 in g_malloc0 + #2 0x55d5f88672f7 in sdhci_pci_realize hw/sd/sdhci-pci.c:36:5 + #3 0x55d5f844b582 in pci_qdev_realize hw/pci/pci.c:2092:9 + #4 0x55d5fa2ee74b in device_set_realized hw/core/qdev.c:510:13 + #5 0x55d5fa325bfb in property_set_bool qom/object.c:2358:5 + #6 0x55d5fa31ea45 in object_property_set qom/object.c:1472:5 + #7 0x55d5fa332509 in object_property_set_qobject om/qom-qobject.c:28:10 + #8 0x55d5fa31f6ed in object_property_set_bool qom/object.c:1541:15 + #9 0x55d5fa2e2948 in qdev_realize hw/core/qdev.c:292:12 + #10 0x55d5f8eed3f1 in qdev_device_add_from_qdict system/qdev-monitor.c:719:10 + #11 0x55d5f8eef7ff in qdev_device_add system/qdev-monitor.c:738:11 + #12 0x55d5f8f211f0 in device_init_func system/vl.c:1200:11 + #13 0x55d5fad0877d in qemu_opts_foreach util/qemu-option.c:1135:14 + #14 0x55d5f8f0df9c in qemu_create_cli_devices system/vl.c:2638:5 + #15 0x55d5f8f0db24 in qmp_x_exit_preconfig system/vl.c:2706:5 + #16 0x55d5f8f14dc0 in qemu_init system/vl.c:3737:9 + ... +SUMMARY: AddressSanitizer: heap-buffer-overflow hw/sd/sdhci.c:564:39 +in sdhci_write_dataport + +Add assertions to ensure the fifo_buffer[] is not overflowed by +malicious accesses to the Buffer Data Port register. + +Fixes: CVE-2024-3447 +Cc: qemu-stable@nongnu.org +Fixes: d7dfca0807 ("hw/sdhci: introduce standard SD host controller") +Buglink: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=58813 +Reported-by: Alexander Bulekov +Reported-by: Chuhong Yuan +Signed-off-by: Peter Maydell +Message-Id: +Signed-off-by: Philippe Mathieu-DaudĂ© +Message-Id: <20240409145524.27913-1-philmd@linaro.org> +--- + hw/sd/sdhci.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c +index 40473b0db0..e95ea34895 100644 +--- a/hw/sd/sdhci.c ++++ b/hw/sd/sdhci.c +@@ -473,6 +473,7 @@ static uint32_t sdhci_read_dataport(SDHCIState *s, unsigned size) + } + + for (i = 0; i < size; i++) { ++ assert(s->data_count < s->buf_maxsz); + value |= s->fifo_buffer[s->data_count] << i * 8; + s->data_count++; + /* check if we've read all valid data (blksize bytes) from buffer */ +@@ -561,6 +562,7 @@ static void sdhci_write_dataport(SDHCIState *s, uint32_t value, unsigned size) + } + + for (i = 0; i < size; i++) { ++ assert(s->data_count < s->buf_maxsz); + s->fifo_buffer[s->data_count] = value & 0xFF; + s->data_count++; + value >>= 8; +@@ -1208,6 +1210,12 @@ sdhci_write(void *opaque, hwaddr offset, uint64_t val, unsigned size) + if (!(s->capareg & R_SDHC_CAPAB_SDMA_MASK)) { + value &= ~SDHC_TRNS_DMA; + } ++ ++ /* TRNMOD writes are inhibited while Command Inhibit (DAT) is true */ ++ if (s->prnsts & SDHC_DATA_INHIBIT) { ++ mask |= 0xffff; ++ } ++ + MASKED_WRITE(s->trnmod, mask, value & SDHC_TRNMOD_MASK); + MASKED_WRITE(s->cmdreg, mask >> 16, value >> 16); + +-- +2.27.0 + diff --git a/hw-usb-Style-cleanup.patch b/hw-usb-Style-cleanup.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2c333b373d8dea2b293fd4e8de1764e8ed3d1a2 --- /dev/null +++ b/hw-usb-Style-cleanup.patch @@ -0,0 +1,66 @@ +From f06b930da5d2acf70d142f1212ef4ee09d643b21 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 16:18:43 +0800 +Subject: [PATCH] hw/usb: Style cleanup +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 455177ffc457098b0103d2a09cb7ba5e260dfcdd + +We are going to modify these lines, fix their style +in order to avoid checkpatch.pl warning. + +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Richard Henderson +Signed-off-by: Michael Tokarev +Signed-off-by: dinglimin +--- + hw/usb/hcd-ehci.c | 3 ++- + hw/usb/hcd-uhci.c | 6 ++++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c +index 19b4534c20..7b093acd98 100644 +--- a/hw/usb/hcd-ehci.c ++++ b/hw/usb/hcd-ehci.c +@@ -1086,8 +1086,9 @@ static void ehci_opreg_write(void *ptr, hwaddr addr, + case CONFIGFLAG: + val &= 0x1; + if (val) { +- for(i = 0; i < NB_PORTS; i++) ++ for (i = 0; i < NB_PORTS; i++) { + handle_port_owner_write(s, i, 0); ++ } + } + break; + +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 77baaa7a6b..6975966c3f 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -457,8 +457,9 @@ static void uhci_port_write(void *opaque, hwaddr addr, + int n; + + n = (addr >> 1) & 7; +- if (n >= NB_PORTS) ++ if (n >= NB_PORTS) { + return; ++ } + port = &s->ports[n]; + dev = port->port.dev; + if (dev && dev->attached) { +@@ -513,8 +514,9 @@ static uint64_t uhci_port_read(void *opaque, hwaddr addr, unsigned size) + UHCIPort *port; + int n; + n = (addr >> 1) & 7; +- if (n >= NB_PORTS) ++ if (n >= NB_PORTS) { + goto read_default; ++ } + port = &s->ports[n]; + val = port->ctrl; + } +-- +2.27.0 + diff --git a/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch new file mode 100644 index 0000000000000000000000000000000000000000..72d0c2a4fc5211a27de4ee2c0def9ee28b956973 --- /dev/null +++ b/hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch @@ -0,0 +1,459 @@ +From dc7e40b2841132b0bc43d25c2c31f41ae3fa2c68 Mon Sep 17 00:00:00 2001 +From: eillon +Date: Tue, 8 Feb 2022 22:43:59 -0500 +Subject: [PATCH] hw/usb: reduce the vpcu cost of UHCI when VNC disconnect + +Reduce the vpcu cost by set a lower FRAME_TIMER_FREQ of the UHCI +when VNC client disconnected. This can reduce about 3% cost of +vcpu thread. + +Signed-off-by: eillon +--- + hw/usb/core.c | 5 ++-- + hw/usb/desc.c | 7 +++-- + hw/usb/dev-hid.c | 2 +- + hw/usb/hcd-uhci.c | 63 ++++++++++++++++++++++++++++++++++------ + hw/usb/hcd-uhci.h | 1 + + hw/usb/host-libusb.c | 32 ++++++++++++++++++++ + include/hw/usb.h | 1 + + include/qemu/timer.h | 28 ++++++++++++++++++ + ui/vnc.c | 4 +++ + util/qemu-timer.c | 69 ++++++++++++++++++++++++++++++++++++++++++++ + 10 files changed, 197 insertions(+), 15 deletions(-) + +diff --git a/hw/usb/core.c b/hw/usb/core.c +index 975f76250a..51b36126ca 100644 +--- a/hw/usb/core.c ++++ b/hw/usb/core.c +@@ -87,7 +87,7 @@ void usb_device_reset(USBDevice *dev) + return; + } + usb_device_handle_reset(dev); +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + dev->addr = 0; + dev->state = USB_STATE_DEFAULT; + } +@@ -105,7 +105,8 @@ void usb_wakeup(USBEndpoint *ep, unsigned int stream) + */ + return; + } +- if (dev->remote_wakeup && dev->port && dev->port->ops->wakeup) { ++ if ((dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) ++ && dev->port && dev->port->ops->wakeup) { + dev->port->ops->wakeup(dev->port); + } + if (bus->ops->wakeup_endpoint) { +diff --git a/hw/usb/desc.c b/hw/usb/desc.c +index f2bdc05a95..333f73fff1 100644 +--- a/hw/usb/desc.c ++++ b/hw/usb/desc.c +@@ -752,7 +752,7 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + if (config->bmAttributes & USB_CFG_ATT_SELFPOWER) { + data[0] |= 1 << USB_DEVICE_SELF_POWERED; + } +- if (dev->remote_wakeup) { ++ if (dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + data[0] |= 1 << USB_DEVICE_REMOTE_WAKEUP; + } + data[1] = 0x00; +@@ -762,14 +762,15 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, + } + case DeviceOutRequest | USB_REQ_CLEAR_FEATURE: + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 0; ++ dev->remote_wakeup &= ~USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_clear_device_feature(dev->addr, value, ret); + break; + case DeviceOutRequest | USB_REQ_SET_FEATURE: ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED; + if (value == USB_DEVICE_REMOTE_WAKEUP) { +- dev->remote_wakeup = 1; ++ dev->remote_wakeup |= USB_DEVICE_REMOTE_WAKEUP; + ret = 0; + } + trace_usb_set_device_feature(dev->addr, value, ret); +diff --git a/hw/usb/dev-hid.c b/hw/usb/dev-hid.c +index bdd6d1ffaf..cc68d1ce9e 100644 +--- a/hw/usb/dev-hid.c ++++ b/hw/usb/dev-hid.c +@@ -745,7 +745,7 @@ static int usb_ptr_post_load(void *opaque, int version_id) + { + USBHIDState *s = opaque; + +- if (s->dev.remote_wakeup) { ++ if (s->dev.remote_wakeup & USB_DEVICE_REMOTE_WAKEUP) { + hid_pointer_activate(&s->hid); + } + return 0; +diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c +index 6975966c3f..a92581ff5f 100644 +--- a/hw/usb/hcd-uhci.c ++++ b/hw/usb/hcd-uhci.c +@@ -44,6 +44,8 @@ + #include "hcd-uhci.h" + + #define FRAME_TIMER_FREQ 1000 ++#define FRAME_TIMER_FREQ_LAZY 10 ++#define USB_DEVICE_NEED_NORMAL_FREQ "QEMU USB Tablet" + + #define FRAME_MAX_LOOPS 256 + +@@ -109,6 +111,22 @@ static void uhci_async_cancel(UHCIAsync *async); + static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td); + static void uhci_resume(void *opaque); + ++static int64_t uhci_frame_timer_freq = FRAME_TIMER_FREQ_LAZY; ++ ++static void uhci_set_frame_freq(int freq) ++{ ++ if (freq <= 0) { ++ return; ++ } ++ ++ uhci_frame_timer_freq = freq; ++} ++ ++static qemu_usb_controller qemu_uhci = { ++ .name = "uhci", ++ .qemu_set_freq = uhci_set_frame_freq, ++}; ++ + static inline int32_t uhci_queue_token(UHCI_TD *td) + { + if ((td->token & (0xf << 15)) == 0) { +@@ -351,7 +369,7 @@ static int uhci_post_load(void *opaque, int version_id) + + if (version_id < 2) { + s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ (NANOSECONDS_PER_SECOND / uhci_frame_timer_freq); + } + return 0; + } +@@ -392,8 +410,29 @@ static void uhci_port_write(void *opaque, hwaddr addr, + if ((val & UHCI_CMD_RS) && !(s->cmd & UHCI_CMD_RS)) { + /* start frame processing */ + trace_usb_uhci_schedule_start(); +- s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + +- (NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ); ++ ++ /* ++ * If the frequency of frame_timer is too slow, Guest OS (Win2012) would become ++ * blue-screen after hotplugging some vcpus. ++ * If this USB device support the remote-wakeup, the UHCI controller ++ * will enter global suspend mode when there is no input for several seconds. ++ * In this case, Qemu will delete the frame_timer. Since the frame_timer has been deleted, ++ * there is no influence to the performance of Vms. So, we can change the frequency to 1000. ++ * After that the frequency will be safe when we trigger the frame_timer again. ++ * Excepting this, there are two ways to change the frequency: ++ * 1)VNC connect/disconnect;2)attach/detach USB device. ++ */ ++ if ((uhci_frame_timer_freq != FRAME_TIMER_FREQ) ++ && (s->ports[0].port.dev) ++ && (!memcmp(s->ports[0].port.dev->product_desc, ++ USB_DEVICE_NEED_NORMAL_FREQ, strlen(USB_DEVICE_NEED_NORMAL_FREQ))) ++ && (s->ports[0].port.dev->remote_wakeup & USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED)) { ++ qemu_log("turn up the frequency of UHCI controller to %d\n", FRAME_TIMER_FREQ); ++ uhci_frame_timer_freq = FRAME_TIMER_FREQ; ++ } ++ ++ s->frame_time = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; ++ s->expire_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->frame_time; + timer_mod(s->frame_timer, s->expire_time); + s->status &= ~UHCI_STS_HCHALTED; + } else if (!(val & UHCI_CMD_RS)) { +@@ -1083,7 +1122,6 @@ static void uhci_frame_timer(void *opaque) + UHCIState *s = opaque; + uint64_t t_now, t_last_run; + int i, frames; +- const uint64_t frame_t = NANOSECONDS_PER_SECOND / FRAME_TIMER_FREQ; + + s->completions_only = false; + qemu_bh_cancel(s->bh); +@@ -1099,14 +1137,14 @@ static void uhci_frame_timer(void *opaque) + } + + /* We still store expire_time in our state, for migration */ +- t_last_run = s->expire_time - frame_t; ++ t_last_run = s->expire_time - s->frame_time; + t_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + /* Process up to MAX_FRAMES_PER_TICK frames */ +- frames = (t_now - t_last_run) / frame_t; ++ frames = (t_now - t_last_run) / s->frame_time; + if (frames > s->maxframes) { + int skipped = frames - s->maxframes; +- s->expire_time += skipped * frame_t; ++ s->expire_time += skipped * s->frame_time; + s->frnum = (s->frnum + skipped) & 0x7ff; + frames -= skipped; + } +@@ -1123,7 +1161,7 @@ static void uhci_frame_timer(void *opaque) + /* The spec says frnum is the frame currently being processed, and + * the guest must look at frnum - 1 on interrupt, so inc frnum now */ + s->frnum = (s->frnum + 1) & 0x7ff; +- s->expire_time += frame_t; ++ s->expire_time += s->frame_time; + } + + /* Complete the previous frame(s) */ +@@ -1134,7 +1172,12 @@ static void uhci_frame_timer(void *opaque) + } + s->pending_int_mask = 0; + +- timer_mod(s->frame_timer, t_now + frame_t); ++ /* expire_time is calculated from last frame_time, we should calculate it ++ * according to new frame_time which equals to ++ * NANOSECONDS_PER_SECOND / uhci_frame_timer_freq */ ++ s->expire_time -= s->frame_time - NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; ++ timer_mod(s->frame_timer, t_now + s->frame_time); + } + + static const MemoryRegionOps uhci_ioport_ops = { +@@ -1195,8 +1238,10 @@ void usb_uhci_common_realize(PCIDevice *dev, Error **errp) + s->bh = qemu_bh_new_guarded(uhci_bh, s, &DEVICE(dev)->mem_reentrancy_guard); + s->frame_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, uhci_frame_timer, s); + s->num_ports_vmstate = NB_PORTS; ++ s->frame_time = NANOSECONDS_PER_SECOND / uhci_frame_timer_freq; + QTAILQ_INIT(&s->queues); + ++ qemu_register_usb_controller(&qemu_uhci, QEMU_USB_CONTROLLER_UHCI); + memory_region_init_io(&s->io_bar, OBJECT(s), &uhci_ioport_ops, s, + "uhci", 0x20); + +diff --git a/hw/usb/hcd-uhci.h b/hw/usb/hcd-uhci.h +index 69f8b40c49..0918719911 100644 +--- a/hw/usb/hcd-uhci.h ++++ b/hw/usb/hcd-uhci.h +@@ -50,6 +50,7 @@ typedef struct UHCIState { + uint16_t status; + uint16_t intr; /* interrupt enable register */ + uint16_t frnum; /* frame number */ ++ uint64_t frame_time; /* frame time in ns */ + uint32_t fl_base_addr; /* frame list base address */ + uint8_t sof_timing; + uint8_t status2; /* bit 0 and 1 are used to generate UHCI_STS_USBINT */ +diff --git a/hw/usb/host-libusb.c b/hw/usb/host-libusb.c +index d7060a42d5..dba469c1ef 100644 +--- a/hw/usb/host-libusb.c ++++ b/hw/usb/host-libusb.c +@@ -945,6 +945,30 @@ static void usb_host_ep_update(USBHostDevice *s) + libusb_free_config_descriptor(conf); + } + ++static unsigned int usb_get_controller_type(int speed) ++{ ++ unsigned int type = MAX_USB_CONTROLLER_TYPES; ++ ++ switch (speed) { ++ case USB_SPEED_SUPER: ++ type = QEMU_USB_CONTROLLER_XHCI; ++ break; ++ case USB_SPEED_HIGH: ++ type = QEMU_USB_CONTROLLER_EHCI; ++ break; ++ case USB_SPEED_FULL: ++ type = QEMU_USB_CONTROLLER_UHCI; ++ break; ++ case USB_SPEED_LOW: ++ type = QEMU_USB_CONTROLLER_OHCI; ++ break; ++ default: ++ break; ++ } ++ ++ return type; ++} ++ + static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + { + USBDevice *udev = USB_DEVICE(s); +@@ -1054,6 +1078,12 @@ static int usb_host_open(USBHostDevice *s, libusb_device *dev, int hostfd) + } + + trace_usb_host_open_success(bus_num, addr); ++ ++ /* change ehci frame time freq when USB passthrough */ ++ qemu_log("usb host speed is %d\n", udev->speed); ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, ++ usb_get_controller_type(udev->speed)); ++ + return 0; + + fail: +@@ -1129,6 +1159,8 @@ static int usb_host_close(USBHostDevice *s) + } + + usb_host_auto_check(NULL); ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, ++ usb_get_controller_type(udev->speed)); + return 0; + } + +diff --git a/include/hw/usb.h b/include/hw/usb.h +index 32c23a5ca2..911179158d 100644 +--- a/include/hw/usb.h ++++ b/include/hw/usb.h +@@ -142,6 +142,7 @@ + + #define USB_DEVICE_SELF_POWERED 0 + #define USB_DEVICE_REMOTE_WAKEUP 1 ++#define USB_DEVICE_REMOTE_WAKEUP_IS_SUPPORTED 2 + + #define USB_DT_DEVICE 0x01 + #define USB_DT_CONFIG 0x02 +diff --git a/include/qemu/timer.h b/include/qemu/timer.h +index 9a366e551f..475c2a3f18 100644 +--- a/include/qemu/timer.h ++++ b/include/qemu/timer.h +@@ -91,6 +91,34 @@ struct QEMUTimer { + int scale; + }; + ++#define QEMU_USB_NORMAL_FREQ 1000 ++#define QEMU_USB_LAZY_FREQ 10 ++#define MAX_USB_CONTROLLER_TYPES 4 ++#define QEMU_USB_CONTROLLER_OHCI 0 ++#define QEMU_USB_CONTROLLER_UHCI 1 ++#define QEMU_USB_CONTROLLER_EHCI 2 ++#define QEMU_USB_CONTROLLER_XHCI 3 ++ ++typedef void (*QEMUSetFreqHandler) (int freq); ++ ++typedef struct qemu_usb_controller { ++ const char *name; ++ QEMUSetFreqHandler qemu_set_freq; ++} qemu_usb_controller; ++ ++typedef qemu_usb_controller* qemu_usb_controller_ptr; ++ ++enum qemu_timer_mode { ++ QEMU_TIMER_USB_NORMAL_MODE = 1 << 0, /* Set when VNC connect or ++ * with usb dev passthrough ++ */ ++ QEMU_TIMER_USB_LAZY_MODE = 1 << 1, /* Set when VNC disconnect */ ++}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type); ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type); ++ + extern QEMUTimerListGroup main_loop_tlg; + + /* +diff --git a/ui/vnc.c b/ui/vnc.c +index 4f23a0fa79..5dd77e73cb 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1365,6 +1365,8 @@ void vnc_disconnect_finish(VncState *vs) + g_free(vs->zrle); + g_free(vs->tight); + g_free(vs); ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_LAZY_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + size_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err) +@@ -3341,6 +3343,8 @@ static void vnc_connect(VncDisplay *vd, QIOChannelSocket *sioc, + } + } + } ++ ++ qemu_timer_set_mode(QEMU_TIMER_USB_NORMAL_MODE, QEMU_USB_CONTROLLER_UHCI); + } + + void vnc_start_protocol(VncState *vs) +diff --git a/util/qemu-timer.c b/util/qemu-timer.c +index 6a0de33dd2..dc891cc557 100644 +--- a/util/qemu-timer.c ++++ b/util/qemu-timer.c +@@ -23,6 +23,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/log.h" + #include "qemu/main-loop.h" + #include "qemu/timer.h" + #include "qemu/lockable.h" +@@ -75,6 +76,74 @@ struct QEMUTimerList { + QemuEvent timers_done_ev; + }; + ++typedef struct qemu_controller_timer_state { ++ qemu_usb_controller_ptr controller; ++ int refs; ++} controller_timer_state; ++ ++typedef controller_timer_state* controller_timer_state_ptr; ++ ++static controller_timer_state uhci_timer_state = { ++ .controller = NULL, ++ .refs = 0, ++}; ++ ++static controller_timer_state_ptr \ ++ qemu_usb_controller_tab[MAX_USB_CONTROLLER_TYPES] = {NULL, ++ &uhci_timer_state, ++ NULL, NULL}; ++ ++int qemu_register_usb_controller(qemu_usb_controller_ptr controller, ++ unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ return 0; ++ } ++ ++ /* for companion EHCI controller will create three UHCI controllers, ++ * we init it only once. ++ */ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) registed frame handler\n", type); ++ qemu_usb_controller_tab[type]->controller = controller; ++ } ++ ++ return 0; ++} ++ ++int qemu_timer_set_mode(enum qemu_timer_mode mode, unsigned int type) ++{ ++ if (type != QEMU_USB_CONTROLLER_UHCI) { ++ qemu_log("the usb controller (%d) no need change frame frep\n", type); ++ return 0; ++ } ++ ++ if (!qemu_usb_controller_tab[type]->controller) { ++ qemu_log("the usb controller (%d) not registed yet\n", type); ++ return 0; ++ } ++ ++ if (mode == QEMU_TIMER_USB_NORMAL_MODE) { ++ if (qemu_usb_controller_tab[type]->refs++ > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_NORMAL_FREQ); ++ qemu_log("Set the controller (%d) of freq %d HZ,\n", ++ type, QEMU_USB_NORMAL_FREQ); ++ } else { ++ if (--qemu_usb_controller_tab[type]->refs > 0) { ++ return 0; ++ } ++ qemu_usb_controller_tab[type]->controller-> ++ qemu_set_freq(QEMU_USB_LAZY_FREQ); ++ qemu_log("Set the controller(type:%d) of freq %d HZ,\n", ++ type, QEMU_USB_LAZY_FREQ); ++ } ++ ++ return 0; ++} ++ + /** + * qemu_clock_ptr: + * @type: type of clock +-- +2.27.0 + diff --git a/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch b/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch new file mode 100644 index 0000000000000000000000000000000000000000..6355641a134dd31a44259e069840adc62d19c53a --- /dev/null +++ b/hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch @@ -0,0 +1,67 @@ +From 8c1ad2043705184da00d39250402a70f403d14a7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:11 +0200 +Subject: [PATCH] hw/virtio: Introduce virtio_bh_new_guarded() helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce virtio_bh_new_guarded(), similar to qemu_bh_new_guarded() +but using the transport memory guard, instead of the device one +(there can only be one virtio device per virtio bus). + +Inspired-by: Gerd Hoffmann +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-2-philmd@linaro.org> +--- + hw/virtio/virtio.c | 10 ++++++++++ + include/hw/virtio/virtio.h | 7 +++++++ + 2 files changed, 17 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d00effe4d5..202aae868e 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -4148,3 +4148,13 @@ static void virtio_register_types(void) + } + + type_init(virtio_register_types) ++ ++QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev, ++ QEMUBHFunc *cb, void *opaque, ++ const char *name) ++{ ++ DeviceState *transport = qdev_get_parent_bus(dev)->parent; ++ ++ return qemu_bh_new_full(cb, opaque, name, ++ &transport->mem_reentrancy_guard); ++} +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index e612441357..60494aed62 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -22,6 +22,7 @@ + #include "standard-headers/linux/virtio_config.h" + #include "standard-headers/linux/virtio_ring.h" + #include "qom/object.h" ++#include "block/aio.h" + + /* + * A guest should never accept this. It implies negotiation is broken +@@ -510,4 +511,10 @@ static inline bool virtio_device_disabled(VirtIODevice *vdev) + bool virtio_legacy_allowed(VirtIODevice *vdev); + bool virtio_legacy_check_disabled(VirtIODevice *vdev); + ++QEMUBH *virtio_bh_new_guarded_full(DeviceState *dev, ++ QEMUBHFunc *cb, void *opaque, ++ const char *name); ++#define virtio_bh_new_guarded(dev, cb, opaque) \ ++ virtio_bh_new_guarded_full((dev), (cb), (opaque), (stringify(cb))) ++ + #endif +-- +2.27.0 + diff --git a/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch b/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b4fc6c0d7ba401daf46c8326b9414f42054ae59 --- /dev/null +++ b/hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch @@ -0,0 +1,43 @@ +From edb30c972ba68b03cc5febefc880698573a17b04 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= +Date: Thu, 4 Apr 2024 20:56:41 +0200 +Subject: [PATCH] hw/virtio/virtio-crypto: Protect from DMA re-entrancy + bugs(CVE-2024-3446) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Replace qemu_bh_new_guarded() by virtio_bh_new_guarded() +so the bus and device use the same guard. Otherwise the +DMA-reentrancy protection can be bypassed. + +Fixes: CVE-2024-3446 +Cc: qemu-stable@nongnu.org +Suggested-by: Alexander Bulekov +Reviewed-by: Gerd Hoffmann +Acked-by: Michael S. Tsirkin +Signed-off-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Michael S. Tsirkin +Message-Id: <20240409105537.18308-5-philmd@linaro.org> +--- + hw/virtio/virtio-crypto.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 0e2cc8d5a8..4aaced74be 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1080,8 +1080,8 @@ static void virtio_crypto_device_realize(DeviceState *dev, Error **errp) + vcrypto->vqs[i].dataq = + virtio_add_queue(vdev, 1024, virtio_crypto_handle_dataq_bh); + vcrypto->vqs[i].dataq_bh = +- qemu_bh_new_guarded(virtio_crypto_dataq_bh, &vcrypto->vqs[i], +- &dev->mem_reentrancy_guard); ++ virtio_bh_new_guarded(dev, virtio_crypto_dataq_bh, ++ &vcrypto->vqs[i]); + vcrypto->vqs[i].vcrypto = vcrypto; + } + +-- +2.27.0 + diff --git a/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3a119f754fc931c7f1b0fe716e91fe81cf5b3c6 --- /dev/null +++ b/i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch @@ -0,0 +1,65 @@ +From ff43e9201aba8f4047e6fd5edb93a4861cc8fed2 Mon Sep 17 00:00:00 2001 +From: Yanan Wang +Date: Thu, 28 Mar 2024 18:57:56 +0800 +Subject: [PATCH] i386: cache passthrough: Update AMD 8000_001D.EAX[25:14] + based on vCPU topo + +On AMD target, when host cache passthrough is disabled we will +emulate the guest caches with default values and initialize the +shared cpu list of the caches based on vCPU topology. However +when host cache passthrough is enabled, the shared cpu list is +consistent with host regardless what the vCPU topology is. + +For example, when cache passthrough is enabled, running a guest +with vThreads=1 on a host with pThreads=2, we will get that there +are every *two* logical vCPUs sharing a L1/L2 cache, which is not +consistent with the vCPU topology (vThreads=1). + +So let's reinitialize BITs[25:14] of AMD CPUID 8000_001D.EAX +based on the actual vCPU topology instead of host pCPU topology. + +Signed-off-by: Yanan Wang +Signed-off-by: Yuan Zhang +--- + target/i386/cpu.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f94405c02b..491cf40cc7 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6597,9 +6597,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + case 0x8000001D: ++ /* Populate AMD Processor Cache Information */ + *eax = 0; + if (cpu->cache_info_passthrough) { + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); ++ ++ /* ++ * Clear BITs[25:14] and then update them based on the guest ++ * vCPU topology, like what we do in encode_cache_cpuid8000001d ++ * when cache_info_passthrough is not enabled. ++ */ ++ *eax &= ~0x03FFC000; ++ switch (count) { ++ case 0: /* L1 dcache info */ ++ case 1: /* L1 icache info */ ++ case 2: /* L2 cache info */ ++ *eax |= ((topo_info.threads_per_core - 1) << 14); ++ break; ++ case 3: /* L3 cache info */ ++ *eax |= ((topo_info.cores_per_die * ++ topo_info.threads_per_core - 1) << 14); ++ break; ++ default: /* end of info */ ++ *eax = *ebx = *ecx = *edx = 0; ++ break; ++ } + break; + } + switch (count) { +-- +2.27.0 + diff --git a/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch b/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch new file mode 100644 index 0000000000000000000000000000000000000000..8549070247aefc0f6091388411438386d580b2ec --- /dev/null +++ b/i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch @@ -0,0 +1,38 @@ +From c952c9acfab98a83122b4e6d406f4a7a0dfe871f Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Mon, 15 Jan 2024 04:13:24 -0500 +Subject: [PATCH] i386/cpu: Clear FEAT_XSAVE_XSS_LO/HI leafs when + CPUID_EXT_XSAVE is not available + +commit 81f5cad3858f27623b1b14467926032d229b76cc upstream. + +Leaf FEAT_XSAVE_XSS_LO and FEAT_XSAVE_XSS_HI also need to be cleared +when CPUID_EXT_XSAVE is not set. + +Fixes: 301e90675c3f ("target/i386: Enable support for XSAVES based features") +Signed-off-by: Xiaoyao Li +Reviewed-by: Yang Weijiang +Message-ID: <20240115091325.1904229-2-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cd16cb893d..8b9ef218d3 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6927,6 +6927,8 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_XCR0_LO] = 0; + env->features[FEAT_XSAVE_XCR0_HI] = 0; ++ env->features[FEAT_XSAVE_XSS_LO] = 0; ++ env->features[FEAT_XSAVE_XSS_HI] = 0; + return; + } + +-- +2.27.0 + diff --git a/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch b/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch new file mode 100644 index 0000000000000000000000000000000000000000..612d46547a6e221c90f8f826f78226593f529373 --- /dev/null +++ b/i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch @@ -0,0 +1,42 @@ +From 26ddb3428182503b28ac87cad7543eb241a9d353 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Mon, 15 Jan 2024 04:13:25 -0500 +Subject: [PATCH] i386/cpu: Mask with XCR0/XSS mask for FEAT_XSAVE_XCR0_HI and + FEAT_XSAVE_XSS_HI leafs + +commit a11a365159b944e05be76f3ec3b98c8b38cb70fd upstream. + +The value of FEAT_XSAVE_XCR0_HI leaf and FEAT_XSAVE_XSS_HI leaf also +need to be masked by XCR0 and XSS mask respectively, to make it +logically correct. + +Fixes: 301e90675c3f ("target/i386: Enable support for XSAVES based features") +Signed-off-by: Xiaoyao Li +Reviewed-by: Yang Weijiang +Message-ID: <20240115091325.1904229-3-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 8b9ef218d3..a66e5a357b 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6947,9 +6947,9 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + } + + env->features[FEAT_XSAVE_XCR0_LO] = mask & CPUID_XSTATE_XCR0_MASK; +- env->features[FEAT_XSAVE_XCR0_HI] = mask >> 32; ++ env->features[FEAT_XSAVE_XCR0_HI] = (mask & CPUID_XSTATE_XCR0_MASK) >> 32; + env->features[FEAT_XSAVE_XSS_LO] = mask & CPUID_XSTATE_XSS_MASK; +- env->features[FEAT_XSAVE_XSS_HI] = mask >> 32; ++ env->features[FEAT_XSAVE_XSS_HI] = (mask & CPUID_XSTATE_XSS_MASK) >> 32; + } + + /***** Steps involved on loading and filtering CPUID data +-- +2.27.0 + diff --git a/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch b/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch new file mode 100644 index 0000000000000000000000000000000000000000..37ee5b31f618e36974d8a9a7efd8e0d928eada52 --- /dev/null +++ b/i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch @@ -0,0 +1,38 @@ +From 576170252c3cbd79ed918f688d088f1ccd15602a Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 24 Jan 2024 21:40:14 -0500 +Subject: [PATCH] i386/cpuid: Decrease cpuid_i when skipping CPUID leaf 1F + +commit 10f92799af8ba3c3cef2352adcd4780f13fbab31 upstream. + +Existing code misses a decrement of cpuid_i when skip leaf 0x1F. +There's a blank CPUID entry(with leaf, subleaf as 0, and all fields +stuffed 0s) left in the CPUID array. + +It conflicts with correct CPUID leaf 0. + +Signed-off-by: Xiaoyao Li +Reviewed-by:Yang Weijiang +Message-ID: <20240125024016.2521244-2-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 4ce80555b4..e68eb8f5e6 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1914,6 +1914,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + case 0x1f: + if (env->nr_dies < 2) { ++ cpuid_i--; + break; + } + /* fallthrough */ +-- +2.27.0 + diff --git a/i386-cpuid-Move-leaf-7-to-correct-group.patch b/i386-cpuid-Move-leaf-7-to-correct-group.patch new file mode 100644 index 0000000000000000000000000000000000000000..fd217f55e0aae3a2a205dd6523a336e7b70cf453 --- /dev/null +++ b/i386-cpuid-Move-leaf-7-to-correct-group.patch @@ -0,0 +1,50 @@ +From bf3d3ecf9ff5808d1f03e83a363c8295f7abad76 Mon Sep 17 00:00:00 2001 +From: Xiaoyao Li +Date: Wed, 24 Jan 2024 21:40:16 -0500 +Subject: [PATCH] i386/cpuid: Move leaf 7 to correct group + +commit 0729857c707535847d7fe31d3d91eb8b2a118e3c upstream. + +CPUID leaf 7 was grouped together with SGX leaf 0x12 by commit +b9edbadefb9e ("i386: Propagate SGX CPUID sub-leafs to KVM") by mistake. + +SGX leaf 0x12 has its specific logic to check if subleaf (starting from 2) +is valid or not by checking the bit 0:3 of corresponding EAX is 1 or +not. + +Leaf 7 follows the logic that EAX of subleaf 0 enumerates the maximum +valid subleaf. + +Fixes: b9edbadefb9e ("i386: Propagate SGX CPUID sub-leafs to KVM") +Signed-off-by: Xiaoyao Li +Message-ID: <20240125024016.2521244-4-xiaoyao.li@intel.com> +Cc: qemu-stable@nongnu.org +Signed-off-by: Paolo Bonzini +Signed-off-by: Jason Zeng +--- + target/i386/kvm/kvm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index e68eb8f5e6..a0bc9ea7b1 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1955,7 +1955,6 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; +- case 0x7: + case 0x12: + for (j = 0; ; j++) { + c->function = i; +@@ -1975,6 +1974,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; ++ case 0x7: + case 0x14: + case 0x1d: + case 0x1e: { +-- +2.27.0 + diff --git a/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch new file mode 100644 index 0000000000000000000000000000000000000000..133577de23a77bfa7915097451864fb1ef639af6 --- /dev/null +++ b/i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch @@ -0,0 +1,42 @@ +From 06fc5eb48668a1c83e6a4e76c1a71403917b1835 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 20:33:47 +0800 +Subject: [PATCH] i6300esb watchdog: bugfix: Add a runstate transition + +QEMU will abort() for the reasons now: + + invalid runstate transition: 'prelaunch' -> 'postmigrate' + Aborted + +This happens when: + |<- watchdog timeout happened, then sets reset_requested to + | SHUTDOWN_CAUSE_GUEST_RESET; + |<- hot-migration thread sets vm state to RUN_STATE_FINISH_MIGRATE + | before the last time of migration; + |<- main thread gets the change of reset_requested and triggers + | reset, then sets vm state to RUN_STATE_PRELAUNCH; + |<- hot-migration thread sets vm state to RUN_STATE_POSTMIGRATE. + +Then 'prelaunch' -> 'postmigrate' runstate transition will happen. +It is legal so add this transition to runstate_transitions_def. + +Signed-off-by: Jinhua Cao +--- + system/runstate.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/system/runstate.c b/system/runstate.c +index ea9d6c2a32..9d3f627fee 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -116,6 +116,7 @@ static const RunStateTransition runstate_transitions_def[] = { + { RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING }, + { RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, ++ { RUN_STATE_PRELAUNCH, RUN_STATE_POSTMIGRATE }, + + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING }, + { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED }, +-- +2.27.0 + diff --git a/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch new file mode 100644 index 0000000000000000000000000000000000000000..5aaf01663e077c1336c79a115a433a15c53002e8 --- /dev/null +++ b/ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch @@ -0,0 +1,42 @@ +From 6689eebbb520dc75bc65e0914c4e05e40a4efc1d Mon Sep 17 00:00:00 2001 +From: Prasad J Pandit +Date: Mon, 21 Jun 2021 09:22:35 +0800 +Subject: [PATCH] ide: ahci: add check to avoid null dereference + (CVE-2019-12067) + +Fix CVE-2019-12067 + +AHCI emulator while committing DMA buffer in ahci_commit_buf() +may do a NULL dereference if the command header 'ad->cur_cmd' +is null. Add check to avoid it. + +Reported-by: Bugs SysSec +Signed-off-by: Prasad J Pandit + +Signed-off-by: Jiajie Li +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + hw/ide/ahci.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c +index afdc44b8e0..8062e1743c 100644 +--- a/hw/ide/ahci.c ++++ b/hw/ide/ahci.c +@@ -1519,8 +1519,10 @@ static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes) + { + AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); + +- tx_bytes += le32_to_cpu(ad->cur_cmd->status); +- ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ if (ad->cur_cmd) { ++ tx_bytes += le32_to_cpu(ad->cur_cmd->status); ++ ad->cur_cmd->status = cpu_to_le32(tx_bytes); ++ } + } + + static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write) +-- +2.27.0 + diff --git a/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch b/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch new file mode 100644 index 0000000000000000000000000000000000000000..1e47ca26def6601174a9c693596606e4dd05e99b --- /dev/null +++ b/include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch @@ -0,0 +1,36 @@ +From 56bfcb77a384419dbd09ca37075a3cf4ba2e9f19 Mon Sep 17 00:00:00 2001 +From: Elen Avan +Date: Fri, 22 Dec 2023 22:17:21 +0300 +Subject: [PATCH] include/ui/rect.h: fix qemu_rect_init() mis-assignment +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Elen Avan +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2051 +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2050 +Fixes: a200d53b1fde "virtio-gpu: replace PIXMAN for region/rect test" +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael Tokarev +Reviewed-by: Marc-AndrĂ© Lureau +Signed-off-by: Michael Tokarev +--- + include/ui/rect.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/ui/rect.h b/include/ui/rect.h +index 94898f92d0..68f05d78a8 100644 +--- a/include/ui/rect.h ++++ b/include/ui/rect.h +@@ -19,7 +19,7 @@ static inline void qemu_rect_init(QemuRect *rect, + uint16_t width, uint16_t height) + { + rect->x = x; +- rect->y = x; ++ rect->y = y; + rect->width = width; + rect->height = height; + } +-- +2.27.0 + diff --git a/intc-gicv3-Fixes-for-vcpu-hotplug.patch b/intc-gicv3-Fixes-for-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..5241a57ddaccffa65e13ff8defd9ffbad9750bc1 --- /dev/null +++ b/intc-gicv3-Fixes-for-vcpu-hotplug.patch @@ -0,0 +1,70 @@ +From 343b61303152b06f9e1ba6d09a405faeaa3fcc98 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 22:12:58 +0800 +Subject: [PATCH] intc/gicv3: Fixes for vcpu hotplug + +1. Some types of machine don't support possible_cpus +callback. +2. The cpu_update_notifier is register only when machine +support vcpu hotplug, so do notifier_remove() unconditi- +onally is wrong. + +Signed-off-by: Keqian Zhu +--- + cpu-common.c | 4 ++++ + hw/intc/arm_gicv3_common.c | 9 +++++++-- + 2 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/cpu-common.c b/cpu-common.c +index da52e45760..54e63b3f77 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -113,6 +113,10 @@ CPUState *qemu_get_possible_cpu(int index) + MachineState *ms = MACHINE(qdev_get_machine()); + const CPUArchIdList *possible_cpus = ms->possible_cpus; + ++ if (possible_cpus == NULL) { ++ return qemu_get_cpu(index); ++ } ++ + assert((index >= 0) && (index < possible_cpus->len)); + + return CPU(possible_cpus->cpus[index].cpu); +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index d051024a30..5667d9f40b 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -25,6 +25,7 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "qemu/error-report.h" ++#include "hw/boards.h" + #include "hw/core/cpu.h" + #include "hw/intc/arm_gicv3_common.h" + #include "hw/qdev-properties.h" +@@ -446,7 +447,7 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_possible_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i) ? : qemu_get_cpu(i); + uint64_t cpu_affid; + + if (qemu_enabled_cpu(cpu)) { +@@ -506,8 +507,12 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); ++ Object *ms = qdev_get_machine(); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + +- notifier_remove(&s->cpu_update_notifier); ++ if (mc->has_hotpluggable_cpus) { ++ notifier_remove(&s->cpu_update_notifier); ++ } + g_free(s->redist_region_count); + } + +-- +2.27.0 + diff --git a/iotests-adapt-to-output-change-for-recently-introduc.patch b/iotests-adapt-to-output-change-for-recently-introduc.patch new file mode 100644 index 0000000000000000000000000000000000000000..85946c303baf315c19a0762442ba38951a2be831 --- /dev/null +++ b/iotests-adapt-to-output-change-for-recently-introduc.patch @@ -0,0 +1,62 @@ +From 7212ca27f0dc957f83fe29858430ee2927e0175c Mon Sep 17 00:00:00 2001 +From: root +Date: Mon, 25 Mar 2024 21:31:32 +0800 +Subject: [PATCH] =?UTF-8?q?iotests:=20adapt=20to=20output=20change=20for?= + =?UTF-8?q?=20recently=20introduced=20'detached=20hea=E2=80=A6?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 39a94d7c34ce9d222fa9c0c99a14e20a567456d7 + +…der' field + +Failure was noticed when running the tests for the qcow2 image format. + +Fixes: 0bd779e ("crypto: Introduce 'detached-header' field in QCryptoBlockInfoLUKS") +Signed-off-by: Fiona Ebner +Message-ID: <20240216101415.293769-1-f.ebner@proxmox.com> +Reviewed-by: Daniel P. BerrangĂ© +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +Signed-off-by: Gao Jiazhen +--- + tests/qemu-iotests/198.out | 2 ++ + tests/qemu-iotests/206.out | 1 + + 2 files changed, 3 insertions(+) + +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 805494916f..62fb73fa3e 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,6 +39,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -84,6 +85,7 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 7e95694777..979f00f9bf 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,6 +114,7 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 ++ detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch b/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7c540706516d78212ea21f2455b3a0cac832ef2 --- /dev/null +++ b/kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch @@ -0,0 +1,42 @@ +From 1228f5c7cfcb78b19f163551aae0612602ac2d7d Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 28 Apr 2024 13:01:48 +0800 +Subject: [PATCH] kvm/arm: Fix SVE related logic for vcpu hotplug feature + +1. Must finalize SVE setting before kvm_arch_init_vcpu(). +2. Must not finalize KVM SVE repeatly for hotplugged vcpu. + +Signed-off-by: Keqian Zhu +--- + target/arm/kvm.c | 1 + + target/arm/kvm64.c | 2 +- + 2 files changed, 2 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 12c1b4b328..1ceb72a1c1 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -704,6 +704,7 @@ void kvm_arm_create_host_vcpu(ARMCPU *cpu) + * later while setting device attributes of the GICR during GICv3 + * reset + */ ++ arm_cpu_finalize_features(cpu, &error_abort); + ret = kvm_arch_init_vcpu(cs); + if (ret < 0) { + error_report("Failed to initialize host vcpu %ld", vcpu_id); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 00b257bb4b..615e8bbbdf 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,7 +647,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + return ret; + } + +- if (cpu_isar_feature(aa64_sve, cpu)) { ++ if (cpu_isar_feature(aa64_sve, cpu) && !DEVICE(cpu)->hotplugged) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; +-- +2.27.0 + diff --git a/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch b/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch new file mode 100644 index 0000000000000000000000000000000000000000..d248b18c61ef7113cd2ea16cd650db007632b943 --- /dev/null +++ b/kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch @@ -0,0 +1,63 @@ +From baacc5ed528a5259286622482a01e3e848aed57e Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Mon, 29 Apr 2024 17:14:47 +0800 +Subject: [PATCH] kvm/arm: Fix compatibility of cold-plug CPU with SVE + +For arm virt machine, besides hotplugged vcpu, the kvm state of +coldplugged CPU is also pre-inited and thus SVE is finalized. + +And a flag in ARMCPU state and skip finalize SVE again. + +Signed-off-by: Keqian Zhu +--- + hw/arm/virt.c | 5 +++++ + target/arm/cpu.h | 3 +++ + target/arm/kvm64.c | 2 +- + 3 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index 507b09d96c..dfe4d9e129 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -3282,6 +3282,11 @@ static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!dev->hotplugged) { + cs->cold_booted = true; + } ++#ifdef CONFIG_KVM ++ if (cs->cpu_index >= ms->smp.cpus) { ++ cpu->kvm_sve_finalized = true; ++ } ++#endif + } + + static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index c51a0e3467..a5ba7f2a26 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -971,6 +971,9 @@ struct ArchCPU { + + /* KVM steal time */ + OnOffAuto kvm_steal_time; ++ ++ /* KVM SVE has been finalized for this CPU */ ++ bool kvm_sve_finalized; + #endif /* CONFIG_KVM */ + + /* Uniprocessor system with MP extensions */ +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 615e8bbbdf..8f01d485b0 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,7 +647,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + return ret; + } + +- if (cpu_isar_feature(aa64_sve, cpu) && !DEVICE(cpu)->hotplugged) { ++ if (cpu_isar_feature(aa64_sve, cpu) && !cpu->kvm_sve_finalized) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; +-- +2.27.0 + diff --git a/linux-headers-Synchronize-linux-headers-from-linux-v.patch b/linux-headers-Synchronize-linux-headers-from-linux-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..53c7260544c09873445f131d7dfd368d13217c7c --- /dev/null +++ b/linux-headers-Synchronize-linux-headers-from-linux-v.patch @@ -0,0 +1,57 @@ +From 280cba84e3eaed10f095f0c88dab27b7799558e5 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:56 +0800 +Subject: [PATCH] linux-headers: Synchronize linux headers from linux + v6.7.0-rc8 + +Use the scripts/update-linux-headers.sh to synchronize linux +headers from linux v6.7.0-rc8. We mainly want to add the +loongarch linux headers and then add the loongarch kvm support +based on it. + +Signed-off-by: Tianrui Zhao +Acked-by: Song Gao +Message-Id: <20240105075804.1228596-2-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + include/standard-headers/linux/fuse.h | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 6b9793842c..fc0dcd10ae 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -209,7 +209,7 @@ + * - add FUSE_HAS_EXPIRE_ONLY + * + * 7.39 +- * - add FUSE_DIRECT_IO_RELAX ++ * - add FUSE_DIRECT_IO_ALLOW_MMAP + * - add FUSE_STATX and related structures + */ + +@@ -405,8 +405,7 @@ struct fuse_file_lock { + * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir, + * symlink and mknod (single group that matches parent) + * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation +- * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now +- * allow shared mmap ++ * FUSE_DIRECT_IO_ALLOW_MMAP: allow shared mmap in FOPEN_DIRECT_IO mode. + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -445,7 +444,10 @@ struct fuse_file_lock { + #define FUSE_HAS_INODE_DAX (1ULL << 33) + #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) + #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) +-#define FUSE_DIRECT_IO_RELAX (1ULL << 36) ++#define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) ++ ++/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ ++#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP + + /** + * CUSE INIT request/reply flags +-- +2.27.0 + diff --git a/linux-headers-Update-to-Linux-v6.7-rc5.patch b/linux-headers-Update-to-Linux-v6.7-rc5.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a740ab691ca0dd4f91840ac3f817d9751e9a126 --- /dev/null +++ b/linux-headers-Update-to-Linux-v6.7-rc5.patch @@ -0,0 +1,1109 @@ +From 9904eb7d4559baca2da713346cd505a80af7e776 Mon Sep 17 00:00:00 2001 +From: Daniel Henrique Barboza +Date: Mon, 18 Dec 2023 17:43:18 -0300 +Subject: [PATCH] linux-headers: Update to Linux v6.7-rc5 + +We'll add a new RISC-V linux-header file, but first let's update all +headers. + +Headers for 'asm-loongarch' were added in this update. + +Signed-off-by: Daniel Henrique Barboza +Acked-by: Alistair Francis +Message-ID: <20231218204321.75757-2-dbarboza@ventanamicro.com> +Signed-off-by: Alistair Francis +--- + include/standard-headers/drm/drm_fourcc.h | 2 + + include/standard-headers/linux/pci_regs.h | 24 ++- + include/standard-headers/linux/vhost_types.h | 7 + + .../standard-headers/linux/virtio_config.h | 5 + + include/standard-headers/linux/virtio_pci.h | 11 ++ + linux-headers/asm-arm64/kvm.h | 32 ++++ + linux-headers/asm-generic/unistd.h | 14 +- + linux-headers/asm-loongarch/bitsperlong.h | 1 + + linux-headers/asm-loongarch/kvm.h | 108 +++++++++++ + linux-headers/asm-loongarch/mman.h | 1 + + linux-headers/asm-loongarch/unistd.h | 5 + + linux-headers/asm-mips/unistd_n32.h | 4 + + linux-headers/asm-mips/unistd_n64.h | 4 + + linux-headers/asm-mips/unistd_o32.h | 4 + + linux-headers/asm-powerpc/unistd_32.h | 4 + + linux-headers/asm-powerpc/unistd_64.h | 4 + + linux-headers/asm-riscv/kvm.h | 12 ++ + linux-headers/asm-s390/unistd_32.h | 4 + + linux-headers/asm-s390/unistd_64.h | 4 + + linux-headers/asm-x86/unistd_32.h | 4 + + linux-headers/asm-x86/unistd_64.h | 3 + + linux-headers/asm-x86/unistd_x32.h | 3 + + linux-headers/linux/iommufd.h | 180 +++++++++++++++++- + linux-headers/linux/kvm.h | 11 ++ + linux-headers/linux/psp-sev.h | 1 + + linux-headers/linux/stddef.h | 9 +- + linux-headers/linux/userfaultfd.h | 9 +- + linux-headers/linux/vfio.h | 47 +++-- + linux-headers/linux/vhost.h | 8 + + 29 files changed, 498 insertions(+), 27 deletions(-) + create mode 100644 linux-headers/asm-loongarch/bitsperlong.h + create mode 100644 linux-headers/asm-loongarch/kvm.h + create mode 100644 linux-headers/asm-loongarch/mman.h + create mode 100644 linux-headers/asm-loongarch/unistd.h + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 72279f4d25..3afb70160f 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -322,6 +322,8 @@ extern "C" { + * index 1 = Cr:Cb plane, [39:0] Cr1:Cb1:Cr0:Cb0 little endian + */ + #define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5') /* 2x2 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0') /* 2x1 subsampled Cr:Cb plane */ ++#define DRM_FORMAT_NV30 fourcc_code('N', 'V', '3', '0') /* non-subsampled Cr:Cb plane */ + + /* + * 2 plane YCbCr MSB aligned +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index e5f558d964..a39193213f 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -80,6 +80,7 @@ + #define PCI_HEADER_TYPE_NORMAL 0 + #define PCI_HEADER_TYPE_BRIDGE 1 + #define PCI_HEADER_TYPE_CARDBUS 2 ++#define PCI_HEADER_TYPE_MFD 0x80 /* Multi-Function Device (possible) */ + + #define PCI_BIST 0x0f /* 8 bits */ + #define PCI_BIST_CODE_MASK 0x0f /* Return result */ +@@ -637,6 +638,7 @@ + #define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ + #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ + #define PCI_EXP_RTSTA 0x20 /* Root Status */ ++#define PCI_EXP_RTSTA_PME_RQ_ID 0x0000ffff /* PME Requester ID */ + #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ + #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ + /* +@@ -930,12 +932,13 @@ + + /* Process Address Space ID */ + #define PCI_PASID_CAP 0x04 /* PASID feature register */ +-#define PCI_PASID_CAP_EXEC 0x02 /* Exec permissions Supported */ +-#define PCI_PASID_CAP_PRIV 0x04 /* Privilege Mode Supported */ ++#define PCI_PASID_CAP_EXEC 0x0002 /* Exec permissions Supported */ ++#define PCI_PASID_CAP_PRIV 0x0004 /* Privilege Mode Supported */ ++#define PCI_PASID_CAP_WIDTH 0x1f00 + #define PCI_PASID_CTRL 0x06 /* PASID control register */ +-#define PCI_PASID_CTRL_ENABLE 0x01 /* Enable bit */ +-#define PCI_PASID_CTRL_EXEC 0x02 /* Exec permissions Enable */ +-#define PCI_PASID_CTRL_PRIV 0x04 /* Privilege Mode Enable */ ++#define PCI_PASID_CTRL_ENABLE 0x0001 /* Enable bit */ ++#define PCI_PASID_CTRL_EXEC 0x0002 /* Exec permissions Enable */ ++#define PCI_PASID_CTRL_PRIV 0x0004 /* Privilege Mode Enable */ + #define PCI_EXT_CAP_PASID_SIZEOF 8 + + /* Single Root I/O Virtualization */ +@@ -975,6 +978,8 @@ + #define PCI_LTR_VALUE_MASK 0x000003ff + #define PCI_LTR_SCALE_MASK 0x00001c00 + #define PCI_LTR_SCALE_SHIFT 10 ++#define PCI_LTR_NOSNOOP_VALUE 0x03ff0000 /* Max No-Snoop Latency Value */ ++#define PCI_LTR_NOSNOOP_SCALE 0x1c000000 /* Scale for Max Value */ + #define PCI_EXT_CAP_LTR_SIZEOF 8 + + /* Access Control Service */ +@@ -1042,9 +1047,16 @@ + #define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR 0x0000 /* Uncorrectable error */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE 0x0002 /* Rcvd ERR_NONFATAL */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE 0x0004 /* Rcvd ERR_FATAL */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT 0x0006 /* Reason in Trig Reason Extension field */ + #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ + #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO 0x0000 /* RP PIO error */ ++#define PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER 0x0020 /* DPC SW Trigger bit */ ++#define PCI_EXP_DPC_RP_PIO_FEP 0x1f00 /* RP PIO First Err Ptr */ + + #define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ + +@@ -1088,6 +1100,8 @@ + #define PCI_L1SS_CTL1_LTR_L12_TH_VALUE 0x03ff0000 /* LTR_L1.2_THRESHOLD_Value */ + #define PCI_L1SS_CTL1_LTR_L12_TH_SCALE 0xe0000000 /* LTR_L1.2_THRESHOLD_Scale */ + #define PCI_L1SS_CTL2 0x0c /* Control 2 Register */ ++#define PCI_L1SS_CTL2_T_PWR_ON_SCALE 0x00000003 /* T_POWER_ON Scale */ ++#define PCI_L1SS_CTL2_T_PWR_ON_VALUE 0x000000f8 /* T_POWER_ON Value */ + + /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ + #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index 5ad07e134a..fd54044936 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -185,5 +185,12 @@ struct vhost_vdpa_iova_range { + * DRIVER_OK + */ + #define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 ++/* Device may expose the virtqueue's descriptor area, driver area and ++ * device area to a different group for ASID binding than where its ++ * buffers may reside. Requires VHOST_BACKEND_F_IOTLB_ASID. ++ */ ++#define VHOST_BACKEND_F_DESC_ASID 0x7 ++/* IOTLB don't flush memory mapping across device reset */ ++#define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 + + #endif +diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h +index 8a7d0dc8b0..bfd1ca643e 100644 +--- a/include/standard-headers/linux/virtio_config.h ++++ b/include/standard-headers/linux/virtio_config.h +@@ -103,6 +103,11 @@ + */ + #define VIRTIO_F_NOTIFICATION_DATA 38 + ++/* This feature indicates that the driver uses the data provided by the device ++ * as a virtqueue identifier in available buffer notifications. ++ */ ++#define VIRTIO_F_NOTIF_CONFIG_DATA 39 ++ + /* + * This feature indicates that the driver can reset a queue individually. + */ +diff --git a/include/standard-headers/linux/virtio_pci.h b/include/standard-headers/linux/virtio_pci.h +index be912cfc95..b7fdfd0668 100644 +--- a/include/standard-headers/linux/virtio_pci.h ++++ b/include/standard-headers/linux/virtio_pci.h +@@ -166,6 +166,17 @@ struct virtio_pci_common_cfg { + uint32_t queue_used_hi; /* read-write */ + }; + ++/* ++ * Warning: do not use sizeof on this: use offsetofend for ++ * specific fields you need. ++ */ ++struct virtio_pci_modern_common_cfg { ++ struct virtio_pci_common_cfg cfg; ++ ++ uint16_t queue_notify_data; /* read-write */ ++ uint16_t queue_reset; /* read-write */ ++}; ++ + /* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ + struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; +diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h +index 38e5957526..c59ea55cd8 100644 +--- a/linux-headers/asm-arm64/kvm.h ++++ b/linux-headers/asm-arm64/kvm.h +@@ -491,6 +491,38 @@ struct kvm_smccc_filter { + #define KVM_HYPERCALL_EXIT_SMC (1U << 0) + #define KVM_HYPERCALL_EXIT_16BIT (1U << 1) + ++/* ++ * Get feature ID registers userspace writable mask. ++ * ++ * From DDI0487J.a, D19.2.66 ("ID_AA64MMFR2_EL1, AArch64 Memory Model ++ * Feature Register 2"): ++ * ++ * "The Feature ID space is defined as the System register space in ++ * AArch64 with op0==3, op1=={0, 1, 3}, CRn==0, CRm=={0-7}, ++ * op2=={0-7}." ++ * ++ * This covers all currently known R/O registers that indicate ++ * anything useful feature wise, including the ID registers. ++ * ++ * If we ever need to introduce a new range, it will be described as ++ * such in the range field. ++ */ ++#define KVM_ARM_FEATURE_ID_RANGE_IDX(op0, op1, crn, crm, op2) \ ++ ({ \ ++ __u64 __op1 = (op1) & 3; \ ++ __op1 -= (__op1 == 3); \ ++ (__op1 << 6 | ((crm) & 7) << 3 | (op2)); \ ++ }) ++ ++#define KVM_ARM_FEATURE_ID_RANGE 0 ++#define KVM_ARM_FEATURE_ID_RANGE_SIZE (3 * 8 * 8) ++ ++struct reg_mask_range { ++ __u64 addr; /* Pointer to mask array */ ++ __u32 range; /* Requested range */ ++ __u32 reserved[13]; ++}; ++ + #endif + + #endif /* __ARM_KVM_H__ */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index abe087c53b..756b013fb8 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -71,7 +71,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) + #define __NR_getcwd 17 + __SYSCALL(__NR_getcwd, sys_getcwd) + #define __NR_lookup_dcookie 18 +-__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie) ++__SYSCALL(__NR_lookup_dcookie, sys_ni_syscall) + #define __NR_eventfd2 19 + __SYSCALL(__NR_eventfd2, sys_eventfd2) + #define __NR_epoll_create1 20 +@@ -816,15 +816,21 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) + __SYSCALL(__NR_futex_waitv, sys_futex_waitv) + #define __NR_set_mempolicy_home_node 450 + __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) +- + #define __NR_cachestat 451 + __SYSCALL(__NR_cachestat, sys_cachestat) +- + #define __NR_fchmodat2 452 + __SYSCALL(__NR_fchmodat2, sys_fchmodat2) ++#define __NR_map_shadow_stack 453 ++__SYSCALL(__NR_map_shadow_stack, sys_map_shadow_stack) ++#define __NR_futex_wake 454 ++__SYSCALL(__NR_futex_wake, sys_futex_wake) ++#define __NR_futex_wait 455 ++__SYSCALL(__NR_futex_wait, sys_futex_wait) ++#define __NR_futex_requeue 456 ++__SYSCALL(__NR_futex_requeue, sys_futex_requeue) + + #undef __NR_syscalls +-#define __NR_syscalls 453 ++#define __NR_syscalls 457 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-loongarch/bitsperlong.h b/linux-headers/asm-loongarch/bitsperlong.h +new file mode 100644 +index 0000000000..6dc0bb0c13 +--- /dev/null ++++ b/linux-headers/asm-loongarch/bitsperlong.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +new file mode 100644 +index 0000000000..c6ad2ee610 +--- /dev/null ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -0,0 +1,108 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited ++ */ ++ ++#ifndef __UAPI_ASM_LOONGARCH_KVM_H ++#define __UAPI_ASM_LOONGARCH_KVM_H ++ ++#include ++ ++/* ++ * KVM LoongArch specific structures and definitions. ++ * ++ * Some parts derived from the x86 version of this file. ++ */ ++ ++#define __KVM_HAVE_READONLY_MEM ++ ++#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 ++#define KVM_DIRTY_LOG_PAGE_OFFSET 64 ++ ++/* ++ * for KVM_GET_REGS and KVM_SET_REGS ++ */ ++struct kvm_regs { ++ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ ++ __u64 gpr[32]; ++ __u64 pc; ++}; ++ ++/* ++ * for KVM_GET_FPU and KVM_SET_FPU ++ */ ++struct kvm_fpu { ++ __u32 fcsr; ++ __u64 fcc; /* 8x8 */ ++ struct kvm_fpureg { ++ __u64 val64[4]; ++ } fpr[32]; ++}; ++ ++/* ++ * For LoongArch, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various ++ * registers. The id field is broken down as follows: ++ * ++ * bits[63..52] - As per linux/kvm.h ++ * bits[51..32] - Must be zero. ++ * bits[31..16] - Register set. ++ * ++ * Register set = 0: GP registers from kvm_regs (see definitions below). ++ * ++ * Register set = 1: CSR registers. ++ * ++ * Register set = 2: KVM specific registers (see definitions below). ++ * ++ * Register set = 3: FPU / SIMD registers (see definitions below). ++ * ++ * Other sets registers may be added in the future. Each set would ++ * have its own identifier in bits[31..16]. ++ */ ++ ++#define KVM_REG_LOONGARCH_GPR (KVM_REG_LOONGARCH | 0x00000ULL) ++#define KVM_REG_LOONGARCH_CSR (KVM_REG_LOONGARCH | 0x10000ULL) ++#define KVM_REG_LOONGARCH_KVM (KVM_REG_LOONGARCH | 0x20000ULL) ++#define KVM_REG_LOONGARCH_FPSIMD (KVM_REG_LOONGARCH | 0x30000ULL) ++#define KVM_REG_LOONGARCH_CPUCFG (KVM_REG_LOONGARCH | 0x40000ULL) ++#define KVM_REG_LOONGARCH_MASK (KVM_REG_LOONGARCH | 0x70000ULL) ++#define KVM_CSR_IDX_MASK 0x7fff ++#define KVM_CPUCFG_IDX_MASK 0x7fff ++ ++/* ++ * KVM_REG_LOONGARCH_KVM - KVM specific control registers. ++ */ ++ ++#define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) ++#define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++ ++#define LOONGARCH_REG_SHIFT 3 ++#define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) ++#define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) ++#define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++ ++struct kvm_debug_exit_arch { ++}; ++ ++/* for KVM_SET_GUEST_DEBUG */ ++struct kvm_guest_debug_arch { ++}; ++ ++/* definition of registers in kvm_run */ ++struct kvm_sync_regs { ++}; ++ ++/* dummy definition */ ++struct kvm_sregs { ++}; ++ ++struct kvm_iocsr_entry { ++ __u32 addr; ++ __u32 pad; ++ __u64 data; ++}; ++ ++#define KVM_NR_IRQCHIPS 1 ++#define KVM_IRQCHIP_NUM_PINS 64 ++#define KVM_MAX_CORES 256 ++ ++#endif /* __UAPI_ASM_LOONGARCH_KVM_H */ +diff --git a/linux-headers/asm-loongarch/mman.h b/linux-headers/asm-loongarch/mman.h +new file mode 100644 +index 0000000000..8eebf89f5a +--- /dev/null ++++ b/linux-headers/asm-loongarch/mman.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-loongarch/unistd.h b/linux-headers/asm-loongarch/unistd.h +new file mode 100644 +index 0000000000..fcb668984f +--- /dev/null ++++ b/linux-headers/asm-loongarch/unistd.h +@@ -0,0 +1,5 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#define __ARCH_WANT_SYS_CLONE ++#define __ARCH_WANT_SYS_CLONE3 ++ ++#include +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 46d8500654..994b6f008f 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -381,5 +381,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index c2f7ac673b..41dcf5877a 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -357,5 +357,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index 757c68f2ad..ae9d334d96 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -427,5 +427,9 @@ + #define __NR_set_mempolicy_home_node (__NR_Linux + 450) + #define __NR_cachestat (__NR_Linux + 451) + #define __NR_fchmodat2 (__NR_Linux + 452) ++#define __NR_map_shadow_stack (__NR_Linux + 453) ++#define __NR_futex_wake (__NR_Linux + 454) ++#define __NR_futex_wait (__NR_Linux + 455) ++#define __NR_futex_requeue (__NR_Linux + 456) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 8ef94bbac1..b9b23d66d7 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -434,6 +434,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 0e7ee43e88..cbb4b3e8f7 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -406,6 +406,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 992c5e4071..60d3b21dea 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -80,6 +80,7 @@ struct kvm_riscv_csr { + unsigned long sip; + unsigned long satp; + unsigned long scounteren; ++ unsigned long senvcfg; + }; + + /* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -93,6 +94,11 @@ struct kvm_riscv_aia_csr { + unsigned long iprio2h; + }; + ++/* Smstateen CSR for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ ++struct kvm_riscv_smstateen_csr { ++ unsigned long sstateen0; ++}; ++ + /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ + struct kvm_riscv_timer { + __u64 frequency; +@@ -131,6 +137,8 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_ZICSR, + KVM_RISCV_ISA_EXT_ZIFENCEI, + KVM_RISCV_ISA_EXT_ZIHPM, ++ KVM_RISCV_ISA_EXT_SMSTATEEN, ++ KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_MAX, + }; + +@@ -148,6 +156,7 @@ enum KVM_RISCV_SBI_EXT_ID { + KVM_RISCV_SBI_EXT_PMU, + KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_RISCV_SBI_EXT_VENDOR, ++ KVM_RISCV_SBI_EXT_DBCN, + KVM_RISCV_SBI_EXT_MAX, + }; + +@@ -178,10 +187,13 @@ enum KVM_RISCV_SBI_EXT_ID { + #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) + #define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) + #define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) ++#define KVM_REG_RISCV_CSR_SMSTATEEN (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) + #define KVM_REG_RISCV_CSR_REG(name) \ + (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) + #define KVM_REG_RISCV_CSR_AIA_REG(name) \ + (offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long)) ++#define KVM_REG_RISCV_CSR_SMSTATEEN_REG(name) \ ++ (offsetof(struct kvm_riscv_smstateen_csr, name) / sizeof(unsigned long)) + + /* Timer registers are mapped as type 4 */ + #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 716fa368ca..c093e6d5f9 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -425,5 +425,9 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index b2a11b1d13..114c0569a4 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -373,5 +373,9 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index d749ad1c24..329649c377 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -443,6 +443,10 @@ + #define __NR_set_mempolicy_home_node 450 + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 ++#define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index cea67282eb..4583606ce6 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -366,6 +366,9 @@ + #define __NR_cachestat 451 + #define __NR_fchmodat2 452 + #define __NR_map_shadow_stack 453 ++#define __NR_futex_wake 454 ++#define __NR_futex_wait 455 ++#define __NR_futex_requeue 456 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 5b2e79bf4c..146d74d8e4 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -318,6 +318,9 @@ + #define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) + #define __NR_cachestat (__X32_SYSCALL_BIT + 451) + #define __NR_fchmodat2 (__X32_SYSCALL_BIT + 452) ++#define __NR_futex_wake (__X32_SYSCALL_BIT + 454) ++#define __NR_futex_wait (__X32_SYSCALL_BIT + 455) ++#define __NR_futex_requeue (__X32_SYSCALL_BIT + 456) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h +index 218bf7ac98..806d98d09c 100644 +--- a/linux-headers/linux/iommufd.h ++++ b/linux-headers/linux/iommufd.h +@@ -47,6 +47,8 @@ enum { + IOMMUFD_CMD_VFIO_IOAS, + IOMMUFD_CMD_HWPT_ALLOC, + IOMMUFD_CMD_GET_HW_INFO, ++ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, ++ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, + }; + + /** +@@ -347,20 +349,86 @@ struct iommu_vfio_ioas { + }; + #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) + ++/** ++ * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation ++ * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as ++ * the parent HWPT in a nesting configuration. ++ * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is ++ * enforced on device attachment ++ */ ++enum iommufd_hwpt_alloc_flags { ++ IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, ++ IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, ++}; ++ ++/** ++ * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table ++ * entry attributes ++ * @IOMMU_VTD_S1_SRE: Supervisor request ++ * @IOMMU_VTD_S1_EAFE: Extended access enable ++ * @IOMMU_VTD_S1_WPE: Write protect enable ++ */ ++enum iommu_hwpt_vtd_s1_flags { ++ IOMMU_VTD_S1_SRE = 1 << 0, ++ IOMMU_VTD_S1_EAFE = 1 << 1, ++ IOMMU_VTD_S1_WPE = 1 << 2, ++}; ++ ++/** ++ * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table ++ * info (IOMMU_HWPT_DATA_VTD_S1) ++ * @flags: Combination of enum iommu_hwpt_vtd_s1_flags ++ * @pgtbl_addr: The base address of the stage-1 page table. ++ * @addr_width: The address width of the stage-1 page table ++ * @__reserved: Must be 0 ++ */ ++struct iommu_hwpt_vtd_s1 { ++ __aligned_u64 flags; ++ __aligned_u64 pgtbl_addr; ++ __u32 addr_width; ++ __u32 __reserved; ++}; ++ ++/** ++ * enum iommu_hwpt_data_type - IOMMU HWPT Data Type ++ * @IOMMU_HWPT_DATA_NONE: no data ++ * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table ++ */ ++enum iommu_hwpt_data_type { ++ IOMMU_HWPT_DATA_NONE, ++ IOMMU_HWPT_DATA_VTD_S1, ++}; ++ + /** + * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) + * @size: sizeof(struct iommu_hwpt_alloc) +- * @flags: Must be 0 ++ * @flags: Combination of enum iommufd_hwpt_alloc_flags + * @dev_id: The device to allocate this HWPT for +- * @pt_id: The IOAS to connect this HWPT to ++ * @pt_id: The IOAS or HWPT to connect this HWPT to + * @out_hwpt_id: The ID of the new HWPT + * @__reserved: Must be 0 ++ * @data_type: One of enum iommu_hwpt_data_type ++ * @data_len: Length of the type specific data ++ * @data_uptr: User pointer to the type specific data + * + * Explicitly allocate a hardware page table object. This is the same object + * type that is returned by iommufd_device_attach() and represents the + * underlying iommu driver's iommu_domain kernel object. + * +- * A HWPT will be created with the IOVA mappings from the given IOAS. ++ * A kernel-managed HWPT will be created with the mappings from the given ++ * IOAS via the @pt_id. The @data_type for this allocation must be set to ++ * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a ++ * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. ++ * ++ * A user-managed nested HWPT will be created from a given parent HWPT via ++ * @pt_id, in which the parent HWPT must be allocated previously via the ++ * same ioctl from a given IOAS (@pt_id). In this case, the @data_type ++ * must be set to a pre-defined type corresponding to an I/O page table ++ * type supported by the underlying IOMMU hardware. ++ * ++ * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and ++ * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr ++ * must be given. + */ + struct iommu_hwpt_alloc { + __u32 size; +@@ -369,13 +437,26 @@ struct iommu_hwpt_alloc { + __u32 pt_id; + __u32 out_hwpt_id; + __u32 __reserved; ++ __u32 data_type; ++ __u32 data_len; ++ __aligned_u64 data_uptr; + }; + #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) + ++/** ++ * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info ++ * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings ++ * on a nested_parent domain. ++ * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html ++ */ ++enum iommu_hw_info_vtd_flags { ++ IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, ++}; ++ + /** + * struct iommu_hw_info_vtd - Intel VT-d hardware information + * +- * @flags: Must be 0 ++ * @flags: Combination of enum iommu_hw_info_vtd_flags + * @__reserved: Must be 0 + * + * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec +@@ -404,6 +485,20 @@ enum iommu_hw_info_type { + IOMMU_HW_INFO_TYPE_INTEL_VTD, + }; + ++/** ++ * enum iommufd_hw_capabilities ++ * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking ++ * If available, it means the following APIs ++ * are supported: ++ * ++ * IOMMU_HWPT_GET_DIRTY_BITMAP ++ * IOMMU_HWPT_SET_DIRTY_TRACKING ++ * ++ */ ++enum iommufd_hw_capabilities { ++ IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, ++}; ++ + /** + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) + * @size: sizeof(struct iommu_hw_info) +@@ -415,6 +510,8 @@ enum iommu_hw_info_type { + * the iommu type specific hardware information data + * @out_data_type: Output the iommu hardware info type as defined in the enum + * iommu_hw_info_type. ++ * @out_capabilities: Output the generic iommu capability info type as defined ++ * in the enum iommu_hw_capabilities. + * @__reserved: Must be 0 + * + * Query an iommu type specific hardware information data from an iommu behind +@@ -439,6 +536,81 @@ struct iommu_hw_info { + __aligned_u64 data_uptr; + __u32 out_data_type; + __u32 __reserved; ++ __aligned_u64 out_capabilities; + }; + #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) ++ ++/* ++ * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty ++ * tracking ++ * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking ++ */ ++enum iommufd_hwpt_set_dirty_tracking_flags { ++ IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, ++}; ++ ++/** ++ * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) ++ * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) ++ * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags ++ * @hwpt_id: HW pagetable ID that represents the IOMMU domain ++ * @__reserved: Must be 0 ++ * ++ * Toggle dirty tracking on an HW pagetable. ++ */ ++struct iommu_hwpt_set_dirty_tracking { ++ __u32 size; ++ __u32 flags; ++ __u32 hwpt_id; ++ __u32 __reserved; ++}; ++#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ ++ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) ++ ++/** ++ * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits ++ * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing ++ * any dirty bits metadata. This flag ++ * can be passed in the expectation ++ * where the next operation is an unmap ++ * of the same IOVA range. ++ * ++ */ ++enum iommufd_hwpt_get_dirty_bitmap_flags { ++ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, ++}; ++ ++/** ++ * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) ++ * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) ++ * @hwpt_id: HW pagetable ID that represents the IOMMU domain ++ * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags ++ * @__reserved: Must be 0 ++ * @iova: base IOVA of the bitmap first bit ++ * @length: IOVA range size ++ * @page_size: page size granularity of each bit in the bitmap ++ * @data: bitmap where to set the dirty bits. The bitmap bits each ++ * represent a page_size which you deviate from an arbitrary iova. ++ * ++ * Checking a given IOVA is dirty: ++ * ++ * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) ++ * ++ * Walk the IOMMU pagetables for a given IOVA range to return a bitmap ++ * with the dirty IOVAs. In doing so it will also by default clear any ++ * dirty bit metadata set in the IOPTE. ++ */ ++struct iommu_hwpt_get_dirty_bitmap { ++ __u32 size; ++ __u32 hwpt_id; ++ __u32 flags; ++ __u32 __reserved; ++ __aligned_u64 iova; ++ __aligned_u64 length; ++ __aligned_u64 page_size; ++ __aligned_u64 data; ++}; ++#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ ++ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) ++ + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0d74ee999a..549fea3a97 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -264,6 +264,7 @@ struct kvm_xen_exit { + #define KVM_EXIT_RISCV_SBI 35 + #define KVM_EXIT_RISCV_CSR 36 + #define KVM_EXIT_NOTIFY 37 ++#define KVM_EXIT_LOONGARCH_IOCSR 38 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -336,6 +337,13 @@ struct kvm_run { + __u32 len; + __u8 is_write; + } mmio; ++ /* KVM_EXIT_LOONGARCH_IOCSR */ ++ struct { ++ __u64 phys_addr; ++ __u8 data[8]; ++ __u32 len; ++ __u8 is_write; ++ } iocsr_io; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; +@@ -1188,6 +1196,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_COUNTER_OFFSET 227 + #define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228 + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 ++#define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1358,6 +1367,7 @@ struct kvm_dirty_tlb { + #define KVM_REG_ARM64 0x6000000000000000ULL + #define KVM_REG_MIPS 0x7000000000000000ULL + #define KVM_REG_RISCV 0x8000000000000000ULL ++#define KVM_REG_LOONGARCH 0x9000000000000000ULL + + #define KVM_REG_SIZE_SHIFT 52 + #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +@@ -1558,6 +1568,7 @@ struct kvm_s390_ucas_mapping { + #define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) + /* Available with KVM_CAP_COUNTER_OFFSET */ + #define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset) ++#define KVM_ARM_GET_REG_WRITABLE_MASKS _IOR(KVMIO, 0xb6, struct reg_mask_range) + + /* ioctl for vm fd */ + #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) +diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h +index 12ccb70099..bcb21339ee 100644 +--- a/linux-headers/linux/psp-sev.h ++++ b/linux-headers/linux/psp-sev.h +@@ -68,6 +68,7 @@ typedef enum { + SEV_RET_INVALID_PARAM, + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, ++ SEV_RET_INVALID_KEY = 0x27, + SEV_RET_MAX, + } sev_ret_code; + +diff --git a/linux-headers/linux/stddef.h b/linux-headers/linux/stddef.h +index 9bb07083ac..bf9749dd14 100644 +--- a/linux-headers/linux/stddef.h ++++ b/linux-headers/linux/stddef.h +@@ -27,8 +27,13 @@ + union { \ + struct { MEMBERS } ATTRS; \ + struct TAG { MEMBERS } ATTRS NAME; \ +- } ++ } ATTRS + ++#ifdef __cplusplus ++/* sizeof(struct{}) is 1 in C++, not 0, can't use C version of the macro. */ ++#define __DECLARE_FLEX_ARRAY(T, member) \ ++ T member[0] ++#else + /** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * +@@ -49,3 +54,5 @@ + #ifndef __counted_by + #define __counted_by(m) + #endif ++ ++#endif /* _LINUX_STDDEF_H */ +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index 59978fbaae..953c75feda 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -40,7 +40,8 @@ + UFFD_FEATURE_EXACT_ADDRESS | \ + UFFD_FEATURE_WP_HUGETLBFS_SHMEM | \ + UFFD_FEATURE_WP_UNPOPULATED | \ +- UFFD_FEATURE_POISON) ++ UFFD_FEATURE_POISON | \ ++ UFFD_FEATURE_WP_ASYNC) + #define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ +@@ -216,6 +217,11 @@ struct uffdio_api { + * (i.e. empty ptes). This will be the default behavior for shmem + * & hugetlbfs, so this flag only affects anonymous memory behavior + * when userfault write-protection mode is registered. ++ * ++ * UFFD_FEATURE_WP_ASYNC indicates that userfaultfd write-protection ++ * asynchronous mode is supported in which the write fault is ++ * automatically resolved and write-protection is un-set. ++ * It implies UFFD_FEATURE_WP_UNPOPULATED. + */ + #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) + #define UFFD_FEATURE_EVENT_FORK (1<<1) +@@ -232,6 +238,7 @@ struct uffdio_api { + #define UFFD_FEATURE_WP_HUGETLBFS_SHMEM (1<<12) + #define UFFD_FEATURE_WP_UNPOPULATED (1<<13) + #define UFFD_FEATURE_POISON (1<<14) ++#define UFFD_FEATURE_WP_ASYNC (1<<15) + __u64 features; + + __u64 ioctls; +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index acf72b4999..8e175ece31 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -277,8 +277,8 @@ struct vfio_region_info { + #define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ + __u32 index; /* Region index */ + __u32 cap_offset; /* Offset within info struct of first cap */ +- __u64 size; /* Region size (bytes) */ +- __u64 offset; /* Region offset from start of device fd */ ++ __aligned_u64 size; /* Region size (bytes) */ ++ __aligned_u64 offset; /* Region offset from start of device fd */ + }; + #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) + +@@ -294,8 +294,8 @@ struct vfio_region_info { + #define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 + + struct vfio_region_sparse_mmap_area { +- __u64 offset; /* Offset of mmap'able area within region */ +- __u64 size; /* Size of mmap'able area */ ++ __aligned_u64 offset; /* Offset of mmap'able area within region */ ++ __aligned_u64 size; /* Size of mmap'able area */ + }; + + struct vfio_region_info_cap_sparse_mmap { +@@ -450,9 +450,9 @@ struct vfio_device_migration_info { + VFIO_DEVICE_STATE_V1_RESUMING) + + __u32 reserved; +- __u64 pending_bytes; +- __u64 data_offset; +- __u64 data_size; ++ __aligned_u64 pending_bytes; ++ __aligned_u64 data_offset; ++ __aligned_u64 data_size; + }; + + /* +@@ -476,7 +476,7 @@ struct vfio_device_migration_info { + + struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; +- __u64 tgt; ++ __aligned_u64 tgt; + }; + + /* +@@ -816,7 +816,7 @@ struct vfio_device_gfx_plane_info { + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ +- __u64 drm_format_mod; /* tiled mode */ ++ __aligned_u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ +@@ -829,6 +829,7 @@ struct vfio_device_gfx_plane_info { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; ++ __u32 reserved; + }; + + #define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) +@@ -863,9 +864,10 @@ struct vfio_device_ioeventfd { + #define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ + #define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ + #define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) +- __u64 offset; /* device fd offset of write */ +- __u64 data; /* data to be written */ ++ __aligned_u64 offset; /* device fd offset of write */ ++ __aligned_u64 data; /* data to be written */ + __s32 fd; /* -1 for de-assignment */ ++ __u32 reserved; + }; + + #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) +@@ -1434,6 +1436,27 @@ struct vfio_device_feature_mig_data_size { + + #define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9 + ++/** ++ * Upon VFIO_DEVICE_FEATURE_SET, set or clear the BUS mastering for the device ++ * based on the operation specified in op flag. ++ * ++ * The functionality is incorporated for devices that needs bus master control, ++ * but the in-band device interface lacks the support. Consequently, it is not ++ * applicable to PCI devices, as bus master control for PCI devices is managed ++ * in-band through the configuration space. At present, this feature is supported ++ * only for CDX devices. ++ * When the device's BUS MASTER setting is configured as CLEAR, it will result in ++ * blocking all incoming DMA requests from the device. On the other hand, configuring ++ * the device's BUS MASTER setting as SET (enable) will grant the device the ++ * capability to perform DMA to the host memory. ++ */ ++struct vfio_device_feature_bus_master { ++ __u32 op; ++#define VFIO_DEVICE_FEATURE_CLEAR_MASTER 0 /* Clear Bus Master */ ++#define VFIO_DEVICE_FEATURE_SET_MASTER 1 /* Set Bus Master */ ++}; ++#define VFIO_DEVICE_FEATURE_BUS_MASTER 10 ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +@@ -1449,7 +1472,7 @@ struct vfio_iommu_type1_info { + __u32 flags; + #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ + #define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ +- __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++ __aligned_u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u32 pad; + }; +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index f5c48b61ab..649560c685 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -219,4 +219,12 @@ + */ + #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) + ++/* Get the group for the descriptor table including driver & device areas ++ * of a virtqueue: read index, write group in num. ++ * The virtqueue index is stored in the index field of vhost_vring_state. ++ * The group ID of the descriptor table for this specific virtqueue ++ * is returned via num field of vhost_vring_state. ++ */ ++#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ ++ struct vhost_vring_state) + #endif +-- +2.27.0 + diff --git a/linux-headers-update-against-5.10-and-manual-clear-v.patch b/linux-headers-update-against-5.10-and-manual-clear-v.patch new file mode 100644 index 0000000000000000000000000000000000000000..911475272126f45878b57fb991ca7551761dc565 --- /dev/null +++ b/linux-headers-update-against-5.10-and-manual-clear-v.patch @@ -0,0 +1,82 @@ +From 2ccd1ec0d18070727ad9b9647da6b6937f16de2a Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:03 +0800 +Subject: [PATCH] linux-headers: update against 5.10 and manual clear vfio + dirty log series + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, update the header to add them. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + linux-headers/linux/vfio.h | 36 +++++++++++++++++++++++++++++++++++- + 1 file changed, 35 insertions(+), 1 deletion(-) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 8e175ece31..956154e509 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -56,6 +56,16 @@ + */ + #define VFIO_UPDATE_VADDR 10 + ++/* ++ * The vfio_iommu driver may support user clears dirty log manually, which means ++ * dirty log can be requested to not cleared automatically after dirty log is ++ * copied to userspace, it's user's duty to clear dirty log. ++ * ++ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP. ++ */ ++#define VFIO_DIRTY_LOG_MANUAL_CLEAR 11 ++ + /* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between +@@ -1651,8 +1661,30 @@ struct vfio_iommu_type1_dma_unmap { + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * +- * Only one of the flags _START, _STOP and _GET may be specified at a time. ++ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as ++ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying ++ * dirty bitmap is not cleared automatically. The user can clear it manually by ++ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set. + * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set, ++ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap ++ * for IOMMU container for a given IOVA range. The user must specify the IOVA ++ * range, the bitmap and the pgsize through the structure ++ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface ++ * supports clearing a bitmap of the smallest supported pgsize only and can be ++ * modified in future to clear a bitmap of any specified supported pgsize. The ++ * user must provide a memory area for the bitmap memory and specify its size ++ * in bitmap.size. One bit is used to represent one page consecutively starting ++ * from iova offset. The user should provide page size in bitmap.pgsize field. ++ * A bit set in the bitmap indicates that the page at that offset from iova is ++ * cleared the dirty status, and dirty tracking is re-enabled for that page. The ++ * caller must set argsz to a value including the size of structure ++ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If ++ * dirty pages logging is not enabled, an error will be returned. Note: user ++ * should clear dirty log before handle corresponding dirty pages. ++ * ++ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be ++ * specified at a time. + */ + struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; +@@ -1660,6 +1692,8 @@ struct vfio_iommu_type1_dirty_bitmap { + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) + #define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR (1 << 3) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP (1 << 4) + __u8 data[]; + }; + +-- +2.27.0 + diff --git a/log-Add-log-at-boot-cpu-init-for-aarch64.patch b/log-Add-log-at-boot-cpu-init-for-aarch64.patch new file mode 100644 index 0000000000000000000000000000000000000000..73ddaae920abb4e64793dfd332ba4ba34c3a9b81 --- /dev/null +++ b/log-Add-log-at-boot-cpu-init-for-aarch64.patch @@ -0,0 +1,68 @@ +From 16c4b8946903985e3dfd470d0e04b79d473505bc Mon Sep 17 00:00:00 2001 +From: "wanghaibin.wang" +Date: Sun, 17 Mar 2024 15:53:57 +0800 +Subject: [PATCH] log: Add log at boot & cpu init for aarch64 + +Add log at boot & cpu init for aarch64 + +Signed-off-by: miaoyubo +Signed-off-by: Jingyi Wang +Signed-off-by: Yuan Zhang +--- + hw/arm/boot.c | 4 ++++ + hw/arm/virt.c | 3 +++ + 2 files changed, 7 insertions(+) + +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 84ea6a807a..d1671e1d42 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -11,6 +11,7 @@ + #include "qemu/datadir.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qemu/log.h" + #include + #include "hw/arm/boot.h" + #include "hw/arm/linux-boot-if.h" +@@ -1226,6 +1227,9 @@ void arm_load_kernel(ARMCPU *cpu, MachineState *ms, struct arm_boot_info *info) + * doesn't support secure. + */ + assert(!(info->secure_board_setup && kvm_enabled())); ++ ++ qemu_log("load the kernel\n"); ++ + info->kernel_filename = ms->kernel_filename; + info->kernel_cmdline = ms->kernel_cmdline; + info->initrd_filename = ms->initrd_filename; +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index c19cacec8b..f4c3d47f30 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -32,6 +32,7 @@ + #include "qemu/datadir.h" + #include "qemu/units.h" + #include "qemu/option.h" ++#include "qemu/log.h" + #include "monitor/qdev.h" + #include "hw/sysbus.h" + #include "hw/arm/boot.h" +@@ -1020,6 +1021,7 @@ static void virt_powerdown_req(Notifier *n, void *opaque) + { + VirtMachineState *s = container_of(n, VirtMachineState, powerdown_notifier); + ++ qemu_log("send powerdown to vm.\n"); + if (s->acpi_dev) { + acpi_send_event(s->acpi_dev, ACPI_POWER_DOWN_STATUS); + } else { +@@ -2240,6 +2242,7 @@ static void machvirt_init(MachineState *machine) + } + + create_fdt(vms); ++ qemu_log("cpu init start\n"); + + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { +-- +2.27.0 + diff --git a/log-Add-some-logs-on-VM-runtime-path.patch b/log-Add-some-logs-on-VM-runtime-path.patch new file mode 100644 index 0000000000000000000000000000000000000000..b72b9bd9a8ec1fa44eab197f4a5864c0b93e5e40 --- /dev/null +++ b/log-Add-some-logs-on-VM-runtime-path.patch @@ -0,0 +1,171 @@ +From 9d683f1ea8961d89cececf1fdc3345663744067f Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 8 Feb 2022 15:48:01 +0800 +Subject: [PATCH] log: Add some logs on VM runtime path + +Add logs on VM runtime path, to make it easier to do trouble shooting. + +Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + hw/virtio/virtio-pci.c | 2 ++ + hw/virtio/virtio.c | 14 ++++++++++++-- + monitor/monitor.c | 9 +++++++++ + qapi/qmp-dispatch.c | 15 +++++++++++++++ + system/qdev-monitor.c | 4 +++- + 5 files changed, 41 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index e433879542..134a8eaef6 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -2082,7 +2082,9 @@ static void virtio_pci_device_unplugged(DeviceState *d) + VirtIOPCIProxy *proxy = VIRTIO_PCI(d); + bool modern = virtio_pci_modern(proxy); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + ++ qemu_log("unplug device name: %s\n", !vdev ? "NULL" : vdev->name); + virtio_pci_stop_ioeventfd(proxy); + + if (modern) { +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 3a160f86ed..a9aa0c4f66 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2048,7 +2048,14 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) + k->set_status(vdev, val); + } + vdev->status = val; +- ++ if (val) { ++ qemu_log("%s device status is %d that means %s\n", ++ vdev->name, val, ++ (val & VIRTIO_CONFIG_S_DRIVER_OK) ? "DRIVER OK" : ++ (val & VIRTIO_CONFIG_S_DRIVER) ? "DRIVER" : ++ (val & VIRTIO_CONFIG_S_ACKNOWLEDGE) ? "ACKNOWLEDGE" : ++ (val & VIRTIO_CONFIG_S_FAILED) ? "FAILED" : "UNKNOWN"); ++ } + return 0; + } + +@@ -2326,8 +2333,11 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + break; + } + +- if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) ++ if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { ++ qemu_log("unacceptable queue_size (%d) or num (%d)\n", ++ queue_size, i); + abort(); ++ } + + vdev->vq[i].vring.num = queue_size; + vdev->vq[i].vring.num_default = queue_size; +diff --git a/monitor/monitor.c b/monitor/monitor.c +index 01ede1babd..e540c1334a 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -29,6 +29,7 @@ + #include "qapi/qapi-emit-events.h" + #include "qapi/qapi-visit-control.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qmp/qjson.h" + #include "qemu/error-report.h" + #include "qemu/option.h" + #include "sysemu/qtest.h" +@@ -338,6 +339,7 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) + { + Monitor *mon; + MonitorQMP *qmp_mon; ++ GString *json; + + trace_monitor_protocol_event_emit(event, qdict); + QTAILQ_FOREACH(mon, &mon_list, entry) { +@@ -348,6 +350,13 @@ static void monitor_qapi_event_emit(QAPIEvent event, QDict *qdict) + qmp_mon = container_of(mon, MonitorQMP, common); + if (qmp_mon->commands != &qmp_cap_negotiation_commands) { + qmp_send_response(qmp_mon, qdict); ++ json = qobject_to_json(QOBJECT(qdict)); ++ if (json) { ++ if (!strstr(json->str, "RTC_CHANGE")) { ++ qemu_log("%s\n", json->str); ++ } ++ g_string_free(json, true); ++ } + } + } + } +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 555528b6bb..7a215cbfd7 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -24,6 +24,7 @@ + #include "qapi/qmp/qbool.h" + #include "qemu/coroutine.h" + #include "qemu/main-loop.h" ++#include "qemu/log.h" + + Visitor *qobject_input_visitor_new_qmp(QObject *obj) + { +@@ -146,6 +147,7 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + QObject *id; + QObject *ret = NULL; + QDict *rsp = NULL; ++ GString *json; + + dict = qobject_to(QDict, request); + if (!dict) { +@@ -203,6 +205,19 @@ QDict *coroutine_mixed_fn qmp_dispatch(const QmpCommandList *cmds, QObject *requ + qobject_ref(args); + } + ++ json = qobject_to_json(QOBJECT(args)); ++ if (json) { ++ if ((strcmp(command, "query-block-jobs") != 0) ++ && (strcmp(command, "query-migrate") != 0) ++ && (strcmp(command, "query-blockstats") != 0) ++ && (strcmp(command, "query-balloon") != 0) ++ && (strcmp(command, "set_password") != 0)) { ++ qemu_log("qmp_cmd_name: %s, arguments: %s\n", ++ command, json->str); ++ } ++ g_string_free(json, true); ++ } ++ + assert(!(oob && qemu_in_coroutine())); + assert(monitor_cur() == NULL); + if (!!(cmd->options & QCO_COROUTINE) == qemu_in_coroutine()) { +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index a13db763e5..c885175b66 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -36,6 +36,7 @@ + #include "qemu/option.h" + #include "qemu/qemu-print.h" + #include "qemu/option_int.h" ++#include "qemu/log.h" + #include "sysemu/block-backend.h" + #include "migration/misc.h" + #include "migration/migration.h" +@@ -643,6 +644,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { ++ error_setg(errp, "can not find bus for %s", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +@@ -715,7 +717,7 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (*errp) { + goto err_del_dev; + } +- ++ qemu_log("add qdev %s:%s success\n", driver, dev->id ? dev->id : "none"); + if (!qdev_realize(dev, bus, errp)) { + goto err_del_dev; + } +-- +2.27.0 + diff --git a/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch b/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch new file mode 100644 index 0000000000000000000000000000000000000000..09be6fe85fc6ebdabf86c767d224315bd996dc6d --- /dev/null +++ b/loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch @@ -0,0 +1,287 @@ +From 4a5a9bef6eff5837dcccd216172957d8470b6245 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 19 Feb 2024 18:34:14 +0800 +Subject: [PATCH] loongarch: Change the UEFI loading mode to loongarch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The UEFI loading mode in loongarch is very different +from that in other architectures:loongarch's UEFI code +is in rom, while other architectures' UEFI code is in flash. + +loongarch UEFI can be loaded as follows: +-machine virt,pflash=pflash0-format +-bios ./QEMU_EFI.fd + +Other architectures load UEFI using the following methods: +-machine virt,pflash0=pflash0-format,pflash1=pflash1-format + +loongarch's UEFI loading method makes qemu and libvirt incompatible +when using NVRAM, and the cost of loongarch's current loading method +far outweighs the benefits, so we decided to use the same UEFI loading +scheme as other architectures. + +Cc: Andrea Bolognani +Cc: maobibo@loongson.cn +Cc: Philippe Mathieu-DaudĂ© +Cc: Song Gao +Cc: zhaotianrui@loongson.cn +Signed-off-by: Xianglai Li +Tested-by: Andrea Bolognani +Reviewed-by: Song Gao +Message-Id: <0bd892aa9b88e0f4cc904cb70efd0251fc1cde29.1708336919.git.lixianglai@loongson.cn> +Signed-off-by: Song Gao +--- + hw/loongarch/acpi-build.c | 29 +++++++++-- + hw/loongarch/virt.c | 101 ++++++++++++++++++++++++++---------- + include/hw/loongarch/virt.h | 10 ++-- + 3 files changed, 107 insertions(+), 33 deletions(-) + +diff --git a/hw/loongarch/acpi-build.c b/hw/loongarch/acpi-build.c +index ae292fc543..f990405d04 100644 +--- a/hw/loongarch/acpi-build.c ++++ b/hw/loongarch/acpi-build.c +@@ -314,16 +314,39 @@ static void build_pci_device_aml(Aml *scope, LoongArchMachineState *lams) + static void build_flash_aml(Aml *scope, LoongArchMachineState *lams) + { + Aml *dev, *crs; ++ MemoryRegion *flash_mem; + +- hwaddr flash_base = VIRT_FLASH_BASE; +- hwaddr flash_size = VIRT_FLASH_SIZE; ++ hwaddr flash0_base; ++ hwaddr flash0_size; ++ ++ hwaddr flash1_base; ++ hwaddr flash1_size; ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash0_base = flash_mem->addr; ++ flash0_size = memory_region_size(flash_mem); ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash1_base = flash_mem->addr; ++ flash1_size = memory_region_size(flash_mem); + + dev = aml_device("FLS0"); + aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015"))); + aml_append(dev, aml_name_decl("_UID", aml_int(0))); + + crs = aml_resource_template(); +- aml_append(crs, aml_memory32_fixed(flash_base, flash_size, AML_READ_WRITE)); ++ aml_append(crs, aml_memory32_fixed(flash0_base, flash0_size, ++ AML_READ_WRITE)); ++ aml_append(dev, aml_name_decl("_CRS", crs)); ++ aml_append(scope, dev); ++ ++ dev = aml_device("FLS1"); ++ aml_append(dev, aml_name_decl("_HID", aml_string("LNRO0015"))); ++ aml_append(dev, aml_name_decl("_UID", aml_int(1))); ++ ++ crs = aml_resource_template(); ++ aml_append(crs, aml_memory32_fixed(flash1_base, flash1_size, ++ AML_READ_WRITE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + } +diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c +index c9a680e61a..6ef40fa24a 100644 +--- a/hw/loongarch/virt.c ++++ b/hw/loongarch/virt.c +@@ -54,7 +54,9 @@ struct loaderparams { + const char *initrd_filename; + }; + +-static void virt_flash_create(LoongArchMachineState *lams) ++static PFlashCFI01 *virt_flash_create1(LoongArchMachineState *lams, ++ const char *name, ++ const char *alias_prop_name) + { + DeviceState *dev = qdev_new(TYPE_PFLASH_CFI01); + +@@ -66,45 +68,78 @@ static void virt_flash_create(LoongArchMachineState *lams) + qdev_prop_set_uint16(dev, "id1", 0x18); + qdev_prop_set_uint16(dev, "id2", 0x00); + qdev_prop_set_uint16(dev, "id3", 0x00); +- qdev_prop_set_string(dev, "name", "virt.flash"); +- object_property_add_child(OBJECT(lams), "virt.flash", OBJECT(dev)); +- object_property_add_alias(OBJECT(lams), "pflash", ++ qdev_prop_set_string(dev, "name", name); ++ object_property_add_child(OBJECT(lams), name, OBJECT(dev)); ++ object_property_add_alias(OBJECT(lams), alias_prop_name, + OBJECT(dev), "drive"); ++ return PFLASH_CFI01(dev); ++} + +- lams->flash = PFLASH_CFI01(dev); ++static void virt_flash_create(LoongArchMachineState *lams) ++{ ++ lams->flash[0] = virt_flash_create1(lams, "virt.flash0", "pflash0"); ++ lams->flash[1] = virt_flash_create1(lams, "virt.flash1", "pflash1"); + } + +-static void virt_flash_map(LoongArchMachineState *lams, +- MemoryRegion *sysmem) ++static void virt_flash_map1(PFlashCFI01 *flash, ++ hwaddr base, hwaddr size, ++ MemoryRegion *sysmem) + { +- PFlashCFI01 *flash = lams->flash; + DeviceState *dev = DEVICE(flash); +- hwaddr base = VIRT_FLASH_BASE; +- hwaddr size = VIRT_FLASH_SIZE; ++ BlockBackend *blk; ++ hwaddr real_size = size; ++ ++ blk = pflash_cfi01_get_blk(flash); ++ if (blk) { ++ real_size = blk_getlength(blk); ++ assert(real_size && real_size <= size); ++ } + +- assert(QEMU_IS_ALIGNED(size, VIRT_FLASH_SECTOR_SIZE)); +- assert(size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX); ++ assert(QEMU_IS_ALIGNED(real_size, VIRT_FLASH_SECTOR_SIZE)); ++ assert(real_size / VIRT_FLASH_SECTOR_SIZE <= UINT32_MAX); + +- qdev_prop_set_uint32(dev, "num-blocks", size / VIRT_FLASH_SECTOR_SIZE); ++ qdev_prop_set_uint32(dev, "num-blocks", real_size / VIRT_FLASH_SECTOR_SIZE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + memory_region_add_subregion(sysmem, base, + sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0)); ++} + ++static void virt_flash_map(LoongArchMachineState *lams, ++ MemoryRegion *sysmem) ++{ ++ PFlashCFI01 *flash0 = lams->flash[0]; ++ PFlashCFI01 *flash1 = lams->flash[1]; ++ ++ virt_flash_map1(flash0, VIRT_FLASH0_BASE, VIRT_FLASH0_SIZE, sysmem); ++ virt_flash_map1(flash1, VIRT_FLASH1_BASE, VIRT_FLASH1_SIZE, sysmem); + } + + static void fdt_add_flash_node(LoongArchMachineState *lams) + { + MachineState *ms = MACHINE(lams); + char *nodename; ++ MemoryRegion *flash_mem; ++ ++ hwaddr flash0_base; ++ hwaddr flash0_size; + +- hwaddr flash_base = VIRT_FLASH_BASE; +- hwaddr flash_size = VIRT_FLASH_SIZE; ++ hwaddr flash1_base; ++ hwaddr flash1_size; + +- nodename = g_strdup_printf("/flash@%" PRIx64, flash_base); ++ flash_mem = pflash_cfi01_get_memory(lams->flash[0]); ++ flash0_base = flash_mem->addr; ++ flash0_size = memory_region_size(flash_mem); ++ ++ flash_mem = pflash_cfi01_get_memory(lams->flash[1]); ++ flash1_base = flash_mem->addr; ++ flash1_size = memory_region_size(flash_mem); ++ ++ nodename = g_strdup_printf("/flash@%" PRIx64, flash0_base); + qemu_fdt_add_subnode(ms->fdt, nodename); + qemu_fdt_setprop_string(ms->fdt, nodename, "compatible", "cfi-flash"); + qemu_fdt_setprop_sized_cells(ms->fdt, nodename, "reg", +- 2, flash_base, 2, flash_size); ++ 2, flash0_base, 2, flash0_size, ++ 2, flash1_base, 2, flash1_size); + qemu_fdt_setprop_cell(ms->fdt, nodename, "bank-width", 4); + g_free(nodename); + } +@@ -639,12 +674,32 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + { + char *filename = MACHINE(lams)->firmware; + char *bios_name = NULL; +- int bios_size; ++ int bios_size, i; ++ BlockBackend *pflash_blk0; ++ MemoryRegion *mr; + + lams->bios_loaded = false; + ++ /* Map legacy -drive if=pflash to machine properties */ ++ for (i = 0; i < ARRAY_SIZE(lams->flash); i++) { ++ pflash_cfi01_legacy_drive(lams->flash[i], ++ drive_get(IF_PFLASH, 0, i)); ++ } ++ + virt_flash_map(lams, get_system_memory()); + ++ pflash_blk0 = pflash_cfi01_get_blk(lams->flash[0]); ++ ++ if (pflash_blk0) { ++ if (filename) { ++ error_report("cannot use both '-bios' and '-drive if=pflash'" ++ "options at once"); ++ exit(1); ++ } ++ lams->bios_loaded = true; ++ return; ++ } ++ + if (filename) { + bios_name = qemu_find_file(QEMU_FILE_TYPE_BIOS, filename); + if (!bios_name) { +@@ -652,21 +707,15 @@ static void loongarch_firmware_init(LoongArchMachineState *lams) + exit(1); + } + +- bios_size = load_image_targphys(bios_name, VIRT_BIOS_BASE, VIRT_BIOS_SIZE); ++ mr = sysbus_mmio_get_region(SYS_BUS_DEVICE(lams->flash[0]), 0); ++ bios_size = load_image_mr(bios_name, mr); + if (bios_size < 0) { + error_report("Could not load ROM image '%s'", bios_name); + exit(1); + } +- + g_free(bios_name); +- +- memory_region_init_ram(&lams->bios, NULL, "loongarch.bios", +- VIRT_BIOS_SIZE, &error_fatal); +- memory_region_set_readonly(&lams->bios, true); +- memory_region_add_subregion(get_system_memory(), VIRT_BIOS_BASE, &lams->bios); + lams->bios_loaded = true; + } +- + } + + static void reset_load_elf(void *opaque) +diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h +index 6ef9a92394..252f7df7f4 100644 +--- a/include/hw/loongarch/virt.h ++++ b/include/hw/loongarch/virt.h +@@ -18,10 +18,12 @@ + + #define VIRT_FWCFG_BASE 0x1e020000UL + #define VIRT_BIOS_BASE 0x1c000000UL +-#define VIRT_BIOS_SIZE (4 * MiB) ++#define VIRT_BIOS_SIZE (16 * MiB) + #define VIRT_FLASH_SECTOR_SIZE (128 * KiB) +-#define VIRT_FLASH_BASE 0x1d000000UL +-#define VIRT_FLASH_SIZE (16 * MiB) ++#define VIRT_FLASH0_BASE VIRT_BIOS_BASE ++#define VIRT_FLASH0_SIZE VIRT_BIOS_SIZE ++#define VIRT_FLASH1_BASE 0x1d000000UL ++#define VIRT_FLASH1_SIZE (16 * MiB) + + #define VIRT_LOWMEM_BASE 0 + #define VIRT_LOWMEM_SIZE 0x10000000 +@@ -49,7 +51,7 @@ struct LoongArchMachineState { + int fdt_size; + DeviceState *platform_bus_dev; + PCIBus *pci_bus; +- PFlashCFI01 *flash; ++ PFlashCFI01 *flash[2]; + MemoryRegion system_iocsr; + MemoryRegion iocsr_mem; + AddressSpace as_iocsr; +-- +2.27.0 + diff --git a/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch new file mode 100644 index 0000000000000000000000000000000000000000..a1c270bc32a6c14c946175df7231cad51d4173c2 --- /dev/null +++ b/memory-backup-Modify-the-VM-s-physical-bits-value-se.patch @@ -0,0 +1,126 @@ +From 65435e107fc8eee37c61a3a7d1adebd013ad466f Mon Sep 17 00:00:00 2001 +From: Ming Yang +Date: Sat, 23 Mar 2024 16:18:03 +0800 +Subject: [PATCH] memory: [backup] Modify the VM's physical bits value set + policy. + +backup code from qemu-6.2 to qemu-8.2 +old info: +commit id : +a09c3928b33b0c53831bd9eeb56f8171c26057bc +messages: +target-i386: Modify the VM's physical bits value set policy. + +To resolve the problem that a VM with large memory capacity fails +to be live migrated, determine whether the VM is a large memory +capacity based on the memory size (4 TB). If yes, set the bus width +of the VM address to 46 bits. If no, set the bus width to 42 bits. + +Signed-off-by: Jinhua Cao +Signed-off-by: Jiajie Li + +Signed-off-by: Ming Yang +--- + target/i386/cpu.c | 20 +++++++++++++++++++- + target/i386/cpu.h | 6 ++++++ + target/i386/host-cpu.c | 13 +++++++------ + 3 files changed, 32 insertions(+), 7 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index a66e5a357b..fc61a84b1e 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -7666,6 +7666,24 @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value) + cpu->env.eip = value; + } + ++ ++/* At present, we check the vm is *LARGE* or not, i.e. whether ++ * the memory size is more than 4T or not. ++ */ ++const uint64_t large_vm_mem_size = 0x40000000000UL; ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu) ++{ ++ /* If there is not a large vm, we set the phys_bits to 42 bits, ++ * otherwise, we increase the phys_bits to 46 bits. ++ */ ++ if (ram_size < large_vm_mem_size) { ++ cpu->phys_bits = DEFAULT_VM_CPU_PHYS_BITS; ++ } else { ++ cpu->phys_bits = LARGE_VM_CPU_PHYS_BITS; ++ cpu->fill_mtrr_mask = true; ++ } ++} ++ + static vaddr x86_cpu_get_pc(CPUState *cs) + { + X86CPU *cpu = X86_CPU(cs); +@@ -7868,7 +7886,7 @@ static Property x86_cpu_properties[] = { + DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), + DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), +- DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), ++ DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, false), + DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, + UINT32_MAX), + DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index ef987f344c..6993552cd9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -24,6 +24,7 @@ + #include "cpu-qom.h" + #include "kvm/hyperv-proto.h" + #include "exec/cpu-defs.h" ++#include "exec/cpu-common.h" + #include "qapi/qapi-types-common.h" + #include "qemu/cpu-float.h" + #include "qemu/timer.h" +@@ -2081,6 +2082,11 @@ struct X86CPUClass { + extern const VMStateDescription vmstate_x86_cpu; + #endif + ++#define DEFAULT_VM_CPU_PHYS_BITS 42 ++#define LARGE_VM_CPU_PHYS_BITS 46 ++ ++void x86_cpu_adjuest_by_ram_size(ram_addr_t ram_size, X86CPU *cpu); ++ + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c +index 92ecb7254b..07738bf857 100644 +--- a/target/i386/host-cpu.c ++++ b/target/i386/host-cpu.c +@@ -13,6 +13,7 @@ + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "sysemu/sysemu.h" ++#include "hw/boards.h" + + /* Note: Only safe for use on x86(-64) hosts */ + static uint32_t host_cpu_phys_bits(void) +@@ -57,14 +58,14 @@ static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) + uint32_t phys_bits = cpu->phys_bits; + static bool warned; + +- /* +- * Print a warning if the user set it to a value that's not the +- * host value. +- */ +- if (phys_bits != host_phys_bits && phys_bits != 0 && ++ /* adjust x86 cpu phys_bits according to ram_size. */ ++ x86_cpu_adjuest_by_ram_size(current_machine->ram_size, cpu); ++ ++ /* Print a warning if the host value less than the user set. */ ++ if (phys_bits > host_phys_bits && phys_bits != 0 && + !warned) { + warn_report("Host physical bits (%u)" +- " does not match phys-bits property (%u)", ++ " less than phys-bits property (%u)", + host_phys_bits, phys_bits); + warned = true; + } +-- +2.27.0 + diff --git a/migration-Add-compress_level-sanity-check.patch b/migration-Add-compress_level-sanity-check.patch new file mode 100644 index 0000000000000000000000000000000000000000..011547f9bad7579ceef2360d39122d40dab5ee4d --- /dev/null +++ b/migration-Add-compress_level-sanity-check.patch @@ -0,0 +1,68 @@ +From 79863c5ccdd4c635657d2e32e91bc02aa49655e0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:23:15 +0800 +Subject: [PATCH] migration: Add compress_level sanity check + +Zlib compression has level from 1 to 9. However Zstd compression has level +from 1 to 22 (level >= 20 not recommanded). Let's do sanity check here +to make sure a vaild compress_level is given by user. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/options.c | 32 ++++++++++++++++++++++++++++---- + 1 file changed, 28 insertions(+), 4 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index 6aaee702dc..9b68962a65 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -1065,16 +1065,40 @@ void migrate_params_init(MigrationParameters *params) + params->has_mode = true; + } + ++static bool compress_level_check(MigrationParameters *params, Error **errp) ++{ ++ switch (params->compress_method) { ++ case COMPRESS_METHOD_ZLIB: ++ if (params->compress_level > 9 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 0 to 9 for Zlib method"); ++ return false; ++ } ++ break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ if (params->compress_level > 19 || params->compress_level < 1) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", ++ "a value in the range of 1 to 19 for Zstd method"); ++ return false; ++ } ++ break; ++#endif ++ default: ++ error_setg(errp, "Checking compress_level failed for unknown reason"); ++ return false; ++ } ++ ++ return true; ++} ++ + /* + * Check whether the parameters are valid. Error will be put into errp + * (if provided). Return true if valid, otherwise false. + */ + bool migrate_params_check(MigrationParameters *params, Error **errp) + { +- if (params->has_compress_level && +- (params->compress_level > 9)) { +- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", +- "a value between 0 and 9"); ++ if (params->has_compress_level && !compress_level_check(params, errp)) { + return false; + } + +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-method.patch b/migration-Add-multi-thread-compress-method.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c2782dc3107c988df2ceb796c16172dfc0e38e4 --- /dev/null +++ b/migration-Add-multi-thread-compress-method.patch @@ -0,0 +1,292 @@ +From c2402b63ecb10b9a25695b710f2664dbcbc01ec4 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 14:57:54 +0800 +Subject: [PATCH] migration: Add multi-thread compress method + +A multi-thread compress method parameter is added to hold the method we +are going to use. By default the 'zlib' method is used to maintain the +compatibility as before. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-properties-system.c | 11 +++++++++++ + include/hw/qdev-properties.h | 4 ++++ + migration/migration-hmp-cmds.c | 13 +++++++++++++ + migration/options.c | 15 +++++++++++++++ + monitor/hmp-cmds.c | 1 + + qapi/migration.json | 32 ++++++++++++++++++++++++++++++-- + util/oslib-posix.c | 2 +- + 7 files changed, 75 insertions(+), 3 deletions(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index f2e2718c74..cd5571fcfb 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -1202,6 +1202,17 @@ const PropertyInfo qdev_prop_uuid = { + .set_default_value = set_default_uuid_auto, + }; + ++/* --- CompressMethod --- */ ++const PropertyInfo qdev_prop_compress_method = { ++ .name = "CompressMethod", ++ .description = "multi-thread compression method, " ++ "zlib", ++ .enum_table = &CompressMethod_lookup, ++ .get = qdev_propinfo_get_enum, ++ .set = qdev_propinfo_set_enum, ++ .set_default_value = qdev_propinfo_set_default_value_enum, ++}; ++ + /* --- s390 cpu entitlement policy --- */ + + QEMU_BUILD_BUG_ON(sizeof(CpuS390Entitlement) != sizeof(int)); +diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h +index 25743a29a0..63602c2c74 100644 +--- a/include/hw/qdev-properties.h ++++ b/include/hw/qdev-properties.h +@@ -60,6 +60,7 @@ extern const PropertyInfo qdev_prop_int64; + extern const PropertyInfo qdev_prop_size; + extern const PropertyInfo qdev_prop_string; + extern const PropertyInfo qdev_prop_on_off_auto; ++extern const PropertyInfo qdev_prop_compress_method; + extern const PropertyInfo qdev_prop_size32; + extern const PropertyInfo qdev_prop_array; + extern const PropertyInfo qdev_prop_link; +@@ -168,6 +169,9 @@ extern const PropertyInfo qdev_prop_link; + DEFINE_PROP(_n, _s, _f, qdev_prop_string, char*) + #define DEFINE_PROP_ON_OFF_AUTO(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_on_off_auto, OnOffAuto) ++#define DEFINE_PROP_COMPRESS_METHOD(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_compress_method, \ ++ CompressMethod) + #define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 86ae832176..261ec1e35c 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,6 +22,7 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" +@@ -291,6 +292,9 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + MigrationParameter_str(MIGRATION_PARAMETER_DECOMPRESS_THREADS), + params->decompress_threads); + assert(params->has_throttle_trigger_threshold); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_COMPRESS_METHOD), ++ CompressMethod_str(params->compress_method)); + monitor_printf(mon, "%s: %u\n", + MigrationParameter_str(MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD), + params->throttle_trigger_threshold); +@@ -519,6 +523,7 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + MigrateSetParameters *p = g_new0(MigrateSetParameters, 1); + uint64_t valuebw = 0; + uint64_t cache_size; ++ CompressMethod compress_method; + Error *err = NULL; + int val, ret; + +@@ -544,6 +549,14 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_decompress_threads = true; + visit_type_uint8(v, param, &p->decompress_threads, &err); + break; ++ case MIGRATION_PARAMETER_COMPRESS_METHOD: ++ p->has_compress_method = true; ++ visit_type_CompressMethod(v, param, &compress_method, &err); ++ if (err) { ++ break; ++ } ++ p->compress_method = compress_method; ++ break; + case MIGRATION_PARAMETER_THROTTLE_TRIGGER_THRESHOLD: + p->has_throttle_trigger_threshold = true; + visit_type_uint8(v, param, &p->throttle_trigger_threshold, &err); +diff --git a/migration/options.c b/migration/options.c +index 8d8ec73ad9..af7ea7b346 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -47,6 +47,7 @@ + #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 + /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ + #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++#define DEFAULT_MIGRATE_COMPRESS_METHOD COMPRESS_METHOD_ZLIB + /* Define default autoconverge cpu throttle migration parameters */ + #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 + #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +@@ -113,6 +114,9 @@ Property migration_properties[] = { + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, + parameters.decompress_threads, + DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_COMPRESS_METHOD("compress-method", MigrationState, ++ parameters.compress_method, ++ DEFAULT_MIGRATE_COMPRESS_METHOD), + DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, + parameters.throttle_trigger_threshold, + DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +@@ -953,6 +957,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->compress_wait_thread = s->parameters.compress_wait_thread; + params->has_decompress_threads = true; + params->decompress_threads = s->parameters.decompress_threads; ++ params->has_compress_method = true; ++ params->compress_method = s->parameters.compress_method; + params->has_throttle_trigger_threshold = true; + params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold; + params->has_cpu_throttle_initial = true; +@@ -1025,6 +1031,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_compress_threads = true; + params->has_compress_wait_thread = true; + params->has_decompress_threads = true; ++ params->has_compress_method = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; + params->has_cpu_throttle_increment = true; +@@ -1259,6 +1266,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + dest->decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ dest->compress_method = params->compress_method; ++ } ++ + if (params->has_throttle_trigger_threshold) { + dest->throttle_trigger_threshold = params->throttle_trigger_threshold; + } +@@ -1380,6 +1391,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + s->parameters.decompress_threads = params->decompress_threads; + } + ++ if (params->has_compress_method) { ++ s->parameters.compress_method = params->compress_method; ++ } ++ + if (params->has_throttle_trigger_threshold) { + s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold; + } +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 871898ac46..5bb3c9cd46 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -24,6 +24,7 @@ + #include "qapi/qapi-commands-control.h" + #include "qapi/qapi-commands-misc.h" + #include "qapi/qmp/qdict.h" ++#include "qapi/qapi-visit-migration.h" + #include "qemu/cutils.h" + #include "hw/intc/intc.h" + #include "qemu/log.h" +diff --git a/qapi/migration.json b/qapi/migration.json +index eb2f883513..cafaa5ccb3 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -708,6 +708,19 @@ + 'bitmaps': [ 'BitmapMigrationBitmapAlias' ] + } } + ++## ++# @CompressMethod: ++# ++# An enumeration of multi-thread compression methods. ++# ++# @zlib: use zlib compression method. ++# ++# Since: 5.0 ++# ++## ++{ 'enum': 'CompressMethod', ++ 'data': [ 'zlib' ] } ++ + ## + # @MigrationParameter: + # +@@ -746,6 +759,9 @@ + # fast as compression, so set the decompress-threads to the number + # about 1/4 of compress-threads is adequate. + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -892,6 +908,7 @@ + { 'name': 'compress-level', 'features': [ 'deprecated' ] }, + { 'name': 'compress-threads', 'features': [ 'deprecated' ] }, + { 'name': 'decompress-threads', 'features': [ 'deprecated' ] }, ++ { 'name': 'compress-method', 'features': [ 'deprecated' ] }, + { 'name': 'compress-wait-thread', 'features': [ 'deprecated' ] }, + 'throttle-trigger-threshold', + 'cpu-throttle-initial', 'cpu-throttle-increment', +@@ -935,6 +952,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Set compression method to use in multi-thread compression. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1066,8 +1086,9 @@ + # + # @deprecated: Member @block-incremental is deprecated. Use + # blockdev-mirror with NBD instead. Members @compress-level, +-# @compress-threads, @decompress-threads and @compress-wait-thread +-# are deprecated because @compression is deprecated. ++# @compress-threads, @decompress-threads, @compress-method ++# and @compress-wait-thread are deprecated because ++# @compression is deprecated. + # + # @unstable: Members @x-checkpoint-delay and @x-vcpu-dirty-limit-period + # are experimental. +@@ -1090,6 +1111,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', +@@ -1161,6 +1184,9 @@ + # + # @decompress-threads: decompression thread count + # ++# @compress-method: Which multi-thread compression method to use. ++# Defaults to none. (Since 5.0) ++# + # @throttle-trigger-threshold: The ratio of bytes_dirty_period and + # bytes_xfer_period to trigger throttling. It is expressed as + # percentage. The default value is 50. (Since 5.0) +@@ -1315,6 +1341,8 @@ + 'features': [ 'deprecated' ] }, + '*decompress-threads': { 'type': 'uint8', + 'features': [ 'deprecated' ] }, ++ '*compress-method': { 'type': 'CompressMethod', ++ 'features': [ 'deprecated' ] }, + '*throttle-trigger-threshold': 'uint8', + '*cpu-throttle-initial': 'uint8', + '*cpu-throttle-increment': 'uint8', +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index 9ca3fee2b8..43af077fed 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -346,7 +346,7 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + +- while (started_num_threads != memset_args->context.num_threads) { ++ while (started_num_threads != memset_args->context->num_threads) { + smp_mb(); + } + +-- +2.27.0 + diff --git a/migration-Add-multi-thread-compress-ops.patch b/migration-Add-multi-thread-compress-ops.patch new file mode 100644 index 0000000000000000000000000000000000000000..dee278f8e65ab15824e9b624fbedfff6c177f8a4 --- /dev/null +++ b/migration-Add-multi-thread-compress-ops.patch @@ -0,0 +1,493 @@ +From 5896dedf32c7e4417bd7f3e889ca41a34b06f5db Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 15:57:31 +0800 +Subject: [PATCH] migration: Add multi-thread compress ops + +Add the MigrationCompressOps and MigrationDecompressOps structures to make +the compression method configurable for multi-thread compression migration. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/options.c | 9 ++ + migration/options.h | 1 + + migration/ram-compress.c | 261 ++++++++++++++++++++++++++------------- + migration/ram-compress.h | 31 ++++- + migration/ram.c | 4 +- + 5 files changed, 215 insertions(+), 91 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index af7ea7b346..6aaee702dc 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -799,6 +799,15 @@ int migrate_decompress_threads(void) + return s->parameters.decompress_threads; + } + ++CompressMethod migrate_compress_method(void) ++{ ++ MigrationState *s; ++ ++ s = migrate_get_current(); ++ ++ return s->parameters.compress_method; ++} ++ + uint64_t migrate_downtime_limit(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 246c160aee..9aca5e41ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -78,6 +78,7 @@ uint8_t migrate_cpu_throttle_increment(void); + uint8_t migrate_cpu_throttle_initial(void); + bool migrate_cpu_throttle_tailslow(void); + int migrate_decompress_threads(void); ++CompressMethod migrate_compress_method(void); + uint64_t migrate_downtime_limit(void); + uint8_t migrate_max_cpu_throttle(void); + uint64_t migrate_max_bandwidth(void); +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 2be344acbc..6e37b22492 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -65,26 +65,167 @@ static QemuThread *compress_threads; + static QemuMutex comp_done_lock; + static QemuCond comp_done_cond; + +-struct DecompressParam { +- bool done; +- bool quit; +- QemuMutex mutex; +- QemuCond cond; +- void *des; +- uint8_t *compbuf; +- int len; +- z_stream stream; +-}; +-typedef struct DecompressParam DecompressParam; +- + static QEMUFile *decomp_file; + static DecompressParam *decomp_param; + static QemuThread *decompress_threads; ++MigrationCompressOps *compress_ops; ++MigrationDecompressOps *decompress_ops; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; + + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); + ++static int zlib_save_setup(CompressParam *param) ++{ ++ if (deflateInit(¶m->stream, ++ migrate_compress_level()) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static ssize_t zlib_compress_data(CompressParam *param, size_t size) ++{ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static void zlib_save_cleanup(CompressParam *param) ++{ ++ deflateEnd(¶m->stream); ++} ++ ++static int zlib_load_setup(DecompressParam *param) ++{ ++ if (inflateInit(¶m->stream) != Z_OK) { ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int ++zlib_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int err; ++ ++ z_stream *stream = ¶m->stream; ++ ++ err = inflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = size; ++ stream->next_out = dest; ++ ++ err = inflate(stream, Z_NO_FLUSH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ return stream->total_out; ++} ++ ++static void zlib_load_cleanup(DecompressParam *param) ++{ ++ inflateEnd(¶m->stream); ++} ++ ++static int zlib_check_len(int len) ++{ ++ return len < 0 || len > compressBound(TARGET_PAGE_SIZE); ++} ++ ++static int set_compress_ops(void) ++{ ++ compress_ops = g_new0(MigrationCompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ compress_ops->save_setup = zlib_save_setup; ++ compress_ops->save_cleanup = zlib_save_cleanup; ++ compress_ops->compress_data = zlib_compress_data; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static int set_decompress_ops(void) ++{ ++ decompress_ops = g_new0(MigrationDecompressOps, 1); ++ ++ switch (migrate_compress_method()) { ++ case COMPRESS_METHOD_ZLIB: ++ decompress_ops->load_setup = zlib_load_setup; ++ decompress_ops->load_cleanup = zlib_load_cleanup; ++ decompress_ops->decompress_data = zlib_decompress_data; ++ decompress_ops->check_len = zlib_check_len; ++ break; ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static void clean_compress_ops(void) ++{ ++ compress_ops->save_setup = NULL; ++ compress_ops->save_cleanup = NULL; ++ compress_ops->compress_data = NULL; ++ ++ g_free(compress_ops); ++ compress_ops = NULL; ++} ++ ++static void clean_decompress_ops(void) ++{ ++ decompress_ops->load_setup = NULL; ++ decompress_ops->load_cleanup = NULL; ++ decompress_ops->decompress_data = NULL; ++ ++ g_free(decompress_ops); ++ decompress_ops = NULL; ++} ++ + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; +@@ -141,7 +282,7 @@ void compress_threads_save_cleanup(void) + qemu_thread_join(compress_threads + i); + qemu_mutex_destroy(&comp_param[i].mutex); + qemu_cond_destroy(&comp_param[i].cond); +- deflateEnd(&comp_param[i].stream); ++ compress_ops->save_cleanup(&comp_param[i]); + g_free(comp_param[i].originbuf); + qemu_fclose(comp_param[i].file); + comp_param[i].file = NULL; +@@ -152,6 +293,7 @@ void compress_threads_save_cleanup(void) + g_free(comp_param); + compress_threads = NULL; + comp_param = NULL; ++ clean_compress_ops(); + } + + int compress_threads_save_setup(void) +@@ -161,6 +303,12 @@ int compress_threads_save_setup(void) + if (!migrate_compress()) { + return 0; + } ++ ++ if (set_compress_ops() < 0) { ++ clean_compress_ops(); ++ return -1; ++ } ++ + thread_count = migrate_compress_threads(); + compress_threads = g_new0(QemuThread, thread_count); + comp_param = g_new0(CompressParam, thread_count); +@@ -172,8 +320,7 @@ int compress_threads_save_setup(void) + goto exit; + } + +- if (deflateInit(&comp_param[i].stream, +- migrate_compress_level()) != Z_OK) { ++ if (compress_ops->save_setup(&comp_param[i]) < 0) { + g_free(comp_param[i].originbuf); + goto exit; + } +@@ -198,50 +345,6 @@ exit: + return -1; + } + +-/* +- * Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. +- */ +-static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) +-{ +- int err; +- uint8_t *dest = NULL; +- z_stream *stream = ¶m->stream; +- uint8_t *p = param->originbuf; +- QEMUFile *f = f = param->file; +- ssize_t blen = qemu_put_compress_start(f, &dest); +- +- if (blen < compressBound(size)) { +- return -1; +- } +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = size; +- stream->next_in = p; +- stream->avail_out = blen; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- blen = stream->next_out - dest; +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_compress_end(f, blen); +- return blen + sizeof(int32_t); +-} +- + static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) + { + uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); +@@ -260,7 +363,7 @@ static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block + * decompression + */ + memcpy(param->originbuf, p, page_size); +- ret = qemu_put_compression_data(param, page_size); ++ ret = compress_ops->compress_data(param, page_size); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +@@ -356,32 +459,6 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + } + } + +-/* return the size after decompression, or negative value on error */ +-static int +-qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) +-{ +- int err; +- +- z_stream *stream = ¶m->stream; +- +- err = inflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = param->len; +- stream->next_in = param->compbuf; +- stream->avail_out = pagesize; +- stream->next_out = dest; +- +- err = inflate(stream, Z_NO_FLUSH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->total_out; +-} +- + static void *do_data_decompress(void *opaque) + { + DecompressParam *param = opaque; +@@ -398,7 +475,7 @@ static void *do_data_decompress(void *opaque) + + pagesize = qemu_target_page_size(); + +- ret = qemu_uncompress_data(param, des, pagesize); ++ ret = decompress_ops->decompress_data(param, des, pagesize); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +@@ -466,7 +543,7 @@ void compress_threads_load_cleanup(void) + qemu_thread_join(decompress_threads + i); + qemu_mutex_destroy(&decomp_param[i].mutex); + qemu_cond_destroy(&decomp_param[i].cond); +- inflateEnd(&decomp_param[i].stream); ++ decompress_ops->load_cleanup(&decomp_param[i]); + g_free(decomp_param[i].compbuf); + decomp_param[i].compbuf = NULL; + } +@@ -475,6 +552,7 @@ void compress_threads_load_cleanup(void) + decompress_threads = NULL; + decomp_param = NULL; + decomp_file = NULL; ++ clean_decompress_ops(); + } + + int compress_threads_load_setup(QEMUFile *f) +@@ -485,6 +563,11 @@ int compress_threads_load_setup(QEMUFile *f) + return 0; + } + ++ if (set_decompress_ops() < 0) { ++ clean_decompress_ops(); ++ return -1; ++ } ++ + /* + * set compression_counters memory to zero for a new migration + */ +@@ -497,7 +580,7 @@ int compress_threads_load_setup(QEMUFile *f) + qemu_cond_init(&decomp_done_cond); + decomp_file = f; + for (i = 0; i < thread_count; i++) { +- if (inflateInit(&decomp_param[i].stream) != Z_OK) { ++ if (decompress_ops->load_setup(&decomp_param[i]) < 0) { + goto exit; + } + +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index 0d89a2f55e..daf241987f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -39,6 +39,20 @@ enum CompressResult { + }; + typedef enum CompressResult CompressResult; + ++struct DecompressParam { ++ bool done; ++ bool quit; ++ QemuMutex mutex; ++ QemuCond cond; ++ void *des; ++ uint8_t *compbuf; ++ int len; ++ ++ /* for zlib compression */ ++ z_stream stream; ++}; ++typedef struct DecompressParam DecompressParam; ++ + struct CompressParam { + bool done; + bool quit; +@@ -51,11 +65,26 @@ struct CompressParam { + ram_addr_t offset; + + /* internally used fields */ +- z_stream stream; + uint8_t *originbuf; ++ ++ /* for zlib compression */ ++ z_stream stream; + }; + typedef struct CompressParam CompressParam; + ++typedef struct { ++ int (*save_setup)(CompressParam *param); ++ void (*save_cleanup)(CompressParam *param); ++ ssize_t (*compress_data)(CompressParam *param, size_t size); ++} MigrationCompressOps; ++ ++typedef struct { ++ int (*load_setup)(DecompressParam *param); ++ void (*load_cleanup)(DecompressParam *param); ++ int (*decompress_data)(DecompressParam *param, uint8_t *dest, size_t size); ++ int (*check_len)(int len); ++} MigrationDecompressOps; ++ + void compress_threads_save_cleanup(void); + int compress_threads_save_setup(void); + +diff --git a/migration/ram.c b/migration/ram.c +index 8c7886ab79..f9b2b9b985 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -96,6 +96,8 @@ + + XBZRLECacheStats xbzrle_counters; + ++extern MigrationDecompressOps *decompress_ops; ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* The migration channel used for a specific host page */ +@@ -3979,7 +3981,7 @@ static int ram_load_precopy(QEMUFile *f) + + case RAM_SAVE_FLAG_COMPRESS_PAGE: + len = qemu_get_be32(f); +- if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { ++ if (decompress_ops->check_len(len)) { + error_report("Invalid compressed data length: %d", len); + ret = -EINVAL; + break; +-- +2.27.0 + diff --git a/migration-Add-zstd-support-in-multi-thread-compressi.patch b/migration-Add-zstd-support-in-multi-thread-compressi.patch new file mode 100644 index 0000000000000000000000000000000000000000..856e86a6bc0143e297bf1328f57338340d19f70d --- /dev/null +++ b/migration-Add-zstd-support-in-multi-thread-compressi.patch @@ -0,0 +1,229 @@ +From 8c9603270184d8dadf64ec6de263268e846f8c18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 16:15:10 +0800 +Subject: [PATCH] migration: Add zstd support in multi-thread compression + +This patch enables zstd option in multi-thread compression. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + hw/core/qdev-properties-system.c | 2 +- + migration/ram-compress.c | 112 +++++++++++++++++++++++++++++++ + migration/ram-compress.h | 15 +++++ + qapi/migration.json | 3 +- + 4 files changed, 130 insertions(+), 2 deletions(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index cd5571fcfb..c581d46f2e 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -1206,7 +1206,7 @@ const PropertyInfo qdev_prop_uuid = { + const PropertyInfo qdev_prop_compress_method = { + .name = "CompressMethod", + .description = "multi-thread compression method, " +- "zlib", ++ "zlib/zstd", + .enum_table = &CompressMethod_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index 6e37b22492..74703f0ec4 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -171,6 +171,103 @@ static int zlib_check_len(int len) + return len < 0 || len > compressBound(TARGET_PAGE_SIZE); + } + ++#ifdef CONFIG_ZSTD ++static int zstd_save_setup(CompressParam *param) ++{ ++ int res; ++ param->zstd_cs = ZSTD_createCStream(); ++ if (!param->zstd_cs) { ++ return -1; ++ } ++ res = ZSTD_initCStream(param->zstd_cs, migrate_compress_level()); ++ if (ZSTD_isError(res)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_save_cleanup(CompressParam *param) ++{ ++ ZSTD_freeCStream(param->zstd_cs); ++ param->zstd_cs = NULL; ++} ++static ssize_t zstd_compress_data(CompressParam *param, size_t size) ++{ ++ int ret; ++ uint8_t *dest = NULL; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ if (blen < ZSTD_compressBound(size)) { ++ return -1; ++ } ++ param->out.dst = dest; ++ param->out.size = blen; ++ param->out.pos = 0; ++ param->in.src = p; ++ param->in.size = size; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_compressStream2(param->zstd_cs, ¶m->out, ++ ¶m->in, ZSTD_e_end); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ blen = param->out.pos; ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static int zstd_load_setup(DecompressParam *param) ++{ ++ int ret; ++ param->zstd_ds = ZSTD_createDStream(); ++ if (!param->zstd_ds) { ++ return -1; ++ } ++ ret = ZSTD_initDStream(param->zstd_ds); ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return 0; ++} ++static void zstd_load_cleanup(DecompressParam *param) ++{ ++ ZSTD_freeDStream(param->zstd_ds); ++ param->zstd_ds = NULL; ++} ++static int ++zstd_decompress_data(DecompressParam *param, uint8_t *dest, size_t size) ++{ ++ int ret; ++ param->out.dst = dest; ++ param->out.size = size; ++ param->out.pos = 0; ++ param->in.src = param->compbuf; ++ param->in.size = param->len; ++ param->in.pos = 0; ++ do { ++ ret = ZSTD_decompressStream(param->zstd_ds, ¶m->out, ¶m->in); ++ } while (ret > 0 && (param->in.size - param->in.pos > 0) ++ && (param->out.size - param->out.pos > 0)); ++ if (ret > 0 && (param->in.size - param->in.pos > 0)) { ++ return -1; ++ } ++ if (ZSTD_isError(ret)) { ++ return -1; ++ } ++ return ret; ++} ++static int zstd_check_len(int len) ++{ ++ return len < 0 || len > ZSTD_compressBound(TARGET_PAGE_SIZE); ++} ++#endif ++ + static int set_compress_ops(void) + { + compress_ops = g_new0(MigrationCompressOps, 1); +@@ -181,6 +278,13 @@ static int set_compress_ops(void) + compress_ops->save_cleanup = zlib_save_cleanup; + compress_ops->compress_data = zlib_compress_data; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ compress_ops->save_setup = zstd_save_setup; ++ compress_ops->save_cleanup = zstd_save_cleanup; ++ compress_ops->compress_data = zstd_compress_data; ++ break; ++#endif + default: + return -1; + } +@@ -199,6 +303,14 @@ static int set_decompress_ops(void) + decompress_ops->decompress_data = zlib_decompress_data; + decompress_ops->check_len = zlib_check_len; + break; ++#ifdef CONFIG_ZSTD ++ case COMPRESS_METHOD_ZSTD: ++ decompress_ops->load_setup = zstd_load_setup; ++ decompress_ops->load_cleanup = zstd_load_cleanup; ++ decompress_ops->decompress_data = zstd_decompress_data; ++ decompress_ops->check_len = zstd_check_len; ++ break; ++#endif + default: + return -1; + } +diff --git a/migration/ram-compress.h b/migration/ram-compress.h +index daf241987f..e8700eb36f 100644 +--- a/migration/ram-compress.h ++++ b/migration/ram-compress.h +@@ -29,6 +29,10 @@ + #ifndef QEMU_MIGRATION_COMPRESS_H + #define QEMU_MIGRATION_COMPRESS_H + ++#ifdef CONFIG_ZSTD ++#include ++#include ++#endif + #include "qemu-file.h" + #include "qapi/qapi-types-migration.h" + +@@ -50,6 +54,11 @@ struct DecompressParam { + + /* for zlib compression */ + z_stream stream; ++#ifdef CONFIG_ZSTD ++ ZSTD_DStream *zstd_ds; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct DecompressParam DecompressParam; + +@@ -69,6 +78,12 @@ struct CompressParam { + + /* for zlib compression */ + z_stream stream; ++ ++#ifdef CONFIG_ZSTD ++ ZSTD_CStream *zstd_cs; ++ ZSTD_inBuffer in; ++ ZSTD_outBuffer out; ++#endif + }; + typedef struct CompressParam CompressParam; + +diff --git a/qapi/migration.json b/qapi/migration.json +index cafaa5ccb3..29af841f4e 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -714,12 +714,13 @@ + # An enumeration of multi-thread compression methods. + # + # @zlib: use zlib compression method. ++# @zstd: use zstd compression method. + # + # Since: 5.0 + # + ## + { 'enum': 'CompressMethod', +- 'data': [ 'zlib' ] } ++ 'data': [ 'zlib', { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } + + ## + # @MigrationParameter: +-- +2.27.0 + diff --git a/migration-Refactoring-multi-thread-compress-migratio.patch b/migration-Refactoring-multi-thread-compress-migratio.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f2a5acbd929af6feaeb19c57d612730bf9f36f1 --- /dev/null +++ b/migration-Refactoring-multi-thread-compress-migratio.patch @@ -0,0 +1,330 @@ +From cf6f31249817380e91cbc4e55b189216645fac18 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Sat, 30 Jan 2021 15:21:17 +0800 +Subject: [PATCH] migration: Refactoring multi-thread compress migration + +Code refactor for the compression procedure which includes: + +1. Move qemu_compress_data and qemu_put_compression_data from qemu-file.c to +ram.c, for the reason that most part of the code logical has nothing to do +with qemu-file. Besides, the decompression code is located at ram.c only. + +2. Simplify the function input arguments for compression and decompression. +Wrap the input into the param structure which already exists. This change also +makes the function much more flexible for other compression methods. + +Signed-off-by: Chuan Zheng +Signed-off-by: Zeyu Jin +Signed-off-by: Ying Fang +--- + migration/meson.build | 4 +- + migration/migration-hmp-cmds.c | 1 - + migration/qemu-file.c | 61 +++++------------------- + migration/qemu-file.h | 4 +- + migration/ram-compress.c | 87 ++++++++++++++++++++++++---------- + 5 files changed, 77 insertions(+), 80 deletions(-) + +diff --git a/migration/meson.build b/migration/meson.build +index 92b1cc4297..d9b46ef0df 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,7 +22,6 @@ system_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', +- 'ram-compress.c', + 'options.c', + 'postcopy-ram.c', + 'savevm.c', +@@ -43,4 +42,5 @@ system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +- 'target.c')) ++ 'target.c', ++ 'ram-compress.c')) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 261ec1e35c..1fa6a5f478 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -22,7 +22,6 @@ + #include "qapi/qapi-commands-migration.h" + #include "qapi/qapi-visit-migration.h" + #include "qapi/qmp/qdict.h" +-#include "qapi/qapi-visit-migration.h" + #include "qapi/string-input-visitor.h" + #include "qapi/string-output-visitor.h" + #include "qemu/cutils.h" +diff --git a/migration/qemu-file.c b/migration/qemu-file.c +index 94231ff295..bd1dbc3db1 100644 +--- a/migration/qemu-file.c ++++ b/migration/qemu-file.c +@@ -669,55 +669,6 @@ uint64_t qemu_get_be64(QEMUFile *f) + return v; + } + +-/* return the size after compression, or negative value on error */ +-static int qemu_compress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) +-{ +- int err; +- +- err = deflateReset(stream); +- if (err != Z_OK) { +- return -1; +- } +- +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; +- stream->next_out = dest; +- +- err = deflate(stream, Z_FINISH); +- if (err != Z_STREAM_END) { +- return -1; +- } +- +- return stream->next_out - dest; +-} +- +-/* Compress size bytes of data start at p and store the compressed +- * data to the buffer of f. +- * +- * Since the file is dummy file with empty_ops, return -1 if f has no space to +- * save the compressed data. +- */ +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size) +-{ +- ssize_t blen = IO_BUF_SIZE - f->buf_index - sizeof(int32_t); +- +- if (blen < compressBound(size)) { +- return -1; +- } +- +- blen = qemu_compress_data(stream, f->buf + f->buf_index + sizeof(int32_t), +- blen, p, size); +- if (blen < 0) { +- return -1; +- } +- +- qemu_put_be32(f, blen); +- add_buf_to_iovec(f, blen); +- return blen + sizeof(int32_t); +-} + + /* Put the data in the buffer of f_src to the buffer of f_des, and + * then reset the buf_index of f_src to 0. +@@ -834,3 +785,15 @@ int qemu_file_get_to_fd(QEMUFile *f, int fd, size_t size) + + return 0; + } ++ ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr) ++{ ++ *dest_ptr = f->buf + f->buf_index + sizeof(int32_t); ++ return IO_BUF_SIZE - f->buf_index - sizeof(int32_t); ++} ++ ++void qemu_put_compress_end(QEMUFile *f, unsigned int v) ++{ ++ qemu_put_be32(f, v); ++ add_buf_to_iovec(f, v); ++} +diff --git a/migration/qemu-file.h b/migration/qemu-file.h +index 8aec9fabf7..8afa95732b 100644 +--- a/migration/qemu-file.h ++++ b/migration/qemu-file.h +@@ -54,8 +54,8 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size, + + size_t coroutine_mixed_fn qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); + size_t coroutine_mixed_fn qemu_get_buffer_in_place(QEMUFile *f, uint8_t **buf, size_t size); +-ssize_t qemu_put_compression_data(QEMUFile *f, z_stream *stream, +- const uint8_t *p, size_t size); ++ssize_t qemu_put_compress_start(QEMUFile *f, uint8_t **dest_ptr); ++void qemu_put_compress_end(QEMUFile *f, unsigned int v); + int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); + bool qemu_file_buffer_empty(QEMUFile *file); + +diff --git a/migration/ram-compress.c b/migration/ram-compress.c +index fa4388f6a6..2be344acbc 100644 +--- a/migration/ram-compress.c ++++ b/migration/ram-compress.c +@@ -28,7 +28,6 @@ + + #include "qemu/osdep.h" + #include "qemu/cutils.h" +- + #include "ram-compress.h" + + #include "qemu/error-report.h" +@@ -40,6 +39,7 @@ + #include "exec/ramblock.h" + #include "ram.h" + #include "migration-stats.h" ++#include "exec/ram_addr.h" + + static struct { + int64_t pages; +@@ -83,28 +83,22 @@ static QemuThread *decompress_threads; + static QemuMutex decomp_done_lock; + static QemuCond decomp_done_cond; + +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf); ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block); + + static void *do_data_compress(void *opaque) + { + CompressParam *param = opaque; + RAMBlock *block; +- ram_addr_t offset; + CompressResult result; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->trigger) { + block = param->block; +- offset = param->offset; + param->trigger = false; + qemu_mutex_unlock(¶m->mutex); + +- result = do_compress_ram_page(param->file, ¶m->stream, +- block, offset, param->originbuf); +- ++ result = do_compress_ram_page(param, block); + qemu_mutex_lock(&comp_done_lock); + param->done = true; + param->result = result; +@@ -204,15 +198,57 @@ exit: + return -1; + } + +-static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, +- RAMBlock *block, ram_addr_t offset, +- uint8_t *source_buf) ++/* ++ * Compress size bytes of data start at p and store the compressed ++ * data to the buffer of f. ++ * ++ * Since the file is dummy file with empty_ops, return -1 if f has no space to ++ * save the compressed data. ++ */ ++static ssize_t qemu_put_compression_data(CompressParam *param, size_t size) ++{ ++ int err; ++ uint8_t *dest = NULL; ++ z_stream *stream = ¶m->stream; ++ uint8_t *p = param->originbuf; ++ QEMUFile *f = f = param->file; ++ ssize_t blen = qemu_put_compress_start(f, &dest); ++ ++ if (blen < compressBound(size)) { ++ return -1; ++ } ++ ++ err = deflateReset(stream); ++ if (err != Z_OK) { ++ return -1; ++ } ++ ++ stream->avail_in = size; ++ stream->next_in = p; ++ stream->avail_out = blen; ++ stream->next_out = dest; ++ ++ err = deflate(stream, Z_FINISH); ++ if (err != Z_STREAM_END) { ++ return -1; ++ } ++ ++ blen = stream->next_out - dest; ++ if (blen < 0) { ++ return -1; ++ } ++ ++ qemu_put_compress_end(f, blen); ++ return blen + sizeof(int32_t); ++} ++ ++static CompressResult do_compress_ram_page(CompressParam *param, RAMBlock *block) + { +- uint8_t *p = block->host + offset; ++ uint8_t *p = block->host + (param->offset & TARGET_PAGE_MASK); + size_t page_size = qemu_target_page_size(); + int ret; + +- assert(qemu_file_buffer_empty(f)); ++ assert(qemu_file_buffer_empty(param->file)); + + if (buffer_is_zero(p, page_size)) { + return RES_ZEROPAGE; +@@ -223,12 +259,12 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, + * so that we can catch up the error during compression and + * decompression + */ +- memcpy(source_buf, p, page_size); +- ret = qemu_put_compression_data(f, stream, source_buf, page_size); ++ memcpy(param->originbuf, p, page_size); ++ ret = qemu_put_compression_data(param, page_size); + if (ret < 0) { + qemu_file_set_error(migrate_get_current()->to_dst_file, ret); + error_report("compressed data failed!"); +- qemu_fflush(f); ++ qemu_fflush(param->file); + return RES_NONE; + } + return RES_COMPRESS; +@@ -322,19 +358,20 @@ bool compress_page_with_multi_thread(RAMBlock *block, ram_addr_t offset, + + /* return the size after decompression, or negative value on error */ + static int +-qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len, +- const uint8_t *source, size_t source_len) ++qemu_uncompress_data(DecompressParam *param, uint8_t *dest, size_t pagesize) + { + int err; + ++ z_stream *stream = ¶m->stream; ++ + err = inflateReset(stream); + if (err != Z_OK) { + return -1; + } + +- stream->avail_in = source_len; +- stream->next_in = (uint8_t *)source; +- stream->avail_out = dest_len; ++ stream->avail_in = param->len; ++ stream->next_in = param->compbuf; ++ stream->avail_out = pagesize; + stream->next_out = dest; + + err = inflate(stream, Z_NO_FLUSH); +@@ -350,20 +387,18 @@ static void *do_data_decompress(void *opaque) + DecompressParam *param = opaque; + unsigned long pagesize; + uint8_t *des; +- int len, ret; ++ int ret; + + qemu_mutex_lock(¶m->mutex); + while (!param->quit) { + if (param->des) { + des = param->des; +- len = param->len; + param->des = 0; + qemu_mutex_unlock(¶m->mutex); + + pagesize = qemu_target_page_size(); + +- ret = qemu_uncompress_data(¶m->stream, des, pagesize, +- param->compbuf, len); ++ ret = qemu_uncompress_data(param, des, pagesize); + if (ret < 0 && migrate_get_current()->decompress_error_check) { + error_report("decompress data failed"); + qemu_file_set_error(decomp_file, ret); +-- +2.27.0 + diff --git a/migration-Skip-only-empty-block-devicesi.patch b/migration-Skip-only-empty-block-devicesi.patch new file mode 100644 index 0000000000000000000000000000000000000000..6787e2920d09b68bf6eb42dc58517a26e0bbae1d --- /dev/null +++ b/migration-Skip-only-empty-block-devicesi.patch @@ -0,0 +1,86 @@ +From 4506b31c0fff0b7a69ec4c7e264715ed70df75a8 Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 22:13:43 +0800 +Subject: [PATCH] migration: Skip only empty block devicesi +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 2e128776dc56f502c2ee41750afe83938f389528 + +The block .save_setup() handler calls a helper routine +init_blk_migration() which builds a list of block devices to take into +account for migration. When one device is found to be empty (sectors +== 0), the loop exits and all the remaining devices are ignored. This +is a regression introduced when bdrv_iterate() was removed. + +Change that by skipping only empty devices. + +Cc: Markus Armbruster +Cc: qemu-stable +Suggested-by: Kevin Wolf +Fixes: fea68bb ("block: Eliminate bdrv_iterate(), use bdrv_next()") +Signed-off-by: CĂ©dric Le Goater +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Link: https://lore.kernel.org/r/20240312120431.550054-1-clg@redhat.com +[peterx: fix "Suggested-by:"] +Signed-off-by: Peter Xu +Signed-off-by: Gao Jiazhen +--- + migration/block.c | 5 ++++- + tests/qemu-iotests/198.out | 2 -- + tests/qemu-iotests/206.out | 1 - + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/block.c b/migration/block.c +index a15f9bddcb..710ef6f490 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -409,7 +409,10 @@ static int init_blk_migration(QEMUFile *f) + } + + sectors = bdrv_nb_sectors(bs); +- if (sectors <= 0) { ++ if (sectors == 0) { ++ continue; ++ } ++ if (sectors < 0) { + ret = sectors; + bdrv_next_cleanup(&it); + goto out; +diff --git a/tests/qemu-iotests/198.out b/tests/qemu-iotests/198.out +index 62fb73fa3e..805494916f 100644 +--- a/tests/qemu-iotests/198.out ++++ b/tests/qemu-iotests/198.out +@@ -39,7 +39,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +@@ -85,7 +84,6 @@ Format specific information: + compression type: COMPRESSION_TYPE + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha256 + cipher alg: aes-256 + uuid: 00000000-0000-0000-0000-000000000000 +diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out +index 979f00f9bf..7e95694777 100644 +--- a/tests/qemu-iotests/206.out ++++ b/tests/qemu-iotests/206.out +@@ -114,7 +114,6 @@ Format specific information: + refcount bits: 16 + encrypt: + ivgen alg: plain64 +- detached header: false + hash alg: sha1 + cipher alg: aes-128 + uuid: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX +-- +2.27.0 + diff --git a/migration-report-migration-related-thread-pid-to-lib.patch b/migration-report-migration-related-thread-pid-to-lib.patch new file mode 100644 index 0000000000000000000000000000000000000000..9fd6fbb9a543dac44c6f9921620bb1112df267f4 --- /dev/null +++ b/migration-report-migration-related-thread-pid-to-lib.patch @@ -0,0 +1,54 @@ +From 7caa5d818e0fa0e1cee2513f2fde4e81f8b5cc13 Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:52:25 +0800 +Subject: [PATCH] migration: report migration related thread pid to libvirt + +in order to control migration thread cgroup, +we need to report migration related thread pid to libvirt + +Signed-off-by:zhengchuan +--- + migration/migration.c | 3 +++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..7c2fdde26b 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3299,6 +3299,9 @@ static void *migration_thread(void *opaque) + MigThrError thr_error; + bool urgent = false; + ++ /* report migration thread pid to libvirt */ ++ qapi_event_send_migration_pid(qemu_get_thread_id()); ++ + thread = migration_threads_add("live_migration", qemu_get_thread_id()); + + rcu_register_thread(); +diff --git a/qapi/migration.json b/qapi/migration.json +index 29af841f4e..b442d0d878 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_PID: ++# ++# Emitted when migration thread appear ++# ++# @pid: pid of migration thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @COLOMessage: + # +-- +2.27.0 + diff --git a/migration-report-multiFd-related-thread-pid-to-libvi.patch b/migration-report-multiFd-related-thread-pid-to-libvi.patch new file mode 100644 index 0000000000000000000000000000000000000000..77730286d76ec64da9e5f1dd78a62ca88b2e852b --- /dev/null +++ b/migration-report-multiFd-related-thread-pid-to-libvi.patch @@ -0,0 +1,62 @@ +From e387eaeef8845993a437ad19eaf988fb101d3fdd Mon Sep 17 00:00:00 2001 +From: zhengchuan +Date: Mon, 5 Dec 2022 20:56:35 +0800 +Subject: [PATCH] migration: report multiFd related thread pid to libvirt + +report multiFd related thread pid to libvirt in order to +pin multiFd thread to different cpu. + +Signed-off-by:zhengchuan +--- + migration/multifd.c | 4 ++++ + qapi/migration.json | 12 ++++++++++++ + 2 files changed, 16 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 409460684f..7d373a245e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -17,6 +17,7 @@ + #include "exec/ramblock.h" + #include "qemu/error-report.h" + #include "qapi/error.h" ++#include "qapi/qapi-events-migration.h" + #include "ram.h" + #include "migration.h" + #include "migration-stats.h" +@@ -657,6 +658,9 @@ static void *multifd_send_thread(void *opaque) + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + ++ /* report multifd thread pid to libvirt */ ++ qapi_event_send_migration_multifd_pid(qemu_get_thread_id()); ++ + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +diff --git a/qapi/migration.json b/qapi/migration.json +index b442d0d878..5d0855a1d8 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -1447,6 +1447,18 @@ + { 'event': 'MIGRATION_PASS', + 'data': { 'pass': 'int' } } + ++## ++# @MIGRATION_MULTIFD_PID: ++# ++# Emitted when multifd thread appear ++# ++# @pid: pid of multifd thread ++# ++# Since: EulerOS Virtual ++## ++{ 'event': 'MIGRATION_MULTIFD_PID', ++ 'data': { 'pid': 'int' } } ++ + ## + # @MIGRATION_PID: + # +-- +2.27.0 + diff --git a/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ee3f088a239688d68c7aaa807781fbd2664dc72 --- /dev/null +++ b/migration-skip-cache_drop-for-bios-bootloader-and-nv.patch @@ -0,0 +1,47 @@ +From dfb9372702b2fb994392b8a6e8a39964c2656ae6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 08:49:41 +0800 +Subject: [PATCH] migration: skip cache_drop for bios bootloader and nvram + template + +Qemu enabled page cache dropping for raw device on the destionation host +during shared storage migration. +However, fsync may take 300ms to multiple seconds to return in multiple-migration +scene, because all domains in a host share bios bootloader file, skip cache_drop +for bios bootloader and nvram template to avoid downtime increase. +--- + block.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/block.c b/block.c +index b7cb963929..3bfd4be6b4 100644 +--- a/block.c ++++ b/block.c +@@ -68,6 +68,9 @@ + + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + ++#define DEFAULT_BIOS_BOOT_LOADER_DIR "/usr/share/edk2" ++#define DEFAULT_NVRAM_TEMPLATE_DIR "/var/lib/libvirt/qemu/nvram" ++ + /* Protected by BQL */ + static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); +@@ -7017,7 +7020,13 @@ int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp) + assert(!(bs->open_flags & BDRV_O_INACTIVE)); + assert_bdrv_graph_readable(); + +- if (bs->drv->bdrv_co_invalidate_cache) { ++ /* ++ * It's not necessary for bios bootloader and nvram template to drop cache ++ * when migration, skip this step for them to avoid dowtime increase. ++ */ ++ if (bs->drv->bdrv_co_invalidate_cache && ++ !strstr(bs->filename, DEFAULT_BIOS_BOOT_LOADER_DIR) && ++ !strstr(bs->filename, DEFAULT_NVRAM_TEMPLATE_DIR)) { + bs->drv->bdrv_co_invalidate_cache(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); +-- +2.27.0 + diff --git a/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch new file mode 100644 index 0000000000000000000000000000000000000000..6bda51123bb31527b02f67c633a8adcb3e210118 --- /dev/null +++ b/monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch @@ -0,0 +1,96 @@ +From a344d8636168ba5f034a908d3394ef88d36133dd Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Thu, 10 Feb 2022 11:18:13 +0800 +Subject: [PATCH] monitor: Discard BLOCK_IO_ERROR event when VM rebooted + +Throttled event like QAPI_EVENT_BLOCK_IO_ERROR may be queued +to limit event rate. Event may be delivered when VM is rebooted +if the event was queued in the *monitor_qapi_event_state* hash table. +Which may casue VM pause and other related problems. +Such as seabios blocked during virtio-scsi initialization: + vring_add_buf(vq, sg, out_num, in_num, 0, 0); + vring_kick(vp, vq, 1); + ------------> VM paused here <----------- + /* Wait for reply */ + while (!vring_more_used(vq)) usleep(5); + +Signed-off-by: Yan Wang +--- + include/monitor/monitor.h | 2 ++ + monitor/monitor.c | 29 +++++++++++++++++++++++++++++ + system/runstate.c | 1 + + 3 files changed, 32 insertions(+) + +diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h +index 965f5d5450..60079086a8 100644 +--- a/include/monitor/monitor.h ++++ b/include/monitor/monitor.h +@@ -63,4 +63,6 @@ void monitor_register_hmp_info_hrt(const char *name, + int error_vprintf_unless_qmp(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 0); + int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2); + ++void monitor_qapi_event_discard_io_error(void); ++ + #endif /* MONITOR_H */ +diff --git a/monitor/monitor.c b/monitor/monitor.c +index e540c1334a..8d59a76612 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -34,6 +34,8 @@ + #include "qemu/option.h" + #include "sysemu/qtest.h" + #include "trace.h" ++#include "qemu/log.h" ++#include "qapi/qmp/qobject.h" + + /* + * To prevent flooding clients, events can be throttled. The +@@ -787,6 +789,33 @@ int monitor_init_opts(QemuOpts *opts, Error **errp) + return ret; + } + ++void monitor_qapi_event_discard_io_error(void) ++{ ++ GHashTableIter event_iter; ++ MonitorQAPIEventState *evstate; ++ gpointer key, value; ++ GString *json; ++ ++ qemu_mutex_lock(&monitor_lock); ++ g_hash_table_iter_init(&event_iter, monitor_qapi_event_state); ++ while (g_hash_table_iter_next(&event_iter, &key, &value)) { ++ evstate = key; ++ /* Only QAPI_EVENT_BLOCK_IO_ERROR is discarded */ ++ if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) { ++ g_hash_table_iter_remove(&event_iter); ++ json = qobject_to_json(QOBJECT(evstate->qdict)); ++ qemu_log(" %s event discarded\n", json->str); ++ timer_del(evstate->timer); ++ timer_free(evstate->timer); ++ qobject_unref(evstate->data); ++ qobject_unref(evstate->qdict); ++ g_string_free(json, true); ++ g_free(evstate); ++ } ++ } ++ qemu_mutex_unlock(&monitor_lock); ++} ++ + QemuOptsList qemu_mon_opts = { + .name = "mon", + .implied_opt_name = "chardev", +diff --git a/system/runstate.c b/system/runstate.c +index 9d3f627fee..62e6db8d42 100644 +--- a/system/runstate.c ++++ b/system/runstate.c +@@ -503,6 +503,7 @@ void qemu_system_reset(ShutdownCause reason) + qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); + } + cpu_synchronize_all_post_reset(); ++ monitor_qapi_event_discard_io_error(); + } + + /* +-- +2.27.0 + diff --git a/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch new file mode 100644 index 0000000000000000000000000000000000000000..8dc8fc1219659e8010693e153e50fbbb4d13301c --- /dev/null +++ b/monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch @@ -0,0 +1,111 @@ +From c6b183a4c3c63454dea39be26b0fb773ec04887e Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:13:05 +0800 +Subject: [PATCH] monitor/qmp: drop inflight rsp if qmp client broken + +If libvirt restart while qemu is handle qmp message, libvirt will +reconnect qemu monitor socket, and query status of qemu by qmp. +But qemu may return last qmp respond to new connect socket, and libvirt +recv unexpected respond, So libvirt think qemu is abnormal, and will +kill qemu. + +This patch add qmp connect id, while reconnect id will change. While +respond to libvirt, judge if id is same, if not, drop this respond. +--- + monitor/monitor-internal.h | 1 + + monitor/qmp.c | 19 +++++++++++-------- + 2 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h +index 252de85681..d7842fa464 100644 +--- a/monitor/monitor-internal.h ++++ b/monitor/monitor-internal.h +@@ -144,6 +144,7 @@ typedef struct { + const QmpCommandList *commands; + bool capab_offered[QMP_CAPABILITY__MAX]; /* capabilities offered */ + bool capab[QMP_CAPABILITY__MAX]; /* offered and accepted */ ++ uint64_t qmp_client_id; /*qmp client id, update if peer disconnect */ + /* + * Protects qmp request/response queue. + * Take monitor_lock first when you need both. +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 6eee450fe4..8f7671c5f1 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -149,18 +149,19 @@ void qmp_send_response(MonitorQMP *mon, const QDict *rsp) + * Null @rsp can only happen for commands with QCO_NO_SUCCESS_RESP. + * Nothing is emitted then. + */ +-static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp) ++static void monitor_qmp_respond(MonitorQMP *mon, QDict *rsp, uint64_t req_client_id) + { +- if (rsp) { +- qmp_send_response(mon, rsp); ++ if (!rsp || (mon->qmp_client_id != req_client_id)) { ++ return; + } ++ qmp_send_response(mon, rsp); + } + + /* + * Runs outside of coroutine context for OOB commands, but in + * coroutine context for everything else. + */ +-static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) ++static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req, uint64_t req_client_id) + { + QDict *rsp; + QDict *error; +@@ -180,7 +181,7 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) + } + } + +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, req_client_id); + qobject_unref(rsp); + } + +@@ -340,13 +341,13 @@ void coroutine_fn monitor_qmp_dispatcher_co(void *data) + trace_monitor_qmp_cmd_in_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req_obj->req); ++ monitor_qmp_dispatch(mon, req_obj->req, mon->qmp_client_id); + } else { + assert(req_obj->err); + trace_monitor_qmp_err_in_band(error_get_pretty(req_obj->err)); + rsp = qmp_error_response(req_obj->err); + req_obj->err = NULL; +- monitor_qmp_respond(mon, rsp); ++ monitor_qmp_respond(mon, rsp, mon->qmp_client_id); + qobject_unref(rsp); + } + +@@ -402,7 +403,7 @@ static void handle_qmp_command(void *opaque, QObject *req, Error *err) + trace_monitor_qmp_cmd_out_of_band(id_json->str); + g_string_free(id_json, true); + } +- monitor_qmp_dispatch(mon, req); ++ monitor_qmp_dispatch(mon, req, mon->qmp_client_id); + qobject_unref(req); + return; + } +@@ -486,6 +487,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event) + mon_refcount++; + break; + case CHR_EVENT_CLOSED: ++ mon->qmp_client_id++; + /* + * Note: this is only useful when the output of the chardev + * backend is still open. For example, when the backend is +@@ -539,6 +541,7 @@ void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) + } + qemu_chr_fe_set_echo(&mon->common.chr, true); + ++ mon->qmp_client_id = 1; + /* Note: we run QMP monitor in I/O thread when @chr supports that */ + monitor_data_init(&mon->common, true, false, + qemu_chr_has_feature(chr, QEMU_CHAR_FEATURE_GCONTEXT)); +-- +2.27.0 + diff --git a/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch new file mode 100644 index 0000000000000000000000000000000000000000..350339ecb2ab482936e5a700347cf01556c04051 --- /dev/null +++ b/nbd-server.c-fix-invalid-read-after-client-was-alrea.patch @@ -0,0 +1,45 @@ +From 81b4091eee81fe3871d836b1a684e27828cdc2be Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:42:33 +0800 +Subject: [PATCH] nbd/server.c: fix invalid read after client was already free + +In the process of NBD equipment pressurization, executing QEMU NBD will +lead to the failure of IO distribution and go to NBD_ Out process of trip(). +If two or more IO go to the out process, client NBD will release in nbd_request_put(). +The user after free problem that is read again in close(). +Through the NBD_ Save the value of client > closing before the out process in trip +to solve the use after free problem. + +Signed-off-by: wangjian161 +--- + nbd/server.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 895cf0a752..e8baed9705 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -2939,6 +2939,7 @@ static coroutine_fn void nbd_trip(void *opaque) + NBDRequestData *req; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; ++ bool client_closing; + Error *local_err = NULL; + + trace_nbd_trip(); +@@ -3023,8 +3024,11 @@ disconnect: + if (local_err) { + error_reportf_err(local_err, "Disconnect client, due to: "); + } ++ client_closing = client->closing; + nbd_request_put(req); +- client_close(client, true); ++ if (!client_closing) { ++ client_close(client, true); ++ } + nbd_client_put(client); + } + +-- +2.27.0 + diff --git a/net-dump.c-Suppress-spurious-compiler-warning.patch b/net-dump.c-Suppress-spurious-compiler-warning.patch new file mode 100644 index 0000000000000000000000000000000000000000..c35ee6f95844981753fcfd9b57510d7131817685 --- /dev/null +++ b/net-dump.c-Suppress-spurious-compiler-warning.patch @@ -0,0 +1,51 @@ +From 6999f07558308ee6b7d63e46ca554a0b702948d6 Mon Sep 17 00:00:00 2001 +From: liuxiangdong +Date: Tue, 8 Feb 2022 15:10:25 +0800 +Subject: [PATCH] net/dump.c: Suppress spurious compiler warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Compiling with gcc version 11.2.0 (Ubuntu 11.2.0-13ubuntu1) results in +a (spurious) warning: + + In function ‘dump_receive_iov’, + inlined from ‘filter_dump_receive_iov’ at ../net/dump.c:157:5: + ../net/dump.c:89:9: error: ‘writev’ specified size 18446744073709551600 +exceeds maximum object size 9223372036854775807 [-Werror=stringop-overflow=] + 89 | if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + In file included from /home/ptomsich/qemu/include/qemu/osdep.h:108, + from ../net/dump.c:25: + ../net/dump.c: In function ‘filter_dump_receive_iov’: + /usr/include/x86_64-linux-gnu/sys/uio.h:52:16: note: in a call to function +‘writev’ declared with attribute ‘read_only (2, 3)’ + 52 | extern ssize_t writev (int __fd, const struct iovec *__iovec, int +__count) + | ^~~~~~ + cc1: all warnings being treated as errors + +This change helps that version of GCC to understand what is going on +and suppresses this warning. + +Signed-off-by: Philipp Tomsich +--- + net/dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/dump.c b/net/dump.c +index 16073f2458..d880a7e299 100644 +--- a/net/dump.c ++++ b/net/dump.c +@@ -87,7 +87,7 @@ static ssize_t dump_receive_iov(DumpState *s, const struct iovec *iov, int cnt, + dumpiov[0].iov_len = sizeof(hdr); + cnt = iov_copy(&dumpiov[1], cnt, iov, cnt, offset, caplen); + +- if (writev(s->fd, dumpiov, cnt + 1) != sizeof(hdr) + caplen) { ++ if (writev(s->fd, &dumpiov[0], cnt + 1) != sizeof(hdr) + caplen) { + error_report("network dump write error - stopping dump"); + close(s->fd); + s->fd = -1; +-- +2.27.0 + diff --git a/net-eepro100-validate-various-address-valuesi-CVE-20.patch b/net-eepro100-validate-various-address-valuesi-CVE-20.patch new file mode 100644 index 0000000000000000000000000000000000000000..dc259d62847299da04ed6a498ce88bdbc9896931 --- /dev/null +++ b/net-eepro100-validate-various-address-valuesi-CVE-20.patch @@ -0,0 +1,58 @@ +From 6e6215b3ad0c8eac918bca9e2b5bb661e27f2fed Mon Sep 17 00:00:00 2001 +From: zhouli57 +Date: Sat, 18 Dec 2021 09:39:57 +0800 +Subject: [PATCH] net: eepro100: validate various address + valuesi(CVE-2021-20255) + +fix CVE-2021-20255 + +patch link: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg06098.html + +fix CVE-2021-20255, sync patch from ostms platform. + +Signed-off-by: zhouli57 +Signed-off-by: Yan Wang +--- + hw/net/eepro100.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index 69e1c4bb89..f6204ec059 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -279,6 +279,9 @@ typedef struct { + /* Quasi static device properties (no need to save them). */ + uint16_t stats_size; + bool has_extended_tcb_support; ++ ++ /* Flag to avoid recursions. */ ++ bool busy; + } EEPRO100State; + + /* Word indices in EEPROM. */ +@@ -844,6 +847,14 @@ static void action_command(EEPRO100State *s) + Therefore we limit the number of iterations. */ + unsigned max_loop_count = 16; + ++ if (s->busy) { ++ /* Prevent recursions. */ ++ logout("recursion in %s:%u\n", __FILE__, __LINE__); ++ return; ++ } ++ ++ s->busy = true; ++ + for (;;) { + bool bit_el; + bool bit_s; +@@ -940,6 +951,7 @@ static void action_command(EEPRO100State *s) + } + TRACE(OTHER, logout("CU list empty\n")); + /* List is empty. Now CU is idle or suspended. */ ++ s->busy = false; + } + + static void eepro100_cu_command(EEPRO100State * s, uint8_t val) +-- +2.27.0 + diff --git a/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch new file mode 100644 index 0000000000000000000000000000000000000000..d25778669c3421fb77c4a787d5852d1f708738e8 --- /dev/null +++ b/oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch @@ -0,0 +1,57 @@ +From b6c45f5ea5d1a379ac0a507cf59345c573b27cc8 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 14:21:39 +0800 +Subject: [PATCH] oslib-posix: optimise vm startup time for 1G hugepage + +It takes quit a long time to clear 1G-hugepage, which makes glibc +pthread_create quit slow. +Create touch_pages threads in advance, and then handle the touch_pages +callback. Only read lock is held here. +--- + util/oslib-posix.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/util/oslib-posix.c b/util/oslib-posix.c +index e86fd64e09..9ca3fee2b8 100644 +--- a/util/oslib-posix.c ++++ b/util/oslib-posix.c +@@ -88,6 +88,8 @@ static QemuMutex sigbus_mutex; + static QemuMutex page_mutex; + static QemuCond page_cond; + ++static int started_num_threads; ++ + int qemu_get_thread_id(void) + { + #if defined(__linux__) +@@ -344,6 +346,10 @@ static void *do_touch_pages(void *arg) + } + qemu_mutex_unlock(&page_mutex); + ++ while (started_num_threads != memset_args->context.num_threads) { ++ smp_mb(); ++ } ++ + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); +@@ -448,7 +454,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + context.threads = g_new0(MemsetThread, context.num_threads); + numpages_per_thread = numpages / context.num_threads; + leftover = numpages % context.num_threads; +- for (i = 0; i < context.num_threads; i++) { ++ for (i = 0, started_num_threads = 0; i < context.num_threads; i++) { + context.threads[i].addr = addr; + context.threads[i].numpages = numpages_per_thread + (i < leftover); + context.threads[i].hpagesize = hpagesize; +@@ -464,6 +470,7 @@ static int touch_all_pages(char *area, size_t hpagesize, size_t numpages, + QEMU_THREAD_JOINABLE); + } + addr += context.threads[i].numpages * hpagesize; ++ started_num_threads++; + } + + if (!use_madv_populate_write) { +-- +2.27.0 + diff --git a/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch new file mode 100644 index 0000000000000000000000000000000000000000..d86d8ab72d3851993421c2d3e778a5508d351177 --- /dev/null +++ b/pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch @@ -0,0 +1,99 @@ +From 3c4b4c4fc3c71b375490233bb9209763d7094ee9 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 8 Feb 2022 16:10:31 +0800 +Subject: [PATCH] pcie: Add pcie-root-port fast plug/unplug feature + +If a device is plugged in the pcie-root-port when VM kernel is +booting, the kernel may wrongly disable the device. +This bug was brought in by two patches of the linux kernel: + +https://patchwork.kernel.org/patch/10575355/ +https://patchwork.kernel.org/patch/10766219/ + +VM runtime like kata uses this feature to boot microVM, +so we must fix it up. We hack into the pcie native hotplug +patch so that hotplug/unplug will work under this circumstance. + +Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/core/machine.c | 2 ++ + hw/pci-bridge/gen_pcie_root_port.c | 2 ++ + hw/pci/pcie.c | 13 ++++++++++++- + include/hw/pci/pcie_port.h | 3 +++ + 4 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 0c17398141..965682619b 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -160,6 +160,8 @@ const size_t hw_compat_4_0_len = G_N_ELEMENTS(hw_compat_4_0); + GlobalProperty hw_compat_3_1[] = { + { "pcie-root-port", "x-speed", "2_5" }, + { "pcie-root-port", "x-width", "1" }, ++ { "pcie-root-port", "fast-plug", "0" }, ++ { "pcie-root-port", "fast-unplug", "0" }, + { "memory-backend-file", "x-use-canonical-path-for-ramblock-id", "true" }, + { "memory-backend-memfd", "x-use-canonical-path-for-ramblock-id", "true" }, + { "tpm-crb", "ppi", "false" }, +diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c +index 1ce4e7beba..1e1ab5bb19 100644 +--- a/hw/pci-bridge/gen_pcie_root_port.c ++++ b/hw/pci-bridge/gen_pcie_root_port.c +@@ -145,6 +145,8 @@ static Property gen_rp_props[] = { + speed, PCIE_LINK_SPEED_16), + DEFINE_PROP_PCIE_LINK_WIDTH("x-width", PCIESlot, + width, PCIE_LINK_WIDTH_32), ++ DEFINE_PROP_UINT8("fast-plug", PCIESlot, fast_plug, 0), ++ DEFINE_PROP_UINT8("fast-unplug", PCIESlot, fast_unplug, 0), + DEFINE_PROP_END_OF_LIST() + }; + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index dccf204451..04fbd794a8 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -555,6 +555,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; + uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); + uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); ++ PCIESlot *s = PCIE_SLOT(hotplug_pdev); + + /* Check if hot-unplug is disabled on the slot */ + if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { +@@ -600,7 +601,17 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, + return; + } + +- pcie_cap_slot_push_attention_button(hotplug_pdev); ++ if ((pci_dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) && s->fast_plug) { ++ pci_word_test_and_clear_mask(pci_dev->config + pci_dev->exp.exp_cap + PCI_EXP_LNKSTA, ++ PCI_EXP_LNKSTA_DLLLA); ++ } ++ ++ if (s->fast_unplug) { ++ pcie_cap_slot_event(hotplug_pdev, ++ PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); ++ } else { ++ pcie_cap_slot_push_attention_button(hotplug_pdev); ++ } + } + + /* pci express slot for pci express root/downstream port +diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h +index 90e6cf45b8..7148a0959b 100644 +--- a/include/hw/pci/pcie_port.h ++++ b/include/hw/pci/pcie_port.h +@@ -56,6 +56,9 @@ struct PCIESlot { + uint8_t chassis; + uint16_t slot; + ++ uint8_t fast_plug; ++ uint8_t fast_unplug; ++ + PCIExpLinkSpeed speed; + PCIExpLinkWidth width; + +-- +2.27.0 + diff --git a/pcie-Compat-with-devices-which-do-not-support-Link-W.patch b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch new file mode 100644 index 0000000000000000000000000000000000000000..9897182af472d0d12369ed8b945d7b380730df9b --- /dev/null +++ b/pcie-Compat-with-devices-which-do-not-support-Link-W.patch @@ -0,0 +1,50 @@ +From 6c72e65d57dc2a7d811f76a126a9a006abd0ab75 Mon Sep 17 00:00:00 2001 +From: fangying +Date: Wed, 18 Mar 2020 12:51:33 +0800 +Subject: [PATCH] pcie: Compat with devices which do not support Link Width, + such as ioh3420 + +We hack into PCI_EXP_LNKCAP to support device fast plug/unplug +for pcie-root-port. However some devices like ioh3420 does not +suport it, so PCI_EXP_LNKCAP is not set for such devices. + +Signed-off-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/pci/pcie.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c +index 6db0cf69cd..dccf204451 100644 +--- a/hw/pci/pcie.c ++++ b/hw/pci/pcie.c +@@ -97,13 +97,6 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) + return; + } + +- /* Clear and fill LNKCAP from what was configured above */ +- pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, +- PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); +- pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, +- QEMU_PCI_EXP_LNKCAP_MLW(s->width) | +- QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); +- + /* + * Link bandwidth notification is required for all root ports and + * downstream ports supporting links wider than x1 or multiple link +@@ -111,6 +104,12 @@ static void pcie_cap_fill_slot_lnk(PCIDevice *dev) + */ + if (s->width > QEMU_PCI_EXP_LNK_X1 || + s->speed > QEMU_PCI_EXP_LNK_2_5GT) { ++ /* Clear and fill LNKCAP from what was configured above */ ++ pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP, ++ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS); ++ pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, ++ QEMU_PCI_EXP_LNKCAP_MLW(s->width) | ++ QEMU_PCI_EXP_LNKCAP_MLS(s->speed)); + pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP, + PCI_EXP_LNKCAP_LBNC); + } +-- +2.27.0 + diff --git a/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch b/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch new file mode 100644 index 0000000000000000000000000000000000000000..015ba30eb31cfadfcbd25abccb7a00172f29dabe --- /dev/null +++ b/pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch @@ -0,0 +1,37 @@ +From 632ec38ed57b76baf3e499d1789aeea0f74df0a5 Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Wed, 28 Feb 2024 20:33:13 +0900 +Subject: [PATCH] pcie_sriov: Validate NumVFs (CVE-2024-26327) + +The guest may write NumVFs greater than TotalVFs and that can lead +to buffer overflow in VF implementations. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2024-26327 +Fixes: 7c0fa8dff811 ("pcie: Add support for Single Root I/O Virtualization (SR/IOV)") +Signed-off-by: Akihiko Odaki +Message-Id: <20240228-reuse-v8-2-282660281e60@daynix.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Sriram Yagnaraman +--- + hw/pci/pcie_sriov.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c +index a1fe65f5d8..da209b7f47 100644 +--- a/hw/pci/pcie_sriov.c ++++ b/hw/pci/pcie_sriov.c +@@ -176,6 +176,9 @@ static void register_vfs(PCIDevice *dev) + + assert(sriov_cap > 0); + num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); ++ if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { ++ return; ++ } + + dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs); + +-- +2.27.0 + diff --git a/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch new file mode 100644 index 0000000000000000000000000000000000000000..696a9a49357b61cd9d645c657c5aa38830276e88 --- /dev/null +++ b/physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch @@ -0,0 +1,127 @@ +From 8fa5af7de07d9bc2535ea8fab087d509795e3579 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sun, 6 Aug 2023 22:12:52 +0000 +Subject: [PATCH] physmem,gdbstub: Common helping funcs/changes to *unrealize* + vCPU + +Supporting vCPU Hotplug for ARM arch also means introducing new functionality of +unrealizing the ARMCPU. This requires some new common functions. + +Defining them as part of architecture independent change so that this code could +be reused by other interested parties. + +Signed-off-by: Salil Mehta +--- + gdbstub/gdbstub.c | 6 ++++++ + include/exec/cpu-common.h | 8 ++++++++ + include/exec/gdbstub.h | 1 + + include/hw/core/cpu.h | 1 + + system/physmem.c | 25 +++++++++++++++++++++++++ + 5 files changed, 41 insertions(+) + +diff --git a/gdbstub/gdbstub.c b/gdbstub/gdbstub.c +index 46d752bbc2..f16006d2a8 100644 +--- a/gdbstub/gdbstub.c ++++ b/gdbstub/gdbstub.c +@@ -582,6 +582,12 @@ void gdb_register_coprocessor(CPUState *cpu, + } + } + ++void gdb_unregister_coprocessor_all(CPUState *cpu) ++{ ++ g_array_free(cpu->gdb_regs, true); ++ cpu->gdb_regs = NULL; ++} ++ + static void gdb_process_breakpoint_remove_all(GDBProcess *p) + { + CPUState *cpu = gdb_get_first_cpu_in_process(p); +diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h +index 41115d8919..2a3d4aa1c8 100644 +--- a/include/exec/cpu-common.h ++++ b/include/exec/cpu-common.h +@@ -139,6 +139,14 @@ size_t qemu_ram_pagesize_largest(void); + */ + void cpu_address_space_init(CPUState *cpu, int asidx, + const char *prefix, MemoryRegion *mr); ++/** ++ * cpu_address_space_destroy: ++ * @cpu: CPU for which address space needs to be destroyed ++ * @asidx: integer index of this address space ++ * ++ * Note that with KVM only one address space is supported. ++ */ ++void cpu_address_space_destroy(CPUState *cpu, int asidx); + + void cpu_physical_memory_rw(hwaddr addr, void *buf, + hwaddr len, bool is_write); +diff --git a/include/exec/gdbstub.h b/include/exec/gdbstub.h +index d8a3c56fa2..d123b838c2 100644 +--- a/include/exec/gdbstub.h ++++ b/include/exec/gdbstub.h +@@ -39,6 +39,7 @@ typedef int (*gdb_set_reg_cb)(CPUArchState *env, uint8_t *buf, int reg); + void gdb_register_coprocessor(CPUState *cpu, + gdb_get_reg_cb get_reg, gdb_set_reg_cb set_reg, + int num_regs, const char *xml, int g_pos); ++void gdb_unregister_coprocessor_all(CPUState *cpu); + + /** + * gdbserver_start: start the gdb server +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 0ca778eb75..6dbe163548 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -496,6 +496,7 @@ struct CPUState { + QSIMPLEQ_HEAD(, qemu_work_item) work_list; + + CPUAddressSpace *cpu_ases; ++ int cpu_ases_ref_count; + int num_ases; + AddressSpace *as; + MemoryRegion *memory; +diff --git a/system/physmem.c b/system/physmem.c +index 247c252e53..299174ad91 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -761,6 +761,7 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + + if (!cpu->cpu_ases) { + cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases); ++ cpu->cpu_ases_ref_count = cpu->num_ases; + } + + newas = &cpu->cpu_ases[asidx]; +@@ -774,6 +775,30 @@ void cpu_address_space_init(CPUState *cpu, int asidx, + } + } + ++void cpu_address_space_destroy(CPUState *cpu, int asidx) ++{ ++ CPUAddressSpace *cpuas; ++ ++ assert(asidx < cpu->num_ases); ++ assert(asidx == 0 || !kvm_enabled()); ++ assert(cpu->cpu_ases); ++ ++ cpuas = &cpu->cpu_ases[asidx]; ++ if (tcg_enabled()) { ++ memory_listener_unregister(&cpuas->tcg_as_listener); ++ } ++ ++ address_space_destroy(cpuas->as); ++ g_free_rcu(cpuas->as, rcu); ++ ++ if (cpu->cpu_ases_ref_count == 1) { ++ g_free(cpu->cpu_ases); ++ cpu->cpu_ases = NULL; ++ } ++ ++ cpu->cpu_ases_ref_count--; ++} ++ + AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx) + { + /* Return the AddressSpace corresponding to the specified index */ +-- +2.27.0 + diff --git a/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch new file mode 100644 index 0000000000000000000000000000000000000000..d879b781bc47781c33814d8be006901a62fa5c80 --- /dev/null +++ b/pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch @@ -0,0 +1,42 @@ +From e730214f4485ad444d8a1db9a284da53f407e8da Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 29 Jul 2019 16:16:35 +0800 +Subject: [PATCH] pl011: reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff + +We can enable ACPI when AArch64 Linux is booted with QEMU and UEFI (AAVMF). +When VM is booting and the SBSA driver has not initialized, writting data +that exceds 32 bytes will cause the read FIFO full and proceeding data will +be lost. The searil port appears to be stuck in this abnormal situation. + +A hack to reset read FIFO when UARTTIMSC=0 & UARTICR=0xffff appears to +resolve the issue. + +The question is fully discussed at +https://www.spinics.net/lists/linux-serial/msg23163.html + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +Signed-off-by: Yan Wang +--- + hw/char/pl011.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/hw/char/pl011.c b/hw/char/pl011.c +index 58edeb9ddb..bc65d778d2 100644 +--- a/hw/char/pl011.c ++++ b/hw/char/pl011.c +@@ -314,6 +314,10 @@ static void pl011_write(void *opaque, hwaddr offset, + case 17: /* UARTICR */ + s->int_level &= ~value; + pl011_update(s); ++ if (!s->int_enabled && !s->int_level) { ++ s->read_count = 0; ++ s->read_pos = 0; ++ } + break; + case 18: /* UARTDMACR */ + s->dmacr = value; +-- +2.27.0 + diff --git a/pl031-support-rtc-timer-property-for-pl031.patch b/pl031-support-rtc-timer-property-for-pl031.patch new file mode 100644 index 0000000000000000000000000000000000000000..48f4d1bad790b6041e6696cd3912e6c77fdeab8e --- /dev/null +++ b/pl031-support-rtc-timer-property-for-pl031.patch @@ -0,0 +1,71 @@ +From 8e30e81c4268103d502587de565842b9632a7965 Mon Sep 17 00:00:00 2001 +From: Jinhao Gao +Date: Tue, 15 Feb 2022 17:02:08 +0800 +Subject: [PATCH] pl031: support rtc-timer property for pl031 + +This patch adds the rtc-timer property for pl031, we can get the +rtc time (UTC) through qmp command "qom-get date" with this property. + +Signed-off-by: Haibin Wang +Reviewed-by: Shannon Zhao +Reviewed-by: Ying Fang +Signed-off-by: Keqian Zhu +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang +--- + hw/rtc/pl031.c | 25 +++++++++++++++++++++++++ + 1 file changed, 25 insertions(+) + +diff --git a/hw/rtc/pl031.c b/hw/rtc/pl031.c +index f2e6baebba..57e9a35616 100644 +--- a/hw/rtc/pl031.c ++++ b/hw/rtc/pl031.c +@@ -63,6 +63,15 @@ static uint32_t pl031_get_count(PL031State *s) + return s->tick_offset + now / NANOSECONDS_PER_SECOND; + } + ++static void pl031_get_date(Object *obj, struct tm *current_tm, Error **errp) ++{ ++ PL031State *s = PL031(obj); ++ time_t ti = pl031_get_count(s); ++ ++ /* Changed to UTC time */ ++ gmtime_r(&ti, current_tm); ++} ++ + static void pl031_set_alarm(PL031State *s) + { + uint32_t ticks; +@@ -202,6 +211,20 @@ static void pl031_init(Object *obj) + qemu_clock_get_ns(rtc_clock) / NANOSECONDS_PER_SECOND; + + s->timer = timer_new_ns(rtc_clock, pl031_interrupt, s); ++ object_property_add_tm(OBJECT(s), "date", pl031_get_date); ++} ++ ++static void pl031_realize(DeviceState *d, Error **errp) ++{ ++ object_property_add_alias(qdev_get_machine(), "rtc-time", ++ OBJECT(d), "date"); ++} ++ ++static void pl031_unrealize(DeviceState *d) ++{ ++ if (object_property_find(qdev_get_machine(), "rtc-time")) { ++ object_property_del(qdev_get_machine(), "rtc-time"); ++ } + } + + static void pl031_finalize(Object *obj) +@@ -338,6 +361,8 @@ static void pl031_class_init(ObjectClass *klass, void *data) + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_pl031; ++ dc->realize = pl031_realize; ++ dc->unrealize = pl031_unrealize; + device_class_set_props(dc, pl031_properties); + } + +-- +2.27.0 + diff --git a/ps2-fix-oob-in-ps2-kbd.patch b/ps2-fix-oob-in-ps2-kbd.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d0f4dea78e884ab9f376659daca9b6717a01661 --- /dev/null +++ b/ps2-fix-oob-in-ps2-kbd.patch @@ -0,0 +1,35 @@ +From 0a54d68547df3f276dc242b52d54e8549d0a84a0 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Wed, 9 Feb 2022 11:21:28 +0800 +Subject: [PATCH] ps2: fix oob in ps2 kbd + +fix oob in ps2 kbd +--- + hw/input/ps2.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/input/ps2.c b/hw/input/ps2.c +index c8fd23cf36..b647561069 100644 +--- a/hw/input/ps2.c ++++ b/hw/input/ps2.c +@@ -167,7 +167,7 @@ void ps2_queue_noirq(PS2State *s, int b) + } + + q->data[q->wptr] = b; +- if (++q->wptr == PS2_BUFFER_SIZE) { ++ if (++q->wptr >= PS2_BUFFER_SIZE) { + q->wptr = 0; + } + q->count++; +@@ -557,7 +557,7 @@ uint32_t ps2_read_data(PS2State *s) + val = q->data[index]; + } else { + val = q->data[q->rptr]; +- if (++q->rptr == PS2_BUFFER_SIZE) { ++ if (++q->rptr >= PS2_BUFFER_SIZE) { + q->rptr = 0; + } + q->count--; +-- +2.27.0 + diff --git a/qapi-block-core-Add-retry-option-for-error-action.patch b/qapi-block-core-Add-retry-option-for-error-action.patch new file mode 100644 index 0000000000000000000000000000000000000000..43154aecf644f1c2e98b9cbe795ab335488d3724 --- /dev/null +++ b/qapi-block-core-Add-retry-option-for-error-action.patch @@ -0,0 +1,63 @@ +From cfc15dc456126a6fb811f0c51af8d8ce5c4a4a1b Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:45 +0800 +Subject: [PATCH] qapi/block-core: Add retry option for error action + +Add a new error action 'retry' to support retry on errors. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + blockdev.c | 2 ++ + qapi/block-core.json | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/blockdev.c b/blockdev.c +index c91f49e7b6..2817f73fad 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -326,6 +326,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) + return BLOCKDEV_ON_ERROR_STOP; + } else if (!strcmp(buf, "report")) { + return BLOCKDEV_ON_ERROR_REPORT; ++ } else if (!strcmp(buf, "retry")) { ++ return BLOCKDEV_ON_ERROR_RETRY; + } else { + error_setg(errp, "'%s' invalid %s error action", + buf, is_read ? "read" : "write"); +diff --git a/qapi/block-core.json b/qapi/block-core.json +index 1444624590..ded6f0f6d2 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -1286,10 +1286,12 @@ + # + # @auto: inherit the error handling policy of the backend (since: 2.7) + # ++# @retry: retrying IO with errors ++# + # Since: 1.3 + ## + { 'enum': 'BlockdevOnError', +- 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } ++ 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] } + + ## + # @MirrorSyncMode: +@@ -5480,10 +5482,12 @@ + # + # @stop: error caused VM to be stopped + # ++# @retry: retry IO with errors ++# + # Since: 2.1 + ## + { 'enum': 'BlockErrorAction', +- 'data': [ 'ignore', 'report', 'stop' ] } ++ 'data': [ 'ignore', 'report', 'stop', 'retry' ] } + + ## + # @BLOCK_IMAGE_CORRUPTED: +-- +2.27.0 + diff --git a/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch new file mode 100644 index 0000000000000000000000000000000000000000..4aa28bb0c5b9bed2bb8cc181af9fe5c046e5068e --- /dev/null +++ b/qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch @@ -0,0 +1,31 @@ +From 172d79d8ebb343fa144987d2c50d90655d5aa5f9 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Thu, 29 Jul 2021 15:24:48 +0800 +Subject: [PATCH] qdev/monitors: Fix reundant error_setg of qdev_add_device + +There is an extra log "error_setg" in qdev_add_device(). When +hot-plug a device, if the corresponding bus doesn't exist, it +will trigger an asseration "assert(*errp == NULL)". + +Fixes: 515a7970490 (log: Add some logs on VM runtime path) +Signed-off-by: Kunkun Jiang +Signed-off-by: Yan Wang +--- + system/qdev-monitor.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c +index c885175b66..b10e483a9a 100644 +--- a/system/qdev-monitor.c ++++ b/system/qdev-monitor.c +@@ -644,7 +644,6 @@ DeviceState *qdev_device_add_from_qdict(const QDict *opts, + if (path != NULL) { + bus = qbus_find(path, errp); + if (!bus) { +- error_setg(errp, "can not find bus for %s", driver); + return NULL; + } + if (!object_dynamic_cast(OBJECT(bus), dc->bus_type)) { +-- +2.27.0 + diff --git a/qemu-img-add-qemu-img-direct-create.patch b/qemu-img-add-qemu-img-direct-create.patch new file mode 100644 index 0000000000000000000000000000000000000000..74fcf3bf7c9b41789da4d4d6c06fcd059e4272ec --- /dev/null +++ b/qemu-img-add-qemu-img-direct-create.patch @@ -0,0 +1,534 @@ +From 422ac7d67a7ced985b1beef4b33cc43b48d1f240 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:18:07 +0800 +Subject: [PATCH] qemu-img: add qemu-img direct create + +Introdue buffer_size while creating raw file, then we +can controll the speed of direct write by: + qemu-img create -t 'cache' -o buffer_size='num' + +Signed-off-by: Jinhua Cao +--- + block/file-posix.c | 65 ++++++++++++++++++-- + include/block/block_int-common.h | 2 + + qapi/block-core.json | 6 +- + qemu-img-cmds.hx | 4 +- + qemu-img.c | 14 ++++- + tests/qemu-iotests/049.out | 102 +++++++++++++++---------------- + tests/qemu-iotests/099.out | 2 +- + 7 files changed, 134 insertions(+), 61 deletions(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 4782aba59f..4ac8f684f1 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -128,6 +128,10 @@ + #define FTYPE_CD 1 + + #define MAX_BLOCKSIZE 4096 ++#define DEFAULT_BUFFER_SIZE 65536 ++#define BUFFER_ALIGN_SIZE 65536 ++#define MIN_BUFFER_SIZE 65536 ++#define MAX_BUFFER_SIZE 16777216 + + /* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes, + * leaving a few more bytes for its future use. */ +@@ -203,6 +207,8 @@ typedef struct RawPosixAIOData { + off_t aio_offset; + uint64_t aio_nbytes; + ++ size_t buffer_size; ++ + union { + struct { + struct iovec *iov; +@@ -2630,7 +2636,8 @@ static void raw_close(BlockDriverState *bs) + */ + static int coroutine_fn + raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, +- PreallocMode prealloc, Error **errp) ++ PreallocMode prealloc, size_t buffer_size, ++ Error **errp) + { + RawPosixAIOData acb; + +@@ -2639,6 +2646,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, int64_t offset, + .aio_fildes = fd, + .aio_type = QEMU_AIO_TRUNCATE, + .aio_offset = offset, ++ .buffer_size = buffer_size, + .truncate = { + .prealloc = prealloc, + .errp = errp, +@@ -2664,7 +2672,8 @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset, + + if (S_ISREG(st.st_mode)) { + /* Always resizes to the exact @offset */ +- return raw_regular_truncate(bs, s->fd, offset, prealloc, errp); ++ return raw_regular_truncate(bs, s->fd, offset, prealloc, ++ DEFAULT_BUFFER_SIZE, errp); + } + + if (prealloc != PREALLOC_MODE_OFF) { +@@ -2882,6 +2891,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + int fd; + uint64_t perm, shared; + int result = 0; ++ int flags = O_RDWR | O_BINARY; ++ size_t buffer_size = DEFAULT_BUFFER_SIZE; + + /* Validate options and set default values */ + assert(options->driver == BLOCKDEV_DRIVER_FILE); +@@ -2901,9 +2912,19 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + error_setg(errp, "Extent size hint is too large"); + goto out; + } ++ if (!file_opts->cache) { ++ file_opts->cache = g_strdup("writeback"); ++ } ++ if (file_opts->preallocation == PREALLOC_MODE_FULL && ++ !strcmp(file_opts->cache, "none")) { ++ flags |= O_DIRECT; ++ } ++ if (file_opts->has_buffersize) { ++ buffer_size = file_opts->buffersize; ++ } + + /* Create file */ +- fd = qemu_create(file_opts->filename, O_RDWR | O_BINARY, 0644, errp); ++ fd = qemu_create(file_opts->filename, flags, 0644, errp); + if (fd < 0) { + result = -errno; + goto out; +@@ -2938,7 +2959,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + } + + /* Clear the file by truncating it to 0 */ +- result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); ++ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, ++ buffer_size, errp); + if (result < 0) { + goto out_unlock; + } +@@ -2982,7 +3004,8 @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) + /* Resize and potentially preallocate the file to the desired + * final size */ + result = raw_regular_truncate(NULL, fd, file_opts->size, +- file_opts->preallocation, errp); ++ file_opts->preallocation, ++ buffer_size, errp); + if (result < 0) { + goto out_unlock; + } +@@ -3003,6 +3026,8 @@ out_close: + error_setg_errno(errp, -result, "Could not close the new file"); + } + out: ++ g_free(file_opts->cache); ++ file_opts->cache = NULL; + return result; + } + +@@ -3018,6 +3043,8 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + PreallocMode prealloc; + char *buf = NULL; + Error *local_err = NULL; ++ size_t buffersize = DEFAULT_BUFFER_SIZE; ++ char *cache = NULL; + + /* Skip file: protocol prefix */ + strstart(filename, "file:", &filename); +@@ -3040,6 +3067,21 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + return -EINVAL; + } + ++ buffersize = qemu_opt_get_size_del(opts, BLOCK_OPT_BUFFER_SIZE, ++ DEFAULT_BUFFER_SIZE); ++ if (buffersize < MIN_BUFFER_SIZE || buffersize > MAX_BUFFER_SIZE) { ++ error_setg_errno(errp, EINVAL, "Buffer size must be between %d " ++ "and %d", MIN_BUFFER_SIZE, MAX_BUFFER_SIZE); ++ return -EINVAL; ++ } ++ ++ cache = qemu_opt_get_del(opts, BLOCK_OPT_CACHE); ++ if (!cache) { ++ cache = g_strdup("writeback"); ++ } ++ ++ buffersize = ROUND_UP(buffersize, BUFFER_ALIGN_SIZE); ++ + options = (BlockdevCreateOptions) { + .driver = BLOCKDEV_DRIVER_FILE, + .u.file = { +@@ -3051,6 +3093,9 @@ raw_co_create_opts(BlockDriver *drv, const char *filename, + .nocow = nocow, + .has_extent_size_hint = has_extent_size_hint, + .extent_size_hint = extent_size_hint, ++ .has_buffersize = true, ++ .buffersize = buffersize, ++ .cache = cache, + }, + }; + return raw_co_create(&options, errp); +@@ -3741,6 +3786,16 @@ static QemuOptsList raw_create_opts = { + .type = QEMU_OPT_SIZE, + .help = "Extent size hint for the image file, 0 to disable" + }, ++ { ++ .name = BLOCK_OPT_CACHE, ++ .type = QEMU_OPT_STRING, ++ .help = "Cache mode (allowed values: writeback, none)" ++ }, ++ { ++ .name = BLOCK_OPT_BUFFER_SIZE, ++ .type = QEMU_OPT_SIZE, ++ .help = "write buffer size" ++ }, + { /* end of list */ } + } + }; +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 4e31d161c5..a6e2436524 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -57,6 +57,8 @@ + #define BLOCK_OPT_DATA_FILE_RAW "data_file_raw" + #define BLOCK_OPT_COMPRESSION_TYPE "compression_type" + #define BLOCK_OPT_EXTL2 "extended_l2" ++#define BLOCK_OPT_CACHE "cache" ++#define BLOCK_OPT_BUFFER_SIZE "buffer_size" + + #define BLOCK_PROBE_BUF_SIZE 512 + +diff --git a/qapi/block-core.json b/qapi/block-core.json +index ca390c5700..1444624590 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4906,6 +4906,8 @@ + # + # @extent-size-hint: Extent size hint to add to the image file; 0 for + # not adding an extent size hint (default: 1 MB, since 5.1) ++# @cache: Cache mode used to write the output disk image ++# @buffersize: Buffer size for creating image + # + # Since: 2.12 + ## +@@ -4914,7 +4916,9 @@ + 'size': 'size', + '*preallocation': 'PreallocMode', + '*nocow': 'bool', +- '*extent-size-hint': 'size'} } ++ '*extent-size-hint': 'size', ++ '*cache': 'str', ++ '*buffersize': 'size'} } + + ## + # @BlockdevCreateOptionsGluster: +diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx +index 068692d13e..20bdcd7b82 100644 +--- a/qemu-img-cmds.hx ++++ b/qemu-img-cmds.hx +@@ -52,9 +52,9 @@ SRST + ERST + + DEF("create", img_create, +- "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-o options] filename [size]") ++ "create [--object objectdef] [-q] [-f fmt] [-b backing_file [-F backing_fmt]] [-u] [-t cache] [-o options] filename [size]") + SRST +-.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-o OPTIONS] FILENAME [SIZE] ++.. option:: create [--object OBJECTDEF] [-q] [-f FMT] [-b BACKING_FILE [-F BACKING_FMT]] [-u] [-t CACHE] [-o OPTIONS] FILENAME [SIZE] + ERST + + DEF("dd", img_dd, +diff --git a/qemu-img.c b/qemu-img.c +index 5a77f67719..80adee2620 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -516,6 +516,7 @@ static int img_create(int argc, char **argv) + const char *base_fmt = NULL; + const char *filename; + const char *base_filename = NULL; ++ const char *cache = BDRV_DEFAULT_CACHE; + char *options = NULL; + Error *local_err = NULL; + bool quiet = false; +@@ -527,7 +528,7 @@ static int img_create(int argc, char **argv) + {"object", required_argument, 0, OPTION_OBJECT}, + {0, 0, 0, 0} + }; +- c = getopt_long(argc, argv, ":F:b:f:ho:qu", ++ c = getopt_long(argc, argv, ":F:b:f:t:ho:qu", + long_options, NULL); + if (c == -1) { + break; +@@ -551,6 +552,9 @@ static int img_create(int argc, char **argv) + case 'f': + fmt = optarg; + break; ++ case 't': ++ cache = optarg; ++ break; + case 'o': + if (accumulate_options(&options, optarg) < 0) { + goto fail; +@@ -594,6 +598,14 @@ static int img_create(int argc, char **argv) + error_exit("Unexpected argument: %s", argv[optind]); + } + ++ if (!options) { ++ options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); ++ } else { ++ char *old_options = options; ++ options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); ++ g_free(old_options); ++ } ++ + bdrv_img_create(filename, fmt, base_filename, base_fmt, + options, img_size, flags, quiet, &local_err); + if (local_err) { +diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out +index 34e1b452e6..b4a9705ec2 100644 +--- a/tests/qemu-iotests/049.out ++++ b/tests/qemu-iotests/049.out +@@ -4,90 +4,90 @@ QA output created by 049 + == 1. Traditional size parameter == + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024b +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1k +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1K +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1G +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1T +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1024.0b +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5k +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5K +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5G +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 TEST_DIR/t.qcow2 1.5T +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 cache=writeback + + == 2. Specifying size via -o == + + qemu-img create -f qcow2 -o size=1024 TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024b TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1k TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1K TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1M TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1G TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1073741824 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1T TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1099511627776 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024.0 TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1024.0b TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1024 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5k TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5K TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1536 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5M TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1572864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5G TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1610612736 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o size=1.5T TEST_DIR/t.qcow2 +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1649267441664 lazy_refcounts=off refcount_bits=16 cache=writeback + + == 3. Invalid sizes == + +@@ -132,84 +132,84 @@ qemu-img: TEST_DIR/t.qcow2: The image size must be specified only once + == Check correct interpretation of suffixes for cluster size == + + qemu-img create -f qcow2 -o cluster_size=1024 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024b TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1k TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1K TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1M TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1048576 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024.0 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=1024.0b TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=1024 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5k TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5K TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=512 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o cluster_size=0.5M TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=524288 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + == Check compat level option == + + qemu-img create -f qcow2 -o compat=0.10 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=1.1 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value '0.42' + + qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value 'foobar' + + == Check preallocation option == + + qemu-img create -f qcow2 -o preallocation=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o preallocation=metadata TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=metadata compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Parameter 'preallocation' does not accept value '1234' + + == Check encryption option == + + qemu-img create -f qcow2 -o encryption=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=off cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 --object secret,id=sec0,data=123456 -o encryption=on,encrypt.key-secret=sec0 TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 encryption=on encrypt.key-secret=sec0 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 cache=writeback + + == Check lazy_refcounts option (only with v3) == + + qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=1.1 lazy_refcounts=on refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=off TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=off refcount_bits=16 cache=writeback + + qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=on TEST_DIR/t.qcow2 64M +-Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 ++Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 cache=writeback + qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use version=v3 or greater) + + == Expect error when backing file name is empty string == +diff --git a/tests/qemu-iotests/099.out b/tests/qemu-iotests/099.out +index 8cce627529..f6f8f25957 100644 +--- a/tests/qemu-iotests/099.out ++++ b/tests/qemu-iotests/099.out +@@ -1,6 +1,6 @@ + QA output created by 099 + Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=131072 +-Formatting 'TEST_DIR/t.IMGFMT.compare', fmt=raw size=131072 ++Formatting 'TEST_DIR/t.IMGFMT.compare', fmt=raw size=131072 cache=writeback + + === Testing simple filename for blkverify === + +-- +2.27.0 + diff --git a/qemu-img-block-set-zero-flags-only-when-discard_zero.patch b/qemu-img-block-set-zero-flags-only-when-discard_zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba0731826712ba76c9e5310686060f4c03334b12 --- /dev/null +++ b/qemu-img-block-set-zero-flags-only-when-discard_zero.patch @@ -0,0 +1,33 @@ +From 48c792a802c8cb0ab670ddf92920e2e5e96747a4 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Mon, 18 Mar 2024 10:04:42 +0800 +Subject: [PATCH] qemu-img block: set zero flags only when discard_zeros of the + block supported + +zero flags set for block discard_zeros, only when the block support +discard_zeros need set these flags. + +old commit info: + qemu-img: block: dont blk_make_zero if discard_zeroes false + +Signed-off-by: Jinhua Cao +--- + block/file-posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 01ae5fd88c..4782aba59f 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -822,7 +822,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, + #endif + s->needs_alignment = raw_needs_alignment(bs); + +- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ++ bs->supported_zero_flags = s->discard_zeroes ? (BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) : 0; + if (S_ISREG(st.st_mode)) { + /* When extending regular files, we get zeros from the OS */ + bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE; +-- +2.27.0 + diff --git a/qemu-img-create-cache-paramter-only-use-for-reg-file.patch b/qemu-img-create-cache-paramter-only-use-for-reg-file.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ff2ad0369356c14587fdcebd0fd89379f14a03e --- /dev/null +++ b/qemu-img-create-cache-paramter-only-use-for-reg-file.patch @@ -0,0 +1,66 @@ +From 9ca9391acb780f15a6d8769339e7cd0edf457529 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 24 Mar 2022 17:12:49 +0800 +Subject: [PATCH] qemu-img create: 'cache' paramter only use for reg file image + +The paramter 'cache' is invalid for host device(/dev/xxx). If +'qemu-img create' operator performed on host device, the host +device not support 'cache' would result 'qemu-img create' execute +failed. + +Signed-off-by: Jinhua Cao +--- + qemu-img.c | 30 ++++++++++++++++++++++++------ + 1 file changed, 24 insertions(+), 6 deletions(-) + +diff --git a/qemu-img.c b/qemu-img.c +index 80adee2620..49d914c9c4 100644 +--- a/qemu-img.c ++++ b/qemu-img.c +@@ -508,6 +508,22 @@ static int64_t cvtnum(const char *name, const char *value) + return cvtnum_full(name, value, 0, INT64_MAX); + } + ++static bool is_reg_file(const char *filename) ++{ ++ struct stat st; ++ ++ /* file not exist, file will be create later, so it's a reg file */ ++ if (access(filename, F_OK) == -1) { ++ return true; ++ } ++ ++ /* file exist, check file type */ ++ if (stat(filename, &st) >= 0 && S_ISREG(st.st_mode)) { ++ return true; ++ } ++ return false; ++} ++ + static int img_create(int argc, char **argv) + { + int c; +@@ -598,12 +614,14 @@ static int img_create(int argc, char **argv) + error_exit("Unexpected argument: %s", argv[optind]); + } + +- if (!options) { +- options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); +- } else { +- char *old_options = options; +- options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); +- g_free(old_options); ++ if (is_reg_file(filename)) { ++ if (!options) { ++ options = g_strdup_printf(BLOCK_OPT_CACHE"=%s", cache); ++ } else { ++ char *old_options = options; ++ options = g_strdup_printf("%s,"BLOCK_OPT_CACHE"=%s", options, cache); ++ g_free(old_options); ++ } + } + + bdrv_img_create(filename, fmt, base_filename, base_fmt, +-- +2.27.0 + diff --git a/qemu-nbd-make-native-as-the-default-aio-mode.patch b/qemu-nbd-make-native-as-the-default-aio-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..23dae5662eeb47f75517140f9c70a8e4dee707e0 --- /dev/null +++ b/qemu-nbd-make-native-as-the-default-aio-mode.patch @@ -0,0 +1,35 @@ +From 0e610831d584d9485eb0655168d08d8234bbb555 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:48:58 +0800 +Subject: [PATCH] qemu-nbd: make native as the default aio mode + +When the file system is dealing with multithreading concurrent writing to a file, +the performance will be degraded because of the lock. +At present, the default AIO mode of QEMU NBD is threads. In the case of large blocks, +because IO is divided into small pieces and multiple queues, it will become multithreading +concurrent writing the same file. Due to the file system, the performance will be greatly reduced. +If you change to native mode, this problem will not exist. + +Signed-off-by: wangjian161 +--- + qemu-nbd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/qemu-nbd.c b/qemu-nbd.c +index 186e6468b1..acccf2977f 100644 +--- a/qemu-nbd.c ++++ b/qemu-nbd.c +@@ -843,6 +843,10 @@ int main(int argc, char **argv) + trace_init_file(); + qemu_set_log(LOG_TRACE, &error_fatal); + ++ if (!seen_aio && (flags & BDRV_O_NOCACHE)) { ++ flags |= BDRV_O_NATIVE_AIO; ++ } ++ + socket_activation = check_socket_activation(); + if (socket_activation == 0) { + if (!sockpath) { +-- +2.27.0 + diff --git a/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc41eac5641004223996c683b2af1792fd826737 --- /dev/null +++ b/qemu-nbd-set-timeout-to-qemu-nbd-socket.patch @@ -0,0 +1,42 @@ +From d6aa08ac3693be3e08f2c8d3ad5a356ea6e9dead Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 10:55:08 +0800 +Subject: [PATCH] qemu-nbd: set timeout to qemu-nbd socket + +In case of insufficient memory and kill-9, +the NBD socket cannot be processed and stuck all the time. + +Signed-off-by: wangjian161 +--- + nbd/client.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/nbd/client.c b/nbd/client.c +index 29ffc609a4..987dde43c7 100644 +--- a/nbd/client.c ++++ b/nbd/client.c +@@ -24,6 +24,8 @@ + #include "nbd-internal.h" + #include "qemu/cutils.h" + ++#define NBD_TIMEOUT_SECONDS 30 ++ + /* Definitions for opaque data types */ + + static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); +@@ -1310,6 +1312,12 @@ int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, + } + } + ++ if (ioctl(fd, NBD_SET_TIMEOUT, NBD_TIMEOUT_SECONDS) < 0) { ++ int serrno = errno; ++ error_setg(errp, "Failed setting timeout"); ++ return -serrno; ++ } ++ + trace_nbd_init_finish(); + + return 0; +-- +2.27.0 + diff --git a/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch b/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d5329a2c14576a2a4c1b42e169641606e07c01d --- /dev/null +++ b/qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch @@ -0,0 +1,37 @@ +From 48f32788794e061ab0b359fe194c964849bb3040 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:10:42 +0800 +Subject: [PATCH] qemu-pr: fixed ioctl failed for multipath disk + +We use ioctl to detect multipath devices. However, we only set flags in +struct dm_ioctl (the argument to ioctl) and left other fields in random, +which may cause the failure of calling ioctl. Hence, we set other +fields to 0 to avoid the failure. + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + scsi/qemu-pr-helper.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index c6c6347e9b..655404fd07 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -285,9 +285,12 @@ static void multipath_pr_init(void) + + static int is_mpath(int fd) + { +- struct dm_ioctl dm = { .flags = DM_NOFLUSH_FLAG }; ++ struct dm_ioctl dm; + struct dm_target_spec *tgt; + ++ memset(&dm, 0, sizeof(struct dm_ioctl)); ++ dm.flags = DM_NOFLUSH_FLAG; ++ + tgt = dm_dev_ioctl(fd, DM_TABLE_STATUS, &dm); + if (!tgt) { + if (errno == ENXIO) { +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index 69a317305277abba51d0e6cb4c2bcf3326069a72..e81fe1f2a99c4d6bcd67a9b95cf43730644decdc 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,4 +1,4 @@ -%define anolis_release 18 +%define anolis_release 19 %bcond_with check @@ -64,7 +64,7 @@ %global have_jack 0 %global have_fdt 1 -%global have_opengl 1 +%global have_opengl 0 %global have_usbredir 1 %global enable_werror 0 @@ -178,7 +178,7 @@ %{requires_ui_curses} \ %{requires_ui_gtk} \ %{requires_ui_egl_headless} \ -%{requires_ui_opengl} \ +# %{requires_ui_opengl} \ %{requires_ui_spice_app} \ %{requires_ui_spice_core} \ %{requires_char_baum} \ @@ -262,112 +262,370 @@ Source27: kvm.conf Source31: kvm-x86.conf Source36: README.tests -Patch0001: 0001-sgx-stub-fix.patch -Patch0002: 0002-Fix-crash-when-loading-snapshot-on-inactive-node.patch - -Patch0003: 0003-hw-loongarch-virt-Align-high-memory-base-address-wit.patch -Patch0004: 0004-target-loongarch-Add-timer-information-dump-support.patch -Patch0005: 0005-target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch -Patch0006: 0006-target-loongarch-move-translate-modules-to-tcg.patch -Patch0007: 0007-linux-headers-Update-to-Linux-v6.7-rc5.patch -Patch0008: 0008-linux-headers-Synchronize-linux-headers-from-linux-v.patch -Patch0009: 0009-target-loongarch-Define-some-kvm_arch-interfaces.patch -Patch0010: 0010-target-loongarch-Supplement-vcpu-env-initial-when-vc.patch -Patch0011: 0011-target-loongarch-Implement-kvm-get-set-registers.patch -Patch0012: 0012-target-loongarch-Implement-kvm_arch_init-function.patch -Patch0013: 0013-target-loongarch-Implement-kvm_arch_init_vcpu.patch -Patch0014: 0014-target-loongarch-Implement-kvm_arch_handle_exit.patch -Patch0015: 0015-target-loongarch-Restrict-TCG-specific-code.patch -Patch0016: 0016-target-loongarch-Implement-set-vcpu-intr-for-kvm.patch -Patch0017: 0017-target-loongarch-Add-loongarch-kvm-into-meson-build.patch -Patch0018: 0018-hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch -Patch0019: 0019-hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch -Patch0020: 0020-hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch -Patch0021: 0021-hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch -Patch0022: 0022-configure-Add-linux-header-compile-support-for-Loong.patch -Patch0023: 0023-target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch -Patch0024: 0024-target-loongarch-kvm-Enable-LSX-LASX-extension.patch -Patch0025: 0025-target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch -Patch0026: 0026-loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch -Patch0027: 0027-target-loongarch-Fix-tlb-huge-page-loading-issue.patch -Patch0028: 0028-target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch -Patch0029: 0029-target-loongarch-kvm-Add-software-breakpoint-support.patch -Patch0030: 0030-hw-intc-loongarch_extioi-Add-virt-extension-support.patch -Patch0031: 0031-target-loongarch-kvm-sync-kernel-header-files.patch -Patch0032: 0032-hw-intc-loongarch_extioi-Add-virt-extension-support-.patch -Patch0033: 0033-target-loongarch-kvm-Add-pmu-support.patch -Patch0034: 0034-target-loongarch-Fix-qemu-system-loongarch64-assert-.patch - -Patch0035: 0035-newfeature-support-vpsp.patch - -Patch0036: 0036-target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch -Patch0037: 0037-target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch - -Patch1001: 1001-i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch -Patch1002: 1002-i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch -Patch1003: 1003-i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch -Patch1004: 1004-i386-cpuid-Move-leaf-7-to-correct-group.patch - -Patch1005: 1005-doc-update-AMD-SEV-to-include-Live-migration-flow.patch -Patch1006: 1006-migration.json-add-AMD-SEV-specific-migration-parame.patch -Patch1007: 1007-confidential-guest-support-introduce-ConfidentialGue.patch -Patch1008: 1008-target-i386-sev-provide-callback-to-setup-outgoing-c.patch -Patch1009: 1009-target-i386-sev-do-not-create-launch-context-for-an-.patch -Patch1010: 1010-target-i386-sev-add-support-to-encrypt-the-outgoing-.patch -Patch1011: 1011-target-i386-sev-add-support-to-load-incoming-encrypt.patch -Patch1012: 1012-kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch -Patch1013: 1013-migration-add-support-to-migrate-shared-regions-list.patch -Patch1014: 1014-migration-ram-add-support-to-send-encrypted-pages.patch -Patch1015: 1015-migration-ram-Force-encrypted-status-for-flash0-flas.patch -Patch1016: 1016-kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch -Patch1017: 1017-target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch -Patch1018: 1018-migration-ram-Force-encrypted-status-for-VGA-vram.patch -Patch1019: 1019-target-i386-sev-Clear-shared_regions_list-when-reboo.patch -Patch1020: 1020-migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch -Patch1021: 1021-target-i386-Introduce-header-file-csv.h.patch -Patch1022: 1022-target-i386-csv-Read-cert-chain-from-file-when-prepa.patch -Patch1023: 1023-target-i386-csv-add-support-to-queue-the-outgoing-pa.patch -Patch1024: 1024-target-i386-csv-add-support-to-encrypt-the-outgoing-.patch -Patch1025: 1025-target-i386-csv-add-support-to-queue-the-incoming-pa.patch -Patch1026: 1026-target-i386-csv-add-support-to-load-incoming-encrypt.patch -Patch1027: 1027-migration-ram-Accelerate-the-transmission-of-CSV-gue.patch -Patch1028: 1028-migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch -Patch1029: 1029-target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch -Patch1030: 1030-target-i386-get-set-migrate-GHCB-state.patch -Patch1031: 1031-target-i386-kvm-Fix-the-resettable-info-when-emulate.patch -Patch1032: 1032-kvm-Add-support-for-CSV2-reboot.patch -Patch1033: 1033-target-i386-csv-Add-CSV3-context.patch -Patch1034: 1034-target-i386-csv-Add-command-to-initialize-CSV3-conte.patch -Patch1035: 1035-target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch -Patch1036: 1036-target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch -Patch1037: 1037-target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch -Patch1038: 1038-target-i386-csv-Do-not-register-unregister-guest-sec.patch -Patch1039: 1039-target-i386-csv-Load-initial-image-to-private-memory.patch -Patch1040: 1040-vga-Force-full-update-for-CSV3-guest.patch -Patch1041: 1041-vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch -Patch1042: 1042-linux-headers-update-kernel-headers-to-include-CSV3-.patch -Patch1043: 1043-target-i386-csv-Add-support-to-migrate-the-outgoing-.patch -Patch1044: 1044-target-i386-csv-Add-support-to-migrate-the-incoming-.patch -Patch1045: 1045-target-i386-csv-Add-support-to-migrate-the-outgoing-.patch -Patch1046: 1046-target-i386-csv-Add-support-to-migrate-the-incoming-.patch -Patch1047: 1047-target-i386-sev-Fix-incompatibility-between-SEV-and-.patch -Patch1048: 1048-target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch -Patch1049: 1049-target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch -Patch1050: 1050-target-i386-Add-new-Hygon-Dharma-CPU-model.patch +#Patch0001: 0001-sgx-stub-fix.patch +#Patch0002: 0002-Fix-crash-when-loading-snapshot-on-inactive-node.patch + +#Patch0003: 0003-hw-loongarch-virt-Align-high-memory-base-address-wit.patch +#Patch0004: 0004-target-loongarch-Add-timer-information-dump-support.patch +#Patch0005: 0005-target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch +#Patch0006: 0006-target-loongarch-move-translate-modules-to-tcg.patch +#Patch0007: 0007-linux-headers-Update-to-Linux-v6.7-rc5.patch +#Patch0008: 0008-linux-headers-Synchronize-linux-headers-from-linux-v.patch +#Patch0009: 0009-target-loongarch-Define-some-kvm_arch-interfaces.patch +#Patch0010: 0010-target-loongarch-Supplement-vcpu-env-initial-when-vc.patch +#Patch0011: 0011-target-loongarch-Implement-kvm-get-set-registers.patch +#Patch0012: 0012-target-loongarch-Implement-kvm_arch_init-function.patch +#Patch0013: 0013-target-loongarch-Implement-kvm_arch_init_vcpu.patch +#Patch0014: 0014-target-loongarch-Implement-kvm_arch_handle_exit.patch +#Patch0015: 0015-target-loongarch-Restrict-TCG-specific-code.patch +#Patch0016: 0016-target-loongarch-Implement-set-vcpu-intr-for-kvm.patch +#Patch0017: 0017-target-loongarch-Add-loongarch-kvm-into-meson-build.patch +#Patch0018: 0018-hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch +#Patch0019: 0019-hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch +#Patch0020: 0020-hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch +#Patch0021: 0021-hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch +#Patch0022: 0022-configure-Add-linux-header-compile-support-for-Loong.patch +#Patch0023: 0023-target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch +#Patch0024: 0024-target-loongarch-kvm-Enable-LSX-LASX-extension.patch +#Patch0025: 0025-target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch +#Patch0026: 0026-loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch +#Patch0027: 0027-target-loongarch-Fix-tlb-huge-page-loading-issue.patch +#Patch0028: 0028-target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch +#Patch0029: 0029-target-loongarch-kvm-Add-software-breakpoint-support.patch +#Patch0030: 0030-hw-intc-loongarch_extioi-Add-virt-extension-support.patch +#Patch0031: 0031-target-loongarch-kvm-sync-kernel-header-files.patch +#Patch0032: 0032-hw-intc-loongarch_extioi-Add-virt-extension-support-.patch +#Patch0033: 0033-target-loongarch-kvm-Add-pmu-support.patch +#Patch0034: 0034-target-loongarch-Fix-qemu-system-loongarch64-assert-.patch + +#Patch0035: 0035-newfeature-support-vpsp.patch + +#Patch0036: 0036-target-loongarch-kvm-Fix-VM-recovery-from-disk-failu.patch +#Patch0037: 0037-target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch + +#Patch1001: 1001-i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch +#Patch1002: 1002-i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch +#Patch1003: 1003-i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch +#Patch1004: 1004-i386-cpuid-Move-leaf-7-to-correct-group.patch + +#Patch1005: 1005-doc-update-AMD-SEV-to-include-Live-migration-flow.patch +#Patch1006: 1006-migration.json-add-AMD-SEV-specific-migration-parame.patch +#Patch1007: 1007-confidential-guest-support-introduce-ConfidentialGue.patch +#Patch1008: 1008-target-i386-sev-provide-callback-to-setup-outgoing-c.patch +#Patch1009: 1009-target-i386-sev-do-not-create-launch-context-for-an-.patch +#Patch1010: 1010-target-i386-sev-add-support-to-encrypt-the-outgoing-.patch +#Patch1011: 1011-target-i386-sev-add-support-to-load-incoming-encrypt.patch +#Patch1012: 1012-kvm-Add-support-for-SEV-shared-regions-list-and-KVM_.patch +#Patch1013: 1013-migration-add-support-to-migrate-shared-regions-list.patch +#Patch1014: 1014-migration-ram-add-support-to-send-encrypted-pages.patch +#Patch1015: 1015-migration-ram-Force-encrypted-status-for-flash0-flas.patch +#Patch1016: 1016-kvm-Add-support-for-userspace-MSR-filtering-and-hand.patch +#Patch1017: 1017-target-i386-sev-Return-0-if-sev_send_get_packet_len-.patch +#Patch1018: 1018-migration-ram-Force-encrypted-status-for-VGA-vram.patch +#Patch1019: 1019-target-i386-sev-Clear-shared_regions_list-when-reboo.patch +#Patch1020: 1020-migration-ram-Fix-calculation-of-gfn-correpond-to-a-.patch +#Patch1021: 1021-target-i386-Introduce-header-file-csv.h.patch +#Patch1022: 1022-target-i386-csv-Read-cert-chain-from-file-when-prepa.patch +#Patch1023: 1023-target-i386-csv-add-support-to-queue-the-outgoing-pa.patch +#Patch1024: 1024-target-i386-csv-add-support-to-encrypt-the-outgoing-.patch +#Patch1025: 1025-target-i386-csv-add-support-to-queue-the-incoming-pa.patch +#Patch1026: 1026-target-i386-csv-add-support-to-load-incoming-encrypt.patch +#Patch1027: 1027-migration-ram-Accelerate-the-transmission-of-CSV-gue.patch +#Patch1028: 1028-migration-ram-Accelerate-the-loading-of-CSV-guest-s-.patch +#Patch1029: 1029-target-i386-csv-Add-support-for-migrate-VMSA-for-CSV.patch +#Patch1030: 1030-target-i386-get-set-migrate-GHCB-state.patch +#Patch1031: 1031-target-i386-kvm-Fix-the-resettable-info-when-emulate.patch +#Patch1032: 1032-kvm-Add-support-for-CSV2-reboot.patch +#Patch1033: 1033-target-i386-csv-Add-CSV3-context.patch +#Patch1034: 1034-target-i386-csv-Add-command-to-initialize-CSV3-conte.patch +#Patch1035: 1035-target-i386-csv-Add-command-to-load-data-to-CSV3-gue.patch +#Patch1036: 1036-target-i386-csv-Add-command-to-load-vmcb-to-CSV3-gue.patch +#Patch1037: 1037-target-i386-cpu-Populate-CPUID-0x8000_001F-when-CSV3.patch +#Patch1038: 1038-target-i386-csv-Do-not-register-unregister-guest-sec.patch +#Patch1039: 1039-target-i386-csv-Load-initial-image-to-private-memory.patch +#Patch1040: 1040-vga-Force-full-update-for-CSV3-guest.patch +#Patch1041: 1041-vfio-Only-map-shared-region-for-CSV3-virtual-machine.patch +#Patch1042: 1042-linux-headers-update-kernel-headers-to-include-CSV3-.patch +#Patch1043: 1043-target-i386-csv-Add-support-to-migrate-the-outgoing-.patch +#Patch1044: 1044-target-i386-csv-Add-support-to-migrate-the-incoming-.patch +#Patch1045: 1045-target-i386-csv-Add-support-to-migrate-the-outgoing-.patch +#Patch1046: 1046-target-i386-csv-Add-support-to-migrate-the-incoming-.patch +#Patch1047: 1047-target-i386-sev-Fix-incompatibility-between-SEV-and-.patch +#Patch1048: 1048-target-i386-sev-Add-support-for-reuse-ASID-for-diffe.patch +#Patch1049: 1049-target-i386-Add-Hygon-Dhyana-v3-CPU-model.patch +#Patch1050: 1050-target-i386-Add-new-Hygon-Dharma-CPU-model.patch # Fix CVE-2024-3446 # https://github.com/qemu/qemu/commit/ba28e0ff4d95b56dc334aac2730ab3651ffc3132 -Patch0038: 0038-hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch +# Patch0038: 0038-hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch #https://gitlab.com/qemu-project/qemu/-/commit/bd385a5298d7062668e804d73944d52aec9549f1 #https://gitlab.com/qemu-project/qemu/-/commit/2eb42a728d27a43fdcad5f37d3f65706ce6deba5 #https://gitlab.com/qemu-project/qemu/-/commit/7e1110664ecbc4826f3c978ccb06b6c1bce823e6 #https://gitlab.com/qemu-project/qemu/-/commit/7ead946998610657d38d1a505d5f25300d4ca613 -Patch0039:fix-cve-2024-4467.patch -Patch1055: 0001-CVE-2023-6683.patch -Patch1056: 0002-CVE-2023-6693.patch -Patch1057: 0003-CVE-2024-26327.patch -Patch1058: 1058-fix-CVE-2024-3446.patch +#Patch0039:fix-cve-2024-4467.patch +#Patch1055: 0001-CVE-2023-6683.patch +#Patch1056: 0002-CVE-2023-6693.patch +#Patch1057: 0003-CVE-2024-26327.patch +#Patch1058: 1058-fix-CVE-2024-3446.patch + + +Patch0001: tests-qemu-iotests-resolved-the-problem-that-the-108.patch +Patch0002: hw-usb-Style-cleanup.patch +Patch0003: virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch +Patch0004: blkio-Respect-memory-alignment-for-bounce-buffer-all.patch +Patch0005: i386-cpu-Clear-FEAT_XSAVE_XSS_LO-HI-leafs-when-CPUID.patch +Patch0006: i386-cpu-Mask-with-XCR0-XSS-mask-for-FEAT_XSAVE_XCR0.patch +Patch0007: i386-cpuid-Decrease-cpuid_i-when-skipping-CPUID-leaf.patch +Patch0008: i386-cpuid-Move-leaf-7-to-correct-group.patch +Patch0009: chardev-char-socket-Fix-TLS-io-channels-sending-too-.patch +Patch0010: vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch +Patch0011: vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch +Patch0012: vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch +Patch0013: vfio-pci-Ascend710-change-to-bar2-quirk.patch +Patch0014: hw-i2c-smbus_slave-Add-object-path-on-error-prints.patch +Patch0015: virtio-gpu-remove-needless-condition.patch +Patch0016: target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch +Patch0017: hw-acpi-cpu-Use-CPUState-typedef.patch +Patch0018: hw-nvme-Use-pcie_sriov_num_vfs-CVE-2024-26328.patch +Patch0019: pcie_sriov-Validate-NumVFs-CVE-2024-26327.patch +Patch0020: Revert-file-posix-Remove-unused-s-discard_zeroes.patch +Patch0021: qemu-img-block-set-zero-flags-only-when-discard_zero.patch +Patch0022: qemu-img-add-qemu-img-direct-create.patch +Patch0023: qemu-img-create-cache-paramter-only-use-for-reg-file.patch +Patch0024: hw-cxl-cxl-host-Fix-missing-ERRP_GUARD-in-cxl_fixed_.patch +Patch0025: hw-display-macfb-Fix-missing-ERRP_GUARD-in-macfb_nub.patch +Patch0026: bugfix-fix-eventfds-may-double-free-when-vm_id-reuse.patch +Patch0027: log-Add-some-logs-on-VM-runtime-path.patch +Patch0028: util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch +Patch0029: bugfix-fix-some-illegal-memory-access-and-memory-lea.patch +Patch0030: bugfix-fix-possible-memory-leak.patch +Patch0031: scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch +Patch0032: qemu-pr-fixed-ioctl-failed-for-multipath-disk.patch +Patch0033: scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch +Patch0034: scsi-bugfix-fix-division-by-zero.patch +Patch0035: qapi-block-core-Add-retry-option-for-error-action.patch +Patch0036: block-backend-Introduce-retry-timer.patch +Patch0037: block-backend-Add-device-specific-retry-callback.patch +Patch0038: block-backend-Enable-retry-action-on-errors.patch +Patch0039: block-backend-Add-timeout-support-for-retry.patch +Patch0040: block-Add-error-retry-param-setting.patch +Patch0041: virtio_blk-Add-support-for-retry-on-errors.patch +Patch0042: scsi-bus-Refactor-the-code-that-retries-requests.patch +Patch0043: scsi-disk-Add-support-for-retry-on-errors.patch +Patch0044: block-backend-Stop-retrying-when-draining.patch +Patch0045: block-Add-sanity-check-when-setting-retry-parameters.patch +Patch0046: scsi-bus-fix-unmatched-object_unref.patch +Patch0047: scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch +Patch0048: block-mirror-fix-file-system-went-to-read-only-after.patch +Patch0049: block-enable-cache-mode-of-empty-cdrom.patch +Patch0050: block-bugfix-Don-t-pause-vm-when-NOSPACE-EIO-happene.patch +Patch0051: hw-loongarch-virt-Align-high-memory-base-address-wit.patch +Patch0052: target-loongarch-Add-timer-information-dump-support.patch +Patch0053: target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch +Patch0054: target-loongarch-move-translate-modules-to-tcg.patch +Patch0055: linux-headers-Update-to-Linux-v6.7-rc5.patch +Patch0056: linux-headers-Synchronize-linux-headers-from-linux-v.patch +Patch0057: target-loongarch-Define-some-kvm_arch-interfaces.patch +Patch0058: target-loongarch-Supplement-vcpu-env-initial-when-vc.patch +Patch0059: target-loongarch-Implement-kvm-get-set-registers.patch +Patch0060: target-loongarch-Implement-kvm_arch_init-function.patch +Patch0061: target-loongarch-Implement-kvm_arch_init_vcpu.patch +Patch0062: target-loongarch-Implement-kvm_arch_handle_exit.patch +Patch0063: target-loongarch-Restrict-TCG-specific-code.patch +Patch0064: target-loongarch-Implement-set-vcpu-intr-for-kvm.patch +Patch0065: target-loongarch-Add-loongarch-kvm-into-meson-build.patch +Patch0066: hw-intc-loongarch_ipi-Use-MemTxAttrs-interface-for-i.patch +Patch0067: hw-loongarch-virt-Set-iocsr-address-space-per-board-.patch +Patch0068: hw-intc-loongarch_extioi-Add-dynamic-cpu-number-supp.patch +Patch0069: hw-intc-loongarch_extioi-Add-vmstate-post_load-suppo.patch +Patch0070: configure-Add-linux-header-compile-support-for-Loong.patch +Patch0071: target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch +Patch0072: target-loongarch-kvm-Enable-LSX-LASX-extension.patch +Patch0073: target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch +Patch0074: loongarch-Change-the-UEFI-loading-mode-to-loongarch.patch +Patch0075: disable-keyring-option.patch +Patch0076: virtio-net-correctly-copy-vnet-header-when-flushing-.patch +Patch0077: ui-clipboard-mark-type-as-not-available-when-there-i.patch +# Patch0078: memory-backup-Modify-the-VM-s-physical-bits-value-se.patch +Patch0079: backup-memory-bakcup-hugepages-hugepages-files-maybe.patch +Patch0080: block-disallow-block-jobs-when-there-is-a-BDRV_O_INA.patch +Patch0081: travis-ci-Rename-SOFTMMU-SYSTEM.patch +Patch0082: iotests-adapt-to-output-change-for-recently-introduc.patch +Patch0083: migration-Skip-only-empty-block-devicesi.patch +Patch0084: vhost-cancel-migration-when-vhost-user-restarted-dur.patch +Patch0085: Currently-while-kvm-and-qemu-can-not-handle-some-kvm.patch +Patch0086: ps2-fix-oob-in-ps2-kbd.patch +Patch0087: monitor-qmp-drop-inflight-rsp-if-qmp-client-broken.patch +Patch0088: oslib-posix-optimise-vm-startup-time-for-1G-hugepage.patch +Patch0089: migration-skip-cache_drop-for-bios-bootloader-and-nv.patch +Patch0090: migration-Add-multi-thread-compress-method.patch +Patch0091: migration-Refactoring-multi-thread-compress-migratio.patch +Patch0092: migration-Add-multi-thread-compress-ops.patch +Patch0093: migration-Add-zstd-support-in-multi-thread-compressi.patch +Patch0094: migration-Add-compress_level-sanity-check.patch +Patch0095: doc-Update-multi-thread-compression-doc.patch +Patch0096: cpu-features-fix-bug-for-memory-leakage.patch +Patch0097: migration-report-migration-related-thread-pid-to-lib.patch +Patch0098: migration-report-multiFd-related-thread-pid-to-libvi.patch +Patch0099: virtio-check-descriptor-numbers.patch +Patch0100: virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch +Patch0101: virtio-print-the-guest-virtio_net-features-that-host.patch +Patch0102: virtio-bugfix-check-the-value-of-caches-before-acces.patch +Patch0103: virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch +Patch0104: nbd-server.c-fix-invalid-read-after-client-was-alrea.patch +Patch0105: qemu-nbd-make-native-as-the-default-aio-mode.patch +Patch0106: qemu-nbd-set-timeout-to-qemu-nbd-socket.patch +Patch0107: qdev-monitors-Fix-reundant-error_setg-of-qdev_add_de.patch +Patch0108: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0109: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch +Patch0110: net-dump.c-Suppress-spurious-compiler-warning.patch +Patch0111: hw-net-rocker_of_dpa-fix-double-free-bug-of-rocker-d.patch +Patch0112: i6300esb-watchdog-bugfix-Add-a-runstate-transition.patch +Patch0113: vhost-user-Set-the-acked_features-to-vm-s-featrue.patch +Patch0114: vhost-user-Add-support-reconnect-vhost-user-socket.patch +Patch0115: fix-qemu-core-when-vhost-user-net-config-with-server.patch +Patch0116: vhost-user-quit-infinite-loop-while-used-memslots-is.patch +Patch0117: vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch +Patch0118: vhost-user-add-unregister_savevm-when-vhost-user-cle.patch +Patch0119: monitor-Discard-BLOCK_IO_ERROR-event-when-VM-reboote.patch +Patch0120: virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch +Patch0121: virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch +Patch0122: virtio-net-set-the-max-of-queue-size-to-4096.patch +Patch0123: virtio-net-update-the-default-and-max-of-rx-tx_queue.patch +Patch0124: hw-usb-reduce-the-vpcu-cost-of-UHCI-when-VNC-disconn.patch +Patch0125: vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch +Patch0126: vhost-vdpa-add-migration-log-ops-for-VhostOps.patch +Patch0127: vhost-introduce-bytemap-for-vhost-backend-logging.patch +Patch0128: vhost-add-vhost_dev_suspend-resume_op.patch +Patch0129: vhost-implement-vhost-vdpa-suspend-resume.patch +Patch0130: vhost-implement-vhost_vdpa_device_suspend-resume.patch +Patch0131: vhost-implement-savevm_handler-for-vdpa-device.patch +Patch0132: vhost-implement-post-resume-bh.patch +Patch0133: vhost-implement-migration-state-notifier-for-vdpa-de.patch +Patch0134: vdpa-implement-vdpa-device-migration.patch +Patch0135: vdpa-move-memory-listener-to-the-realize-stage.patch +Patch0136: vdpa-support-vdpa-device-suspend-resume.patch +Patch0137: vdpa-suspend-function-return-0-when-the-vdpa-device-.patch +Patch0138: vdpa-correct-param-passed-in-when-unregister-save.patch +Patch0139: vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch +Patch0140: docs-Add-generic-vhost-vdpa-device-documentation.patch +Patch0141: vdpa-set-vring-enable-only-if-the-vring-address-has-.patch +Patch0142: ide-ahci-add-check-to-avoid-null-dereference-CVE-201.patch +Patch0143: net-eepro100-validate-various-address-valuesi-CVE-20.patch +Patch0144: cpu-add-Kunpeng-920-cpu-support.patch +Patch0145: cpu-add-Cortex-A72-processor-kvm-target-support.patch +Patch0146: tests-virt-Allow-changes-to-PPTT-test-table.patch +Patch0147: hw-arm64-add-vcpu-cache-info-support.patch +# Patch0148: arm64-Add-the-cpufreq-device-to-show-cpufreq-info-to.patch +# Patch0149: tests-virt-Update-expected-ACPI-tables-for-virt-test.patch +# Patch0150: pl011-reset-read-FIFO-when-UARTTIMSC-0-UARTICR-0xfff.patch +# Patch0151: shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch +# Patch0152: tests-Disable-filemonitor-testcase.patch +# Patch0153: freeclock-add-qmp-command-to-get-time-offset-of-vm-i.patch +# Patch0154: freeclock-set-rtc_date_diff-for-arm.patch +# Patch0155: freeclock-set-rtc_date_diff-for-X86.patch +# Patch0156: i386-cache-passthrough-Update-AMD-8000_001D.EAX-25-1.patch +# Patch0157: bugfix-irq-Avoid-covering-object-refcount-of-qemu_ir.patch +# Patch0158: log-Add-log-at-boot-cpu-init-for-aarch64.patch +# Patch0159: feature-Add-log-for-each-modules.patch +# Patch0160: feature-Add-logs-for-vm-start-and-destroy.patch +# Patch0161: pl031-support-rtc-timer-property-for-pl031.patch +# Patch0162: arm-acpi-Fix-when-make-qemu-system-aarch64-at-x86_64.patch +# Patch0163: linux-headers-update-against-5.10-and-manual-clear-v.patch +# Patch0164: vfio-Maintain-DMA-mapping-range-for-the-container.patch +# Patch0165: vfio-migration-Add-support-for-manual-clear-vfio-dir.patch +# Patch0166: arm-virt-target-arm-Add-new-ARMCPU-socket-cluster-co.patch +# Patch0167: cpus-common-Add-common-CPU-utility-for-possible-vCPU.patch +# Patch0168: hw-arm-virt-Move-setting-of-common-CPU-properties-in.patch +# Patch0169: arm-virt-target-arm-Machine-init-time-change-common-.patch +# Patch0170: accel-kvm-Extract-common-KVM-vCPU-creation-parking-c.patch +# Patch0171: arm-virt-kvm-Pre-create-disabled-possible-vCPUs-mach.patch +# Patch0172: arm-virt-gicv3-Changes-to-pre-size-GIC-with-possible.patch +# Patch0173: arm-virt-Init-PMU-at-host-for-all-possible-vcpus.patch +# Patch0174: hw-acpi-Move-CPU-ctrl-dev-MMIO-region-len-macro-to-c.patch +# Patch0175: arm-acpi-Enable-ACPI-support-for-vcpu-hotplug.patch +# Patch0176: hw-acpi-Add-ACPI-CPU-hotplug-init-stub.patch +# Patch0177: hw-acpi-Use-qemu_present_cpu-API-in-ACPI-CPU-hotplug.patch +# Patch0178: hw-acpi-Init-GED-framework-with-cpu-hotplug-events.patch +# Patch0179: arm-virt-Add-cpu-hotplug-events-to-GED-during-creati.patch +# Patch0180: arm-virt-Create-GED-dev-before-disabled-CPU-Objs-are.patch +# Patch0181: hw-acpi-Update-CPUs-AML-with-cpu-ctrl-dev-change.patch +# Patch0182: arm-virt-acpi-Factor-out-CPPC-building-from-DSDT-CPU.patch +# Patch0183: acpi-cpu-Add-cpu_cppc-building-support.patch +# Patch0184: tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch +# Patch0185: arm-virt-acpi-Build-CPUs-AML-with-CPU-Hotplug-suppor.patch +# Patch0186: arm-virt-Make-ARM-vCPU-present-status-ACPI-persisten.patch +# Patch0187: hw-acpi-ACPI-AML-Changes-to-reflect-the-correct-_STA.patch +# Patch0188: hw-acpi-Update-GED-_EVT-method-AML-with-cpu-scan.patch +# Patch0189: hw-arm-MADT-Tbl-change-to-size-the-guest-with-possib.patch +# Patch0190: hw-acpi-Make-_MAT-method-optional.patch +# Patch0191: arm-virt-Release-objects-for-disabled-possible-vCPUs.patch +# Patch0192: hw-acpi-Update-ACPI-GED-framework-to-support-vCPU-Ho.patch +# Patch0193: arm-virt-Add-update-basic-hot-un-plug-framework.patch +# Patch0194: arm-virt-Changes-to-un-wire-GICC-vCPU-IRQs-during-ho.patch +# Patch0195: hw-arm-gicv3-Changes-to-update-GIC-with-vCPU-hot-plu.patch +# Patch0196: hw-intc-arm-gicv3-Changes-required-to-re-init-the-vC.patch +# Patch0197: arm-virt-Update-the-guest-via-GED-about-CPU-hot-un-p.patch +# Patch0198: hw-arm-Changes-required-for-reset-and-to-support-nex.patch +# Patch0199: physmem-gdbstub-Common-helping-funcs-changes-to-unre.patch +# Patch0200: target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch +# Patch0201: target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch +# Patch0202: target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch +# Patch0203: hw-arm-Support-hotplug-capability-check-using-_OSC-m.patch +# Patch0204: tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch +# Patch0205: hw-arm-virt-Expose-cold-booted-CPUs-as-MADT-GICC-Ena.patch +# Patch0206: system-physmem-Fix-possible-double-free-when-destroy.patch +# Patch0207: arm-cpu-Some-fixes-for-arm_cpu_unrealizefn.patch +# Patch0208: acpi-cpu-Fix-cpu_hotplug_hw_init.patch +# Patch0209: system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch +# Patch0210: system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch +# Patch0211: arm-virt.c-Convey-local_err-when-set-psci-conduit.patch +# Patch0212: arm-virt-Fix-adjudgement-of-core_id-for-vcpu-hotplug.patch +# Patch0213: accel-kvm-Use-correct-id-for-parked-vcpu.patch +# Patch0214: arm-kvm-Set-psci-smccc-filter-only-with-vcpu-hotplug.patch +# Patch0215: intc-gicv3-Fixes-for-vcpu-hotplug.patch +# Patch0216: acpi-ged-Init-cpu-hotplug-only-when-machine-support-.patch +# Patch0217: acpi-ged-Remove-cpuhp-field-of-ged.patch +# Patch0218: arm-virt-acpi-Require-possible_cpu_arch_ids-for-buil.patch +# Patch0219: arm-virt-Consider-has_ged-when-set-mc-has_hotpluggab.patch +# Patch0220: arm-virt-Require-mc-has_hotpluggable_cpus-for-cold-p.patch +# Patch0221: tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch +# Patch0222: coro-support-live-patch-for-libcare.patch +# Patch0223: arm-virt-Use-separate-filed-to-identify-cpu-hotplug-.patch +# Patch0224: arm-virt-Use-max_cpus-to-calculate-redist1_count.patch +# Patch0225: include-ui-rect.h-fix-qemu_rect_init-mis-assignment.patch +# Patch0226: tests-bios-tables-test-Rename-smbios-type-4-related-.patch +# Patch0227: hw-scsi-scsi-generic-Fix-io_timeout-property-not-app.patch +# Patch0228: hw-net-virtio-net-fix-qemu-set-used-ring-flag-even-v.patch +# Patch0229: block-virtio-blk-Fix-memory-leak-from-virtio_blk_zon.patch +# Patch0230: hw-nvme-fix-Werror-maybe-uninitialized.patch +# Patch0231: hw-net-net_tx_pkt-Fix-overrun-in-update_sctp_checksu.patch +# Patch0232: hw-virtio-Introduce-virtio_bh_new_guarded-helper.patch +# Patch0233: hw-display-virtio-gpu-Protect-from-DMA-re-entrancy-b.patch +# Patch0234: hw-char-virtio-serial-bus-Protect-from-DMA-re-entran.patch +# Patch0235: hw-virtio-virtio-crypto-Protect-from-DMA-re-entrancy.patch +# Patch0236: hw-sd-sdhci-Do-not-update-TRNMOD-when-Command-Inhibi.patch +# Patch0237: acpi-cpu-Fix-detection-of-present-cpu.patch +# Patch0238: arm-virt-Don-t-modify-smp.max_cpus-when-vcpu-hotplug.patch +# Patch0239: kvm-arm-Fix-SVE-related-logic-for-vcpu-hotplug-featu.patch +# Patch0240: arm-virt-acpi-Extend-cpufreq-to-support-max_cpus.patch +# Patch0241: kvm-arm-Fix-compatibility-of-cold-plug-CPU-with-SVE.patch +# Patch0242: hw-isa-vt82c686-Keep-track-of-PIRQ-PINT-pins-separat.patch +# Patch0243: target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch +# Patch0244: target-i386-Add-new-CPU-model-SierraForest.patch +# Patch0245: target-i386-Export-RFDS-bit-to-guests.patch +# Patch0246: target-loongarch-Fix-qemu-system-loongarch64-assert-.patch +# Patch0247: target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch +# Patch0248: target-loongarch-Fix-tlb-huge-page-loading-issue.patch +# Patch0249: target-loongarch-kvm-Add-software-breakpoint-support.patch +# Patch0250: target-loongarch-kvm-sync-kernel-header-files.patch +# Patch0251: hw-intc-loongarch_extioi-Add-virt-extension-support.patch +# Patch0252: target-loongarch-kvm-Add-pmu-support.patch +# Patch0253: target-loongarch-kvm-Fix-vm-restore-failed.patch +# Patch0254: target-loongarch-kvm-Add-pv-steal-time-support.patch +# Patch0255: target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch + ExclusiveArch: x86_64 aarch64 loongarch64 @@ -755,7 +1013,6 @@ This package provides the additional D-Bus UI for QEMU. %package ui-gtk Summary: QEMU GTK UI driver Requires: %{name}-common = %{EVR} -Requires: %{name}-ui-opengl = %{EVR} Obsoletes: qemu-kvm-ui-gtk < %{EVR} %description ui-gtk This package provides the additional GTK UI for QEMU. @@ -763,7 +1020,6 @@ This package provides the additional GTK UI for QEMU. %package ui-egl-headless Summary: QEMU EGL headless driver Requires: %{name}-common = %{EVR} -Requires: %{name}-ui-opengl = %{EVR} Obsoletes: qemu-kvm-ui-egl-headless < %{EVR} %description ui-egl-headless This package provides the additional egl-headless UI for QEMU. @@ -864,7 +1120,6 @@ This package provides the vhost-user-gpu display device for QEMU. %package ui-spice-core Summary: QEMU spice-core UI driver Requires: %{name}-common = %{EVR} -Requires: %{name}-ui-opengl = %{EVR} Obsoletes: qemu-kvm-ui-spice-core < %{EVR} %description ui-spice-core This package provides the additional spice-core UI for QEMU. @@ -1217,9 +1472,9 @@ run_configure \ --enable-capstone \ --enable-coroutine-pool \ --enable-curl \ - --enable-dbus-display \ - --enable-debug-info \ - --enable-docs \ +# --enable-dbus-display \ +# --enable-debug-info \ +# --enable-docs \ %if %{have_fdt} --enable-fdt=system \ %endif @@ -1240,11 +1495,13 @@ run_configure \ %endif --enable-lzo \ --enable-malloc-trim \ + --enable-modules \ --enable-mpath \ %if %{have_numactl} --enable-numa \ %endif + --disable-opengl \ %if %{have_opengl} --enable-opengl \ %endif @@ -1748,7 +2005,7 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %files ui-gtk %{_libdir}/%{name}/ui-gtk.so %files ui-egl-headless -%{_libdir}/%{name}/ui-egl-headless.so +%{_libdir}/%{name}/ui-gtk.so %files char-baum %{_libdir}/%{name}/chardev-baum.so diff --git a/scsi-bugfix-fix-division-by-zero.patch b/scsi-bugfix-fix-division-by-zero.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d6d18bb9d106aed5ccd724667f19a187bf41f1a --- /dev/null +++ b/scsi-bugfix-fix-division-by-zero.patch @@ -0,0 +1,58 @@ +From f2837d186532fb82ed01dbe32bdcf9dda6b06258 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 16:34:05 +0800 +Subject: [PATCH] scsi: bugfix: fix division by zero + +Error of PRDM disk may cause divide by zero in +scsi_read_complete(), so add LOG and assert(). + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-generic.c | 20 ++++++++++++++++++-- + 1 file changed, 18 insertions(+), 2 deletions(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 2417f0ad84..22efcd09a6 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -192,6 +192,10 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0 && r->buflen >= 8) { ++ if (s->blocksize == 0) { ++ qemu_log("device blocksize is 0!\n"); ++ abort(); ++ } + uint8_t buf[16] = {}; + uint8_t buf_used = MIN(r->buflen, 16); + uint64_t max_transfer = calculate_max_transfer(s); +@@ -326,11 +330,23 @@ static void scsi_read_complete(void * opaque, int ret) + /* Snoop READ CAPACITY output to set the blocksize. */ + if (r->req.cmd.buf[0] == READ_CAPACITY_10 && + (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) { +- s->blocksize = ldl_be_p(&r->buf[4]); ++ int new_blocksize = ldl_be_p(&r->buf[4]); ++ if (s->blocksize != new_blocksize) { ++ qemu_log("device id=%s type=%d: blocksize %d change to %d\n", ++ s->qdev.id ? s->qdev.id : "null", s->type, ++ s->blocksize, new_blocksize); ++ } ++ s->blocksize = new_blocksize; + s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL; + } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 && + (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) { +- s->blocksize = ldl_be_p(&r->buf[8]); ++ int new_blocksize = ldl_be_p(&r->buf[8]); ++ if (s->blocksize != new_blocksize) { ++ qemu_log("device id=%s type=%d: blocksize %d change to %d\n", ++ s->qdev.id ? s->qdev.id : "null", s->type, ++ s->blocksize, new_blocksize); ++ } ++ s->blocksize = new_blocksize; + s->max_lba = ldq_be_p(&r->buf[0]); + } + +-- +2.27.0 + diff --git a/scsi-bus-Refactor-the-code-that-retries-requests.patch b/scsi-bus-Refactor-the-code-that-retries-requests.patch new file mode 100644 index 0000000000000000000000000000000000000000..0226238a63c083bbab32c94ccc1033fec91bb3f2 --- /dev/null +++ b/scsi-bus-Refactor-the-code-that-retries-requests.patch @@ -0,0 +1,69 @@ +From d69428c793ca7311c55d0efdaa82100247e35dcc Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:54 +0800 +Subject: [PATCH] scsi-bus: Refactor the code that retries requests + +Move the code that retries requests from scsi_dma_restart_bh() to its own, +non-static, function. This will allow us to call it from the +retry_request_cb() of scsi-disk in a future patch. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + hw/scsi/scsi-bus.c | 16 +++++++++++----- + include/hw/scsi/scsi.h | 1 + + 2 files changed, 12 insertions(+), 5 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index fc4b77fdb0..cecb02ae7e 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -144,14 +144,10 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host, + qbus_set_bus_hotplug_handler(BUS(bus)); + } + +-static void scsi_dma_restart_bh(void *opaque) ++void scsi_retry_requests(SCSIDevice *s) + { +- SCSIDevice *s = opaque; + SCSIRequest *req, *next; + +- qemu_bh_delete(s->bh); +- s->bh = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.blk)); + QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { + scsi_req_ref(req); +@@ -175,6 +171,16 @@ static void scsi_dma_restart_bh(void *opaque) + object_unref(OBJECT(s)); + } + ++static void scsi_dma_restart_bh(void *opaque) ++{ ++ SCSIDevice *s = opaque; ++ ++ qemu_bh_delete(s->bh); ++ s->bh = NULL; ++ ++ scsi_retry_requests(s); ++} ++ + void scsi_req_retry(SCSIRequest *req) + { + /* No need to save a reference, because scsi_dma_restart_bh just +diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h +index 3692ca82f3..6ec18bf12b 100644 +--- a/include/hw/scsi/scsi.h ++++ b/include/hw/scsi/scsi.h +@@ -226,6 +226,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); + void scsi_req_cancel(SCSIRequest *req); + void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); + void scsi_req_retry(SCSIRequest *req); ++void scsi_retry_requests(SCSIDevice *s); + void scsi_device_drained_begin(SCSIDevice *sdev); + void scsi_device_drained_end(SCSIDevice *sdev); + void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); +-- +2.27.0 + diff --git a/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch new file mode 100644 index 0000000000000000000000000000000000000000..e6a960a3e62698688b1227110eccb7bef0b582f8 --- /dev/null +++ b/scsi-bus-fix-incorrect-call-for-blk_error_retry_rese.patch @@ -0,0 +1,81 @@ +From 60181b02c77f533105f904ab9e023bc22f65ad48 Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 29 Mar 2022 12:05:56 +0800 +Subject: [PATCH] scsi-bus: fix incorrect call for + blk_error_retry_reset_timeout() + +Fix commit 52115ca0("scsi-disk: Add support for retry on errors"). +Call Stack: + ... + scsi_read_data() + scsi_do_read(r, 0) + scsi_disk_req_check_error() + blk_error_retry_reset_timeout() + blk->retry_start_time = 0; + +It will cause IO hang when storage network disconnected. Before the +storage network recovered, the upper call stack will reset the +retry_start_time, and cause the next IO operation not returned immediately. + +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-disk.c | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 97d8c5bb30..845a2a7d5d 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -258,10 +258,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + } + } + +-static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++static bool scsi_disk_req_handle_error(SCSIDiskReq *r, int ret, bool acct_failed) + { +- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); +- + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -271,6 +269,17 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, ret, acct_failed); + } + ++ return false; ++} ++ ++static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) ++{ ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ ++ if (r->req.io_canceled || ret < 0) { ++ return scsi_disk_req_handle_error(r, ret, acct_failed); ++ } ++ + blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } +@@ -423,7 +432,7 @@ static void scsi_do_read(SCSIDiskReq *r, int ret) + SCSIDiskClass *sdc = (SCSIDiskClass *) object_get_class(OBJECT(s)); + + assert (r->req.aiocb == NULL); +- if (scsi_disk_req_check_error(r, ret, false)) { ++ if (scsi_disk_req_handle_error(r, ret, false)) { + goto done; + } + +@@ -464,6 +473,9 @@ static void scsi_do_read_cb(void *opaque, int ret) + block_acct_failed(blk_get_stats(s->qdev.conf.blk), &r->acct); + } else { + block_acct_done(blk_get_stats(s->qdev.conf.blk), &r->acct); ++ if (!r->req.io_canceled) { ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); ++ } + } + scsi_do_read(opaque, ret); + aio_context_release(blk_get_aio_context(s->qdev.conf.blk)); +-- +2.27.0 + diff --git a/scsi-bus-fix-unmatched-object_unref.patch b/scsi-bus-fix-unmatched-object_unref.patch new file mode 100644 index 0000000000000000000000000000000000000000..006400c59b478b8ba5290cda83355f557053bce2 --- /dev/null +++ b/scsi-bus-fix-unmatched-object_unref.patch @@ -0,0 +1,43 @@ +From c2f55f210d4e021121865ea31037d2751188befd Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Tue, 1 Mar 2022 20:12:12 +0800 +Subject: [PATCH] scsi-bus: fix unmatched object_unref() + +Fix commit 391dd8f1("scsi-bus: Refactor the code that retries requests"), +which split scsi_dma_restart_bh(), but the object_unref() belongs to +scsi_dma_restart_bh(). +So, we should mv object_unref() from scsi_retry_requests() to +scsi_dma_restart_bh(). + +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-bus.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c +index cecb02ae7e..7b60ac11f5 100644 +--- a/hw/scsi/scsi-bus.c ++++ b/hw/scsi/scsi-bus.c +@@ -167,8 +167,6 @@ void scsi_retry_requests(SCSIDevice *s) + scsi_req_unref(req); + } + aio_context_release(blk_get_aio_context(s->conf.blk)); +- /* Drop the reference that was acquired in scsi_dma_restart_cb */ +- object_unref(OBJECT(s)); + } + + static void scsi_dma_restart_bh(void *opaque) +@@ -179,6 +177,9 @@ static void scsi_dma_restart_bh(void *opaque) + s->bh = NULL; + + scsi_retry_requests(s); ++ ++ /* Drop the reference that was acquired in scsi_dma_restart_cb */ ++ object_unref(OBJECT(s)); + } + + void scsi_req_retry(SCSIRequest *req) +-- +2.27.0 + diff --git a/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch b/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch new file mode 100644 index 0000000000000000000000000000000000000000..70ec66267d03822265d6cabde1a71b1c845e34f1 --- /dev/null +++ b/scsi-cdrom-Fix-crash-after-remote-cdrom-detached.patch @@ -0,0 +1,36 @@ +From aac11bd40369aa31c9b3efb701242cc307ce5645 Mon Sep 17 00:00:00 2001 +From: WangJian +Date: Wed, 9 Feb 2022 11:42:47 +0800 +Subject: [PATCH] scsi: cdrom: Fix crash after remote cdrom detached + +There is a small window between the twice blk_is_available in +scsi_disk_emulate_command which would cause crash due to the later +assertion if the remote cdrom is detached in this window. + +So this patch replaces assertions with return to avoid qemu crash. + +Signed-off-by: wangjian161 +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-disk.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index f638854ebf..7f581efce8 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -2021,7 +2021,10 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf) + memset(outbuf, 0, r->buflen); + switch (req->cmd.buf[0]) { + case TEST_UNIT_READY: +- assert(blk_is_available(s->qdev.conf.blk)); ++ if (!blk_is_available(s->qdev.conf.blk)) { ++ scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); ++ return 0; ++ } + break; + case INQUIRY: + buflen = scsi_disk_emulate_inquiry(req, outbuf); +-- +2.27.0 + diff --git a/scsi-disk-Add-support-for-retry-on-errors.patch b/scsi-disk-Add-support-for-retry-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..29fbc7e5e56ecf1f121f4488f63990afa1f8aba9 --- /dev/null +++ b/scsi-disk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,79 @@ +From 6100f909506025563ecec29b25f64cce75fc2353 Mon Sep 17 00:00:00 2001 +From: Jiahui Cen +Date: Thu, 21 Jan 2021 15:46:55 +0800 +Subject: [PATCH] scsi-disk: Add support for retry on errors + +Mark failed requests as to be retried and implement retry_request_cb to +handle these requests. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + hw/scsi/scsi-disk.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 6691f5edb8..97d8c5bb30 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -249,6 +249,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + scsi_req_retry(&r->req); + return true; + ++ case BLOCK_ERROR_ACTION_RETRY: ++ scsi_req_retry(&r->req); ++ return true; ++ + default: + g_assert_not_reached(); + } +@@ -256,6 +260,8 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int ret, bool acct_failed) + + static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + { ++ SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); ++ + if (r->req.io_canceled) { + scsi_req_cancel_complete(&r->req); + return true; +@@ -265,6 +271,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) + return scsi_handle_rw_error(r, ret, acct_failed); + } + ++ blk_error_retry_reset_timeout(s->qdev.conf.blk); + return false; + } + +@@ -2391,6 +2398,13 @@ static void scsi_disk_resize_cb(void *opaque) + } + } + ++static void scsi_disk_retry_request(void *opaque) ++{ ++ SCSIDiskState *s = opaque; ++ ++ scsi_retry_requests(&s->qdev); ++} ++ + static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) + { + SCSIDiskState *s = opaque; +@@ -2440,12 +2454,14 @@ static const BlockDevOps scsi_disk_removable_block_ops = { + .is_medium_locked = scsi_cd_is_medium_locked, + .is_tray_open = scsi_cd_is_tray_open, + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static const BlockDevOps scsi_disk_block_ops = { + .drained_begin = scsi_disk_drained_begin, + .drained_end = scsi_disk_drained_end, + .resize_cb = scsi_disk_resize_cb, ++ .retry_request_cb = scsi_disk_retry_request, + }; + + static void scsi_disk_unit_attention_reported(SCSIDevice *dev) +-- +2.27.0 + diff --git a/scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch b/scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch new file mode 100644 index 0000000000000000000000000000000000000000..36e16d60cd4f5c37103032ccc77144d7b6d90d38 --- /dev/null +++ b/scsi-disk-define-props-in-scsi_block_disk-to-avoid-m.patch @@ -0,0 +1,36 @@ +From 85307e997e4ee7a50a87ac2ac218911c0058d8e3 Mon Sep 17 00:00:00 2001 +From: Pan Nengyuan +Date: Mon, 13 Jan 2020 15:53:32 +0800 +Subject: [PATCH] scsi-disk: define props in scsi_block_disk to avoid memleaks + +scsi_block_realize() use scsi_realize() to init some props, but +these props is not defined in scsi_block_properties, so they will +not be freed. + +This patch defines these prop in scsi_block_disk_properties to avoid memleaks. + +Signed-off-by: Pan Nengyuan +Signed-off-by: Yan Wang +Signed-off-by: shaodenghui +--- + hw/scsi/scsi-disk.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c +index 6691f5edb8..f638854ebf 100644 +--- a/hw/scsi/scsi-disk.c ++++ b/hw/scsi/scsi-disk.c +@@ -3241,9 +3241,7 @@ static const TypeInfo scsi_cd_info = { + + #ifdef __linux__ + static Property scsi_block_properties[] = { +- DEFINE_BLOCK_ERROR_PROPERTIES(SCSIDiskState, qdev.conf), +- DEFINE_PROP_DRIVE("drive", SCSIDiskState, qdev.conf.blk), +- DEFINE_PROP_BOOL("share-rw", SCSIDiskState, qdev.conf.share_rw, false), ++ DEFINE_SCSI_DISK_PROPERTIES(), + DEFINE_PROP_UINT16("rotation_rate", SCSIDiskState, rotation_rate, 0), + DEFINE_PROP_UINT64("max_unmap_size", SCSIDiskState, max_unmap_size, + DEFAULT_MAX_UNMAP_SIZE), +-- +2.27.0 + diff --git a/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ba1e54d8bafed805bdc0212dcb6add9bf99fa87 --- /dev/null +++ b/shadow_dev-introduce-shadow-dev-for-virtio-net-devic.patch @@ -0,0 +1,196 @@ +From c4829aa6fce007c995b21cfbd86de0473263c19a Mon Sep 17 00:00:00 2001 +From: Dongxu Sun +Date: Sat, 30 Mar 2024 12:49:05 +0800 +Subject: [PATCH] shadow_dev: introduce shadow dev for virtio-net device + +for virtio net devices, create the shadow device for vlpi +bypass inject supported. + +Signed-off-by: Wang Haibin +Signed-off-by: Yu Zenghui +Signed-off-by: Chen Qun +Signed-off-by: KunKun Jiang +Signed-off-by: Dongxu Sun +Signed-off-by: Yuan Zhang +--- + hw/virtio/virtio-pci.c | 32 ++++++++++++++++++++++++++ + include/sysemu/kvm.h | 5 +++++ + linux-headers/linux/kvm.h | 13 +++++++++++ + target/arm/kvm.c | 47 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 97 insertions(+) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 134a8eaef6..f8adb0520a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -922,18 +922,44 @@ undo: + } + return ret; + } ++ ++#ifdef __aarch64__ ++int __attribute__((weak)) kvm_create_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int __attribute__((weak)) kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ return 0; ++} ++#endif ++ + static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_create_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; + } + ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net") && ret != 0) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif ++ + return ret; + } + +@@ -976,6 +1002,12 @@ static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + } + kvm_virtio_pci_vector_release_one(proxy, queue_no); + } ++ ++#ifdef __aarch64__ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ kvm_delete_shadow_device(&proxy->pci_dev); ++ } ++#endif + } + + static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index d614878164..b46d6203b4 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -538,4 +538,9 @@ bool kvm_arch_cpu_check_are_resettable(void); + bool kvm_dirty_ring_enabled(void); + + uint32_t kvm_dirty_ring_size(void); ++ ++#ifdef __aarch64__ ++int kvm_create_shadow_device(PCIDevice *dev); ++int kvm_delete_shadow_device(PCIDevice *dev); ++#endif + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 549fea3a97..56f6b2583f 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1198,6 +1198,8 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229 + #define KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES 230 + ++#define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 ++ + #ifdef KVM_CAP_IRQ_ROUTING + + struct kvm_irq_routing_irqchip { +@@ -1524,6 +1526,17 @@ struct kvm_s390_ucas_mapping { + #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) + #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) + #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) ++ ++#ifdef __aarch64__ ++struct kvm_master_dev_info ++{ ++ __u32 nvectors; /* number of msi vectors */ ++ struct kvm_msi msi[0]; ++}; ++#define KVM_CREATE_SHADOW_DEV _IOW(KVMIO, 0xf0, struct kvm_master_dev_info) ++#define KVM_DEL_SHADOW_DEV _IOW(KVMIO, 0xf1, __u32) ++#endif ++ + /* Available with KVM_CAP_PIT_STATE2 */ + #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) + #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 7903e2ddde..f59f4f81b2 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -26,6 +26,8 @@ + #include "trace.h" + #include "internals.h" + #include "hw/pci/pci.h" ++#include "hw/pci/msi.h" ++#include "hw/pci/msix.h" + #include "exec/memattrs.h" + #include "exec/address-spaces.h" + #include "hw/boards.h" +@@ -1053,6 +1055,51 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + return 0; + } + ++int kvm_create_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ struct kvm_master_dev_info *mdi; ++ MSIMessage msg; ++ uint32_t vector, nvectors = msix_nr_vectors_allocated(dev); ++ uint32_t request_id; ++ int ret; ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ mdi = g_malloc0(sizeof(uint32_t) + sizeof(struct kvm_msi) * nvectors); ++ mdi->nvectors = nvectors; ++ request_id = pci_requester_id(dev); ++ ++ for (vector = 0; vector < nvectors; vector++) { ++ msg = msix_get_message(dev, vector); ++ mdi->msi[vector].address_lo = extract64(msg.address, 0, 32); ++ mdi->msi[vector].address_hi = extract64(msg.address, 32, 32); ++ mdi->msi[vector].data = le32_to_cpu(msg.data); ++ mdi->msi[vector].flags = KVM_MSI_VALID_DEVID; ++ mdi->msi[vector].devid = request_id; ++ memset(mdi->msi[vector].pad, 0, sizeof(mdi->msi[vector].pad)); ++ } ++ ++ ret = kvm_vm_ioctl(s, KVM_CREATE_SHADOW_DEV, mdi); ++ g_free(mdi); ++ return ret; ++} ++ ++int kvm_delete_shadow_device(PCIDevice *dev) ++{ ++ KVMState *s = kvm_state; ++ uint32_t request_id, nvectors = msix_nr_vectors_allocated(dev); ++ ++ if (!kvm_vm_check_extension(s, KVM_CAP_ARM_VIRT_MSI_BYPASS) || !nvectors) { ++ return 0; ++ } ++ ++ request_id = pci_requester_id(dev); ++ return kvm_vm_ioctl(s, KVM_DEL_SHADOW_DEV, &request_id); ++} ++ + int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) + { +-- +2.27.0 + diff --git a/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch new file mode 100644 index 0000000000000000000000000000000000000000..4c1b707d2a181fcc357d04385781add6b274a8f2 --- /dev/null +++ b/system-cpus-Fix-pause_all_vcpus-under-concurrent-env.patch @@ -0,0 +1,91 @@ +From 401e145800134d0310d613f48c4962a108b8ddda Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:03 +0800 +Subject: [PATCH] system/cpus: Fix pause_all_vcpus() under concurrent + environment + +Both main loop thread and vCPU thread are allowed to call +pause_all_vcpus(), and in general resume_all_vcpus() is called +after it. Two issues live in pause_all_vcpus(): + +1. There is possibility that during thread T1 waits on +qemu_pause_cond with bql unlocked, other thread has called +pause_all_vcpus() and resume_all_vcpus(), then thread T1 will +stuck, because the condition all_vcpus_paused() is always false. + +2. After all_vcpus_paused() has been checked as true, we will +unlock bql to relock replay_mutex. During the bql was unlocked, +the vcpu's state may has been changed by other thread, so we +must retry. + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 29 ++++++++++++++++++++++++----- + 1 file changed, 24 insertions(+), 5 deletions(-) + +diff --git a/system/cpus.c b/system/cpus.c +index a444a747f0..7c5369fa9c 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -551,12 +551,14 @@ static bool all_vcpus_paused(void) + return true; + } + +-void pause_all_vcpus(void) ++static void request_pause_all_vcpus(void) + { + CPUState *cpu; + +- qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); + CPU_FOREACH(cpu) { ++ if (cpu->stopped) { ++ continue; ++ } + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { +@@ -564,6 +566,14 @@ void pause_all_vcpus(void) + qemu_cpu_kick(cpu); + } + } ++} ++ ++void pause_all_vcpus(void) ++{ ++ qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); ++ ++retry: ++ request_pause_all_vcpus(); + + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks +@@ -572,14 +582,23 @@ void pause_all_vcpus(void) + + while (!all_vcpus_paused()) { + qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); +- CPU_FOREACH(cpu) { +- qemu_cpu_kick(cpu); +- } ++ /* During we waited on qemu_pause_cond the bql was unlocked, ++ * the vcpu's state may has been changed by other thread, so ++ * we must request the pause state on all vcpus again. ++ */ ++ request_pause_all_vcpus(); + } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); ++ ++ /* During the bql was unlocked, the vcpu's state may has been ++ * changed by other thread, so we must retry. ++ */ ++ if (!all_vcpus_paused()) { ++ goto retry; ++ } + } + + void cpu_resume(CPUState *cpu) +-- +2.27.0 + diff --git a/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4008abba6fee9de7707a9cdbf1ad33373a58270 --- /dev/null +++ b/system-cpus-Fix-resume_all_vcpus-under-vCPU-hotplug-.patch @@ -0,0 +1,43 @@ +From a29922f76c9b5064ddd2e686fa725b96c435e889 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Sun, 17 Mar 2024 16:37:04 +0800 +Subject: [PATCH] system/cpus: Fix resume_all_vcpus() under vCPU hotplug + condition + +For vCPU being hotplugged, qemu_init_vcpu() is called. In this +function, we set vcpu state as stopped, and then wait vcpu thread +to be created. + +As the vcpu state is stopped, it will inform us it has been created +and then wait on halt_cond. After we has realized vcpu object, we +will resume the vcpu thread. + +However, during we wait vcpu thread to be created, the bql is +unlocked, and other thread is allowed to call resume_all_vcpus(), +which will resume the un-realized vcpu. + +This fixes the issue by filter out un-realized vcpu during +resume_all_vcpus(). + +Signed-off-by: Keqian Zhu +--- + system/cpus.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/system/cpus.c b/system/cpus.c +index 7c5369fa9c..f2289e9545 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -618,6 +618,9 @@ void resume_all_vcpus(void) + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + CPU_FOREACH(cpu) { ++ if (!object_property_get_bool(OBJECT(cpu), "realized", &error_abort)) { ++ continue; ++ } + cpu_resume(cpu); + } + } +-- +2.27.0 + diff --git a/system-physmem-Fix-possible-double-free-when-destroy.patch b/system-physmem-Fix-possible-double-free-when-destroy.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2f3853bc110515d6d30dcb404fee28aef1339eb --- /dev/null +++ b/system-physmem-Fix-possible-double-free-when-destroy.patch @@ -0,0 +1,64 @@ +From 5f7464524d0fb2c25c9bacfb550df92bef9bb3bf Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 26 Mar 2024 14:11:05 +0800 +Subject: [PATCH] system/physmem: Fix possible double free when destroy cpu as + +address_space_destroy() and g_free_rcu() both operate cpuas->as +at rcu thread context asynchronously, each one is a rcu task +that have different callback (the first callback is do_address_ +space_destroy() and the second callback is g_free()). + +It's possible that while the first task is pending and the second +task overwrites the rcu callback (as the second task operates on +the same object). Then the g_free will be called twice on cpuas->as. + +Signed-off-by: Keqian Zhu +--- + include/exec/memory.h | 1 + + system/memory.c | 3 +++ + system/physmem.c | 2 +- + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/exec/memory.h b/include/exec/memory.h +index e131c2682c..91c42c9a6a 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1114,6 +1114,7 @@ struct AddressSpace { + struct rcu_head rcu; + char *name; + MemoryRegion *root; ++ bool free_in_rcu; + + /* Accessed via RCU. */ + struct FlatView *current_map; +diff --git a/system/memory.c b/system/memory.c +index 798b6c0a17..fb817e54bc 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3130,6 +3130,9 @@ static void do_address_space_destroy(AddressSpace *as) + g_free(as->name); + g_free(as->ioeventfds); + memory_region_unref(as->root); ++ if (as->free_in_rcu) { ++ g_free(as); ++ } + } + + void address_space_destroy(AddressSpace *as) +diff --git a/system/physmem.c b/system/physmem.c +index 299174ad91..cbe838f203 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -788,8 +788,8 @@ void cpu_address_space_destroy(CPUState *cpu, int asidx) + memory_listener_unregister(&cpuas->tcg_as_listener); + } + ++ cpuas->as->free_in_rcu = true; + address_space_destroy(cpuas->as); +- g_free_rcu(cpuas->as, rcu); + + if (cpu->cpu_ases_ref_count == 1) { + g_free(cpu->cpu_ases); +-- +2.27.0 + diff --git a/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch new file mode 100644 index 0000000000000000000000000000000000000000..023fe7f49df7f1fa140ac9c9a7ea7dda1511f453 --- /dev/null +++ b/target-arm-Add-support-of-unrealize-ARMCPU-during-vC.patch @@ -0,0 +1,294 @@ +From b311feda2078e7ee8f060531d4d061beccbc2f77 Mon Sep 17 00:00:00 2001 +From: Salil Mehta +Date: Sat, 9 May 2020 20:13:10 +0100 +Subject: [PATCH] target/arm: Add support of *unrealize* ARMCPU during vCPU + Hot-unplug + +vCPU Hot-unplug will result in QOM CPU object unrealization which will do away +with all the vCPU thread creations, allocations, registrations that happened +as part of the realization process. This change introduces the ARM CPU unrealize +function taking care of exactly that. + +Note, initialized KVM vCPUs are not destroyed in host KVM but their Qemu context +is parked at the QEMU KVM layer. + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Keqian Zhu +Signed-off-by: Keqian Zhu +Reported-by: Vishnu Pajjuri +[VP: Identified CPU stall issue & suggested probable fix] +Signed-off-by: Salil Mehta +--- + target/arm/cpu.c | 101 +++++++++++++++++++++++++++++++++++++++++ + target/arm/cpu.h | 14 ++++++ + target/arm/gdbstub.c | 6 +++ + target/arm/helper.c | 25 ++++++++++ + target/arm/internals.h | 3 ++ + target/arm/kvm64.c | 4 ++ + 6 files changed, 153 insertions(+) + +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 18b8a79c8f..501f88eb2f 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -142,6 +142,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node); + } + ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque) + { +@@ -153,6 +163,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); + } + ++void arm_unregister_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) + { + /* Reset a single ARMCPRegInfo register */ +@@ -2390,6 +2410,85 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + acc->parent_realize(dev, errp); + } + ++static void arm_cpu_unrealizefn(DeviceState *dev) ++{ ++ ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUARMState *env = &cpu->env; ++ CPUState *cs = CPU(dev); ++ bool has_secure; ++ ++ has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); ++ ++ /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ ++ cpu_address_space_destroy(cs, ARMASIdx_NS); ++ ++ if (cpu->tag_memory != NULL) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagNS); ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagS); ++ } ++ } ++ ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_S); ++ } ++ ++ destroy_cpreg_list(cpu); ++ arm_cpu_unregister_gdb_regs(cpu); ++ unregister_cp_regs_for_features(cpu); ++ ++ if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->sau.rbar); ++ g_free(env->sau.rlar); ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMSA) && ++ arm_feature(env, ARM_FEATURE_V7) && ++ cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); ++ } ++ if (cpu->pmsav8r_hdregion) { ++ g_free(env->pmsav8.hprbar); ++ g_free(env->pmsav8.hprlar); ++ } ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMU)) { ++ if (!kvm_enabled()) { ++ arm_unregister_pre_el_change_hooks(cpu); ++ arm_unregister_el_change_hooks(cpu); ++ } ++ ++#ifndef CONFIG_USER_ONLY ++ if (cpu->pmu_timer) { ++ timer_del(cpu->pmu_timer); ++ } ++#endif ++ } ++ ++ cpu_remove_sync(CPU(dev)); ++ acc->parent_unrealize(dev); ++ ++#ifndef CONFIG_USER_ONLY ++ timer_del(cpu->gt_timer[GTIMER_PHYS]); ++ timer_del(cpu->gt_timer[GTIMER_VIRT]); ++ timer_del(cpu->gt_timer[GTIMER_HYP]); ++ timer_del(cpu->gt_timer[GTIMER_SEC]); ++ timer_del(cpu->gt_timer[GTIMER_HYPVIRT]); ++#endif ++} ++ + static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) + { + ObjectClass *oc; +@@ -2492,6 +2591,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + + device_class_set_parent_realize(dc, arm_cpu_realizefn, + &acc->parent_realize); ++ device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn, ++ &acc->parent_unrealize); + + device_class_set_props(dc, arm_cpu_properties); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index 145d3dbf13..c51a0e3467 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1138,6 +1138,7 @@ struct ARMCPUClass { + + const ARMCPUInfo *info; + DeviceRealize parent_realize; ++ DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; + }; + +@@ -3359,6 +3360,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs) + */ + void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque); ++/** ++ * arm_unregister_pre_el_change_hook: ++ * unregister all pre EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu); ++ + /** + * arm_register_el_change_hook: + * Register a hook function which will be called immediately after this +@@ -3371,6 +3379,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + */ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void + *opaque); ++/** ++ * arm_unregister_el_change_hook: ++ * unregister all EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_el_change_hooks(ARMCPU *cpu); + + /** + * arm_rebuild_hflags: +diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c +index 28f546a5ff..5ba1e28e34 100644 +--- a/target/arm/gdbstub.c ++++ b/target/arm/gdbstub.c +@@ -553,3 +553,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) + } + #endif /* CONFIG_TCG */ + } ++ ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ gdb_unregister_coprocessor_all(cs); ++} +diff --git a/target/arm/helper.c b/target/arm/helper.c +index 2746d3fdac..e47498828c 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -263,6 +263,19 @@ void init_cpreg_list(ARMCPU *cpu) + g_list_free(keys); + } + ++void destroy_cpreg_list(ARMCPU *cpu) ++{ ++ assert(cpu->cpreg_indexes); ++ assert(cpu->cpreg_values); ++ assert(cpu->cpreg_vmstate_indexes); ++ assert(cpu->cpreg_vmstate_values); ++ ++ g_free(cpu->cpreg_indexes); ++ g_free(cpu->cpreg_values); ++ g_free(cpu->cpreg_vmstate_indexes); ++ g_free(cpu->cpreg_vmstate_values); ++} ++ + /* + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. + */ +@@ -9438,6 +9451,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) + #endif + } + ++void unregister_cp_regs_for_features(ARMCPU *cpu) ++{ ++ CPUARMState *env = &cpu->env; ++ if (arm_feature(env, ARM_FEATURE_M)) { ++ /* M profile has no coprocessor registers */ ++ return; ++ } ++ ++ /* empty it all. unregister all the coprocessor registers */ ++ g_hash_table_remove_all(cpu->cp_regs); ++} ++ + /* Sort alphabetically by type name, except for "any". */ + static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) + { +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 143d57c0fe..c3a7682f05 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -187,9 +187,12 @@ void arm_cpu_register(const ARMCPUInfo *info); + void aarch64_cpu_register(const ARMCPUInfo *info); + + void register_cp_regs_for_features(ARMCPU *cpu); ++void unregister_cp_regs_for_features(ARMCPU *cpu); + void init_cpreg_list(ARMCPU *cpu); ++void destroy_cpreg_list(ARMCPU *cpu); + + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu); + void arm_translate_init(void); + + void arm_restore_state_to_opc(CPUState *cs, +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 03ce1e7525..9c3a35d63a 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -647,6 +647,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ if (cs->thread_id) { ++ qemu_del_vm_change_state_handler(cs->vmcse); ++ } ++ + return 0; + } + +-- +2.27.0 + diff --git a/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b4922cc702a47bb45f2fa001c86719a8bb9c0c9 --- /dev/null +++ b/target-arm-kvm-Write-CPU-state-back-to-KVM-on-reset.patch @@ -0,0 +1,50 @@ +From a079801cd3ae6484cad6826f20bcf4ecc7e97ead Mon Sep 17 00:00:00 2001 +From: Jean-Philippe Brucker +Date: Wed, 5 May 2021 15:43:27 +0200 +Subject: [PATCH] target/arm/kvm: Write CPU state back to KVM on reset + +When a KVM vCPU is reset following a PSCI CPU_ON call, its power state +is not synchronized with KVM at the moment. Because the vCPU is not +marked dirty, we miss the call to kvm_arch_put_registers() that writes +to KVM's MP_STATE. Force mp_state synchronization. + +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/kvm.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 70cf15b550..aca652621f 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -636,11 +636,12 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; ++ CPUState *cs = CPU(cpu); + + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ +- ret = kvm_arm_vcpu_init(CPU(cpu)); ++ ret = kvm_arm_vcpu_init(cs); + if (ret < 0) { + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); +@@ -657,6 +658,12 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + * for the same reason we do so in kvm_arch_get_registers(). + */ + write_list_to_cpustate(cpu); ++ ++ /* ++ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if ++ * it was parked in KVM and is now booting from a PSCI CPU_ON call. ++ */ ++ cs->vcpu_dirty = true; + } + + void kvm_arm_create_host_vcpu(ARMCPU *cpu) +-- +2.27.0 + diff --git a/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b4bfbb2d6aa800e60bee7e96d429be813c6271c --- /dev/null +++ b/target-arm-kvm-tcg-Register-Handle-SMCCC-hypercall-e.patch @@ -0,0 +1,407 @@ +From 9c4a7c44c3c9e89c6aeab85b00c72a09a0c13940 Mon Sep 17 00:00:00 2001 +From: Author Salil Mehta +Date: Sat, 27 May 2023 22:13:13 +0200 +Subject: [PATCH] target/arm/kvm,tcg: Register/Handle SMCCC hypercall exits to + VMM/Qemu + +Add registration and Handling of HVC/SMC hypercall exits to VMM + +Co-developed-by: Salil Mehta +Signed-off-by: Salil Mehta +Co-developed-by: Jean-Philippe Brucker +Signed-off-by: Jean-Philippe Brucker +Signed-off-by: Salil Mehta +--- + target/arm/arm-powerctl.c | 51 +++++++++++++++++++++++++++++------- + target/arm/helper.c | 2 +- + target/arm/internals.h | 11 -------- + target/arm/kvm.c | 52 +++++++++++++++++++++++++++++++++++++ + target/arm/kvm64.c | 46 +++++++++++++++++++++++++++++--- + target/arm/kvm_arm.h | 13 ++++++++++ + target/arm/meson.build | 1 + + target/arm/{tcg => }/psci.c | 8 ++++++ + target/arm/tcg/meson.build | 4 --- + 9 files changed, 160 insertions(+), 28 deletions(-) + rename target/arm/{tcg => }/psci.c (97%) + +diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c +index c078849403..fb19b04189 100644 +--- a/target/arm/arm-powerctl.c ++++ b/target/arm/arm-powerctl.c +@@ -16,6 +16,7 @@ + #include "qemu/log.h" + #include "qemu/main-loop.h" + #include "sysemu/tcg.h" ++#include "hw/boards.h" + + #ifndef DEBUG_ARM_POWERCTL + #define DEBUG_ARM_POWERCTL 0 +@@ -28,18 +29,37 @@ + } \ + } while (0) + ++static CPUArchId *arm_get_archid_by_id(uint64_t id) ++{ ++ int n; ++ CPUArchId *arch_id; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ++ /* ++ * At this point disabled CPUs don't have a CPUState, but their CPUArchId ++ * exists. ++ * ++ * TODO: Is arch_id == mp_affinity? This needs work. ++ */ ++ for (n = 0; n < ms->possible_cpus->len; n++) { ++ arch_id = &ms->possible_cpus->cpus[n]; ++ ++ if (arch_id->arch_id == id) { ++ return arch_id; ++ } ++ } ++ return NULL; ++} ++ + CPUState *arm_get_cpu_by_id(uint64_t id) + { +- CPUState *cpu; ++ CPUArchId *arch_id; + + DPRINTF("cpu %" PRId64 "\n", id); + +- CPU_FOREACH(cpu) { +- ARMCPU *armcpu = ARM_CPU(cpu); +- +- if (armcpu->mp_affinity == id) { +- return cpu; +- } ++ arch_id = arm_get_archid_by_id(id); ++ if (arch_id && arch_id->cpu) { ++ return CPU(arch_id->cpu); + } + + qemu_log_mask(LOG_GUEST_ERROR, +@@ -97,6 +117,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + { + CPUState *target_cpu_state; + ARMCPU *target_cpu; ++ CPUArchId *arch_id; + struct CpuOnInfo *info; + + assert(qemu_mutex_iothread_locked()); +@@ -117,12 +138,24 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + } + + /* Retrieve the cpu we are powering up */ +- target_cpu_state = arm_get_cpu_by_id(cpuid); +- if (!target_cpu_state) { ++ arch_id = arm_get_archid_by_id(cpuid); ++ if (!arch_id) { + /* The cpu was not found */ + return QEMU_ARM_POWERCTL_INVALID_PARAM; + } + ++ target_cpu_state = CPU(arch_id->cpu); ++ if (!qemu_enabled_cpu(target_cpu_state)) { ++ /* ++ * The cpu is not plugged in or disabled. We should return appropriate ++ * value as introduced in DEN0022E PSCI 1.2 issue E ++ */ ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "[ARM]%s: Denying attempt to online removed/disabled " ++ "CPU%" PRId64"\n", __func__, cpuid); ++ return QEMU_ARM_POWERCTL_IS_OFF; ++ } ++ + target_cpu = ARM_CPU(target_cpu_state); + if (target_cpu->power_state == PSCI_ON) { + qemu_log_mask(LOG_GUEST_ERROR, +diff --git a/target/arm/helper.c b/target/arm/helper.c +index e47498828c..793aa89cc6 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -11346,7 +11346,7 @@ void arm_cpu_do_interrupt(CPUState *cs) + env->exception.syndrome); + } + +- if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) { ++ if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; +diff --git a/target/arm/internals.h b/target/arm/internals.h +index c3a7682f05..20b9c1da38 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -314,21 +314,10 @@ vaddr arm_adjust_watchpoint_address(CPUState *cs, vaddr addr, int len); + /* Callback function for when a watchpoint or breakpoint triggers. */ + void arm_debug_excp_handler(CPUState *cs); + +-#if defined(CONFIG_USER_ONLY) || !defined(CONFIG_TCG) +-static inline bool arm_is_psci_call(ARMCPU *cpu, int excp_type) +-{ +- return false; +-} +-static inline void arm_handle_psci_call(ARMCPU *cpu) +-{ +- g_assert_not_reached(); +-} +-#else + /* Return true if the r0/x0 value indicates that this SMC/HVC is a PSCI call. */ + bool arm_is_psci_call(ARMCPU *cpu, int excp_type); + /* Actually handle a PSCI call */ + void arm_handle_psci_call(ARMCPU *cpu); +-#endif + + /** + * arm_clear_exclusive: clear the exclusive monitor +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index aca652621f..66caf9e5e7 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -260,6 +260,7 @@ int kvm_arch_get_default_type(MachineState *ms) + int kvm_arch_init(MachineState *ms, KVMState *s) + { + int ret = 0; ++ + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ +@@ -310,6 +311,22 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ /* ++ * To be able to handle PSCI CPU ON calls in QEMU, we need to install SMCCC ++ * filter in the Host KVM. This is required to support features like ++ * virtual CPU Hotplug on ARM platforms. ++ */ ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ abort(); ++ } ++ + kvm_arm_init_debug(s); + + return ret; +@@ -966,6 +983,38 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, + return -1; + } + ++static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ ++ /* ++ * hard coding immediate to 0 as we dont expect non-zero value as of now ++ * This might change in future versions. Hence, KVM_GET_ONE_REG could be ++ * used in such cases but it must be enhanced then only synchronize will ++ * also fetch ESR_EL2 value. ++ */ ++ if (run->hypercall.flags == KVM_HYPERCALL_EXIT_SMC) { ++ cs->exception_index = EXCP_SMC; ++ env->exception.syndrome = syn_aa64_smc(0); ++ } else { ++ cs->exception_index = EXCP_HVC; ++ env->exception.syndrome = syn_aa64_hvc(0); ++ } ++ env->exception.target_el = 1; ++ qemu_mutex_lock_iothread(); ++ arm_cpu_do_interrupt(cs); ++ qemu_mutex_unlock_iothread(); ++ ++ /* ++ * For PSCI, exit the kvm_run loop and process the work. Especially ++ * important if this was a CPU_OFF command and we can't return to the guest. ++ */ ++ return EXCP_INTERRUPT; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -981,6 +1030,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, + run->arm_nisv.fault_ipa); + break; ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_arm_handle_hypercall(cs, run); ++ break; + default: + qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", + __func__, run->exit_reason); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 9c3a35d63a..00b257bb4b 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -111,6 +111,25 @@ bool kvm_arm_hw_debug_active(CPUState *cs) + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); + } + ++static bool kvm_arm_set_vm_attr(struct kvm_device_attr *attr, const char *name) ++{ ++ int err; ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, + const char *name) + { +@@ -181,6 +200,28 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) + } + } + ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ struct kvm_smccc_filter filter = { ++ .base = func, ++ .nr_functions = 1, ++ .action = faction, ++ }; ++ struct kvm_device_attr attr = { ++ .group = KVM_ARM_VM_SMCCC_CTRL, ++ .attr = KVM_ARM_VM_SMCCC_FILTER, ++ .flags = 0, ++ .addr = (uintptr_t)&filter, ++ }; ++ ++ if (!kvm_arm_set_vm_attr(&attr, "SMCCC Filter")) { ++ error_report("failed to set SMCCC filter in KVM Host"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) + { + uint64_t ret; +@@ -629,9 +670,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + /* +- * When KVM is in use, PSCI is emulated in-kernel and not by qemu. +- * Currently KVM has its own idea about MPIDR assignment, so we +- * override our defaults with what we get from KVM. ++ * KVM may emulate PSCI in-kernel. Currently KVM has its own idea about ++ * MPIDR assignment, so we override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 31408499b3..bf4df54c96 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -388,6 +388,15 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); + + int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + ++/** ++ * kvm_arm_set_smccc_filter ++ * @func: funcion ++ * @faction: SMCCC filter action(handle, deny, fwd-to-user) to be deployed ++ * ++ * Sets the ARMs SMC-CC filter in KVM Host for selective hypercall exits ++ */ ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); ++ + #else + + /* +@@ -462,6 +471,10 @@ static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs) + g_assert_not_reached(); + } + ++static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +diff --git a/target/arm/meson.build b/target/arm/meson.build +index 5d04a8e94f..d1dd4932ed 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -23,6 +23,7 @@ arm_system_ss.add(files( + 'arm-qmp-cmds.c', + 'cortex-regs.c', + 'machine.c', ++ 'psci.c', + 'ptw.c', + )) + +diff --git a/target/arm/tcg/psci.c b/target/arm/psci.c +similarity index 97% +rename from target/arm/tcg/psci.c +rename to target/arm/psci.c +index 6c1239bb96..a8690a16af 100644 +--- a/target/arm/tcg/psci.c ++++ b/target/arm/psci.c +@@ -21,7 +21,9 @@ + #include "exec/helper-proto.h" + #include "kvm-consts.h" + #include "qemu/main-loop.h" ++#include "qemu/error-report.h" + #include "sysemu/runstate.h" ++#include "sysemu/tcg.h" + #include "internals.h" + #include "arm-powerctl.h" + +@@ -157,6 +159,11 @@ void arm_handle_psci_call(ARMCPU *cpu) + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: ++ if (!tcg_enabled()) { ++ warn_report("CPU suspend not supported in non-tcg mode"); ++ break; ++ } ++#ifdef CONFIG_TCG + /* Affinity levels are not supported in QEMU */ + if (param[1] & 0xfffe0000) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; +@@ -169,6 +176,7 @@ void arm_handle_psci_call(ARMCPU *cpu) + env->regs[0] = 0; + } + helper_wfi(env, 4); ++#endif + break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 6fca38f2cc..ad3cfcb3bd 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -51,7 +51,3 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'sme_helper.c', + 'sve_helper.c', + )) +- +-arm_system_ss.add(files( +- 'psci.c', +-)) +-- +2.27.0 + diff --git a/target-i386-Add-new-CPU-model-SierraForest.patch b/target-i386-Add-new-CPU-model-SierraForest.patch new file mode 100644 index 0000000000000000000000000000000000000000..156e1d6db0c10ae1d9de2f7d0aea57a887a3d4e1 --- /dev/null +++ b/target-i386-Add-new-CPU-model-SierraForest.patch @@ -0,0 +1,212 @@ +From c61eabb8aa86fed57c2cd5394e0e89e350c99c5e Mon Sep 17 00:00:00 2001 +From: Tao Su +Date: Wed, 20 Mar 2024 10:10:44 +0800 +Subject: [PATCH] target/i386: Add new CPU model SierraForest +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 6e82d3b6220777667968a04c87e1667f164ebe88 upstream. + +According to table 1-2 in Intel Architecture Instruction Set Extensions and +Future Features (rev 051) [1], SierraForest has the following new features +which have already been virtualized: + +- CMPCCXADD CPUID.(EAX=7,ECX=1):EAX[bit 7] +- AVX-IFMA CPUID.(EAX=7,ECX=1):EAX[bit 23] +- AVX-VNNI-INT8 CPUID.(EAX=7,ECX=1):EDX[bit 4] +- AVX-NE-CONVERT CPUID.(EAX=7,ECX=1):EDX[bit 5] + +Add above features to new CPU model SierraForest. Comparing with GraniteRapids +CPU model, SierraForest bare-metal removes the following features: + +- HLE CPUID.(EAX=7,ECX=0):EBX[bit 4] +- RTM CPUID.(EAX=7,ECX=0):EBX[bit 11] +- AVX512F CPUID.(EAX=7,ECX=0):EBX[bit 16] +- AVX512DQ CPUID.(EAX=7,ECX=0):EBX[bit 17] +- AVX512_IFMA CPUID.(EAX=7,ECX=0):EBX[bit 21] +- AVX512CD CPUID.(EAX=7,ECX=0):EBX[bit 28] +- AVX512BW CPUID.(EAX=7,ECX=0):EBX[bit 30] +- AVX512VL CPUID.(EAX=7,ECX=0):EBX[bit 31] +- AVX512_VBMI CPUID.(EAX=7,ECX=0):ECX[bit 1] +- AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 6] +- AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 11] +- AVX512_BITALG CPUID.(EAX=7,ECX=0):ECX[bit 12] +- AVX512_VPOPCNTDQ CPUID.(EAX=7,ECX=0):ECX[bit 14] +- LA57 CPUID.(EAX=7,ECX=0):ECX[bit 16] +- TSXLDTRK CPUID.(EAX=7,ECX=0):EDX[bit 16] +- AMX-BF16 CPUID.(EAX=7,ECX=0):EDX[bit 22] +- AVX512_FP16 CPUID.(EAX=7,ECX=0):EDX[bit 23] +- AMX-TILE CPUID.(EAX=7,ECX=0):EDX[bit 24] +- AMX-INT8 CPUID.(EAX=7,ECX=0):EDX[bit 25] +- AVX512_BF16 CPUID.(EAX=7,ECX=1):EAX[bit 5] +- fast zero-length MOVSB CPUID.(EAX=7,ECX=1):EAX[bit 10] +- fast short CMPSB, SCASB CPUID.(EAX=7,ECX=1):EAX[bit 12] +- AMX-FP16 CPUID.(EAX=7,ECX=1):EAX[bit 21] +- PREFETCHI CPUID.(EAX=7,ECX=1):EDX[bit 14] +- XFD CPUID.(EAX=0xD,ECX=1):EAX[bit 4] +- EPT_PAGE_WALK_LENGTH_5 VMX_EPT_VPID_CAP(0x48c)[bit 7] + +Add all features of GraniteRapids CPU model except above features to +SierraForest CPU model. + +SierraForest doesn’t support TSX and RTM but supports TAA_NO. When RTM is +not enabled in host, KVM will not report TAA_NO. So, just don't include +TAA_NO in SierraForest CPU model. + +[1] https://cdrdv2.intel.com/v1/dl/getContent/671368 + +Intel-SIG: commit 6e82d3b62207 target/i386: Add new CPU model SierraForest. +8.2.0-Add SRF CPU module support + +Reviewed-by: Zhao Liu +Reviewed-by: Xiaoyao Li +Signed-off-by: Tao Su +Message-ID: <20240320021044.508263-1-tao1.su@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 126 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6abe33946c..57a832cea2 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4109,6 +4109,132 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ }, + }, + }, ++ { ++ .name = "SierraForest", ++ .level = 0x23, ++ .vendor = CPUID_VENDOR_INTEL, ++ .family = 6, ++ .model = 175, ++ .stepping = 0, ++ /* ++ * please keep the ascending order so that we can have a clear view of ++ * bit position of each feature. ++ */ ++ .features[FEAT_1_EDX] = ++ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | ++ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | ++ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | ++ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | ++ CPUID_SSE | CPUID_SSE2, ++ .features[FEAT_1_ECX] = ++ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | ++ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | ++ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | ++ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | ++ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, ++ .features[FEAT_8000_0001_EDX] = ++ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | ++ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, ++ .features[FEAT_8000_0001_ECX] = ++ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, ++ .features[FEAT_8000_0008_EBX] = ++ CPUID_8000_0008_EBX_WBNOINVD, ++ .features[FEAT_7_0_EBX] = ++ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | ++ CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | ++ CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | ++ CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | ++ CPUID_7_0_EBX_SHA_NI, ++ .features[FEAT_7_0_ECX] = ++ CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_GFNI | ++ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | ++ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, ++ .features[FEAT_7_0_EDX] = ++ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | ++ CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | ++ CPUID_7_0_EDX_SPEC_CTRL_SSBD, ++ .features[FEAT_ARCH_CAPABILITIES] = ++ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | ++ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | ++ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_SBDR_SSDP_NO | ++ MSR_ARCH_CAP_FBSDP_NO | MSR_ARCH_CAP_PSDP_NO | ++ MSR_ARCH_CAP_PBRSB_NO, ++ .features[FEAT_XSAVE] = ++ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | ++ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, ++ .features[FEAT_6_EAX] = ++ CPUID_6_EAX_ARAT, ++ .features[FEAT_7_1_EAX] = ++ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_CMPCCXADD | ++ CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_AVX_IFMA, ++ .features[FEAT_7_1_EDX] = ++ CPUID_7_1_EDX_AVX_VNNI_INT8 | CPUID_7_1_EDX_AVX_NE_CONVERT, ++ .features[FEAT_7_2_EDX] = ++ CPUID_7_2_EDX_MCDT_NO, ++ .features[FEAT_VMX_BASIC] = ++ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, ++ .features[FEAT_VMX_ENTRY_CTLS] = ++ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | ++ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, ++ .features[FEAT_VMX_EPT_VPID_CAPS] = ++ MSR_VMX_EPT_EXECONLY | MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | ++ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | ++ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | ++ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | ++ MSR_VMX_EPT_INVVPID_ALL_CONTEXT | ++ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, ++ .features[FEAT_VMX_EXIT_CTLS] = ++ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | ++ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | ++ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | ++ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | ++ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, ++ .features[FEAT_VMX_MISC] = ++ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | ++ MSR_VMX_MISC_VMWRITE_VMEXIT, ++ .features[FEAT_VMX_PINBASED_CTLS] = ++ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | ++ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | ++ VMX_PIN_BASED_POSTED_INTR, ++ .features[FEAT_VMX_PROCBASED_CTLS] = ++ VMX_CPU_BASED_VIRTUAL_INTR_PENDING | ++ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | ++ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | ++ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | ++ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | ++ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | ++ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | ++ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | ++ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | ++ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | ++ VMX_CPU_BASED_PAUSE_EXITING | ++ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, ++ .features[FEAT_VMX_SECONDARY_CTLS] = ++ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ++ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | ++ VMX_SECONDARY_EXEC_RDTSCP | ++ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | ++ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | ++ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | ++ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | ++ VMX_SECONDARY_EXEC_RDRAND_EXITING | ++ VMX_SECONDARY_EXEC_ENABLE_INVPCID | ++ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | ++ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | ++ VMX_SECONDARY_EXEC_XSAVES, ++ .features[FEAT_VMX_VMFUNC] = ++ MSR_VMX_VMFUNC_EPT_SWITCHING, ++ .xlevel = 0x80000008, ++ .model_id = "Intel Xeon Processor (SierraForest)", ++ .versions = (X86CPUVersionDefinition[]) { ++ { .version = 1 }, ++ { /* end of list */ }, ++ }, ++ }, + { + .name = "Denverton", + .level = 21, +-- +2.27.0 + diff --git a/target-i386-Export-RFDS-bit-to-guests.patch b/target-i386-Export-RFDS-bit-to-guests.patch new file mode 100644 index 0000000000000000000000000000000000000000..00561d6ddb4bde68c8702ee1524a2aedfc403cc4 --- /dev/null +++ b/target-i386-Export-RFDS-bit-to-guests.patch @@ -0,0 +1,47 @@ +From b167617657fa078c4ea14cf54138ff5a4ce180f3 Mon Sep 17 00:00:00 2001 +From: Pawan Gupta +Date: Wed, 13 Mar 2024 07:53:23 -0700 +Subject: [PATCH] target/i386: Export RFDS bit to guests + +commit 41bdd9812863c150284a9339a048ed88c40f4df7 upstream. + +Register File Data Sampling (RFDS) is a CPU side-channel vulnerability +that may expose stale register value. CPUs that set RFDS_NO bit in MSR +IA32_ARCH_CAPABILITIES indicate that they are not vulnerable to RFDS. +Similarly, RFDS_CLEAR indicates that CPU is affected by RFDS, and has +the microcode to help mitigate RFDS. + +Make RFDS_CLEAR and RFDS_NO bits available to guests. + +Intel-SIG: commit 41bdd9812863 target/i386: Export RFDS bit to guests. +8.2.0-Add SRF CPU module support + +Signed-off-by: Pawan Gupta +Reviewed-by: Xiaoyao Li +Reviewed-by: Zhao Liu +Message-ID: <9a38877857392b5c2deae7e7db1b170d15510314.1710341348.git.pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 57a832cea2..fd32c64f99 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1157,8 +1157,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, +- "pbrsb-no", NULL, "gds-no", NULL, +- NULL, NULL, NULL, NULL, ++ "pbrsb-no", NULL, "gds-no", "rfds-no", ++ "rfds-clear", NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, +-- +2.27.0 + diff --git a/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch b/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch new file mode 100644 index 0000000000000000000000000000000000000000..6ecd0ba1d3a70a4c14fe6da4bf868f4ca4e73767 --- /dev/null +++ b/target-i386-Introduce-Icelake-Server-v7-to-enable-TS.patch @@ -0,0 +1,64 @@ +From 8f2e7e0ebc4351d61091669137a4e26b78f3cb27 Mon Sep 17 00:00:00 2001 +From: Zhenzhong Duan +Date: Wed, 20 Mar 2024 17:31:38 +0800 +Subject: [PATCH] target/i386: Introduce Icelake-Server-v7 to enable TSX + +commit c895fa54e3060c5ac6f3888dce96c9b78626072b upstream. + +When start L2 guest with both L1/L2 using Icelake-Server-v3 or above, +QEMU reports below warning: + +"warning: host doesn't support requested feature: MSR(10AH).taa-no [bit 8]" + +Reason is QEMU Icelake-Server-v3 has TSX feature disabled but enables taa-no +bit. It's meaningless that TSX isn't supported but still claim TSX is secure. +So L1 KVM doesn't expose taa-no to L2 if TSX is unsupported, then starting L2 +triggers the warning. + +Fix it by introducing a new version Icelake-Server-v7 which has both TSX +and taa-no features. Then guest can use TSX securely when it see taa-no. + +This matches the production Icelake which supports TSX and isn't susceptible +to TSX Async Abort (TAA) vulnerabilities, a.k.a, taa-no. + +Ideally, TSX should have being enabled together with taa-no since v3, but for +compatibility, we'd better to add v7 to enable it. + +Fixes: d965dc35592d ("target/i386: Add ARCH_CAPABILITIES related bits into Icelake-Server CPU model") +Intel-SIG: commit c895fa54e306 target/i386: Introduce Icelake-Server-v7 to enable TSX. +8.2.0-Add SRF CPU module support + +Tested-by: Xiangfei Ma +Signed-off-by: Zhenzhong Duan +Message-ID: <20240320093138.80267-2-zhenzhong.duan@intel.com> +Signed-off-by: Paolo Bonzini +[ Quanxian Wang: amend commit log ] +Signed-off-by: Quanxian Wang +--- + target/i386/cpu.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 491cf40cc7..6abe33946c 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -3822,6 +3822,16 @@ static const X86CPUDefinition builtin_x86_defs[] = { + { /* end of list */ } + }, + }, ++ { ++ .version = 7, ++ .note = "TSX, taa-no", ++ .props = (PropValue[]) { ++ /* Restore TSX features removed by -v2 above */ ++ { "hle", "on" }, ++ { "rtm", "on" }, ++ { /* end of list */ } ++ }, ++ }, + { /* end of list */ } + } + }, +-- +2.27.0 + diff --git a/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch b/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3cffaa94a06016242e9cd7e2147676da7622321 --- /dev/null +++ b/target-i386-sev-Fix-missing-ERRP_GUARD-for-error_pre.patch @@ -0,0 +1,63 @@ +From 5a4e9ad98edc1ba5c1e93f0e24753c1a8355ffce Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 13 Mar 2024 13:49:37 +0800 +Subject: [PATCH] target/i386/sev: Fix missing ERRP_GUARD() for error_prepend() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from f55cceac8c03e639711490f08996c32861591435 +As the comment in qapi/error, passing @errp to error_prepend() requires ERRP_GUARD(): + +* = Why, when and how to use ERRP_GUARD() = +* +* Without ERRP_GUARD(), use of the @errp parameter is restricted: +... +* - It should not be passed to error_prepend(), error_vprepend() or +* error_append_hint(), because that doesn't work with &error_fatal. +* ERRP_GUARD() lifts these restrictions. +* +* To use ERRP_GUARD(), add it right at the beginning of the function. +* @errp can then be used without worrying about the argument being +* NULL or &error_fatal. + +ERRP_GUARD() could avoid the case when @errp is the pointer of +error_fatal, the user can't see this additional information, because +exit() happens in error_setg earlier than information is added [1]. + +The sev_inject_launch_secret() passes @errp to error_prepend(), and as +an APIs defined in target/i386/sev.h, it is necessary to protect its +@errp with ERRP_GUARD(). + +To avoid the issue like [1] said, add missing ERRP_GUARD() at the +beginning of this function. + +[1]: Issue description in the commit message of commit ae7c80a7bd73 + ("error: New macro ERRP_GUARD()"). + +Cc: Paolo Bonzini +Cc: Marcelo Tosatti +Signed-off-by: Zhao Liu +Reviewed-by: Thomas Huth +Message-ID: <20240229143914.1977550-17-zhao1.liu@linux.intel.com> +Signed-off-by: Philippe Mathieu-DaudĂ© +Signed-off-by: dinglimin +--- + target/i386/sev.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/i386/sev.c b/target/i386/sev.c +index 9a71246682..1a9d1db7a8 100644 +--- a/target/i386/sev.c ++++ b/target/i386/sev.c +@@ -1044,6 +1044,7 @@ sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) + int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + uint64_t gpa, Error **errp) + { ++ ERRP_GUARD(); + struct kvm_sev_launch_secret input; + g_autofree guchar *data = NULL, *hdr = NULL; + int error, ret = 1; +-- +2.27.0 + diff --git a/target-loongarch-Add-loongarch-kvm-into-meson-build.patch b/target-loongarch-Add-loongarch-kvm-into-meson-build.patch new file mode 100644 index 0000000000000000000000000000000000000000..7dd1112397927da908b455c142ab4857918fece2 --- /dev/null +++ b/target-loongarch-Add-loongarch-kvm-into-meson-build.patch @@ -0,0 +1,56 @@ +From 49a7ae85d6ac42f8ef556a0d42802508c28adfcc Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:04 +0800 +Subject: [PATCH] target/loongarch: Add loongarch kvm into meson build + +Add kvm.c into meson.build to compile it when kvm +is configed. Meanwhile in meson.build, we set the +kvm_targets to loongarch64-softmmu when the cpu is +loongarch. And fix the compiling error when config +is enable-kvm,disable-tcg. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-10-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + meson.build | 2 ++ + target/loongarch/kvm/meson.build | 1 + + target/loongarch/meson.build | 1 + + 3 files changed, 4 insertions(+) + create mode 100644 target/loongarch/kvm/meson.build + +diff --git a/meson.build b/meson.build +index 445f2b7c2b..0c62b4156d 100644 +--- a/meson.build ++++ b/meson.build +@@ -114,6 +114,8 @@ elif cpu in ['riscv32'] + kvm_targets = ['riscv32-softmmu'] + elif cpu in ['riscv64'] + kvm_targets = ['riscv64-softmmu'] ++elif cpu in ['loongarch64'] ++ kvm_targets = ['loongarch64-softmmu'] + else + kvm_targets = [] + endif +diff --git a/target/loongarch/kvm/meson.build b/target/loongarch/kvm/meson.build +new file mode 100644 +index 0000000000..2266de6ca9 +--- /dev/null ++++ b/target/loongarch/kvm/meson.build +@@ -0,0 +1 @@ ++loongarch_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c')) +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index e84e4c51f4..db310f6022 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -18,3 +18,4 @@ subdir('tcg') + + target_arch += {'loongarch': loongarch_ss} + target_system_arch += {'loongarch': loongarch_system_ss} ++subdir('kvm') +-- +2.27.0 + diff --git a/target-loongarch-Add-timer-information-dump-support.patch b/target-loongarch-Add-timer-information-dump-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..e489939d2ce6703d7d79d86fe9527c298a294800 --- /dev/null +++ b/target-loongarch-Add-timer-information-dump-support.patch @@ -0,0 +1,33 @@ +From 8a43c9379651fbf9d015240d6dc7c4b90ce98683 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 6 Dec 2023 16:18:39 +0800 +Subject: [PATCH] target/loongarch: Add timer information dump support + +Timer emulation sometimes is problematic especially when vm is running in +kvm mode. This patch adds registers dump support relative with timer +hardware, so that it is easier to find the problems. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20231206081839.2290178-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index fc075952e6..db9a421cc4 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -762,6 +762,8 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) + qemu_fprintf(f, "TLBRENTRY=%016" PRIx64 "\n", env->CSR_TLBRENTRY); + qemu_fprintf(f, "TLBRBADV=%016" PRIx64 "\n", env->CSR_TLBRBADV); + qemu_fprintf(f, "TLBRERA=%016" PRIx64 "\n", env->CSR_TLBRERA); ++ qemu_fprintf(f, "TCFG=%016" PRIx64 "\n", env->CSR_TCFG); ++ qemu_fprintf(f, "TVAL=%016" PRIx64 "\n", env->CSR_TVAL); + + /* fpr */ + if (flags & CPU_DUMP_FPU) { +-- +2.27.0 + diff --git a/target-loongarch-Define-some-kvm_arch-interfaces.patch b/target-loongarch-Define-some-kvm_arch-interfaces.patch new file mode 100644 index 0000000000000000000000000000000000000000..8667c98948257e691a468b425ec9982592d7f64d --- /dev/null +++ b/target-loongarch-Define-some-kvm_arch-interfaces.patch @@ -0,0 +1,162 @@ +From 623a99084843f47723cb799d4bcef8e1359d59ad Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:57 +0800 +Subject: [PATCH] target/loongarch: Define some kvm_arch interfaces + +Define some functions in target/loongarch/kvm/kvm.c, +such as kvm_arch_put_registers, kvm_arch_get_registers +and kvm_arch_handle_exit, etc. which are needed by +kvm/kvm-all.c. Now the most functions has no content +and they will be implemented in the next patches. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-3-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 131 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 131 insertions(+) + create mode 100644 target/loongarch/kvm/kvm.c + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +new file mode 100644 +index 0000000000..0d67322fd9 +--- /dev/null ++++ b/target/loongarch/kvm/kvm.c +@@ -0,0 +1,131 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch KVM ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "qemu/osdep.h" ++#include ++#include ++ ++#include "qemu/timer.h" ++#include "qemu/error-report.h" ++#include "qemu/main-loop.h" ++#include "sysemu/sysemu.h" ++#include "sysemu/kvm.h" ++#include "sysemu/kvm_int.h" ++#include "hw/pci/pci.h" ++#include "exec/memattrs.h" ++#include "exec/address-spaces.h" ++#include "hw/boards.h" ++#include "hw/irq.h" ++#include "qemu/log.h" ++#include "hw/loader.h" ++#include "migration/migration.h" ++#include "sysemu/runstate.h" ++#include "cpu-csr.h" ++#include "kvm_loongarch.h" ++ ++static bool cap_has_mp_state; ++const KVMCapabilityInfo kvm_arch_required_capabilities[] = { ++ KVM_CAP_LAST_INFO ++}; ++ ++int kvm_arch_get_registers(CPUState *cs) ++{ ++ return 0; ++} ++int kvm_arch_put_registers(CPUState *cs, int level) ++{ ++ return 0; ++} ++ ++int kvm_arch_init_vcpu(CPUState *cs) ++{ ++ return 0; ++} ++ ++int kvm_arch_destroy_vcpu(CPUState *cs) ++{ ++ return 0; ++} ++ ++unsigned long kvm_arch_vcpu_id(CPUState *cs) ++{ ++ return cs->cpu_index; ++} ++ ++int kvm_arch_release_virq_post(int virq) ++{ ++ return 0; ++} ++ ++int kvm_arch_msi_data_to_gsi(uint32_t data) ++{ ++ abort(); ++} ++ ++int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, ++ uint64_t address, uint32_t data, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, ++ int vector, PCIDevice *dev) ++{ ++ return 0; ++} ++ ++void kvm_arch_init_irq_routing(KVMState *s) ++{ ++} ++ ++int kvm_arch_get_default_type(MachineState *ms) ++{ ++ return 0; ++} ++ ++int kvm_arch_init(MachineState *ms, KVMState *s) ++{ ++ return 0; ++} ++ ++int kvm_arch_irqchip_create(KVMState *s) ++{ ++ return 0; ++} ++ ++void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) ++{ ++} ++ ++MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) ++{ ++ return MEMTXATTRS_UNSPECIFIED; ++} ++ ++int kvm_arch_process_async_events(CPUState *cs) ++{ ++ return cs->halted; ++} ++ ++bool kvm_arch_stop_on_emulation_error(CPUState *cs) ++{ ++ return true; ++} ++ ++bool kvm_arch_cpu_check_are_resettable(void) ++{ ++ return true; ++} ++ ++int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ++{ ++ return 0; ++} ++ ++void kvm_arch_accel_class_init(ObjectClass *oc) ++{ ++} +-- +2.27.0 + diff --git a/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch b/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch new file mode 100644 index 0000000000000000000000000000000000000000..f94c68bba1a2e09e031cb55c27a219eb9a392f49 --- /dev/null +++ b/target-loongarch-Fix-qemu-loongarch64-hang-when-exec.patch @@ -0,0 +1,45 @@ +From 6d175f9d5d5b9f46ee2f1a6fe00249bb817b5dc6 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 20 Mar 2024 09:39:55 +0800 +Subject: [PATCH] target/loongarch: Fix qemu-loongarch64 hang when + executing 'll.d $t0, $t0, 0' + +On gen_ll, if a->imm is zero, make_address_x return src1, +but the load to destination may clobber src1. We use a new +destination to fix this problem. + +Fixes: c5af6628f4be (target/loongarch: Extract make_address_i() helper) +Reviewed-by: Richard Henderson +Suggested-by: Richard Henderson +Signed-off-by: Song Gao +Message-Id: <20240320013955.1561311-1-gaosong@loongson.cn> +--- + target/loongarch/tcg/insn_trans/trans_atomic.c.inc | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +index 80c2e286fd..974bc2a70f 100644 +--- a/target/loongarch/tcg/insn_trans/trans_atomic.c.inc ++++ b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +@@ -5,14 +5,14 @@ + + static bool gen_ll(DisasContext *ctx, arg_rr_i *a, MemOp mop) + { +- TCGv dest = gpr_dst(ctx, a->rd, EXT_NONE); ++ TCGv t1 = tcg_temp_new(); + TCGv src1 = gpr_src(ctx, a->rj, EXT_NONE); + TCGv t0 = make_address_i(ctx, src1, a->imm); + +- tcg_gen_qemu_ld_i64(dest, t0, ctx->mem_idx, mop); ++ tcg_gen_qemu_ld_i64(t1, t0, ctx->mem_idx, mop); + tcg_gen_st_tl(t0, tcg_env, offsetof(CPULoongArchState, lladdr)); +- tcg_gen_st_tl(dest, tcg_env, offsetof(CPULoongArchState, llval)); +- gen_set_gpr(a->rd, dest, EXT_NONE); ++ tcg_gen_st_tl(t1, tcg_env, offsetof(CPULoongArchState, llval)); ++ gen_set_gpr(a->rd, t1, EXT_NONE); + + return true; + } +-- +2.33.0 + diff --git a/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch b/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7c39f0f16831c3351599a41a08e1e6f95c67272 --- /dev/null +++ b/target-loongarch-Fix-qemu-system-loongarch64-assert-.patch @@ -0,0 +1,136 @@ +From 3db0118d3663c5d56841dac30e4bf95ccfff21bd Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Apr 2024 09:39:36 +0800 +Subject: [PATCH] target/loongarch: Fix qemu-system-loongarch64 assert + failed with the option '-d int' +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +qemu-system-loongarch64 assert failed with the option '-d int', +the helper_idle() raise an exception EXCP_HLT, but the exception name is undefined. + +----- +merge patch: + +0cbb322f70e8a87e4acbffecef5ea8f9448f3513(target/loongarch/cpu.c: typo fix: expection) + +Signed-off-by: Song Gao +Reviewed-by: Philippe Mathieu-DaudĂ© +Message-Id: <20240321123606.1704900-1-gaosong@loongson.cn> +--- + target/loongarch/cpu.c | 74 +++++++++++++++++++++++------------------- + 1 file changed, 40 insertions(+), 34 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index b098b1c6f3..0b3f954b64 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -43,33 +43,45 @@ const char * const fregnames[32] = { + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + +-static const char * const excp_names[] = { +- [EXCCODE_INT] = "Interrupt", +- [EXCCODE_PIL] = "Page invalid exception for load", +- [EXCCODE_PIS] = "Page invalid exception for store", +- [EXCCODE_PIF] = "Page invalid exception for fetch", +- [EXCCODE_PME] = "Page modified exception", +- [EXCCODE_PNR] = "Page Not Readable exception", +- [EXCCODE_PNX] = "Page Not Executable exception", +- [EXCCODE_PPI] = "Page Privilege error", +- [EXCCODE_ADEF] = "Address error for instruction fetch", +- [EXCCODE_ADEM] = "Address error for Memory access", +- [EXCCODE_SYS] = "Syscall", +- [EXCCODE_BRK] = "Break", +- [EXCCODE_INE] = "Instruction Non-Existent", +- [EXCCODE_IPE] = "Instruction privilege error", +- [EXCCODE_FPD] = "Floating Point Disabled", +- [EXCCODE_FPE] = "Floating Point Exception", +- [EXCCODE_DBP] = "Debug breakpoint", +- [EXCCODE_BCE] = "Bound Check Exception", +- [EXCCODE_SXD] = "128 bit vector instructions Disable exception", +- [EXCCODE_ASXD] = "256 bit vector instructions Disable exception", ++struct TypeExcp { ++ int32_t exccode; ++ const char * const name; ++}; ++ ++static const struct TypeExcp excp_names[] = { ++ {EXCCODE_INT, "Interrupt"}, ++ {EXCCODE_PIL, "Page invalid exception for load"}, ++ {EXCCODE_PIS, "Page invalid exception for store"}, ++ {EXCCODE_PIF, "Page invalid exception for fetch"}, ++ {EXCCODE_PME, "Page modified exception"}, ++ {EXCCODE_PNR, "Page Not Readable exception"}, ++ {EXCCODE_PNX, "Page Not Executable exception"}, ++ {EXCCODE_PPI, "Page Privilege error"}, ++ {EXCCODE_ADEF, "Address error for instruction fetch"}, ++ {EXCCODE_ADEM, "Address error for Memory access"}, ++ {EXCCODE_SYS, "Syscall"}, ++ {EXCCODE_BRK, "Break"}, ++ {EXCCODE_INE, "Instruction Non-Existent"}, ++ {EXCCODE_IPE, "Instruction privilege error"}, ++ {EXCCODE_FPD, "Floating Point Disabled"}, ++ {EXCCODE_FPE, "Floating Point Exception"}, ++ {EXCCODE_DBP, "Debug breakpoint"}, ++ {EXCCODE_BCE, "Bound Check Exception"}, ++ {EXCCODE_SXD, "128 bit vector instructions Disable exception"}, ++ {EXCCODE_ASXD, "256 bit vector instructions Disable exception"}, ++ {EXCP_HLT, "EXCP_HLT"}, + }; + + const char *loongarch_exception_name(int32_t exception) + { +- assert(excp_names[exception]); +- return excp_names[exception]; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(excp_names); i++) { ++ if (excp_names[i].exccode == exception) { ++ return excp_names[i].name; ++ } ++ } ++ return "Unknown"; + } + + void G_NORETURN do_raise_exception(CPULoongArchState *env, +@@ -78,7 +90,7 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, + { + CPUState *cs = env_cpu(env); + +- qemu_log_mask(CPU_LOG_INT, "%s: %d (%s)\n", ++ qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", + __func__, + exception, + loongarch_exception_name(exception)); +@@ -159,22 +171,16 @@ static void loongarch_cpu_do_interrupt(CPUState *cs) + CPULoongArchState *env = &cpu->env; + bool update_badinstr = 1; + int cause = -1; +- const char *name; + bool tlbfill = FIELD_EX64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR); + uint32_t vec_size = FIELD_EX64(env->CSR_ECFG, CSR_ECFG, VS); + + if (cs->exception_index != EXCCODE_INT) { +- if (cs->exception_index < 0 || +- cs->exception_index >= ARRAY_SIZE(excp_names)) { +- name = "unknown"; +- } else { +- name = excp_names[cs->exception_index]; +- } +- + qemu_log_mask(CPU_LOG_INT, + "%s enter: pc " TARGET_FMT_lx " ERA " TARGET_FMT_lx +- " TLBRERA " TARGET_FMT_lx " %s exception\n", __func__, +- env->pc, env->CSR_ERA, env->CSR_TLBRERA, name); ++ " TLBRERA " TARGET_FMT_lx " exception: %d (%s)\n", ++ __func__, env->pc, env->CSR_ERA, env->CSR_TLBRERA, ++ cs->exception_index, ++ loongarch_exception_name(cs->exception_index)); + } + + switch (cs->exception_index) { +-- +2.33.0 + diff --git a/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch b/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..33cf3381868dff0b58bf3ba2fb4ef5bb98fb5687 --- /dev/null +++ b/target-loongarch-Fix-qtest-test-hmp-error-when-KVM-o.patch @@ -0,0 +1,570 @@ +From d2381abc2c78de68e765a29a55282707541e315d Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Thu, 25 Jan 2024 14:14:01 +0800 +Subject: [PATCH] target/loongarch: Fix qtest test-hmp error when KVM-only + build + +The cc->sysemu_ops->get_phys_page_debug() is NULL when +KVM-only build. this patch fixes it. + +Signed-off-by: Song Gao +Tested-by: Bibo Mao +Message-Id: <20240125061401.52526-1-gaosong@loongson.cn> +--- + target/loongarch/cpu.c | 2 - + target/loongarch/cpu_helper.c | 231 ++++++++++++++++++++++++++++++ + target/loongarch/internals.h | 20 ++- + target/loongarch/meson.build | 1 + + target/loongarch/tcg/tlb_helper.c | 230 ----------------------------- + 5 files changed, 250 insertions(+), 234 deletions(-) + create mode 100644 target/loongarch/cpu_helper.c + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 6611d137a1..b098b1c6f3 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -771,9 +771,7 @@ static struct TCGCPUOps loongarch_tcg_ops = { + #include "hw/core/sysemu-cpu-ops.h" + + static const struct SysemuCPUOps loongarch_sysemu_ops = { +-#ifdef CONFIG_TCG + .get_phys_page_debug = loongarch_cpu_get_phys_page_debug, +-#endif + }; + + static int64_t loongarch_cpu_get_arch_id(CPUState *cs) +diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c +new file mode 100644 +index 0000000000..f68d63f466 +--- /dev/null ++++ b/target/loongarch/cpu_helper.c +@@ -0,0 +1,231 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * LoongArch CPU helpers for qemu ++ * ++ * Copyright (c) 2024 Loongson Technology Corporation Limited ++ * ++ */ ++ ++#include "qemu/osdep.h" ++#include "cpu.h" ++#include "internals.h" ++#include "cpu-csr.h" ++ ++static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ int access_type, int index, int mmu_idx) ++{ ++ LoongArchTLB *tlb = &env->tlb[index]; ++ uint64_t plv = mmu_idx; ++ uint64_t tlb_entry, tlb_ppn; ++ uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; ++ ++ if (index >= LOONGARCH_STLB) { ++ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); ++ } else { ++ tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); ++ } ++ n = (address >> tlb_ps) & 0x1;/* Odd or even */ ++ ++ tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; ++ tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); ++ tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); ++ tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); ++ if (is_la64(env)) { ++ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); ++ tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); ++ tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); ++ tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); ++ } else { ++ tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); ++ tlb_nx = 0; ++ tlb_nr = 0; ++ tlb_rplv = 0; ++ } ++ ++ /* Remove sw bit between bit12 -- bit PS*/ ++ tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); ++ ++ /* Check access rights */ ++ if (!tlb_v) { ++ return TLBRET_INVALID; ++ } ++ ++ if (access_type == MMU_INST_FETCH && tlb_nx) { ++ return TLBRET_XI; ++ } ++ ++ if (access_type == MMU_DATA_LOAD && tlb_nr) { ++ return TLBRET_RI; ++ } ++ ++ if (((tlb_rplv == 0) && (plv > tlb_plv)) || ++ ((tlb_rplv == 1) && (plv != tlb_plv))) { ++ return TLBRET_PE; ++ } ++ ++ if ((access_type == MMU_DATA_STORE) && !tlb_d) { ++ return TLBRET_DIRTY; ++ } ++ ++ *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | ++ (address & MAKE_64BIT_MASK(0, tlb_ps)); ++ *prot = PAGE_READ; ++ if (tlb_d) { ++ *prot |= PAGE_WRITE; ++ } ++ if (!tlb_nx) { ++ *prot |= PAGE_EXEC; ++ } ++ return TLBRET_MATCH; ++} ++ ++/* ++ * One tlb entry holds an adjacent odd/even pair, the vpn is the ++ * content of the virtual page number divided by 2. So the ++ * compare vpn is bit[47:15] for 16KiB page. while the vppn ++ * field in tlb entry contains bit[47:13], so need adjust. ++ * virt_vpn = vaddr[47:13] ++ */ ++bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, ++ int *index) ++{ ++ LoongArchTLB *tlb; ++ uint16_t csr_asid, tlb_asid, stlb_idx; ++ uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; ++ int i, compare_shift; ++ uint64_t vpn, tlb_vppn; ++ ++ csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); ++ stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); ++ vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); ++ stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ ++ compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; ++ ++ /* Search STLB */ ++ for (i = 0; i < 8; ++i) { ++ tlb = &env->tlb[i * 256 + stlb_idx]; ++ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); ++ if (tlb_e) { ++ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); ++ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); ++ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); ++ ++ if ((tlb_g == 1 || tlb_asid == csr_asid) && ++ (vpn == (tlb_vppn >> compare_shift))) { ++ *index = i * 256 + stlb_idx; ++ return true; ++ } ++ } ++ } ++ ++ /* Search MTLB */ ++ for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { ++ tlb = &env->tlb[i]; ++ tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); ++ if (tlb_e) { ++ tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); ++ tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); ++ tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); ++ tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); ++ compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; ++ vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); ++ if ((tlb_g == 1 || tlb_asid == csr_asid) && ++ (vpn == (tlb_vppn >> compare_shift))) { ++ *index = i; ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ ++static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx) ++{ ++ int index, match; ++ ++ match = loongarch_tlb_search(env, address, &index); ++ if (match) { ++ return loongarch_map_tlb_entry(env, physical, prot, ++ address, access_type, index, mmu_idx); ++ } ++ ++ return TLBRET_NOMATCH; ++} ++ ++static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, ++ target_ulong dmw) ++{ ++ if (is_la64(env)) { ++ return va & TARGET_VIRT_MASK; ++ } else { ++ uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); ++ return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ ++ (pseg << R_CSR_DMW_32_VSEG_SHIFT); ++ } ++} ++ ++int get_physical_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx) ++{ ++ int user_mode = mmu_idx == MMU_IDX_USER; ++ int kernel_mode = mmu_idx == MMU_IDX_KERNEL; ++ uint32_t plv, base_c, base_v; ++ int64_t addr_high; ++ uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); ++ uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); ++ ++ /* Check PG and DA */ ++ if (da & !pg) { ++ *physical = address & TARGET_PHYS_MASK; ++ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; ++ return TLBRET_MATCH; ++ } ++ ++ plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); ++ if (is_la64(env)) { ++ base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; ++ } else { ++ base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; ++ } ++ /* Check direct map window */ ++ for (int i = 0; i < 4; i++) { ++ if (is_la64(env)) { ++ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); ++ } else { ++ base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); ++ } ++ if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { ++ *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); ++ *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; ++ return TLBRET_MATCH; ++ } ++ } ++ ++ /* Check valid extension */ ++ addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); ++ if (!(addr_high == 0 || addr_high == -1)) { ++ return TLBRET_BADADDR; ++ } ++ ++ /* Mapped address */ ++ return loongarch_map_address(env, physical, prot, address, ++ access_type, mmu_idx); ++} ++ ++hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ hwaddr phys_addr; ++ int prot; ++ ++ if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, ++ cpu_mmu_index(env, false)) != 0) { ++ return -1; ++ } ++ return phys_addr; ++} +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index 0beb034748..a2fc54c8a7 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -37,6 +37,17 @@ void restore_fp_status(CPULoongArchState *env); + #endif + + #ifndef CONFIG_USER_ONLY ++enum { ++ TLBRET_MATCH = 0, ++ TLBRET_BADADDR = 1, ++ TLBRET_NOMATCH = 2, ++ TLBRET_INVALID = 3, ++ TLBRET_DIRTY = 4, ++ TLBRET_RI = 5, ++ TLBRET_XI = 6, ++ TLBRET_PE = 7, ++}; ++ + extern const VMStateDescription vmstate_loongarch_cpu; + + void loongarch_cpu_set_irq(void *opaque, int irq, int level); +@@ -46,12 +57,17 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); + uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); + void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, + uint64_t value); ++bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, ++ int *index); ++int get_physical_address(CPULoongArchState *env, hwaddr *physical, ++ int *prot, target_ulong address, ++ MMUAccessType access_type, int mmu_idx); ++hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); ++ + #ifdef CONFIG_TCG + bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); +- +-hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); + #endif + #endif /* !CONFIG_USER_ONLY */ + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index db310f6022..e002e9aaf6 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -8,6 +8,7 @@ loongarch_ss.add(files( + + loongarch_system_ss = ss.source_set() + loongarch_system_ss.add(files( ++ 'cpu_helper.c', + 'loongarch-qmp-cmds.c', + 'machine.c', + )) +diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +index 449043c68b..804ab7a263 100644 +--- a/target/loongarch/tcg/tlb_helper.c ++++ b/target/loongarch/tcg/tlb_helper.c +@@ -17,236 +17,6 @@ + #include "exec/log.h" + #include "cpu-csr.h" + +-enum { +- TLBRET_MATCH = 0, +- TLBRET_BADADDR = 1, +- TLBRET_NOMATCH = 2, +- TLBRET_INVALID = 3, +- TLBRET_DIRTY = 4, +- TLBRET_RI = 5, +- TLBRET_XI = 6, +- TLBRET_PE = 7, +-}; +- +-static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- int access_type, int index, int mmu_idx) +-{ +- LoongArchTLB *tlb = &env->tlb[index]; +- uint64_t plv = mmu_idx; +- uint64_t tlb_entry, tlb_ppn; +- uint8_t tlb_ps, n, tlb_v, tlb_d, tlb_plv, tlb_nx, tlb_nr, tlb_rplv; +- +- if (index >= LOONGARCH_STLB) { +- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); +- } else { +- tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); +- } +- n = (address >> tlb_ps) & 0x1;/* Odd or even */ +- +- tlb_entry = n ? tlb->tlb_entry1 : tlb->tlb_entry0; +- tlb_v = FIELD_EX64(tlb_entry, TLBENTRY, V); +- tlb_d = FIELD_EX64(tlb_entry, TLBENTRY, D); +- tlb_plv = FIELD_EX64(tlb_entry, TLBENTRY, PLV); +- if (is_la64(env)) { +- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_64, PPN); +- tlb_nx = FIELD_EX64(tlb_entry, TLBENTRY_64, NX); +- tlb_nr = FIELD_EX64(tlb_entry, TLBENTRY_64, NR); +- tlb_rplv = FIELD_EX64(tlb_entry, TLBENTRY_64, RPLV); +- } else { +- tlb_ppn = FIELD_EX64(tlb_entry, TLBENTRY_32, PPN); +- tlb_nx = 0; +- tlb_nr = 0; +- tlb_rplv = 0; +- } +- +- /* Remove sw bit between bit12 -- bit PS*/ +- tlb_ppn = tlb_ppn & ~(((0x1UL << (tlb_ps - 12)) -1)); +- +- /* Check access rights */ +- if (!tlb_v) { +- return TLBRET_INVALID; +- } +- +- if (access_type == MMU_INST_FETCH && tlb_nx) { +- return TLBRET_XI; +- } +- +- if (access_type == MMU_DATA_LOAD && tlb_nr) { +- return TLBRET_RI; +- } +- +- if (((tlb_rplv == 0) && (plv > tlb_plv)) || +- ((tlb_rplv == 1) && (plv != tlb_plv))) { +- return TLBRET_PE; +- } +- +- if ((access_type == MMU_DATA_STORE) && !tlb_d) { +- return TLBRET_DIRTY; +- } +- +- *physical = (tlb_ppn << R_TLBENTRY_64_PPN_SHIFT) | +- (address & MAKE_64BIT_MASK(0, tlb_ps)); +- *prot = PAGE_READ; +- if (tlb_d) { +- *prot |= PAGE_WRITE; +- } +- if (!tlb_nx) { +- *prot |= PAGE_EXEC; +- } +- return TLBRET_MATCH; +-} +- +-/* +- * One tlb entry holds an adjacent odd/even pair, the vpn is the +- * content of the virtual page number divided by 2. So the +- * compare vpn is bit[47:15] for 16KiB page. while the vppn +- * field in tlb entry contains bit[47:13], so need adjust. +- * virt_vpn = vaddr[47:13] +- */ +-static bool loongarch_tlb_search(CPULoongArchState *env, target_ulong vaddr, +- int *index) +-{ +- LoongArchTLB *tlb; +- uint16_t csr_asid, tlb_asid, stlb_idx; +- uint8_t tlb_e, tlb_ps, tlb_g, stlb_ps; +- int i, compare_shift; +- uint64_t vpn, tlb_vppn; +- +- csr_asid = FIELD_EX64(env->CSR_ASID, CSR_ASID, ASID); +- stlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS); +- vpn = (vaddr & TARGET_VIRT_MASK) >> (stlb_ps + 1); +- stlb_idx = vpn & 0xff; /* VA[25:15] <==> TLBIDX.index for 16KiB Page */ +- compare_shift = stlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; +- +- /* Search STLB */ +- for (i = 0; i < 8; ++i) { +- tlb = &env->tlb[i * 256 + stlb_idx]; +- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); +- if (tlb_e) { +- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); +- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); +- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); +- +- if ((tlb_g == 1 || tlb_asid == csr_asid) && +- (vpn == (tlb_vppn >> compare_shift))) { +- *index = i * 256 + stlb_idx; +- return true; +- } +- } +- } +- +- /* Search MTLB */ +- for (i = LOONGARCH_STLB; i < LOONGARCH_TLB_MAX; ++i) { +- tlb = &env->tlb[i]; +- tlb_e = FIELD_EX64(tlb->tlb_misc, TLB_MISC, E); +- if (tlb_e) { +- tlb_vppn = FIELD_EX64(tlb->tlb_misc, TLB_MISC, VPPN); +- tlb_ps = FIELD_EX64(tlb->tlb_misc, TLB_MISC, PS); +- tlb_asid = FIELD_EX64(tlb->tlb_misc, TLB_MISC, ASID); +- tlb_g = FIELD_EX64(tlb->tlb_entry0, TLBENTRY, G); +- compare_shift = tlb_ps + 1 - R_TLB_MISC_VPPN_SHIFT; +- vpn = (vaddr & TARGET_VIRT_MASK) >> (tlb_ps + 1); +- if ((tlb_g == 1 || tlb_asid == csr_asid) && +- (vpn == (tlb_vppn >> compare_shift))) { +- *index = i; +- return true; +- } +- } +- } +- return false; +-} +- +-static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- MMUAccessType access_type, int mmu_idx) +-{ +- int index, match; +- +- match = loongarch_tlb_search(env, address, &index); +- if (match) { +- return loongarch_map_tlb_entry(env, physical, prot, +- address, access_type, index, mmu_idx); +- } +- +- return TLBRET_NOMATCH; +-} +- +-static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, +- target_ulong dmw) +-{ +- if (is_la64(env)) { +- return va & TARGET_VIRT_MASK; +- } else { +- uint32_t pseg = FIELD_EX32(dmw, CSR_DMW_32, PSEG); +- return (va & MAKE_64BIT_MASK(0, R_CSR_DMW_32_VSEG_SHIFT)) | \ +- (pseg << R_CSR_DMW_32_VSEG_SHIFT); +- } +-} +- +-static int get_physical_address(CPULoongArchState *env, hwaddr *physical, +- int *prot, target_ulong address, +- MMUAccessType access_type, int mmu_idx) +-{ +- int user_mode = mmu_idx == MMU_IDX_USER; +- int kernel_mode = mmu_idx == MMU_IDX_KERNEL; +- uint32_t plv, base_c, base_v; +- int64_t addr_high; +- uint8_t da = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, DA); +- uint8_t pg = FIELD_EX64(env->CSR_CRMD, CSR_CRMD, PG); +- +- /* Check PG and DA */ +- if (da & !pg) { +- *physical = address & TARGET_PHYS_MASK; +- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; +- return TLBRET_MATCH; +- } +- +- plv = kernel_mode | (user_mode << R_CSR_DMW_PLV3_SHIFT); +- if (is_la64(env)) { +- base_v = address >> R_CSR_DMW_64_VSEG_SHIFT; +- } else { +- base_v = address >> R_CSR_DMW_32_VSEG_SHIFT; +- } +- /* Check direct map window */ +- for (int i = 0; i < 4; i++) { +- if (is_la64(env)) { +- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_64, VSEG); +- } else { +- base_c = FIELD_EX64(env->CSR_DMW[i], CSR_DMW_32, VSEG); +- } +- if ((plv & env->CSR_DMW[i]) && (base_c == base_v)) { +- *physical = dmw_va2pa(env, address, env->CSR_DMW[i]); +- *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; +- return TLBRET_MATCH; +- } +- } +- +- /* Check valid extension */ +- addr_high = sextract64(address, TARGET_VIRT_ADDR_SPACE_BITS, 16); +- if (!(addr_high == 0 || addr_high == -1)) { +- return TLBRET_BADADDR; +- } +- +- /* Mapped address */ +- return loongarch_map_address(env, physical, prot, address, +- access_type, mmu_idx); +-} +- +-hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +-{ +- LoongArchCPU *cpu = LOONGARCH_CPU(cs); +- CPULoongArchState *env = &cpu->env; +- hwaddr phys_addr; +- int prot; +- +- if (get_physical_address(env, &phys_addr, &prot, addr, MMU_DATA_LOAD, +- cpu_mmu_index(env, false)) != 0) { +- return -1; +- } +- return phys_addr; +-} +- + static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, + MMUAccessType access_type, int tlb_error) + { +-- +2.27.0 + diff --git a/target-loongarch-Fix-tlb-huge-page-loading-issue.patch b/target-loongarch-Fix-tlb-huge-page-loading-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..467e6d745444102b0eefc14e2d06c01bba6115b2 --- /dev/null +++ b/target-loongarch-Fix-tlb-huge-page-loading-issue.patch @@ -0,0 +1,208 @@ +From c5938b5f858ee8904893e08999df1af1ae13b063 Mon Sep 17 00:00:00 2001 +From: Xianglai Li +Date: Mon, 18 Mar 2024 15:03:32 +0800 +Subject: [PATCH] target/loongarch: Fix tlb huge page loading issue + +When we use qemu tcg simulation, the page size of bios is 4KB. +When using the level 2 super huge page (page size is 1G) to create the page table, +it is found that the content of the corresponding address space is abnormal, +resulting in the bios can not start the operating system and graphical interface normally. + +The lddir and ldpte instruction emulation has +a problem with the use of super huge page processing above level 2. +The page size is not correctly calculated, +resulting in the wrong page size of the table entry found by tlb. + +Signed-off-by: Xianglai Li +Reviewed-by: Richard Henderson +Signed-off-by: Song Gao +Message-Id: <20240318070332.1273939-1-lixianglai@loongson.cn> +--- + target/loongarch/cpu-csr.h | 3 + + target/loongarch/internals.h | 5 -- + target/loongarch/tcg/tlb_helper.c | 113 +++++++++++++++++++++--------- + 3 files changed, 82 insertions(+), 39 deletions(-) + +diff --git a/target/loongarch/cpu-csr.h b/target/loongarch/cpu-csr.h +index c59d7a9fcb..0834e91f30 100644 +--- a/target/loongarch/cpu-csr.h ++++ b/target/loongarch/cpu-csr.h +@@ -67,6 +67,9 @@ FIELD(TLBENTRY, D, 1, 1) + FIELD(TLBENTRY, PLV, 2, 2) + FIELD(TLBENTRY, MAT, 4, 2) + FIELD(TLBENTRY, G, 6, 1) ++FIELD(TLBENTRY, HUGE, 6, 1) ++FIELD(TLBENTRY, HGLOBAL, 12, 1) ++FIELD(TLBENTRY, LEVEL, 13, 2) + FIELD(TLBENTRY_32, PPN, 8, 24) + FIELD(TLBENTRY_64, PPN, 12, 36) + FIELD(TLBENTRY_64, NR, 61, 1) +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index a2fc54c8a7..944153b180 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -16,11 +16,6 @@ + #define TARGET_PHYS_MASK MAKE_64BIT_MASK(0, TARGET_PHYS_ADDR_SPACE_BITS) + #define TARGET_VIRT_MASK MAKE_64BIT_MASK(0, TARGET_VIRT_ADDR_SPACE_BITS) + +-/* Global bit used for lddir/ldpte */ +-#define LOONGARCH_PAGE_HUGE_SHIFT 6 +-/* Global bit for huge page */ +-#define LOONGARCH_HGLOBAL_SHIFT 12 +- + void loongarch_translate_init(void); + + void loongarch_cpu_dump_state(CPUState *cpu, FILE *f, int flags); +diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +index 804ab7a263..eedd1ac376 100644 +--- a/target/loongarch/tcg/tlb_helper.c ++++ b/target/loongarch/tcg/tlb_helper.c +@@ -17,6 +17,34 @@ + #include "exec/log.h" + #include "cpu-csr.h" + ++static void get_dir_base_width(CPULoongArchState *env, uint64_t *dir_base, ++ uint64_t *dir_width, target_ulong level) ++{ ++ switch (level) { ++ case 1: ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); ++ break; ++ case 2: ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); ++ break; ++ case 3: ++ *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); ++ break; ++ case 4: ++ *dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); ++ break; ++ default: ++ /* level may be zero for ldpte */ ++ *dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); ++ *dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); ++ break; ++ } ++} ++ + static void raise_mmu_exception(CPULoongArchState *env, target_ulong address, + MMUAccessType access_type, int tlb_error) + { +@@ -486,7 +514,25 @@ target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, + target_ulong badvaddr, index, phys, ret; + int shift; + uint64_t dir_base, dir_width; +- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; ++ ++ if (unlikely((level == 0) || (level > 4))) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Attepted LDDIR with level %"PRId64"\n", level); ++ return base; ++ } ++ ++ if (FIELD_EX64(base, TLBENTRY, HUGE)) { ++ if (unlikely(level == 4)) { ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "Attempted use of level 4 huge page\n"); ++ } ++ ++ if (FIELD_EX64(base, TLBENTRY, LEVEL)) { ++ return base; ++ } else { ++ return FIELD_DP64(base, TLBENTRY, LEVEL, level); ++ } ++ } + + badvaddr = env->CSR_TLBRBADV; + base = base & TARGET_PHYS_MASK; +@@ -495,30 +541,7 @@ target_ulong helper_lddir(CPULoongArchState *env, target_ulong base, + shift = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTEWIDTH); + shift = (shift + 1) * 3; + +- if (huge) { +- return base; +- } +- switch (level) { +- case 1: +- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR1_WIDTH); +- break; +- case 2: +- dir_base = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, DIR2_WIDTH); +- break; +- case 3: +- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR3_WIDTH); +- break; +- case 4: +- dir_base = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_BASE); +- dir_width = FIELD_EX64(env->CSR_PWCH, CSR_PWCH, DIR4_WIDTH); +- break; +- default: +- do_raise_exception(env, EXCCODE_INE, GETPC()); +- return 0; +- } ++ get_dir_base_width(env, &dir_base, &dir_width, level); + index = (badvaddr >> dir_base) & ((1 << dir_width) - 1); + phys = base | index << shift; + ret = ldq_phys(cs->as, phys) & TARGET_PHYS_MASK; +@@ -531,20 +554,42 @@ void helper_ldpte(CPULoongArchState *env, target_ulong base, target_ulong odd, + CPUState *cs = env_cpu(env); + target_ulong phys, tmp0, ptindex, ptoffset0, ptoffset1, ps, badv; + int shift; +- bool huge = (base >> LOONGARCH_PAGE_HUGE_SHIFT) & 0x1; + uint64_t ptbase = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTBASE); + uint64_t ptwidth = FIELD_EX64(env->CSR_PWCL, CSR_PWCL, PTWIDTH); ++ uint64_t dir_base, dir_width; + ++ /* ++ * The parameter "base" has only two types, ++ * one is the page table base address, ++ * whose bit 6 should be 0, ++ * and the other is the huge page entry, ++ * whose bit 6 should be 1. ++ */ + base = base & TARGET_PHYS_MASK; ++ if (FIELD_EX64(base, TLBENTRY, HUGE)) { ++ /* ++ * Gets the huge page level and Gets huge page size. ++ * Clears the huge page level information in the entry. ++ * Clears huge page bit. ++ * Move HGLOBAL bit to GLOBAL bit. ++ */ ++ get_dir_base_width(env, &dir_base, &dir_width, ++ FIELD_EX64(base, TLBENTRY, LEVEL)); ++ ++ base = FIELD_DP64(base, TLBENTRY, LEVEL, 0); ++ base = FIELD_DP64(base, TLBENTRY, HUGE, 0); ++ if (FIELD_EX64(base, TLBENTRY, HGLOBAL)) { ++ base = FIELD_DP64(base, TLBENTRY, HGLOBAL, 0); ++ base = FIELD_DP64(base, TLBENTRY, G, 1); ++ } + +- if (huge) { +- /* Huge Page. base is paddr */ +- tmp0 = base ^ (1 << LOONGARCH_PAGE_HUGE_SHIFT); +- /* Move Global bit */ +- tmp0 = ((tmp0 & (1 << LOONGARCH_HGLOBAL_SHIFT)) >> +- LOONGARCH_HGLOBAL_SHIFT) << R_TLBENTRY_G_SHIFT | +- (tmp0 & (~(1 << LOONGARCH_HGLOBAL_SHIFT))); +- ps = ptbase + ptwidth - 1; ++ ps = dir_base + dir_width - 1; ++ /* ++ * Huge pages are evenly split into parity pages ++ * when loaded into the tlb, ++ * so the tlb page size needs to be divided by 2. ++ */ ++ tmp0 = base; + if (odd) { + tmp0 += MAKE_64BIT_MASK(ps, 1); + } +-- +2.33.0 + diff --git a/target-loongarch-Implement-kvm-get-set-registers.patch b/target-loongarch-Implement-kvm-get-set-registers.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ccab152351b42a20afac08a00679adacc6b788f --- /dev/null +++ b/target-loongarch-Implement-kvm-get-set-registers.patch @@ -0,0 +1,724 @@ +From 0884653d8583aaaa5585caf38246518439bcfdfd Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:59 +0800 +Subject: [PATCH] target/loongarch: Implement kvm get/set registers + +Implement kvm_arch_get/set_registers interfaces, many regs +can be get/set in the function, such as core regs, csr regs, +fpu regs, mp state, etc. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Change-Id: Ia8fc48fe08b1768853f7729e77d37cdf270031e4 +Message-Id: <20240105075804.1228596-5-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + meson.build | 1 + + target/loongarch/cpu.c | 3 + + target/loongarch/cpu.h | 1 + + target/loongarch/internals.h | 5 +- + target/loongarch/kvm/kvm.c | 580 +++++++++++++++++++++++++++++++++- + target/loongarch/trace-events | 11 + + target/loongarch/trace.h | 1 + + 7 files changed, 599 insertions(+), 3 deletions(-) + create mode 100644 target/loongarch/trace-events + create mode 100644 target/loongarch/trace.h + +diff --git a/meson.build b/meson.build +index 6c77d9687d..445f2b7c2b 100644 +--- a/meson.build ++++ b/meson.build +@@ -3358,6 +3358,7 @@ if have_system or have_user + 'target/hppa', + 'target/i386', + 'target/i386/kvm', ++ 'target/loongarch', + 'target/mips/tcg', + 'target/nios2', + 'target/ppc', +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 021592798a..275833eec8 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -553,6 +553,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + #ifndef CONFIG_USER_ONLY + env->pc = 0x1c000000; + memset(env->tlb, 0, sizeof(env->tlb)); ++ if (kvm_enabled()) { ++ kvm_arch_reset_vcpu(env); ++ } + #endif + + restore_fp_status(env); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index f6d5ef0852..f4a89bd626 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -360,6 +360,7 @@ typedef struct CPUArchState { + MemoryRegion iocsr_mem; + bool load_elf; + uint64_t elf_address; ++ uint32_t mp_state; + /* Store ipistate to access from this struct */ + DeviceState *ipistate; + #endif +diff --git a/target/loongarch/internals.h b/target/loongarch/internals.h +index c492863cc5..0beb034748 100644 +--- a/target/loongarch/internals.h ++++ b/target/loongarch/internals.h +@@ -31,8 +31,10 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, + + const char *loongarch_exception_name(int32_t exception); + ++#ifdef CONFIG_TCG + int ieee_ex_to_loongarch(int xcpt); + void restore_fp_status(CPULoongArchState *env); ++#endif + + #ifndef CONFIG_USER_ONLY + extern const VMStateDescription vmstate_loongarch_cpu; +@@ -44,12 +46,13 @@ uint64_t cpu_loongarch_get_constant_timer_counter(LoongArchCPU *cpu); + uint64_t cpu_loongarch_get_constant_timer_ticks(LoongArchCPU *cpu); + void cpu_loongarch_store_constant_timer_config(LoongArchCPU *cpu, + uint64_t value); +- ++#ifdef CONFIG_TCG + bool loongarch_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); + + hwaddr loongarch_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); ++#endif + #endif /* !CONFIG_USER_ONLY */ + + uint64_t read_fcc(CPULoongArchState *env); +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 0d67322fd9..e7c9ef830c 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -26,19 +26,595 @@ + #include "sysemu/runstate.h" + #include "cpu-csr.h" + #include "kvm_loongarch.h" ++#include "trace.h" + + static bool cap_has_mp_state; + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO + }; + ++static int kvm_loongarch_get_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Get the current register set as KVM seems it */ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_get_regs_core(strerror(errno)); ++ return ret; ++ } ++ /* gpr[0] value is always 0 */ ++ env->gpr[0] = 0; ++ for (i = 1; i < 32; i++) { ++ env->gpr[i] = regs.gpr[i]; ++ } ++ ++ env->pc = regs.pc; ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_core(CPUState *cs) ++{ ++ int ret = 0; ++ int i; ++ struct kvm_regs regs; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ /* Set the registers based on QEMU's view of things */ ++ for (i = 0; i < 32; i++) { ++ regs.gpr[i] = env->gpr[i]; ++ } ++ ++ regs.pc = env->pc; ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); ++ if (ret < 0) { ++ trace_kvm_failed_put_regs_core(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_csr(CPUState *cs) ++{ ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CRMD), ++ &env->CSR_CRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRMD), ++ &env->CSR_PRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EUEN), ++ &env->CSR_EUEN); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_MISC), ++ &env->CSR_MISC); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ECFG), ++ &env->CSR_ECFG); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ESTAT), ++ &env->CSR_ESTAT); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ERA), ++ &env->CSR_ERA); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADV), ++ &env->CSR_BADV); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADI), ++ &env->CSR_BADI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EENTRY), ++ &env->CSR_EENTRY); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBIDX), ++ &env->CSR_TLBIDX); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBEHI), ++ &env->CSR_TLBEHI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO0), ++ &env->CSR_TLBELO0); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO1), ++ &env->CSR_TLBELO1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ASID), ++ &env->CSR_ASID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDL), ++ &env->CSR_PGDL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDH), ++ &env->CSR_PGDH); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGD), ++ &env->CSR_PGD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCL), ++ &env->CSR_PWCL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCH), ++ &env->CSR_PWCH); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_STLBPS), ++ &env->CSR_STLBPS); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), ++ &env->CSR_RVACFG); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ &env->CSR_CPUID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), ++ &env->CSR_PRCFG1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG2), ++ &env->CSR_PRCFG2); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG3), ++ &env->CSR_PRCFG3); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(0)), ++ &env->CSR_SAVE[0]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(1)), ++ &env->CSR_SAVE[1]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(2)), ++ &env->CSR_SAVE[2]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(3)), ++ &env->CSR_SAVE[3]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(4)), ++ &env->CSR_SAVE[4]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(5)), ++ &env->CSR_SAVE[5]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(6)), ++ &env->CSR_SAVE[6]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(7)), ++ &env->CSR_SAVE[7]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TID), ++ &env->CSR_TID); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CNTC), ++ &env->CSR_CNTC); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TICLR), ++ &env->CSR_TICLR); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_LLBCTL), ++ &env->CSR_LLBCTL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL1), ++ &env->CSR_IMPCTL1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL2), ++ &env->CSR_IMPCTL2); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRENTRY), ++ &env->CSR_TLBRENTRY); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRBADV), ++ &env->CSR_TLBRBADV); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRERA), ++ &env->CSR_TLBRERA); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRSAVE), ++ &env->CSR_TLBRSAVE); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO0), ++ &env->CSR_TLBRELO0); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO1), ++ &env->CSR_TLBRELO1); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBREHI), ++ &env->CSR_TLBREHI); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRPRMD), ++ &env->CSR_TLBRPRMD); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(0)), ++ &env->CSR_DMW[0]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(1)), ++ &env->CSR_DMW[1]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(2)), ++ &env->CSR_DMW[2]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(3)), ++ &env->CSR_DMW[3]); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TVAL), ++ &env->CSR_TVAL); ++ ++ ret |= kvm_get_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TCFG), ++ &env->CSR_TCFG); ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_csr(CPUState *cs) ++{ ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CRMD), ++ &env->CSR_CRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRMD), ++ &env->CSR_PRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EUEN), ++ &env->CSR_EUEN); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_MISC), ++ &env->CSR_MISC); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ECFG), ++ &env->CSR_ECFG); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ESTAT), ++ &env->CSR_ESTAT); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ERA), ++ &env->CSR_ERA); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADV), ++ &env->CSR_BADV); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_BADI), ++ &env->CSR_BADI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_EENTRY), ++ &env->CSR_EENTRY); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBIDX), ++ &env->CSR_TLBIDX); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBEHI), ++ &env->CSR_TLBEHI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO0), ++ &env->CSR_TLBELO0); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBELO1), ++ &env->CSR_TLBELO1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_ASID), ++ &env->CSR_ASID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDL), ++ &env->CSR_PGDL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGDH), ++ &env->CSR_PGDH); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PGD), ++ &env->CSR_PGD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCL), ++ &env->CSR_PWCL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PWCH), ++ &env->CSR_PWCH); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_STLBPS), ++ &env->CSR_STLBPS); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), ++ &env->CSR_RVACFG); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ &env->CSR_CPUID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), ++ &env->CSR_PRCFG1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG2), ++ &env->CSR_PRCFG2); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG3), ++ &env->CSR_PRCFG3); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(0)), ++ &env->CSR_SAVE[0]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(1)), ++ &env->CSR_SAVE[1]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(2)), ++ &env->CSR_SAVE[2]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(3)), ++ &env->CSR_SAVE[3]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(4)), ++ &env->CSR_SAVE[4]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(5)), ++ &env->CSR_SAVE[5]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(6)), ++ &env->CSR_SAVE[6]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_SAVE(7)), ++ &env->CSR_SAVE[7]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TID), ++ &env->CSR_TID); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CNTC), ++ &env->CSR_CNTC); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TICLR), ++ &env->CSR_TICLR); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_LLBCTL), ++ &env->CSR_LLBCTL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL1), ++ &env->CSR_IMPCTL1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_IMPCTL2), ++ &env->CSR_IMPCTL2); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRENTRY), ++ &env->CSR_TLBRENTRY); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRBADV), ++ &env->CSR_TLBRBADV); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRERA), ++ &env->CSR_TLBRERA); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRSAVE), ++ &env->CSR_TLBRSAVE); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO0), ++ &env->CSR_TLBRELO0); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRELO1), ++ &env->CSR_TLBRELO1); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBREHI), ++ &env->CSR_TLBREHI); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TLBRPRMD), ++ &env->CSR_TLBRPRMD); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(0)), ++ &env->CSR_DMW[0]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(1)), ++ &env->CSR_DMW[1]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(2)), ++ &env->CSR_DMW[2]); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_DMW(3)), ++ &env->CSR_DMW[3]); ++ /* ++ * timer cfg must be put at last since it is used to enable ++ * guest timer ++ */ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TVAL), ++ &env->CSR_TVAL); ++ ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_TCFG), ++ &env->CSR_TCFG); ++ return ret; ++} ++ ++static int kvm_loongarch_get_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_get_fpu(strerror(errno)); ++ return ret; ++ } ++ ++ env->fcsr0 = fpu.fcsr; ++ for (i = 0; i < 32; i++) { ++ env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; ++ } ++ for (i = 0; i < 8; i++) { ++ env->cf[i] = fpu.fcc & 0xFF; ++ fpu.fcc = fpu.fcc >> 8; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_regs_fp(CPUState *cs) ++{ ++ int ret, i; ++ struct kvm_fpu fpu; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ fpu.fcsr = env->fcsr0; ++ fpu.fcc = 0; ++ for (i = 0; i < 32; i++) { ++ fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; ++ } ++ ++ for (i = 0; i < 8; i++) { ++ fpu.fcc |= env->cf[i] << (8 * i); ++ } ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_FPU, &fpu); ++ if (ret < 0) { ++ trace_kvm_failed_put_fpu(strerror(errno)); ++ } ++ ++ return ret; ++} ++ ++void kvm_arch_reset_vcpu(CPULoongArchState *env) ++{ ++ env->mp_state = KVM_MP_STATE_RUNNABLE; ++} ++ ++static int kvm_loongarch_get_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ struct kvm_mp_state mp_state; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_get_mpstate(strerror(errno)); ++ return ret; ++ } ++ env->mp_state = mp_state.mp_state; ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_put_mpstate(CPUState *cs) ++{ ++ int ret = 0; ++ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ struct kvm_mp_state mp_state = { ++ .mp_state = env->mp_state ++ }; ++ ++ if (cap_has_mp_state) { ++ ret = kvm_vcpu_ioctl(cs, KVM_SET_MP_STATE, &mp_state); ++ if (ret) { ++ trace_kvm_failed_put_mpstate(strerror(errno)); ++ } ++ } ++ ++ return ret; ++} ++ ++static int kvm_loongarch_get_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ uint64_t val; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ for (i = 0; i < 21; i++) { ++ ret = kvm_get_one_reg(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_get_cpucfg(strerror(errno)); ++ } ++ env->cpucfg[i] = (uint32_t)val; ++ } ++ return ret; ++} ++ ++static int kvm_loongarch_put_cpucfg(CPUState *cs) ++{ ++ int i, ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ uint64_t val; ++ ++ for (i = 0; i < 21; i++) { ++ val = env->cpucfg[i]; ++ /* LSX and LASX and LBT are not supported in kvm now */ ++ if (i == 2) { ++ val &= ~(BIT(R_CPUCFG2_LSX_SHIFT) | BIT(R_CPUCFG2_LASX_SHIFT)); ++ val &= ~(BIT(R_CPUCFG2_LBT_X86_SHIFT) | ++ BIT(R_CPUCFG2_LBT_ARM_SHIFT) | ++ BIT(R_CPUCFG2_LBT_MIPS_SHIFT)); ++ } ++ ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); ++ if (ret < 0) { ++ trace_kvm_failed_put_cpucfg(strerror(errno)); ++ } ++ } ++ return ret; ++} ++ + int kvm_arch_get_registers(CPUState *cs) + { +- return 0; ++ int ret; ++ ++ ret = kvm_loongarch_get_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_get_cpucfg(cs); ++ return ret; + } ++ + int kvm_arch_put_registers(CPUState *cs, int level) + { +- return 0; ++ int ret; ++ ++ ret = kvm_loongarch_put_regs_core(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_csr(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_regs_fp(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_mpstate(cs); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = kvm_loongarch_put_cpucfg(cs); ++ return ret; + } + + int kvm_arch_init_vcpu(CPUState *cs) +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +new file mode 100644 +index 0000000000..6827ab566a +--- /dev/null ++++ b/target/loongarch/trace-events +@@ -0,0 +1,11 @@ ++# See docs/devel/tracing.rst for syntax documentation. ++ ++#kvm.c ++kvm_failed_get_regs_core(const char *msg) "Failed to get core regs from KVM: %s" ++kvm_failed_put_regs_core(const char *msg) "Failed to put core regs into KVM: %s" ++kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s" ++kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s" ++kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s" ++kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s" ++kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" ++kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" +diff --git a/target/loongarch/trace.h b/target/loongarch/trace.h +new file mode 100644 +index 0000000000..c2ecb78f08 +--- /dev/null ++++ b/target/loongarch/trace.h +@@ -0,0 +1 @@ ++#include "trace/trace-target_loongarch.h" +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_handle_exit.patch b/target-loongarch-Implement-kvm_arch_handle_exit.patch new file mode 100644 index 0000000000000000000000000000000000000000..235e2292258903ce03f0a1c97275f237f6a64285 --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_handle_exit.patch @@ -0,0 +1,68 @@ +From 3feeca228b010716aacdf7159df10ea63f7e34cd Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:02 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_handle_exit + +Implement kvm_arch_handle_exit for loongarch. In this +function, the KVM_EXIT_LOONGARCH_IOCSR is handled, +we read or write the iocsr address space by the addr, +length and is_write argument in kvm_run. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-8-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 24 +++++++++++++++++++++++- + target/loongarch/trace-events | 1 + + 2 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 85e7aeb083..d2dab3fef4 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -723,7 +723,29 @@ bool kvm_arch_cpu_check_are_resettable(void) + + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { +- return 0; ++ int ret = 0; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ MemTxAttrs attrs = {}; ++ ++ attrs.requester_id = env_cpu(env)->cpu_index; ++ ++ trace_kvm_arch_handle_exit(run->exit_reason); ++ switch (run->exit_reason) { ++ case KVM_EXIT_LOONGARCH_IOCSR: ++ address_space_rw(&env->address_space_iocsr, ++ run->iocsr_io.phys_addr, ++ attrs, ++ run->iocsr_io.data, ++ run->iocsr_io.len, ++ run->iocsr_io.is_write); ++ break; ++ default: ++ ret = -1; ++ warn_report("KVM: unknown exit reason %d", run->exit_reason); ++ break; ++ } ++ return ret; + } + + void kvm_arch_accel_class_init(ObjectClass *oc) +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 937c3c7c0c..021839880e 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -11,3 +11,4 @@ kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s" + kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" ++kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d" +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_init-function.patch b/target-loongarch-Implement-kvm_arch_init-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb972b1072a9e0d137795b3862ffad620c11cafd --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_init-function.patch @@ -0,0 +1,33 @@ +From 3a87dbd5e0343ee777bac0f18888a5a2d51254db Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:00 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_init function + +Implement the kvm_arch_init of loongarch, in the function, the +KVM_CAP_MP_STATE cap is checked by kvm ioctl. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Richard Henderson +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-6-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index e7c9ef830c..29944b9ef8 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -665,6 +665,7 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE); + return 0; + } + +-- +2.27.0 + diff --git a/target-loongarch-Implement-kvm_arch_init_vcpu.patch b/target-loongarch-Implement-kvm_arch_init_vcpu.patch new file mode 100644 index 0000000000000000000000000000000000000000..7c80b63a38b382412df85d4e70c557d6f26b8d72 --- /dev/null +++ b/target-loongarch-Implement-kvm_arch_init_vcpu.patch @@ -0,0 +1,87 @@ +From d7d47c044c9854675b75b91ade678d03316d9271 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:58:01 +0800 +Subject: [PATCH] target/loongarch: Implement kvm_arch_init_vcpu + +Implement kvm_arch_init_vcpu interface for loongarch, +in this function, we register VM change state handler. +And when VM state changes to running, the counter value +should be put into kvm to keep consistent with kvm, +and when state change to stop, counter value should be +refreshed from kvm. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-7-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.h | 2 ++ + target/loongarch/kvm/kvm.c | 23 +++++++++++++++++++++++ + target/loongarch/trace-events | 2 ++ + 3 files changed, 27 insertions(+) + +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index f4a89bd626..8ebd6fa1a7 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -381,6 +381,8 @@ struct ArchCPU { + + /* 'compatible' string for this CPU for Linux device trees */ + const char *dtb_compatible; ++ /* used by KVM_REG_LOONGARCH_COUNTER ioctl to access guest time counters */ ++ uint64_t kvm_state_counter; + }; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 29944b9ef8..85e7aeb083 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -617,8 +617,31 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + ++static void kvm_loongarch_vm_stage_change(void *opaque, bool running, ++ RunState state) ++{ ++ int ret; ++ CPUState *cs = opaque; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ ++ if (running) { ++ ret = kvm_set_one_reg(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_put_counter(strerror(errno)); ++ } ++ } else { ++ ret = kvm_get_one_reg(cs, KVM_REG_LOONGARCH_COUNTER, ++ &cpu->kvm_state_counter); ++ if (ret < 0) { ++ trace_kvm_failed_get_counter(strerror(errno)); ++ } ++ } ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { ++ qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); + return 0; + } + +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 6827ab566a..937c3c7c0c 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -7,5 +7,7 @@ kvm_failed_get_fpu(const char *msg) "Failed to get fpu from KVM: %s" + kvm_failed_put_fpu(const char *msg) "Failed to put fpu into KVM: %s" + kvm_failed_get_mpstate(const char *msg) "Failed to get mp_state from KVM: %s" + kvm_failed_put_mpstate(const char *msg) "Failed to put mp_state into KVM: %s" ++kvm_failed_get_counter(const char *msg) "Failed to get counter from KVM: %s" ++kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" +-- +2.27.0 + diff --git a/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch b/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch new file mode 100644 index 0000000000000000000000000000000000000000..0fbad2aab0c381829cd5589add8e73cdbb364387 --- /dev/null +++ b/target-loongarch-Implement-set-vcpu-intr-for-kvm.patch @@ -0,0 +1,122 @@ +From 5f4c8b31db442e6ac39fbfe4b29d5479ab3567aa Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Wed, 10 Jan 2024 10:41:52 +0100 +Subject: [PATCH] target/loongarch: Implement set vcpu intr for kvm +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement loongarch kvm set vcpu interrupt interface, +when a irq is set in vcpu, we use the KVM_INTERRUPT +ioctl to set intr into kvm. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-ID: <20240105075804.1228596-9-zhaotianrui@loongson.cn> +[PMD: Split from bigger patch, part 2] +Signed-off-by: Philippe Mathieu-DaudĂ© +Message-Id: <20240110094152.52138-2-philmd@linaro.org> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 9 ++++++++- + target/loongarch/kvm/kvm.c | 15 +++++++++++++++ + target/loongarch/kvm/kvm_loongarch.h | 16 ++++++++++++++++ + target/loongarch/trace-events | 1 + + 4 files changed, 40 insertions(+), 1 deletion(-) + create mode 100644 target/loongarch/kvm/kvm_loongarch.h + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 60f2636b43..413414392b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -12,6 +12,8 @@ + #include "qemu/module.h" + #include "sysemu/qtest.h" + #include "sysemu/tcg.h" ++#include "sysemu/kvm.h" ++#include "kvm/kvm_loongarch.h" + #include "exec/exec-all.h" + #include "cpu.h" + #include "internals.h" +@@ -19,6 +21,9 @@ + #include "cpu-csr.h" + #include "sysemu/reset.h" + #include "vec.h" ++#ifdef CONFIG_KVM ++#include ++#endif + #ifdef CONFIG_TCG + #include "exec/cpu_ldst.h" + #include "tcg/tcg.h" +@@ -111,7 +116,9 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) + return; + } + +- if (tcg_enabled()) { ++ if (kvm_enabled()) { ++ kvm_loongarch_set_interrupt(cpu, irq, level); ++ } else if (tcg_enabled()) { + env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); + if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { + cpu_interrupt(cs, CPU_INTERRUPT_HARD); +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index d2dab3fef4..bd33ec2114 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -748,6 +748,21 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + return ret; + } + ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level) ++{ ++ struct kvm_interrupt intr; ++ CPUState *cs = CPU(cpu); ++ ++ if (level) { ++ intr.irq = irq; ++ } else { ++ intr.irq = -irq; ++ } ++ ++ trace_kvm_set_intr(irq, level); ++ return kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &intr); ++} ++ + void kvm_arch_accel_class_init(ObjectClass *oc) + { + } +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +new file mode 100644 +index 0000000000..d945b6bb82 +--- /dev/null ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -0,0 +1,16 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later */ ++/* ++ * QEMU LoongArch kvm interface ++ * ++ * Copyright (c) 2023 Loongson Technology Corporation Limited ++ */ ++ ++#include "cpu.h" ++ ++#ifndef QEMU_KVM_LOONGARCH_H ++#define QEMU_KVM_LOONGARCH_H ++ ++int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); ++void kvm_arch_reset_vcpu(CPULoongArchState *env); ++ ++#endif +diff --git a/target/loongarch/trace-events b/target/loongarch/trace-events +index 021839880e..dea11edc0f 100644 +--- a/target/loongarch/trace-events ++++ b/target/loongarch/trace-events +@@ -12,3 +12,4 @@ kvm_failed_put_counter(const char *msg) "Failed to put counter into KVM: %s" + kvm_failed_get_cpucfg(const char *msg) "Failed to get cpucfg from KVM: %s" + kvm_failed_put_cpucfg(const char *msg) "Failed to put cpucfg into KVM: %s" + kvm_arch_handle_exit(int num) "kvm arch handle exit, the reason number: %d" ++kvm_set_intr(int irq, int level) "kvm set interrupt, irq num: %d, level: %d" +-- +2.27.0 + diff --git a/target-loongarch-Restrict-TCG-specific-code.patch b/target-loongarch-Restrict-TCG-specific-code.patch new file mode 100644 index 0000000000000000000000000000000000000000..a430cf8acfc20f73f5c2eabe0fb5e81f5fa7221f --- /dev/null +++ b/target-loongarch-Restrict-TCG-specific-code.patch @@ -0,0 +1,152 @@ +From 773ea71519da1413ca2e0e60857272164e156a47 Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Wed, 10 Jan 2024 10:41:51 +0100 +Subject: [PATCH] target/loongarch: Restrict TCG-specific code +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In preparation of supporting KVM in the next commit. + +Conflict: + +diff --cc target/loongarch/cpu.c +index 275833eec8,70dd4622aa..0000000000 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@@ -17,9 -17,14 +17,17 @@@ + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" + -#ifndef CONFIG_USER_ONLY + #include "sysemu/reset.h" +++<<<<<<< HEAD + +#include "tcg/tcg.h" +++======= ++ #endif +++>>>>>>> target/loongarch: Restrict TCG-specific code + #include "vec.h" ++ #ifdef CONFIG_TCG ++ #include "exec/cpu_ldst.h" ++ #include "tcg/tcg.h" ++ #endif + +Solve: + +drop: +++<<<<<<< HEAD + +#include "tcg/tcg.h" +++======= ++ #endif +++>>>>>>> target/loongarch: Restrict TCG-specific code + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-ID: <20240105075804.1228596-9-zhaotianrui@loongson.cn> +[PMD: Split from bigger patch, part 1] +Signed-off-by: Philippe Mathieu-DaudĂ© +Message-Id: <20240110094152.52138-1-philmd@linaro.org> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 30 +++++++++++++++++++++--------- + 1 file changed, 21 insertions(+), 9 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index 275833eec8..60f2636b43 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -11,15 +11,18 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "sysemu/qtest.h" +-#include "exec/cpu_ldst.h" ++#include "sysemu/tcg.h" + #include "exec/exec-all.h" + #include "cpu.h" + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" + #include "sysemu/reset.h" +-#include "tcg/tcg.h" + #include "vec.h" ++#ifdef CONFIG_TCG ++#include "exec/cpu_ldst.h" ++#include "tcg/tcg.h" ++#endif + + const char * const regnames[32] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", +@@ -108,12 +111,13 @@ void loongarch_cpu_set_irq(void *opaque, int irq, int level) + return; + } + +- env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); +- +- if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { +- cpu_interrupt(cs, CPU_INTERRUPT_HARD); +- } else { +- cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ if (tcg_enabled()) { ++ env->CSR_ESTAT = deposit64(env->CSR_ESTAT, irq, 1, level != 0); ++ if (FIELD_EX64(env->CSR_ESTAT, CSR_ESTAT, IS)) { ++ cpu_interrupt(cs, CPU_INTERRUPT_HARD); ++ } else { ++ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); ++ } + } + } + +@@ -138,7 +142,10 @@ static inline bool cpu_loongarch_hw_interrupts_pending(CPULoongArchState *env) + + return (pending & status) != 0; + } ++#endif + ++#ifdef CONFIG_TCG ++#ifndef CONFIG_USER_ONLY + static void loongarch_cpu_do_interrupt(CPUState *cs) + { + LoongArchCPU *cpu = LOONGARCH_CPU(cs); +@@ -320,7 +327,6 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) + } + #endif + +-#ifdef CONFIG_TCG + static void loongarch_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) + { +@@ -558,7 +564,9 @@ static void loongarch_cpu_reset_hold(Object *obj) + } + #endif + ++#ifdef CONFIG_TCG + restore_fp_status(env); ++#endif + cs->exception_index = -1; + } + +@@ -701,8 +709,10 @@ static void loongarch_cpu_init(Object *obj) + CPULoongArchState *env = &cpu->env; + + qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS); ++#ifdef CONFIG_TCG + timer_init_ns(&cpu->timer, QEMU_CLOCK_VIRTUAL, + &loongarch_constant_timer_cb, cpu); ++#endif + memory_region_init_io(&env->system_iocsr, OBJECT(cpu), NULL, + env, "iocsr", UINT64_MAX); + address_space_init(&env->address_space_iocsr, &env->system_iocsr, "IOCSR"); +@@ -802,7 +812,9 @@ static struct TCGCPUOps loongarch_tcg_ops = { + #include "hw/core/sysemu-cpu-ops.h" + + static const struct SysemuCPUOps loongarch_sysemu_ops = { ++#ifdef CONFIG_TCG + .get_phys_page_debug = loongarch_cpu_get_phys_page_debug, ++#endif + }; + + static int64_t loongarch_cpu_get_arch_id(CPUState *cs) +-- +2.27.0 + diff --git a/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch b/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch new file mode 100644 index 0000000000000000000000000000000000000000..9556d3117254e61fffbefcfdc494ae6298632b81 --- /dev/null +++ b/target-loongarch-Set-cpuid-CSR-register-only-once-wi.patch @@ -0,0 +1,57 @@ +From d271f623205c2984a30cfb12e160e219b2bbe974 Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Mon, 15 Jan 2024 16:51:21 +0800 +Subject: [PATCH] target/loongarch: Set cpuid CSR register only once with kvm + mode + +CSR cpuid register is used for routing irq to different vcpus, its +value is kept unchanged since poweron. So it is not necessary to +set CSR cpuid register after system resets, and it is only set at +vm creation stage. + +Signed-off-by: Bibo Mao +Reviewed-by: Song Gao +Message-Id: <20240115085121.180524-1-maobibo@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/kvm/kvm.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 84bcdf5f86..2230f029d0 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -250,7 +250,7 @@ static int kvm_loongarch_get_csr(CPUState *cs) + return ret; + } + +-static int kvm_loongarch_put_csr(CPUState *cs) ++static int kvm_loongarch_put_csr(CPUState *cs, int level) + { + int ret = 0; + LoongArchCPU *cpu = LOONGARCH_CPU(cs); +@@ -322,8 +322,11 @@ static int kvm_loongarch_put_csr(CPUState *cs) + ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_RVACFG), + &env->CSR_RVACFG); + +- ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), ++ /* CPUID is constant after poweron, it should be set only once */ ++ if (level >= KVM_PUT_FULL_STATE) { ++ ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_CPUID), + &env->CSR_CPUID); ++ } + + ret |= kvm_set_one_reg(cs, KVM_IOC_CSRID(LOONGARCH_CSR_PRCFG1), + &env->CSR_PRCFG1); +@@ -598,7 +601,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) + return ret; + } + +- ret = kvm_loongarch_put_csr(cs); ++ ret = kvm_loongarch_put_csr(cs, level); + if (ret) { + return ret; + } +-- +2.27.0 + diff --git a/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch b/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch new file mode 100644 index 0000000000000000000000000000000000000000..e066713c788e3f93715b341486904e7db372f6ca --- /dev/null +++ b/target-loongarch-Supplement-vcpu-env-initial-when-vc.patch @@ -0,0 +1,59 @@ +From 48dae5f461bf2cde206e879d52df6cf1bad3ac6e Mon Sep 17 00:00:00 2001 +From: Tianrui Zhao +Date: Fri, 5 Jan 2024 15:57:58 +0800 +Subject: [PATCH] target/loongarch: Supplement vcpu env initial when vcpu reset + +Supplement vcpu env initial when vcpu reset, including +init vcpu CSR_CPUID,CSR_TID to cpu->cpu_index. The two +regs will be used in kvm_get/set_csr_ioctl. + +Signed-off-by: Tianrui Zhao +Signed-off-by: xianglai li +Reviewed-by: Song Gao +Message-Id: <20240105075804.1228596-4-zhaotianrui@loongson.cn> +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 2 ++ + target/loongarch/cpu.h | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index db9a421cc4..021592798a 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -531,10 +531,12 @@ static void loongarch_cpu_reset_hold(Object *obj) + + env->CSR_ESTAT = env->CSR_ESTAT & (~MAKE_64BIT_MASK(0, 2)); + env->CSR_RVACFG = FIELD_DP64(env->CSR_RVACFG, CSR_RVACFG, RBITS, 0); ++ env->CSR_CPUID = cs->cpu_index; + env->CSR_TCFG = FIELD_DP64(env->CSR_TCFG, CSR_TCFG, EN, 0); + env->CSR_LLBCTL = FIELD_DP64(env->CSR_LLBCTL, CSR_LLBCTL, KLO, 0); + env->CSR_TLBRERA = FIELD_DP64(env->CSR_TLBRERA, CSR_TLBRERA, ISTLBR, 0); + env->CSR_MERRCTL = FIELD_DP64(env->CSR_MERRCTL, CSR_MERRCTL, ISMERR, 0); ++ env->CSR_TID = cs->cpu_index; + + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, TLB_TYPE, 2); + env->CSR_PRCFG3 = FIELD_DP64(env->CSR_PRCFG3, CSR_PRCFG3, MTLB_ENTRY, 63); +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 00d1fba597..f6d5ef0852 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -319,6 +319,7 @@ typedef struct CPUArchState { + uint64_t CSR_PWCH; + uint64_t CSR_STLBPS; + uint64_t CSR_RVACFG; ++ uint64_t CSR_CPUID; + uint64_t CSR_PRCFG1; + uint64_t CSR_PRCFG2; + uint64_t CSR_PRCFG3; +@@ -350,7 +351,6 @@ typedef struct CPUArchState { + uint64_t CSR_DBG; + uint64_t CSR_DERA; + uint64_t CSR_DSAVE; +- uint64_t CSR_CPUID; + + #ifndef CONFIG_USER_ONLY + LoongArchTLB tlb[LOONGARCH_TLB_MAX]; +-- +2.27.0 + diff --git a/target-loongarch-kvm-Add-pmu-support.patch b/target-loongarch-kvm-Add-pmu-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..5a5650a903c670138079db6e2d1bb6902e81a4ff --- /dev/null +++ b/target-loongarch-kvm-Add-pmu-support.patch @@ -0,0 +1,224 @@ +From 57db061a63243c64c07624740fc039ddcc4777a2 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Fri, 22 Mar 2024 19:26:35 +0800 +Subject: [PATCH] target/loongarch/kvm: Add pmu support + +This patch adds PMU support + e.g + '... -cpu max,pmu=on,pmnum=[1-16]'; + '... -cpu max,pmu=on' (default pmnum = 4); + '... -cpu max,pmu=off' (disable PMU) + +Signed-off-by: Song Gao +--- + target/loongarch/cpu.c | 64 +++++++++++++++++++++++++++ + target/loongarch/cpu.h | 2 + + target/loongarch/kvm/kvm.c | 55 ++++++++++++++++++++++- + target/loongarch/loongarch-qmp-cmds.c | 2 +- + 4 files changed, 121 insertions(+), 2 deletions(-) + +diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c +index fdb819c2cf..bc557f207b 100644 +--- a/target/loongarch/cpu.c ++++ b/target/loongarch/cpu.c +@@ -8,6 +8,7 @@ + #include "qemu/osdep.h" + #include "qemu/log.h" + #include "qemu/qemu-print.h" ++#include "qemu/error-report.h" + #include "qapi/error.h" + #include "qemu/module.h" + #include "sysemu/qtest.h" +@@ -19,6 +20,7 @@ + #include "internals.h" + #include "fpu/softfloat-helpers.h" + #include "cpu-csr.h" ++#include "qapi/visitor.h" + #include "sysemu/reset.h" + #include "vec.h" + #ifdef CONFIG_KVM +@@ -426,6 +428,14 @@ static void loongarch_la464_initfn(Object *obj) + data = FIELD_DP32(data, CPUCFG5, CC_DIV, 1); + env->cpucfg[5] = data; + ++ if (kvm_enabled()) { ++ data = 0; ++ data = FIELD_DP32(data, CPUCFG6, PMP, 1); ++ data = FIELD_DP32(data, CPUCFG6, PMNUM, 3); ++ data = FIELD_DP32(data, CPUCFG6, PMBITS, 63); ++ env->cpucfg[6] = data; ++ } ++ + data = 0; + data = FIELD_DP32(data, CPUCFG16, L1_IUPRE, 1); + data = FIELD_DP32(data, CPUCFG16, L1_DPRE, 1); +@@ -660,6 +670,48 @@ static void loongarch_set_lasx(Object *obj, bool value, Error **errp) + } + } + ++static bool loongarch_get_pmu(Object *obj, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ ++ return !!(FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)); ++} ++ ++static void loongarch_set_pmu(Object *obj, bool value, Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ ++ cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMP, value); ++} ++ ++static void loongarch_get_pmnum(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ uint32_t value = FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMNUM); ++ ++ visit_type_uint32(v, name, &value, errp); ++} ++ ++static void loongarch_set_pmnum(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(obj); ++ uint32_t *value= opaque; ++ ++ if (!visit_type_uint32(v, name, value, errp)) { ++ return; ++ } ++ if ((*value <= PMNUM_MAX) && (*value > 0)) { ++ cpu->env.cpucfg[6] = FIELD_DP32(cpu->env.cpucfg[6], CPUCFG6, PMNUM, *value -1); ++ } else { ++ error_report("Performance counter number need be in [1- %d]\n", PMNUM_MAX); ++ exit(EXIT_FAILURE); ++ } ++} ++ + void loongarch_cpu_post_init(Object *obj) + { + LoongArchCPU *cpu = LOONGARCH_CPU(obj); +@@ -672,6 +724,18 @@ void loongarch_cpu_post_init(Object *obj) + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); + } ++ ++ if (kvm_enabled()) { ++ object_property_add_bool(obj, "pmu", loongarch_get_pmu, ++ loongarch_set_pmu); ++ if (FIELD_EX32(cpu->env.cpucfg[6], CPUCFG6, PMP)) { ++ uint32_t value = 4; ++ object_property_add(obj, "pmnum", "uint32", ++ loongarch_get_pmnum, ++ loongarch_set_pmnum, NULL, ++ (void *)&value); ++ } ++ } + } + + static void loongarch_cpu_init(Object *obj) +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 4749d41c8c..80cad24fa1 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -186,6 +186,8 @@ FIELD(CPUCFG6, PMNUM, 4, 4) + FIELD(CPUCFG6, PMBITS, 8, 6) + FIELD(CPUCFG6, UPM, 14, 1) + ++#define PMNUM_MAX 16 ++ + /* cpucfg[16] bits */ + FIELD(CPUCFG16, L1_IUPRE, 0, 1) + FIELD(CPUCFG16, L1_IUUNIFY, 1, 1) +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 49d02076ad..5dda631b2b 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -573,6 +573,53 @@ static int kvm_check_cpucfg2(CPUState *cs) + return ret; + } + ++static int kvm_check_cpucfg6(CPUState *cs) ++{ ++ int ret; ++ uint64_t val; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_CPUCFG, ++ .attr = 6, ++ .addr = (uint64_t)&val, ++ }; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, &attr); ++ if (!ret) { ++ kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, &attr); ++ ++ if (FIELD_EX32(env->cpucfg[6], CPUCFG6, PMP)) { ++ /* Check PMP */ ++ if (!FIELD_EX32(val, CPUCFG6, PMP)) { ++ error_report("'pmu' feature not supported by KVM on this host" ++ " Please disable 'pmu' with " ++ "'... -cpu XXX,pmu=off ...'\n"); ++ exit(EXIT_FAILURE); ++ } ++ /* Check PMNUM */ ++ int guest_pmnum = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMNUM); ++ int host_pmnum = FIELD_EX32(val, CPUCFG6, PMNUM); ++ if (guest_pmnum > host_pmnum){ ++ warn_report("The guest pmnum %d larger than KVM support %d\n", ++ guest_pmnum, host_pmnum); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, ++ PMNUM, host_pmnum); ++ } ++ /* Check PMBITS */ ++ int guest_pmbits = FIELD_EX32(env->cpucfg[6], CPUCFG6, PMBITS); ++ int host_pmbits = FIELD_EX32(val, CPUCFG6, PMBITS); ++ if (guest_pmbits != host_pmbits) { ++ warn_report("The host not support PMBITS %d\n", guest_pmbits); ++ env->cpucfg[6] = FIELD_DP32(env->cpucfg[6], CPUCFG6, ++ PMBITS, host_pmbits); ++ } ++ } ++ } ++ ++ return ret; ++} ++ + static int kvm_loongarch_put_cpucfg(CPUState *cs) + { + int i, ret = 0; +@@ -586,7 +633,13 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + if (ret) { + return ret; + } +- } ++ } ++ if (i == 6) { ++ ret = kvm_check_cpucfg6(cs); ++ if (ret) { ++ return ret; ++ } ++ } + val = env->cpucfg[i]; + ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); + if (ret < 0) { +diff --git a/target/loongarch/loongarch-qmp-cmds.c b/target/loongarch/loongarch-qmp-cmds.c +index 645672ff59..2612f43de9 100644 +--- a/target/loongarch/loongarch-qmp-cmds.c ++++ b/target/loongarch/loongarch-qmp-cmds.c +@@ -42,7 +42,7 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + } + + static const char *cpu_model_advertised_features[] = { +- "lsx", "lasx", NULL ++ "lsx", "lasx", "pmu", "pmnum", NULL + }; + + CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, +-- +2.33.0 + diff --git a/target-loongarch-kvm-Add-pv-steal-time-support.patch b/target-loongarch-kvm-Add-pv-steal-time-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..19fbcad65ec05a5ea70e7df913889faccff344fa --- /dev/null +++ b/target-loongarch-kvm-Add-pv-steal-time-support.patch @@ -0,0 +1,169 @@ +From 8b69a1b340da95cacdff252927ca8aef9d43c33a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 24 Apr 2024 16:06:33 +0800 +Subject: [PATCH] target/loongarch/kvm: Add pv steal time support + +Signed-off-by: Song Gao +--- + linux-headers/asm-loongarch/kvm.h | 2 ++ + target/loongarch/cpu.h | 3 ++ + target/loongarch/kvm/kvm.c | 50 ++++++++++++++++++++++++++++ + target/loongarch/kvm/kvm_loongarch.h | 2 ++ + target/loongarch/machine.c | 25 ++++++++++++++ + 5 files changed, 82 insertions(+) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 4cec8c1601..81fec85f0a 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -84,6 +84,8 @@ struct kvm_fpu { + #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) + #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) + #define KVM_LOONGARCH_VCPU_CPUCFG 0 ++#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1 ++#define KVM_LOONGARCH_VCPU_PVTIME_GPA 0 + + struct kvm_debug_exit_arch { + }; +diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h +index 80cad24fa1..0ed24051af 100644 +--- a/target/loongarch/cpu.h ++++ b/target/loongarch/cpu.h +@@ -365,6 +365,9 @@ typedef struct CPUArchState { + /* Store ipistate to access from this struct */ + DeviceState *ipistate; + #endif ++ struct { ++ uint64_t guest_addr; ++ } st; + } CPULoongArchState; + + /** +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 5dda631b2b..e1d521a1de 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -649,6 +649,56 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + return ret; + } + ++int kvm_loongarch_put_pvtime(LoongArchCPU *cpu) ++{ ++ CPULoongArchState *env = &cpu->env; ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->st.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ /* It's ok even though kvm has not such attr */ ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("PVTIME IPA: KVM_SET_DEVICE_ATTR: %s", strerror(-err)); ++ return err; ++ } ++ ++ return 0; ++} ++ ++int kvm_loongarch_get_pvtime(LoongArchCPU *cpu) ++{ ++ CPULoongArchState *env = &cpu->env; ++ int err; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_PVTIME_CTRL, ++ .attr = KVM_LOONGARCH_VCPU_PVTIME_GPA, ++ .addr = (uint64_t)&env->st.guest_addr, ++ }; ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ /* It's ok even though kvm has not such attr */ ++ return 0; ++ } ++ ++ err = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("PVTIME IPA: KVM_GET_DEVICE_ATTR: %s", strerror(-err)); ++ return err; ++ } ++ ++ return 0; ++} ++ + int kvm_arch_get_registers(CPUState *cs) + { + int ret; +diff --git a/target/loongarch/kvm/kvm_loongarch.h b/target/loongarch/kvm/kvm_loongarch.h +index d945b6bb82..551878a725 100644 +--- a/target/loongarch/kvm/kvm_loongarch.h ++++ b/target/loongarch/kvm/kvm_loongarch.h +@@ -12,5 +12,7 @@ + + int kvm_loongarch_set_interrupt(LoongArchCPU *cpu, int irq, int level); + void kvm_arch_reset_vcpu(CPULoongArchState *env); ++int kvm_loongarch_put_pvtime(LoongArchCPU *cpu); ++int kvm_loongarch_get_pvtime(LoongArchCPU *cpu); + + #endif +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 4443caed2d..ec5abe56db 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -9,6 +9,8 @@ + #include "cpu.h" + #include "migration/cpu.h" + #include "vec.h" ++#include "kvm/kvm_loongarch.h" ++#include "sysemu/kvm.h" + + static const VMStateDescription vmstate_fpu_reg = { + .name = "fpu_reg", +@@ -122,15 +124,38 @@ const VMStateDescription vmstate_tlb = { + } + }; + ++static int cpu_post_load(void *opaque, int version_id) ++{ ++#ifdef CONFIG_KVM ++ LoongArchCPU *cpu = opaque; ++ kvm_loongarch_put_pvtime(cpu); ++#endif ++ return 0; ++} ++ ++static int cpu_pre_save(void *opaque) ++{ ++#ifdef CONFIG_KVM ++ LoongArchCPU *cpu = opaque; ++ kvm_loongarch_get_pvtime(cpu); ++#endif ++ return 0; ++} ++ + /* LoongArch CPU state */ + const VMStateDescription vmstate_loongarch_cpu = { + .name = "cpu", + .version_id = 1, + .minimum_version_id = 1, ++ .post_load = cpu_post_load, ++ .pre_save = cpu_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), + VMSTATE_UINTTL(env.pc, LoongArchCPU), + ++ /* PV time */ ++ VMSTATE_UINT64(env.st.guest_addr, LoongArchCPU), ++ + /* Remaining CSRs */ + VMSTATE_UINT64(env.CSR_CRMD, LoongArchCPU), + VMSTATE_UINT64(env.CSR_PRMD, LoongArchCPU), +-- +2.33.0 + diff --git a/target-loongarch-kvm-Add-software-breakpoint-support.patch b/target-loongarch-kvm-Add-software-breakpoint-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..d3981d62e76171605008311379cfa6929f1b221e --- /dev/null +++ b/target-loongarch-kvm-Add-software-breakpoint-support.patch @@ -0,0 +1,132 @@ +From 6a301af275fd684c197cf7a2e73fc265993478da Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Sun, 18 Feb 2024 15:00:25 +0800 +Subject: [PATCH] target/loongarch/kvm: Add software breakpoint support + +With KVM virtualization, debug exception is passthrough to +to guest kernel rather than host mode. Here hypercall +instruction with special hypercall code is used for sw +breakpoint usage. + +Now only software breakpoint is supported, and itt is allowed +to insert/remove software breakpoint. Later hardware breakpoint +will be added. + +Signed-off-by: Bibo Mao +--- + target/loongarch/kvm/kvm.c | 77 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 77 insertions(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index c19978a970..49d02076ad 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -29,6 +29,7 @@ + #include "trace.h" + + static bool cap_has_mp_state; ++static unsigned int brk_insn; + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_LAST_INFO + }; +@@ -675,7 +676,14 @@ static void kvm_loongarch_vm_stage_change(void *opaque, bool running, + + int kvm_arch_init_vcpu(CPUState *cs) + { ++ uint64_t val; ++ + qemu_add_vm_change_state_handler(kvm_loongarch_vm_stage_change, cs); ++ ++ if (!kvm_get_one_reg(cs, KVM_REG_LOONGARCH_DEBUG_INST, &val)) { ++ brk_insn = val; ++ } ++ + return 0; + } + +@@ -755,6 +763,68 @@ bool kvm_arch_cpu_check_are_resettable(void) + return true; + } + ++ ++void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) ++{ ++ if (kvm_sw_breakpoints_active(cpu)) { ++ dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; ++ } ++} ++ ++int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) ++{ ++ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || ++ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { ++ error_report("%s failed", __func__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) ++{ ++ static uint32_t brk; ++ ++ if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || ++ brk != brk_insn || ++ cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { ++ error_report("%s failed", __func__); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) ++{ ++ return -ENOSYS; ++} ++ ++int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) ++{ ++ return -ENOSYS; ++} ++ ++void kvm_arch_remove_all_hw_breakpoints(void) ++{ ++} ++ ++static bool kvm_loongarch_handle_debug(CPUState *cs, struct kvm_run *run) ++{ ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ if (cs->singlestep_enabled) { ++ return true; ++ } ++ ++ if (kvm_find_sw_breakpoint(cs, env->pc)) { ++ return true; ++ } ++ ++ return false; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -774,6 +844,13 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + run->iocsr_io.len, + run->iocsr_io.is_write); + break; ++ ++ case KVM_EXIT_DEBUG: ++ if (kvm_loongarch_handle_debug(cs, run)) { ++ ret = EXCP_DEBUG; ++ } ++ break; ++ + default: + ret = -1; + warn_report("KVM: unknown exit reason %d", run->exit_reason); +-- +2.33.0 + diff --git a/target-loongarch-kvm-Enable-LSX-LASX-extension.patch b/target-loongarch-kvm-Enable-LSX-LASX-extension.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc5655fd1259e3f0405bd996299ec25373f9d6f9 --- /dev/null +++ b/target-loongarch-kvm-Enable-LSX-LASX-extension.patch @@ -0,0 +1,98 @@ +From 6e503b590e42ad7c522cf937b83e1f8f715dbd1a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Mon, 22 Jan 2024 17:02:06 +0800 +Subject: [PATCH] target/loongarch/kvm: Enable LSX/LASX extension + +The kernel had already support LSX and LASX [1], +but QEMU is disable LSX/LASX for kvm. This patch adds +kvm_check_cpucfg2() to check CPUCFG2. + +[1]: https://lore.kernel.org/all/CABgObfZHRf7E_7Jk4uPRmSyxTy3EiuuYwHC35jQncNL9s-zTDA@mail.gmail.com/ + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240122090206.1083584-1-gaosong@loongson.cn> +--- + linux-headers/asm-loongarch/kvm.h | 1 + + target/loongarch/kvm/kvm.c | 45 ++++++++++++++++++++++++++----- + 2 files changed, 39 insertions(+), 7 deletions(-) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index c6ad2ee610..923d0bd382 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -79,6 +79,7 @@ struct kvm_fpu { + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) + #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) + #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) ++#define KVM_LOONGARCH_VCPU_CPUCFG 0 + + struct kvm_debug_exit_arch { + }; +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index 2230f029d0..c19978a970 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -540,6 +540,38 @@ static int kvm_loongarch_get_cpucfg(CPUState *cs) + return ret; + } + ++static int kvm_check_cpucfg2(CPUState *cs) ++{ ++ int ret; ++ uint64_t val; ++ struct kvm_device_attr attr = { ++ .group = KVM_LOONGARCH_VCPU_CPUCFG, ++ .attr = 2, ++ .addr = (uint64_t)&val, ++ }; ++ LoongArchCPU *cpu = LOONGARCH_CPU(cs); ++ CPULoongArchState *env = &cpu->env; ++ ++ ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, &attr); ++ ++ if (!ret) { ++ kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, &attr); ++ env->cpucfg[2] &= val; ++ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, FP)) { ++ /* The FP minimal version is 1. */ ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, FP_VER, 1); ++ } ++ ++ if (FIELD_EX32(env->cpucfg[2], CPUCFG2, LLFTP)) { ++ /* The LLFTP minimal version is 1. */ ++ env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, LLFTP_VER, 1); ++ } ++ } ++ ++ return ret; ++} ++ + static int kvm_loongarch_put_cpucfg(CPUState *cs) + { + int i, ret = 0; +@@ -548,14 +580,13 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs) + uint64_t val; + + for (i = 0; i < 21; i++) { ++ if (i == 2) { ++ ret = kvm_check_cpucfg2(cs); ++ if (ret) { ++ return ret; ++ } ++ } + val = env->cpucfg[i]; +- /* LSX and LASX and LBT are not supported in kvm now */ +- if (i == 2) { +- val &= ~(BIT(R_CPUCFG2_LSX_SHIFT) | BIT(R_CPUCFG2_LASX_SHIFT)); +- val &= ~(BIT(R_CPUCFG2_LBT_X86_SHIFT) | +- BIT(R_CPUCFG2_LBT_ARM_SHIFT) | +- BIT(R_CPUCFG2_LBT_MIPS_SHIFT)); +- } + ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), &val); + if (ret < 0) { + trace_kvm_failed_put_cpucfg(strerror(errno)); +-- +2.27.0 + diff --git a/target-loongarch-kvm-Fix-vm-restore-failed.patch b/target-loongarch-kvm-Fix-vm-restore-failed.patch new file mode 100644 index 0000000000000000000000000000000000000000..36cd4cb417fdbc3a62e618574e79800ba5fda743 --- /dev/null +++ b/target-loongarch-kvm-Fix-vm-restore-failed.patch @@ -0,0 +1,28 @@ +From 7cf9ed3844ed3340165121e5fd7dcb959ee80d15 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Wed, 24 Apr 2024 14:18:46 +0800 +Subject: [PATCH] target/loongarch/kvm: Fix vm restore failed + +The vmstate_loongarch_cpu need kvm_state_counter. + +Signed-off-by: Song Gao +--- + target/loongarch/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c +index 1c4e01d076..4443caed2d 100644 +--- a/target/loongarch/machine.c ++++ b/target/loongarch/machine.c +@@ -191,6 +191,8 @@ const VMStateDescription vmstate_loongarch_cpu = { + VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, + 0, vmstate_tlb, LoongArchTLB), + ++ VMSTATE_UINT64(kvm_state_counter, LoongArchCPU), ++ + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { +-- +2.33.0 + diff --git a/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch b/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch new file mode 100644 index 0000000000000000000000000000000000000000..cdd2e4957e54ddfa954cae7818eae2e85f11a7e7 --- /dev/null +++ b/target-loongarch-kvm-fpu-save-the-vreg-registers-hig.patch @@ -0,0 +1,43 @@ +From 7a3573ce009afa271168829da86e2c70c63fa58a Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 14 May 2024 19:07:52 +0800 +Subject: [PATCH] target/loongarch/kvm: fpu save the vreg registers high + 192bit + +On kvm side, get_fpu/set_fpu save the vreg registers high 192bits, +but QEMU missing. + +Signed-off-by: Song Gao +Reviewed-by: Bibo Mao +Message-Id: <20240514110752.989572-1-gaosong@loongson.cn> +--- + target/loongarch/kvm/kvm.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c +index e1d521a1de..5c88270132 100644 +--- a/target/loongarch/kvm/kvm.c ++++ b/target/loongarch/kvm/kvm.c +@@ -444,6 +444,9 @@ static int kvm_loongarch_get_regs_fp(CPUState *cs) + env->fcsr0 = fpu.fcsr; + for (i = 0; i < 32; i++) { + env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; ++ env->fpr[i].vreg.UD[1] = fpu.fpr[i].val64[1]; ++ env->fpr[i].vreg.UD[2] = fpu.fpr[i].val64[2]; ++ env->fpr[i].vreg.UD[3] = fpu.fpr[i].val64[3]; + } + for (i = 0; i < 8; i++) { + env->cf[i] = fpu.fcc & 0xFF; +@@ -465,6 +468,9 @@ static int kvm_loongarch_put_regs_fp(CPUState *cs) + fpu.fcc = 0; + for (i = 0; i < 32; i++) { + fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; ++ fpu.fpr[i].val64[1] = env->fpr[i].vreg.UD[1]; ++ fpu.fpr[i].val64[2] = env->fpr[i].vreg.UD[2]; ++ fpu.fpr[i].val64[3] = env->fpr[i].vreg.UD[3]; + } + + for (i = 0; i < 8; i++) { +-- +2.33.0 + diff --git a/target-loongarch-kvm-sync-kernel-header-files.patch b/target-loongarch-kvm-sync-kernel-header-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..4d621d7f30f2a0d6296992b697b390f8b67fb82c --- /dev/null +++ b/target-loongarch-kvm-sync-kernel-header-files.patch @@ -0,0 +1,41 @@ +From b7e49ac3b4e7dbfc9ba4645a85962294883c251a Mon Sep 17 00:00:00 2001 +From: Bibo Mao +Date: Wed, 13 Mar 2024 10:04:33 +0800 +Subject: [PATCH] target/loongarch/kvm: sync kernel header files + +sync kernel header files. + +Signed-off-by: Bibo Mao +--- + linux-headers/asm-loongarch/kvm.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h +index 923d0bd382..4cec8c1601 100644 +--- a/linux-headers/asm-loongarch/kvm.h ++++ b/linux-headers/asm-loongarch/kvm.h +@@ -15,10 +15,12 @@ + */ + + #define __KVM_HAVE_READONLY_MEM ++#define __KVM_HAVE_GUEST_DEBUG + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_DIRTY_LOG_PAGE_OFFSET 64 + ++#define KVM_GUESTDBG_USE_SW_BP 0x00010000 + /* + * for KVM_GET_REGS and KVM_SET_REGS + */ +@@ -74,6 +76,8 @@ struct kvm_fpu { + + #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) + #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) ++/* Debugging: Special instruction for software breakpoint */ ++#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) + + #define LOONGARCH_REG_SHIFT 3 + #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) +-- +2.33.0 + diff --git a/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch b/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch new file mode 100644 index 0000000000000000000000000000000000000000..7a41c08ace63d59551990616806a9c784d4384c5 --- /dev/null +++ b/target-loongarch-meson-move-gdbstub.c-to-loongarch.s.patch @@ -0,0 +1,41 @@ +From ae65e1281aa67713bde6bce323a3a8d06f27c636 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Jan 2024 10:01:59 +0800 +Subject: [PATCH] target/loongarch/meson: move gdbstub.c to loongarch.ss +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +gdbstub.c is not specific to TCG and can be used by +other accelerators, such as KVM accelerator + +Reviewed-by: Philippe Mathieu-DaudĂ© +Signed-off-by: Song Gao +Message-Id: <20240102020200.3462097-1-gaosong@loongson.cn> +--- + target/loongarch/meson.build | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index 18e8191e2b..b3a0fb12fb 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -3,6 +3,7 @@ gen = decodetree.process('insns.decode') + loongarch_ss = ss.source_set() + loongarch_ss.add(files( + 'cpu.c', ++ 'gdbstub.c', + )) + loongarch_tcg_ss = ss.source_set() + loongarch_tcg_ss.add(gen) +@@ -10,7 +11,6 @@ loongarch_tcg_ss.add(files( + 'fpu_helper.c', + 'op_helper.c', + 'translate.c', +- 'gdbstub.c', + 'vec_helper.c', + )) + loongarch_tcg_ss.add(zlib) +-- +2.27.0 + diff --git a/target-loongarch-move-translate-modules-to-tcg.patch b/target-loongarch-move-translate-modules-to-tcg.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7e249c8e5ff13f7e04e3013613cf1a21ad032c0 --- /dev/null +++ b/target-loongarch-move-translate-modules-to-tcg.patch @@ -0,0 +1,215 @@ +From eef77dd5b0d292d8a0276c820fc8fee24de0d898 Mon Sep 17 00:00:00 2001 +From: Song Gao +Date: Tue, 2 Jan 2024 10:02:00 +0800 +Subject: [PATCH] target/loongarch: move translate modules to tcg/ +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce the target/loongarch/tcg directory. Its purpose is to hold the TCG +code that is selected by CONFIG_TCG + +Reviewed-by: Philippe Mathieu-DaudĂ© +Signed-off-by: Song Gao +Message-Id: <20240102020200.3462097-2-gaosong@loongson.cn> +--- + target/loongarch/meson.build | 15 +-------------- + target/loongarch/{ => tcg}/constant_timer.c | 0 + target/loongarch/{ => tcg}/csr_helper.c | 0 + target/loongarch/{ => tcg}/fpu_helper.c | 0 + .../{ => tcg}/insn_trans/trans_arith.c.inc | 0 + .../{ => tcg}/insn_trans/trans_atomic.c.inc | 0 + .../{ => tcg}/insn_trans/trans_bit.c.inc | 0 + .../{ => tcg}/insn_trans/trans_branch.c.inc | 0 + .../{ => tcg}/insn_trans/trans_extra.c.inc | 0 + .../{ => tcg}/insn_trans/trans_farith.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fcmp.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fcnv.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fmemory.c.inc | 0 + .../{ => tcg}/insn_trans/trans_fmov.c.inc | 0 + .../{ => tcg}/insn_trans/trans_memory.c.inc | 0 + .../insn_trans/trans_privileged.c.inc | 0 + .../{ => tcg}/insn_trans/trans_shift.c.inc | 0 + .../{ => tcg}/insn_trans/trans_vec.c.inc | 0 + target/loongarch/{ => tcg}/iocsr_helper.c | 0 + target/loongarch/tcg/meson.build | 19 +++++++++++++++++++ + target/loongarch/{ => tcg}/op_helper.c | 0 + target/loongarch/{ => tcg}/tlb_helper.c | 0 + target/loongarch/{ => tcg}/translate.c | 0 + target/loongarch/{ => tcg}/vec_helper.c | 0 + 24 files changed, 20 insertions(+), 14 deletions(-) + rename target/loongarch/{ => tcg}/constant_timer.c (100%) + rename target/loongarch/{ => tcg}/csr_helper.c (100%) + rename target/loongarch/{ => tcg}/fpu_helper.c (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_arith.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_atomic.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_bit.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_branch.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_extra.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_farith.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fcmp.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fcnv.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fmemory.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_fmov.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_memory.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_privileged.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_shift.c.inc (100%) + rename target/loongarch/{ => tcg}/insn_trans/trans_vec.c.inc (100%) + rename target/loongarch/{ => tcg}/iocsr_helper.c (100%) + create mode 100644 target/loongarch/tcg/meson.build + rename target/loongarch/{ => tcg}/op_helper.c (100%) + rename target/loongarch/{ => tcg}/tlb_helper.c (100%) + rename target/loongarch/{ => tcg}/translate.c (100%) + rename target/loongarch/{ => tcg}/vec_helper.c (100%) + +diff --git a/target/loongarch/meson.build b/target/loongarch/meson.build +index b3a0fb12fb..e84e4c51f4 100644 +--- a/target/loongarch/meson.build ++++ b/target/loongarch/meson.build +@@ -5,29 +5,16 @@ loongarch_ss.add(files( + 'cpu.c', + 'gdbstub.c', + )) +-loongarch_tcg_ss = ss.source_set() +-loongarch_tcg_ss.add(gen) +-loongarch_tcg_ss.add(files( +- 'fpu_helper.c', +- 'op_helper.c', +- 'translate.c', +- 'vec_helper.c', +-)) +-loongarch_tcg_ss.add(zlib) + + loongarch_system_ss = ss.source_set() + loongarch_system_ss.add(files( + 'loongarch-qmp-cmds.c', + 'machine.c', +- 'tlb_helper.c', +- 'constant_timer.c', +- 'csr_helper.c', +- 'iocsr_helper.c', + )) + + common_ss.add(when: 'CONFIG_LOONGARCH_DIS', if_true: [files('disas.c'), gen]) + +-loongarch_ss.add_all(when: 'CONFIG_TCG', if_true: [loongarch_tcg_ss]) ++subdir('tcg') + + target_arch += {'loongarch': loongarch_ss} + target_system_arch += {'loongarch': loongarch_system_ss} +diff --git a/target/loongarch/constant_timer.c b/target/loongarch/tcg/constant_timer.c +similarity index 100% +rename from target/loongarch/constant_timer.c +rename to target/loongarch/tcg/constant_timer.c +diff --git a/target/loongarch/csr_helper.c b/target/loongarch/tcg/csr_helper.c +similarity index 100% +rename from target/loongarch/csr_helper.c +rename to target/loongarch/tcg/csr_helper.c +diff --git a/target/loongarch/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c +similarity index 100% +rename from target/loongarch/fpu_helper.c +rename to target/loongarch/tcg/fpu_helper.c +diff --git a/target/loongarch/insn_trans/trans_arith.c.inc b/target/loongarch/tcg/insn_trans/trans_arith.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_arith.c.inc +rename to target/loongarch/tcg/insn_trans/trans_arith.c.inc +diff --git a/target/loongarch/insn_trans/trans_atomic.c.inc b/target/loongarch/tcg/insn_trans/trans_atomic.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_atomic.c.inc +rename to target/loongarch/tcg/insn_trans/trans_atomic.c.inc +diff --git a/target/loongarch/insn_trans/trans_bit.c.inc b/target/loongarch/tcg/insn_trans/trans_bit.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_bit.c.inc +rename to target/loongarch/tcg/insn_trans/trans_bit.c.inc +diff --git a/target/loongarch/insn_trans/trans_branch.c.inc b/target/loongarch/tcg/insn_trans/trans_branch.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_branch.c.inc +rename to target/loongarch/tcg/insn_trans/trans_branch.c.inc +diff --git a/target/loongarch/insn_trans/trans_extra.c.inc b/target/loongarch/tcg/insn_trans/trans_extra.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_extra.c.inc +rename to target/loongarch/tcg/insn_trans/trans_extra.c.inc +diff --git a/target/loongarch/insn_trans/trans_farith.c.inc b/target/loongarch/tcg/insn_trans/trans_farith.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_farith.c.inc +rename to target/loongarch/tcg/insn_trans/trans_farith.c.inc +diff --git a/target/loongarch/insn_trans/trans_fcmp.c.inc b/target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fcmp.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fcmp.c.inc +diff --git a/target/loongarch/insn_trans/trans_fcnv.c.inc b/target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fcnv.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fcnv.c.inc +diff --git a/target/loongarch/insn_trans/trans_fmemory.c.inc b/target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fmemory.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fmemory.c.inc +diff --git a/target/loongarch/insn_trans/trans_fmov.c.inc b/target/loongarch/tcg/insn_trans/trans_fmov.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_fmov.c.inc +rename to target/loongarch/tcg/insn_trans/trans_fmov.c.inc +diff --git a/target/loongarch/insn_trans/trans_memory.c.inc b/target/loongarch/tcg/insn_trans/trans_memory.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_memory.c.inc +rename to target/loongarch/tcg/insn_trans/trans_memory.c.inc +diff --git a/target/loongarch/insn_trans/trans_privileged.c.inc b/target/loongarch/tcg/insn_trans/trans_privileged.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_privileged.c.inc +rename to target/loongarch/tcg/insn_trans/trans_privileged.c.inc +diff --git a/target/loongarch/insn_trans/trans_shift.c.inc b/target/loongarch/tcg/insn_trans/trans_shift.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_shift.c.inc +rename to target/loongarch/tcg/insn_trans/trans_shift.c.inc +diff --git a/target/loongarch/insn_trans/trans_vec.c.inc b/target/loongarch/tcg/insn_trans/trans_vec.c.inc +similarity index 100% +rename from target/loongarch/insn_trans/trans_vec.c.inc +rename to target/loongarch/tcg/insn_trans/trans_vec.c.inc +diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/tcg/iocsr_helper.c +similarity index 100% +rename from target/loongarch/iocsr_helper.c +rename to target/loongarch/tcg/iocsr_helper.c +diff --git a/target/loongarch/tcg/meson.build b/target/loongarch/tcg/meson.build +new file mode 100644 +index 0000000000..1a3cd589fb +--- /dev/null ++++ b/target/loongarch/tcg/meson.build +@@ -0,0 +1,19 @@ ++if 'CONFIG_TCG' not in config_all ++ subdir_done() ++endif ++ ++loongarch_ss.add([zlib, gen]) ++ ++loongarch_ss.add(files( ++ 'fpu_helper.c', ++ 'op_helper.c', ++ 'translate.c', ++ 'vec_helper.c', ++)) ++ ++loongarch_system_ss.add(files( ++ 'constant_timer.c', ++ 'csr_helper.c', ++ 'iocsr_helper.c', ++ 'tlb_helper.c', ++)) +diff --git a/target/loongarch/op_helper.c b/target/loongarch/tcg/op_helper.c +similarity index 100% +rename from target/loongarch/op_helper.c +rename to target/loongarch/tcg/op_helper.c +diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c +similarity index 100% +rename from target/loongarch/tlb_helper.c +rename to target/loongarch/tcg/tlb_helper.c +diff --git a/target/loongarch/translate.c b/target/loongarch/tcg/translate.c +similarity index 100% +rename from target/loongarch/translate.c +rename to target/loongarch/tcg/translate.c +diff --git a/target/loongarch/vec_helper.c b/target/loongarch/tcg/vec_helper.c +similarity index 100% +rename from target/loongarch/vec_helper.c +rename to target/loongarch/tcg/vec_helper.c +-- +2.27.0 + diff --git a/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch new file mode 100644 index 0000000000000000000000000000000000000000..28e2cf523f3eabc362a585cc6ac48cef43396ed8 --- /dev/null +++ b/tcg-mttcg-enable-threads-to-unregister-in-tcg_ctxs.patch @@ -0,0 +1,98 @@ +From f797e2713a94b48de59324d00c851d89f4438fc0 Mon Sep 17 00:00:00 2001 +From: Miguel Luis +Date: Fri, 3 Feb 2023 12:33:41 -0100 +Subject: [PATCH] tcg/mttcg: enable threads to unregister in tcg_ctxs[] + +[This patch is just for reference. It has problems as it does not takes care of +the TranslationBlocks and their assigned regions during CPU unrealize] + +When using TCG acceleration in a multi-threaded context each vCPU has its own +thread registered in tcg_ctxs[] upon creation and tcg_cur_ctxs stores the current +number of threads that got created. Although, the lack of a mechanism to +unregister these threads is a problem when exercising vCPU hotplug/unplug +due to the fact that tcg_cur_ctxs gets incremented everytime a vCPU gets +hotplugged but never gets decremented everytime a vCPU gets unplugged, therefore +breaking the assert stating tcg_cur_ctxs < tcg_max_ctxs after a certain amount +of vCPU hotplugs. + +Suggested-by: Salil Mehta +[SM: Check Things To Do Section, https://lore.kernel.org/all/20200613213629.21984-1-salil.mehta@huawei.com/] +Signed-off-by: Miguel Luis +--- + accel/tcg/tcg-accel-ops-mttcg.c | 1 + + include/tcg/startup.h | 5 +++++ + tcg/tcg.c | 23 +++++++++++++++++++++++ + 3 files changed, 29 insertions(+) + +diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c +index fac80095bb..73866990ce 100644 +--- a/accel/tcg/tcg-accel-ops-mttcg.c ++++ b/accel/tcg/tcg-accel-ops-mttcg.c +@@ -122,6 +122,7 @@ static void *mttcg_cpu_thread_fn(void *arg) + qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); + rcu_unregister_thread(); ++ tcg_unregister_thread(); + return NULL; + } + +diff --git a/include/tcg/startup.h b/include/tcg/startup.h +index f71305765c..c6cb1d92a7 100644 +--- a/include/tcg/startup.h ++++ b/include/tcg/startup.h +@@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); + */ + void tcg_register_thread(void); + ++/** ++ * tcg_register_thread: Unregister this thread with the TCG runtime ++ */ ++void tcg_unregister_thread(void); ++ + /** + * tcg_prologue_init(): Generate the code for the TCG prologue + * +diff --git a/tcg/tcg.c b/tcg/tcg.c +index 896a36caeb..61fcf8597d 100644 +--- a/tcg/tcg.c ++++ b/tcg/tcg.c +@@ -764,6 +764,14 @@ static void alloc_tcg_plugin_context(TCGContext *s) + #endif + } + ++static void free_tcg_plugin_context(TCGContext *s) ++{ ++#ifdef CONFIG_PLUGIN ++ g_ptr_array_unref(s->plugin_tb->insns); ++ g_free(s->plugin_tb); ++#endif ++} ++ + /* + * All TCG threads except the parent (i.e. the one that called tcg_context_init + * and registered the target's TCG globals) must register with this function +@@ -814,6 +822,21 @@ void tcg_register_thread(void) + + tcg_ctx = s; + } ++ ++void tcg_unregister_thread(void) ++{ ++ TCGContext *s = tcg_ctx; ++ unsigned int n; ++ ++ /* Unclaim an entry in tcg_ctxs */ ++ n = qatomic_fetch_dec(&tcg_cur_ctxs); ++ g_assert(n > 1); ++ qatomic_store_release(&tcg_ctxs[n - 1], 0); ++ ++ free_tcg_plugin_context(s); ++ ++ g_free(s); ++} + #endif /* !CONFIG_USER_ONLY */ + + /* pool based memory allocation */ +-- +2.27.0 + diff --git a/tests-Disable-filemonitor-testcase.patch b/tests-Disable-filemonitor-testcase.patch new file mode 100644 index 0000000000000000000000000000000000000000..a12778e836e6f24397d8258a73dc509120cb9d7b --- /dev/null +++ b/tests-Disable-filemonitor-testcase.patch @@ -0,0 +1,32 @@ +From bad33579c56b73d56e0b220c98faad7893609b85 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Mon, 18 Mar 2024 10:21:04 +0800 +Subject: [PATCH] tests: Disable filemonitor testcase + +Since filemonitor testcase requires that host kernel being a LTS version, +we cannot guarantee that on OBS system. Lets disable it by default. + +Signed-off-by: Ying Fang +Signed-off-by: Jinhao Gao +Signed-off-by: Yuan Zhang +--- + tests/unit/meson.build | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/tests/unit/meson.build b/tests/unit/meson.build +index a05d471090..598ba41bb9 100644 +--- a/tests/unit/meson.build ++++ b/tests/unit/meson.build +@@ -142,9 +142,6 @@ if have_system + 'test-vmstate': [migration, io], + 'test-yank': ['socket-helpers.c', qom, io, chardev] + } +- if config_host_data.get('CONFIG_INOTIFY1') +- tests += {'test-util-filemonitor': []} +- endif + + # Some tests: test-char, test-qdev-global-props, and test-qga, + # are not runnable under TSan due to a known issue. +-- +2.27.0 + diff --git a/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6cd45895335b8bbfdcba149b7c30692b9afb0e7 --- /dev/null +++ b/tests-acpi-Update-expected-ACPI-tables-for-vcpu-hotp.patch @@ -0,0 +1,62 @@ +From cecec52ca38fa98a821c2a833e71a5fae1cc735d Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 20:10:51 +0800 +Subject: [PATCH] tests/acpi: Update expected ACPI tables for vcpu hotplug + +Update the ACPI tables for vcpu hotplug. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 ------------------ + 1 files changed, 40 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index c7406e395a..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,41 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/pc/DSDT", +-"tests/data/acpi/pc/DSDT.acpierst", +-"tests/data/acpi/pc/DSDT.acpihmat", +-"tests/data/acpi/pc/DSDT.bridge", +-"tests/data/acpi/pc/DSDT.cphp", +-"tests/data/acpi/pc/DSDT.dimmpxm", +-"tests/data/acpi/pc/DSDT.hpbridge", +-"tests/data/acpi/pc/DSDT.hpbrroot", +-"tests/data/acpi/pc/DSDT.ipmikcs", +-"tests/data/acpi/pc/DSDT.memhp", +-"tests/data/acpi/pc/DSDT.nohpet", +-"tests/data/acpi/pc/DSDT.numamem", +-"tests/data/acpi/pc/DSDT.roothp", +-"tests/data/acpi/q35/DSDT", +-"tests/data/acpi/q35/DSDT.acpierst", +-"tests/data/acpi/q35/DSDT.acpihmat", +-"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", +-"tests/data/acpi/q35/DSDT.applesmc", +-"tests/data/acpi/q35/DSDT.bridge", +-"tests/data/acpi/q35/DSDT.cphp", +-"tests/data/acpi/q35/DSDT.cxl", +-"tests/data/acpi/q35/DSDT.dimmpxm", +-"tests/data/acpi/q35/DSDT.ipmibt", +-"tests/data/acpi/q35/DSDT.ipmismbus", +-"tests/data/acpi/q35/DSDT.ivrs", +-"tests/data/acpi/q35/DSDT.memhp", +-"tests/data/acpi/q35/DSDT.mmio64", +-"tests/data/acpi/q35/DSDT.multi-bridge", +-"tests/data/acpi/q35/DSDT.noacpihp", +-"tests/data/acpi/q35/DSDT.nohpet", +-"tests/data/acpi/q35/DSDT.numamem", +-"tests/data/acpi/q35/DSDT.pvpanic-isa", +-"tests/data/acpi/q35/DSDT.tis.tpm12", +-"tests/data/acpi/q35/DSDT.tis.tpm2", +-"tests/data/acpi/q35/DSDT.viot", +-"tests/data/acpi/virt/DSDT", +-"tests/data/acpi/virt/DSDT.acpihmatvirt", +-"tests/data/acpi/virt/DSDT.memhp", +-"tests/data/acpi/virt/DSDT.pxb", +-"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch new file mode 100644 index 0000000000000000000000000000000000000000..67d9c444d4f5069d6eeb2a4ee6f4e4cce374ea88 --- /dev/null +++ b/tests-acpi-bios-tables-test-Allow-changes-to-virt-DS.patch @@ -0,0 +1,62 @@ +From 6cfe9afcaceb7d9fb7d54f08b2362fc654b54d12 Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Tue, 2 Apr 2024 17:23:18 +0800 +Subject: [PATCH] tests/acpi/bios-tables-test: Allow changes to virt/DSDT file + +Prepare to change of cpu aml. + +Signed-off-by: Keqian Zhu +--- + tests/qtest/bios-tables-test-allowed-diff.h | 40 +++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..c7406e395a 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,41 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/pc/DSDT", ++"tests/data/acpi/pc/DSDT.acpierst", ++"tests/data/acpi/pc/DSDT.acpihmat", ++"tests/data/acpi/pc/DSDT.bridge", ++"tests/data/acpi/pc/DSDT.cphp", ++"tests/data/acpi/pc/DSDT.dimmpxm", ++"tests/data/acpi/pc/DSDT.hpbridge", ++"tests/data/acpi/pc/DSDT.hpbrroot", ++"tests/data/acpi/pc/DSDT.ipmikcs", ++"tests/data/acpi/pc/DSDT.memhp", ++"tests/data/acpi/pc/DSDT.nohpet", ++"tests/data/acpi/pc/DSDT.numamem", ++"tests/data/acpi/pc/DSDT.roothp", ++"tests/data/acpi/q35/DSDT", ++"tests/data/acpi/q35/DSDT.acpierst", ++"tests/data/acpi/q35/DSDT.acpihmat", ++"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", ++"tests/data/acpi/q35/DSDT.applesmc", ++"tests/data/acpi/q35/DSDT.bridge", ++"tests/data/acpi/q35/DSDT.cphp", ++"tests/data/acpi/q35/DSDT.cxl", ++"tests/data/acpi/q35/DSDT.dimmpxm", ++"tests/data/acpi/q35/DSDT.ipmibt", ++"tests/data/acpi/q35/DSDT.ipmismbus", ++"tests/data/acpi/q35/DSDT.ivrs", ++"tests/data/acpi/q35/DSDT.memhp", ++"tests/data/acpi/q35/DSDT.mmio64", ++"tests/data/acpi/q35/DSDT.multi-bridge", ++"tests/data/acpi/q35/DSDT.noacpihp", ++"tests/data/acpi/q35/DSDT.nohpet", ++"tests/data/acpi/q35/DSDT.numamem", ++"tests/data/acpi/q35/DSDT.pvpanic-isa", ++"tests/data/acpi/q35/DSDT.tis.tpm12", ++"tests/data/acpi/q35/DSDT.tis.tpm2", ++"tests/data/acpi/q35/DSDT.viot", ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.acpihmatvirt", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.pxb", ++"tests/data/acpi/virt/DSDT.topology", +\ No newline at end of file +-- +2.27.0 + diff --git a/tests-bios-tables-test-Rename-smbios-type-4-related-.patch b/tests-bios-tables-test-Rename-smbios-type-4-related-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6622fc9d05f520b8d78f8730b0a692acd1b1ccf2 --- /dev/null +++ b/tests-bios-tables-test-Rename-smbios-type-4-related-.patch @@ -0,0 +1,97 @@ +From b59b75fc9f7ed73323179305363f0c2e00613863 Mon Sep 17 00:00:00 2001 +From: Zhao Liu +Date: Tue, 28 Nov 2023 00:02:02 +0800 +Subject: [PATCH] tests: bios-tables-test: Rename smbios type 4 related test + functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +In fact, type4-count, core-count, core-count2, thread-count and +thread-count2 are tested with KVM not TCG. + +Rename these test functions to reflect KVM base instead of TCG. + +Signed-off-by: Zhao Liu +Message-Id: <20231127160202.1037290-1-zhao1.liu@linux.intel.com> +Reviewed-by: Philippe Mathieu-DaudĂ© +Reviewed-by: Igor Mammedov +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +--- + tests/qtest/bios-tables-test.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c +index fe6a9a8563..21811a1ab5 100644 +--- a/tests/qtest/bios-tables-test.c ++++ b/tests/qtest/bios-tables-test.c +@@ -1015,7 +1015,7 @@ static void test_acpi_q35_tcg(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_type4_count(void) ++static void test_acpi_q35_kvm_type4_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1031,7 +1031,7 @@ static void test_acpi_q35_tcg_type4_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_core_count(void) ++static void test_acpi_q35_kvm_core_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1048,7 +1048,7 @@ static void test_acpi_q35_tcg_core_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_core_count2(void) ++static void test_acpi_q35_kvm_core_count2(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1065,7 +1065,7 @@ static void test_acpi_q35_tcg_core_count2(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_thread_count(void) ++static void test_acpi_q35_kvm_thread_count(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -1082,7 +1082,7 @@ static void test_acpi_q35_tcg_thread_count(void) + free_test_data(&data); + } + +-static void test_acpi_q35_tcg_thread_count2(void) ++static void test_acpi_q35_kvm_thread_count2(void) + { + test_data data = { + .machine = MACHINE_Q35, +@@ -2262,15 +2262,15 @@ int main(int argc, char *argv[]) + qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); + qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); + qtest_add_func("acpi/q35/type4-count", +- test_acpi_q35_tcg_type4_count); ++ test_acpi_q35_kvm_type4_count); + qtest_add_func("acpi/q35/core-count", +- test_acpi_q35_tcg_core_count); ++ test_acpi_q35_kvm_core_count); + qtest_add_func("acpi/q35/core-count2", +- test_acpi_q35_tcg_core_count2); ++ test_acpi_q35_kvm_core_count2); + qtest_add_func("acpi/q35/thread-count", +- test_acpi_q35_tcg_thread_count); ++ test_acpi_q35_kvm_thread_count); + qtest_add_func("acpi/q35/thread-count2", +- test_acpi_q35_tcg_thread_count2); ++ test_acpi_q35_kvm_thread_count2); + } + if (qtest_has_device("virtio-iommu-pci")) { + qtest_add_func("acpi/q35/viot", test_acpi_q35_viot); +-- +2.27.0 + diff --git a/tests-qemu-iotests-resolved-the-problem-that-the-108.patch b/tests-qemu-iotests-resolved-the-problem-that-the-108.patch new file mode 100644 index 0000000000000000000000000000000000000000..f4a3c54f52e349b6d5cb2fd66b8af8dd0387e8ce --- /dev/null +++ b/tests-qemu-iotests-resolved-the-problem-that-the-108.patch @@ -0,0 +1,31 @@ +From d95cbdd8738d61b8bc7c9a1541dade42c1f48314 Mon Sep 17 00:00:00 2001 +From: adttil <2429917001@qq.com> +Date: Thu, 1 Feb 2024 21:53:58 +0800 +Subject: [PATCH] tests/qemu-iotests: resolved the problem that the 108 test + cases in the container fail + +The loop device cannot be created in the compilation environment of the +container. Therefore, a judgment condition is added to the +initialization variable loopdev to check whether loop-control exists. + +Signed-off-by: Adttil <2429917001@qq.com> +--- + tests/qemu-iotests/108 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 +index 54e935acf2..a6fe261265 100755 +--- a/tests/qemu-iotests/108 ++++ b/tests/qemu-iotests/108 +@@ -55,7 +55,7 @@ _supported_os Linux + _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file + + # This test either needs sudo -n losetup or FUSE exports to work +-if sudo -n losetup &>/dev/null; then ++if test -c "/dev/loop-control" && sudo -n losetup &>/dev/null; then + loopdev=true + else + loopdev=false +-- +2.27.0 + diff --git a/tests-virt-Allow-changes-to-PPTT-test-table.patch b/tests-virt-Allow-changes-to-PPTT-test-table.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9cb5f6893a4ad9a7e80437b9a778abdb259bfae --- /dev/null +++ b/tests-virt-Allow-changes-to-PPTT-test-table.patch @@ -0,0 +1,25 @@ +From 3402740cb4f6d6b9baabfde0a7667b4990b010a5 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 19:21:59 +0800 +Subject: [PATCH] tests: virt: Allow changes to PPTT test table + +Allow changes to test/data/acpi/virt/PPTT*, prepare to change the +building policy of the cluster topology. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523c8b..18d02a710d 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,4 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/virt/PPTT", ++"tests/data/acpi/virt/PPTT.acpihmatvirt", ++"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..6cb5a114d1d72fba550886fe5f58ea0ba1954ffd --- /dev/null +++ b/tests-virt-Update-expected-ACPI-tables-for-virt-test.patch @@ -0,0 +1,25 @@ +From b062e2f182af4c44fbd3a03eda9c934686037032 Mon Sep 17 00:00:00 2001 +From: Kunkun Jiang +Date: Sat, 30 Mar 2024 20:16:32 +0800 +Subject: [PATCH] tests: virt: Update expected ACPI tables for virt test + +Update the ACPI tables according to the acpi aml_build change, also +empty bios-tables-test-allowed-diff.h. + +Signed-off-by: Kunkun Jiang +--- + tests/qtest/bios-tables-test-allowed-diff.h | 3 --- + 1 files changed, 3 deletions(-) + +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index 18d02a710d..dfb8523c8b 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1,4 +1 @@ + /* List of comma-separated changed AML files to ignore */ +-"tests/data/acpi/virt/PPTT", +-"tests/data/acpi/virt/PPTT.acpihmatvirt", +-"tests/data/acpi/virt/PPTT.topology", +-- +2.27.0 + diff --git a/travis-ci-Rename-SOFTMMU-SYSTEM.patch b/travis-ci-Rename-SOFTMMU-SYSTEM.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa788135e89c9c9c09946848f47064de448e0545 --- /dev/null +++ b/travis-ci-Rename-SOFTMMU-SYSTEM.patch @@ -0,0 +1,67 @@ +From c03415f3b75e6a37c7eb392ef62bf92b94267b4d Mon Sep 17 00:00:00 2001 +From: gaojiazhen +Date: Mon, 25 Mar 2024 17:26:52 +0800 +Subject: [PATCH] travis-ci: Rename SOFTMMU -> SYSTEM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 47833f817cc597db124c690bd14600bb5d00e824 + +Since we *might* have user emulation with softmmu, +rename MAIN_SOFTMMU_TARGETS as MAIN_SYSTEM_TARGETS +to express 'system emulation targets'. + +Signed-off-by: Philippe Mathieu-DaudĂ© +Message-ID: <20240313213339.82071-3-philmd@linaro.org> +Reviewed-by: Thomas Huth +Reviewed-by: Richard Henderson +Signed-off-by: Thomas Huth +Signed-off-by: Gao Jiazhen +--- + .travis.yml | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/.travis.yml b/.travis.yml +index 76859d48da..597d151b80 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -35,7 +35,7 @@ env: + - TEST_BUILD_CMD="" + - TEST_CMD="make check V=1" + # This is broadly a list of "mainline" system targets which have support across the major distros +- - MAIN_SOFTMMU_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" ++ - MAIN_SYSTEM_TARGETS="aarch64-softmmu,mips64-softmmu,ppc64-softmmu,riscv64-softmmu,s390x-softmmu,x86_64-softmmu" + - CCACHE_SLOPPINESS="include_file_ctime,include_file_mtime" + - CCACHE_MAXSIZE=1G + - G_MESSAGES_DEBUG=error +@@ -114,7 +114,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS} --cxx=/bin/false" ++ --target-list=${MAIN_SYSTEM_TARGETS} --cxx=/bin/false" + - UNRELIABLE=true + + - name: "[ppc64] GCC check-tcg" +@@ -185,7 +185,7 @@ jobs: + env: + - TEST_CMD="make check check-tcg V=1" + - CONFIG="--disable-containers --enable-fdt=system +- --target-list=${MAIN_SOFTMMU_TARGETS},s390x-linux-user" ++ --target-list=${MAIN_SYSTEM_TARGETS},s390x-linux-user" + - UNRELIABLE=true + script: + - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$? +@@ -226,7 +226,7 @@ jobs: + - genisoimage + env: + - CONFIG="--disable-containers --enable-fdt=system --audio-drv-list=sdl +- --disable-user --target-list-exclude=${MAIN_SOFTMMU_TARGETS}" ++ --disable-user --target-list-exclude=${MAIN_SYSTEM_TARGETS}" + + - name: "[s390x] GCC (user)" + arch: s390x +-- +2.27.0 + diff --git a/ui-clipboard-mark-type-as-not-available-when-there-i.patch b/ui-clipboard-mark-type-as-not-available-when-there-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..843479a9c61b25ad1857352b4dafae1c5c2f4feb --- /dev/null +++ b/ui-clipboard-mark-type-as-not-available-when-there-i.patch @@ -0,0 +1,89 @@ +From 855f7f30de962f79393f0b9f8b0355b996d72de7 Mon Sep 17 00:00:00 2001 +From: Fiona Ebner +Date: Wed, 24 Jan 2024 11:57:48 +0100 +Subject: [PATCH] ui/clipboard: mark type as not available when there is no + data (CVE-2023-6683) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +With VNC, a client can send a non-extended VNC_MSG_CLIENT_CUT_TEXT +message with len=0. In qemu_clipboard_set_data(), the clipboard info +will be updated setting data to NULL (because g_memdup(data, size) +returns NULL when size is 0). If the client does not set the +VNC_ENCODING_CLIPBOARD_EXT feature when setting up the encodings, then +the 'request' callback for the clipboard peer is not initialized. +Later, because data is NULL, qemu_clipboard_request() can be reached +via vdagent_chr_write() and vdagent_clipboard_recv_request() and +there, the clipboard owner's 'request' callback will be attempted to +be called, but that is a NULL pointer. + +In particular, this can happen when using the KRDC (22.12.3) VNC +client. + +Another scenario leading to the same issue is with two clients (say +noVNC and KRDC): + +The noVNC client sets the extension VNC_FEATURE_CLIPBOARD_EXT and +initializes its cbpeer. + +The KRDC client does not, but triggers a vnc_client_cut_text() (note +it's not the _ext variant)). There, a new clipboard info with it as +the 'owner' is created and via qemu_clipboard_set_data() is called, +which in turn calls qemu_clipboard_update() with that info. + +In qemu_clipboard_update(), the notifier for the noVNC client will be +called, i.e. vnc_clipboard_notify() and also set vs->cbinfo for the +noVNC client. The 'owner' in that clipboard info is the clipboard peer +for the KRDC client, which did not initialize the 'request' function. +That sounds correct to me, it is the owner of that clipboard info. + +Then when noVNC sends a VNC_MSG_CLIENT_CUT_TEXT message (it did set +the VNC_FEATURE_CLIPBOARD_EXT feature correctly, so a check for it +passes), that clipboard info is passed to qemu_clipboard_request() and +the original segfault still happens. + +Fix the issue by handling updates with size 0 differently. In +particular, mark in the clipboard info that the type is not available. + +While at it, switch to g_memdup2(), because g_memdup() is deprecated. + +Cc: qemu-stable@nongnu.org +Fixes: CVE-2023-6683 +Reported-by: Markus Frank +Suggested-by: Marc-AndrĂ© Lureau +Signed-off-by: Fiona Ebner +Reviewed-by: Marc-AndrĂ© Lureau +Tested-by: Markus Frank +Message-ID: <20240124105749.204610-1-f.ebner@proxmox.com> +Signed-off-by: liuxiangdong +--- + ui/clipboard.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/ui/clipboard.c b/ui/clipboard.c +index 3d14bffaf8..b3f6fa3c9e 100644 +--- a/ui/clipboard.c ++++ b/ui/clipboard.c +@@ -163,9 +163,15 @@ void qemu_clipboard_set_data(QemuClipboardPeer *peer, + } + + g_free(info->types[type].data); +- info->types[type].data = g_memdup(data, size); +- info->types[type].size = size; +- info->types[type].available = true; ++ if (size) { ++ info->types[type].data = g_memdup2(data, size); ++ info->types[type].size = size; ++ info->types[type].available = true; ++ } else { ++ info->types[type].data = NULL; ++ info->types[type].size = 0; ++ info->types[type].available = false; ++ } + + if (update) { + qemu_clipboard_update(info); +-- +2.27.0 + diff --git a/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch b/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..82e2884cba6131390119bf688ee0452887236b86 --- /dev/null +++ b/util-log-add-CONFIG_DISABLE_QEMU_LOG-macro.patch @@ -0,0 +1,42 @@ +From 28763d8df34c20cab60baec8f4f5615cbea8c0df Mon Sep 17 00:00:00 2001 +From: Yan Wang +Date: Fri, 11 Feb 2022 18:20:59 +0800 +Subject: [PATCH] util/log: add CONFIG_DISABLE_QEMU_LOG macro + +Using CONFIG_DISABLE_QEMU_LOG macro to control +qemu_log function. + +Signed-off-by: Yan Wang +Signed-off-by: Adttil +--- + util/log.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/util/log.c b/util/log.c +index d36c98da0b..78b6cf225f 100644 +--- a/util/log.c ++++ b/util/log.c +@@ -143,6 +143,12 @@ void qemu_log_unlock(FILE *logfile) + } + } + ++#ifdef CONFIG_DISABLE_QEMU_LOG ++void qemu_log(const char *fmt, ...) ++{ ++ return; ++} ++#else + void qemu_log(const char *fmt, ...) + { + FILE *f = qemu_log_trylock(); +@@ -155,6 +161,7 @@ void qemu_log(const char *fmt, ...) + qemu_log_unlock(f); + } + } ++#endif + + static void __attribute__((__constructor__)) startup(void) + { +-- +2.27.0 + diff --git a/vdpa-correct-param-passed-in-when-unregister-save.patch b/vdpa-correct-param-passed-in-when-unregister-save.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f3aa089a764098f3f976cb99fc5e47a56c775a8 --- /dev/null +++ b/vdpa-correct-param-passed-in-when-unregister-save.patch @@ -0,0 +1,30 @@ +From 5714aaddcbc313e63da435a253d9d472984d7b49 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:22:54 +0800 +Subject: [PATCH] vdpa: correct param passed in when unregister save + +The idstr passed in the unregister_savevm function is inconsisten +with the idstr passed in when register_savevm_live registration. +Needs to be modified, otherwise migration will fail after hotunplug +all vdpa devices. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index b889dd4715..1d299019da 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -404,6 +404,6 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { + migration_remove_notifier(&vdev->migration_state); +- unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); ++ unregister_savevm(NULL, "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +-- +2.27.0 + diff --git a/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch new file mode 100644 index 0000000000000000000000000000000000000000..4a9ed108a970b2c447c9aaa0a8de42f7009300f4 --- /dev/null +++ b/vdpa-don-t-suspend-resume-device-when-vdpa-device-no.patch @@ -0,0 +1,67 @@ +From b82f02e93d5efa2ea62dd135c508cb707fdd35a7 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:32:00 +0800 +Subject: [PATCH] vdpa: don't suspend/resume device when vdpa device not + started + +When vdpa device not started, we don't need to suspend vdpa device +and send vdpa device state information. Therefore, add the suspended +flag of vdpa device to distinguish whether the device is suspended and +use it to determine whether the device needs to resume in dest qemu. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d299019da..887c96a201 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -294,10 +294,13 @@ static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) + int ret; + + qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); +- ret = vhost_vdpa_dev_buffer_save(hdev, f); +- if (ret) { +- error_report("Save vdpa device buffer failed: %d\n", ret); +- return ret; ++ qemu_put_be16(f, (uint16_t)vdev->suspended); ++ if (vdev->suspended) { ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } + } + qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); + +@@ -311,6 +314,7 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + + int ret; + uint64_t data; ++ uint16_t suspended; + + data = qemu_get_be64(f); + while (data != VDPA_MIG_FLAG_END_OF_STATE) { +@@ -323,10 +327,13 @@ static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) + return -EINVAL; + } + } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { +- ret = vhost_vdpa_dev_buffer_load(hdev, f); +- if (ret) { +- error_report("fail to restore device buffer.\n"); +- return ret; ++ suspended = qemu_get_be16(f); ++ if (suspended) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } + } + } + +-- +2.27.0 + diff --git a/vdpa-implement-vdpa-device-migration.patch b/vdpa-implement-vdpa-device-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..295cdcdc24869ad704f467190303b456a14136e7 --- /dev/null +++ b/vdpa-implement-vdpa-device-migration.patch @@ -0,0 +1,75 @@ +From 4688e12c57a34801010abf2a4cf528fcef3b9ec0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:59:56 +0800 +Subject: [PATCH] vdpa: implement vdpa device migration + +Integrate the live migration code, call the registered live +migration function, and open the vdpa live migration prototype + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index f22d5d5bc0..6af78a4229 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -28,6 +28,8 @@ + #include "hw/virtio/vdpa-dev.h" + #include "sysemu/sysemu.h" + #include "sysemu/runstate.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/migration.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -154,6 +156,8 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + vhost_vdpa_device_dummy_handle_output); + } + ++ vdpa_migration_register(v); ++ + return; + + free_config: +@@ -173,6 +177,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); + int i; + ++ vdpa_migration_unregister(s); + virtio_set_status(vdev, 0); + + for (i = 0; i < s->num_queues; i++) { +@@ -308,6 +313,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); ++ MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -320,6 +326,11 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + return; + } + ++ if (ms->state == RUN_STATE_PAUSED || ++ ms->state == RUN_STATE_RESTORE_VM) { ++ return; ++ } ++ + if (should_start) { + ret = vhost_vdpa_device_start(vdev, &local_err); + if (ret < 0) { +@@ -338,7 +349,7 @@ static Property vhost_vdpa_device_properties[] = { + + static const VMStateDescription vmstate_vhost_vdpa_device = { + .name = "vhost-vdpa-device", +- .unmigratable = 1, ++ .unmigratable = 0, + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { +-- +2.27.0 + diff --git a/vdpa-move-memory-listener-to-the-realize-stage.patch b/vdpa-move-memory-listener-to-the-realize-stage.patch new file mode 100644 index 0000000000000000000000000000000000000000..56137c61c50fca095cf11049bf57e53340f0bb5e --- /dev/null +++ b/vdpa-move-memory-listener-to-the-realize-stage.patch @@ -0,0 +1,91 @@ +From 587f42300488af4478d7aa1b62e2b351155621db Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 16:01:16 +0800 +Subject: [PATCH] vdpa: move memory listener to the realize stage + +Move the memory listener registration of vdpa from the start stage +to the realize stage. Avoid that in the start phase, the memory +listener callback function has not yet been processed. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev.c | 4 ++++ + hw/virtio/vhost-vdpa.c | 5 ----- + 2 files changed, 4 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 6af78a4229..877bf7464f 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -30,6 +30,7 @@ + #include "sysemu/runstate.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/migration.h" ++#include "exec/address-spaces.h" + + static void + vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) +@@ -125,6 +126,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + goto free_vqs; + } + ++ memory_listener_register(&v->vdpa.listener, &address_space_memory); + v->config_size = vhost_vdpa_device_get_u32(v->vhostfd, + VHOST_VDPA_GET_CONFIG_SIZE, + errp); +@@ -163,6 +165,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) + free_config: + g_free(v->config); + vhost_cleanup: ++ memory_listener_unregister(&v->vdpa.listener); + vhost_dev_cleanup(&v->dev); + free_vqs: + g_free(vqs); +@@ -188,6 +191,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) + + g_free(s->config); + g_free(s->dev.vqs); ++ memory_listener_unregister(&s->vdpa.listener); + vhost_dev_cleanup(&s->dev); + qemu_close(s->vhostfd); + s->vhostfd = -1; +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 063e941544..30408f2069 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1320,8 +1320,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + "IOMMU and try again"); + return -1; + } +- memory_listener_register(&v->listener, dev->vdev->dma_as); +- + return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + } + +@@ -1515,7 +1513,6 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + + static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + { +- struct vhost_vdpa *v = dev->opaque; + int ret; + + vhost_vdpa_svqs_stop(dev); +@@ -1526,7 +1523,6 @@ static int vhost_vdpa_suspend_device(struct vhost_dev *dev) + } + + ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); +- memory_listener_unregister(&v->listener); + return ret; + } + +@@ -1548,7 +1544,6 @@ static int vhost_vdpa_resume_device(struct vhost_dev *dev) + return 0; + } + +- memory_listener_register(&v->listener, &address_space_memory); + return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); + } + +-- +2.27.0 + diff --git a/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f18cb554fd2d0cd90ef726a20b52ffb6219f98c --- /dev/null +++ b/vdpa-set-vring-enable-only-if-the-vring-address-has-.patch @@ -0,0 +1,38 @@ +From 0f515ff831f46ef34cd83aa145e547e48d8b3b56 Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 14 Dec 2023 11:05:52 +0800 +Subject: [PATCH] vdpa: set vring enable only if the vring address has already + been set + +Currently, vhost-vdpa does not determine the status of each vring when +performing the enable operation on vring. When the vBIOS(EDK2) is running, +the driver will not enable all vrings. In this case, setting all vrings +to enable is isconsistent with the actual situation. + +Add logic when enabling vring, make a judement on the vring status. If the +vring address is not set, the vring will not enabled. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 30408f2069..d49826845f 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -890,6 +890,11 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) + .index = idx, + .num = 1, + }; ++ hwaddr addr = virtio_queue_get_desc_addr(dev->vdev, idx); ++ if (addr == 0) { ++ return 0; ++ } ++ + int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); + + trace_vhost_vdpa_set_vring_ready(dev, idx, r); +-- +2.27.0 + diff --git a/vdpa-support-vdpa-device-suspend-resume.patch b/vdpa-support-vdpa-device-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e8ff653152a49d9410c2479427368f9a50c07d2 --- /dev/null +++ b/vdpa-support-vdpa-device-suspend-resume.patch @@ -0,0 +1,120 @@ +From e58b48ab2bb679f4c661301019d6f94bd39f93e5 Mon Sep 17 00:00:00 2001 +From: libai +Date: Tue, 19 Dec 2023 20:18:03 +0800 +Subject: [PATCH] vdpa: support vdpa device suspend/resume + +only implement suspend and resume interface used for migration. The +current implementation still has bugs when suspend/resume a virtual +machine. Fix it. + +Fixes: 4c5a9a0703 (""vhost: implement vhost_vdpa_device_suspend/resume) + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 16 +++++++++++----- + hw/virtio/vdpa-dev.c | 8 +------- + include/hw/virtio/vdpa-dev.h | 1 + + 3 files changed, 13 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 9b47e3ed45..8b13f89c85 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -143,6 +143,7 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + } + + vdpa->started = false; ++ vdpa->suspended = true; + + ret = vhost_dev_suspend(&vdpa->dev, vdev, false); + if (ret) { +@@ -165,6 +166,7 @@ set_guest_notifiers_fail: + } + + suspend_fail: ++ vdpa->suspended = false; + vdpa->started = true; + return ret; + } +@@ -201,6 +203,7 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + goto err_guest_notifiers; + } + vdpa->started = true; ++ vdpa->suspended = false; + + /* + * guest_notifier_mask/pending not used yet, so just unmask +@@ -241,7 +244,7 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!running) { +- if (ms->state == RUN_STATE_PAUSED) { ++ if (ms->state == MIGRATION_STATUS_ACTIVE || state == RUN_STATE_PAUSED) { + ret = vhost_vdpa_device_suspend(vdpa); + if (ret) { + error_report("suspend vdpa device failed: %d\n", ret); +@@ -251,16 +254,19 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + } else { +- if (ms->state == RUN_STATE_RESTORE_VM) { ++ if (vdpa->suspended) { + ret = vhost_vdpa_device_resume(vdpa); + if (ret) { +- error_report("migration dest resume device failed, abort!\n"); +- exit(EXIT_FAILURE); ++ error_report("vhost vdpa device resume failed: %d\n", ret); + } + } + + if (mis->state == RUN_STATE_RESTORE_VM) { +- vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ ret = vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ if (ret) { ++ error_report("migration dest resume device failed: %d\n", ret); ++ exit(EXIT_FAILURE); ++ } + /* post resume */ + mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, + hdev); +diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c +index 877bf7464f..91e71847b0 100644 +--- a/hw/virtio/vdpa-dev.c ++++ b/hw/virtio/vdpa-dev.c +@@ -317,7 +317,6 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) + static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + { + VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); +- MigrationState *ms = migrate_get_current(); + bool should_start = virtio_device_started(vdev, status); + Error *local_err = NULL; + int ret; +@@ -326,12 +325,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) + should_start = false; + } + +- if (s->started == should_start) { +- return; +- } +- +- if (ms->state == RUN_STATE_PAUSED || +- ms->state == RUN_STATE_RESTORE_VM) { ++ if (s->started == should_start || s->suspended) { + return; + } + +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 20f50c76c6..60e9c3f3fe 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -37,6 +37,7 @@ struct VhostVdpaDevice { + int config_size; + uint16_t queue_size; + bool started; ++ bool suspended; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; + Notifier migration_state; +-- +2.27.0 + diff --git a/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch new file mode 100644 index 0000000000000000000000000000000000000000..69f13f0993b2229d4fac841eb52be1d6f5552d55 --- /dev/null +++ b/vdpa-suspend-function-return-0-when-the-vdpa-device-.patch @@ -0,0 +1,45 @@ +From a78602118043eb9923996504d5b2e1b14a1ec38d Mon Sep 17 00:00:00 2001 +From: libai +Date: Thu, 21 Dec 2023 11:03:37 +0800 +Subject: [PATCH] vdpa: suspend function return 0 when the vdpa device is + stopped + +When vhost vdpa device is stopped(vdpa->started is false), suspend +operation do nothing and return success, instead of return failure. + +The same goes for resume function. + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 8b13f89c85..b889dd4715 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -134,8 +134,8 @@ static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + +- if (!vdpa->started) { +- return -EFAULT; ++ if (!vdpa->started || vdpa->suspended) { ++ return 0; + } + + if (!k->set_guest_notifiers) { +@@ -178,6 +178,10 @@ static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + ++ if (vdpa->started || !vdpa->suspended) { ++ return 0; ++ } ++ + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers\n"); + return -ENOSYS; +-- +2.27.0 + diff --git a/vfio-Maintain-DMA-mapping-range-for-the-container.patch b/vfio-Maintain-DMA-mapping-range-for-the-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..5898ae86434982c5e54d5d3eaf152cb96ba1ff98 --- /dev/null +++ b/vfio-Maintain-DMA-mapping-range-for-the-container.patch @@ -0,0 +1,204 @@ +From bd2d81775edf285149346bf793d9b71236d7cf34 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:04 +0800 +Subject: [PATCH] vfio: Maintain DMA mapping range for the container + +When synchronizing dirty bitmap from kernel VFIO we do it in a +per-iova-range fashion and we allocate the userspace bitmap for each of the +ioctl. This patch introduces `struct VFIODMARange` to describe a range of +the given DMA mapping with respect to a VFIO_IOMMU_MAP_DMA operation, and +make the bitmap cache of this range be persistent so that we don't need to +g_try_malloc0() every time. Note that the new structure is almost a copy of +`struct vfio_iommu_type1_dma_map` but only internally used by QEMU. + +More importantly, the cached per-iova-range dirty bitmap will be further +used when we want to add support for the CLEAR_BITMAP and this cached +bitmap will be used to guarantee we don't clear any unknown dirty bits +otherwise that can be a severe data loss issue for migration code. + +It's pretty intuitive to maintain a bitmap per container since we perform +log_sync at this granule. But I don't know how to deal with things like +memory hot-{un}plug, sparse DMA mappings, etc. Suggestions welcome. + +* yet something to-do: + - can't work with guest viommu + - no locks + - etc + +[ The idea and even the commit message are largely inherited from kvm side. + See commit 9f4bf4baa8b820c7930e23c9566c9493db7e1d25. ] + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 9 +++++-- + hw/vfio/container.c | 49 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 12 +++++++++ + 3 files changed, 68 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index e70fdf5e0c..564e933135 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1156,6 +1156,7 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + vfio_devices_all_device_dirty_tracking(container); + uint64_t dirty_pages; + VFIOBitmap vbmap; ++ VFIODMARange *qrange; + int ret; + + if (!container->dirty_pages_supported && !all_device_dirty_tracking) { +@@ -1165,10 +1166,16 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + return 0; + } + ++ qrange = vfio_lookup_match_range(container, iova, size); ++ /* the same as vfio_dma_unmap() */ ++ assert(qrange); ++ + ret = vfio_bitmap_alloc(&vbmap, size); + if (ret) { + return ret; + } ++ g_free(vbmap.bitmap); ++ vbmap.bitmap = qrange->bitmap; + + if (all_device_dirty_tracking) { + ret = vfio_devices_query_dirty_bitmap(container, &vbmap, iova, size); +@@ -1186,8 +1193,6 @@ int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, + trace_vfio_get_dirty_bitmap(container->fd, iova, size, vbmap.size, + ram_addr, dirty_pages); + out: +- g_free(vbmap.bitmap); +- + return ret; + } + +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 242010036a..9a176a0d33 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -112,6 +112,29 @@ unmap_exit: + return ret; + } + ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size) ++{ ++ VFIODMARange *qrange; ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ if (qrange->iova == start_addr && qrange->size == size) { ++ return qrange; ++ } ++ } ++ return NULL; ++} ++ ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange) ++{ ++ uint64_t pages, size; ++ ++ pages = REAL_HOST_PAGE_ALIGN(qrange->size) / qemu_real_host_page_size(); ++ size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / BITS_PER_BYTE; ++ ++ qrange->bitmap = g_malloc0(size); ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +@@ -124,6 +147,7 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; + bool need_dirty_sync = false; + int ret; + +@@ -136,6 +160,22 @@ int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + need_dirty_sync = true; + } + ++ /* ++ * unregister the DMA range ++ * ++ * It seems that the memory layer will give us the same section as the one ++ * used in region_add(). Otherwise it'll be complicated to manipulate the ++ * bitmap across region_{add,del}. Is there any guarantee? ++ * ++ * But there is really not such a restriction on the kernel interface ++ * (VFIO_IOMMU_DIRTY_PAGES_FLAG_{UN}MAP_DMA, etc). ++ */ ++ qrange = vfio_lookup_match_range(container, iova, size); ++ assert(qrange); ++ g_free(qrange->bitmap); ++ QLIST_REMOVE(qrange, next); ++ g_free(qrange); ++ + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c +@@ -180,6 +220,14 @@ int vfio_dma_map(VFIOContainer *container, hwaddr iova, + .iova = iova, + .size = size, + }; ++ VFIODMARange *qrange; ++ ++ qrange = g_malloc0(sizeof(*qrange)); ++ qrange->iova = iova; ++ qrange->size = size; ++ QLIST_INSERT_HEAD(&container->dma_list, qrange, next); ++ /* XXX allocate the dirty bitmap on demand */ ++ vfio_dma_range_init_dirty_bitmap(qrange); + + if (!readonly) { + map.flags |= VFIO_DMA_MAP_FLAG_WRITE; +@@ -552,6 +600,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container->iova_ranges = NULL; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->vrdl_list); ++ QLIST_INIT(&container->dma_list); + + ret = vfio_init_container(container, group->fd, errp); + if (ret) { +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index a4a22accb9..b131d04c9c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -80,6 +80,14 @@ typedef struct VFIOAddressSpace { + + struct VFIOGroup; + ++typedef struct VFIODMARange { ++ QLIST_ENTRY(VFIODMARange) next; ++ hwaddr iova; ++ size_t size; ++ void *vaddr; /* unused */ ++ unsigned long *bitmap; /* dirty bitmap cache for this range */ ++} VFIODMARange; ++ + typedef struct VFIOContainer { + VFIOAddressSpace *space; + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ +@@ -97,6 +105,7 @@ typedef struct VFIOContainer { + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; + QLIST_HEAD(, VFIOGroup) group_list; + QLIST_HEAD(, VFIORamDiscardListener) vrdl_list; ++ QLIST_HEAD(, VFIODMARange) dma_list; + QLIST_ENTRY(VFIOContainer) next; + QLIST_HEAD(, VFIODevice) device_list; + GList *iova_ranges; +@@ -212,6 +221,9 @@ void vfio_put_address_space(VFIOAddressSpace *space); + bool vfio_devices_all_running_and_saving(VFIOContainer *container); + + /* container->fd */ ++VFIODMARange *vfio_lookup_match_range(VFIOContainer *container, ++ hwaddr start_addr, hwaddr size); ++void vfio_dma_range_init_dirty_bitmap(VFIODMARange *qrange); + int vfio_dma_unmap(VFIOContainer *container, hwaddr iova, + ram_addr_t size, IOMMUTLBEntry *iotlb); + int vfio_dma_map(VFIOContainer *container, hwaddr iova, +-- +2.27.0 + diff --git a/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch new file mode 100644 index 0000000000000000000000000000000000000000..06e5781624f27a44b87089188713bc3a9b345261 --- /dev/null +++ b/vfio-migration-Add-support-for-manual-clear-vfio-dir.patch @@ -0,0 +1,229 @@ +From 24c3ff779f35b40967d195e4764d4cb605c1a304 Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Sat, 8 May 2021 17:31:05 +0800 +Subject: [PATCH] vfio/migration: Add support for manual clear vfio dirty log + +The new capability VFIO_DIRTY_LOG_MANUAL_CLEAR and the new ioctl +VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and +VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP have been introduced in +the kernel, tweak the userspace side to use them. + +Check if the kernel supports VFIO_DIRTY_LOG_MANUAL_CLEAR and +provide the log_clear() hook for vfio_memory_listener. If the +kernel supports it, deliever the clear message to kernel. + +Signed-off-by: Zenghui Yu +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 136 ++++++++++++++++++++++++++++++++++ + hw/vfio/container.c | 13 +++- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 148 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 564e933135..e08b147b3d 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1344,6 +1344,141 @@ static void vfio_listener_log_sync(MemoryListener *listener, + } + } + ++/* ++ * I'm not sure if there's any alignment requirement for the CLEAR_BITMAP ++ * ioctl. But copy from kvm side and align {start, size} with 64 pages. ++ * ++ * I think the code can be simplified a lot if no alignment requirement. ++ */ ++#define VFIO_CLEAR_LOG_SHIFT 6 ++#define VFIO_CLEAR_LOG_ALIGN (qemu_real_host_page_size() << VFIO_CLEAR_LOG_SHIFT) ++#define VFIO_CLEAR_LOG_MASK (-VFIO_CLEAR_LOG_ALIGN) ++ ++static int vfio_log_clear_one_range(VFIOContainer *container,VFIODMARange *qrange, ++ uint64_t start, uint64_t size) ++{ ++ struct vfio_iommu_type1_dirty_bitmap *dbitmap; ++ struct vfio_iommu_type1_dirty_bitmap_get *range; ++ ++ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); ++ ++ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); ++ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP; ++ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; ++ ++ /* ++ * Now let's deal with the actual bitmap, which is almost the same ++ * as the kvm side. ++ */ ++ uint64_t end, bmap_start, start_delta, bmap_npages; ++ unsigned long *bmap_clear = NULL, psize = qemu_real_host_page_size(); ++ int ret; ++ ++ bmap_start = start & VFIO_CLEAR_LOG_MASK; ++ start_delta = start - bmap_start; ++ bmap_start /= psize; ++ ++ bmap_npages = DIV_ROUND_UP(size + start_delta, VFIO_CLEAR_LOG_ALIGN) ++ << VFIO_CLEAR_LOG_SHIFT; ++ end = qrange->size / psize; ++ if (bmap_npages > end - bmap_start) { ++ bmap_npages = end - bmap_start; ++ } ++ start_delta /= psize; ++ ++ if (start_delta) { ++ bmap_clear = bitmap_new(bmap_npages); ++ bitmap_copy_with_src_offset(bmap_clear, qrange->bitmap, ++ bmap_start, start_delta + size / psize); ++ bitmap_clear(bmap_clear, 0, start_delta); ++ range->bitmap.data = (__u64 *)bmap_clear; ++ } else { ++ range->bitmap.data = (__u64 *)(qrange->bitmap + BIT_WORD(bmap_start)); ++ } ++ ++ range->iova = qrange->iova + bmap_start * psize; ++ range->size = bmap_npages * psize; ++ range->bitmap.size = ROUND_UP(bmap_npages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ range->bitmap.pgsize = qemu_real_host_page_size(); ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); ++ if (ret) { ++ error_report("Failed to clear dirty log for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, ++ (uint64_t)range->size, errno); ++ goto err_out; ++ } ++ ++ bitmap_clear(qrange->bitmap, bmap_start + start_delta, size / psize); ++err_out: ++ g_free(bmap_clear); ++ g_free(dbitmap); ++ return 0; ++} ++ ++static int vfio_physical_log_clear(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ uint64_t start, size, offset, count; ++ VFIODMARange *qrange; ++ int ret = 0; ++ ++ if (!container->dirty_log_manual_clear) { ++ /* No need to do explicit clear */ ++ return ret; ++ } ++ ++ start = section->offset_within_address_space; ++ size = int128_get64(section->size); ++ ++ if (!size) { ++ return ret; ++ } ++ ++ QLIST_FOREACH(qrange, &container->dma_list, next) { ++ /* ++ * Discard ranges that do not overlap the section (e.g., the ++ * Memory BAR regions of the device) ++ */ ++ if (qrange->iova > start + size - 1 || ++ start > qrange->iova + qrange->size - 1) { ++ continue; ++ } ++ ++ if (start >= qrange->iova) { ++ /* The range starts before section or is aligned to it. */ ++ offset = start - qrange->iova; ++ count = MIN(qrange->size - offset, size); ++ } else { ++ /* The range starts after section. */ ++ offset = 0; ++ count = MIN(qrange->size, size - (qrange->iova - start)); ++ } ++ ret = vfio_log_clear_one_range(container, qrange, offset, count); ++ if (ret < 0) { ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static void vfio_listener_log_clear(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ if (vfio_listener_skipped_section(section) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_dirty_tracking(container)) { ++ vfio_physical_log_clear(container, section); ++ } ++} ++ + const MemoryListener vfio_memory_listener = { + .name = "vfio", + .region_add = vfio_listener_region_add, +@@ -1351,6 +1486,7 @@ const MemoryListener vfio_memory_listener = { + .log_global_start = vfio_listener_log_global_start, + .log_global_stop = vfio_listener_log_global_stop, + .log_sync = vfio_listener_log_sync, ++ .log_clear = vfio_listener_log_clear, + }; + + void vfio_reset_handler(void *opaque) +diff --git a/hw/vfio/container.c b/hw/vfio/container.c +index 9a176a0d33..d8b9117f4f 100644 +--- a/hw/vfio/container.c ++++ b/hw/vfio/container.c +@@ -285,7 +285,9 @@ int vfio_query_dirty_bitmap(VFIOContainer *container, VFIOBitmap *vbmap, + dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); + + dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); +- dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ dbitmap->flags = container->dirty_log_manual_clear ? ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR : ++ VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; + range->iova = iova; + range->size = size; +@@ -409,7 +411,7 @@ static int vfio_get_iommu_type(VFIOContainer *container, + static int vfio_init_container(VFIOContainer *container, int group_fd, + Error **errp) + { +- int iommu_type, ret; ++ int iommu_type, dirty_log_manual_clear, ret; + + iommu_type = vfio_get_iommu_type(container, errp); + if (iommu_type < 0) { +@@ -438,6 +440,13 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + } + + container->iommu_type = iommu_type; ++ ++ dirty_log_manual_clear = ioctl(container->fd, VFIO_CHECK_EXTENSION, ++ VFIO_DIRTY_LOG_MANUAL_CLEAR); ++ if (dirty_log_manual_clear) { ++ container->dirty_log_manual_clear = dirty_log_manual_clear; ++ } ++ + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index b131d04c9c..fd9828d50b 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -97,6 +97,7 @@ typedef struct VFIOContainer { + Error *error; + bool initialized; + bool dirty_pages_supported; ++ bool dirty_log_manual_clear; + uint64_t dirty_pgsizes; + uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; +-- +2.27.0 + diff --git a/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch b/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch new file mode 100644 index 0000000000000000000000000000000000000000..6fb6caafbd47feb499b800928e889985a942f545 --- /dev/null +++ b/vfio-pci-Ascend310-need-4Bytes-quirk-in-bar4.patch @@ -0,0 +1,105 @@ +From 9558ea5d0bded6c9189adf2ce317cca205604c15 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 17:00:39 +0800 +Subject: [PATCH] vfio/pci: Ascend310 need 4Bytes quirk in bar4 + +--- + hw/vfio/pci-quirks.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 75 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 84b1a7b948..8fb190ce3c 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1209,6 +1209,80 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + return 0; + } + ++#define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND310_XLOADER_SIZE 4 ++#define ASCEND310_XLOADER_OFFSET 0x400 ++ ++typedef struct VFIOAscendBarQuirk { ++ struct VFIOPCIDevice *vdev; ++ pcibus_t offset; ++ uint8_t bar; ++ MemoryRegion *mem; ++} VFIOAscendBarQuirk; ++ ++static uint64_t vfio_ascend_quirk_read(void *opaque, ++ hwaddr addr, unsigned size) ++{ ++ VFIOAscendBarQuirk *quirk = opaque; ++ VFIOPCIDevice *vdev = quirk->vdev; ++ ++ qemu_log("read RO region! addr=0x%" HWADDR_PRIx ", size=%d\n", ++ addr + quirk->offset, size); ++ ++ return vfio_region_read(&vdev->bars[quirk->bar].region, ++ addr + quirk->offset, size); ++} ++ ++static void vfio_ascend_quirk_write(void *opaque, hwaddr addr, ++ uint64_t data, unsigned size) ++{ ++ VFIOAscendBarQuirk *quirk = opaque; ++ ++ qemu_log("modifying RO region is not allowed! addr=0x%" ++ HWADDR_PRIx ", data=0x%" PRIx64 ", size=%d\n", ++ addr + quirk->offset, data, size); ++} ++ ++static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { ++ .read = vfio_ascend_quirk_read, ++ .write = vfio_ascend_quirk_write, ++ .endianness = DEVICE_LITTLE_ENDIAN, ++}; ++ ++static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar4_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 4 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND310) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar4_quirk = quirk->data = g_new0(typeof(*bar4_quirk), quirk->nr_mem); ++ bar4_quirk[0].vdev = vdev; ++ bar4_quirk[0].offset = ASCEND310_XLOADER_OFFSET; ++ bar4_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar4_quirk[0], ++ "vfio-ascend310-bar4-intercept-regs-quirk", ++ ASCEND310_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar4_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + /* + * Common quirk probe entry points. + */ +@@ -1261,6 +1335,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + + void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr) +-- +2.27.0 + diff --git a/vfio-pci-Ascend710-change-to-bar2-quirk.patch b/vfio-pci-Ascend710-change-to-bar2-quirk.patch new file mode 100644 index 0000000000000000000000000000000000000000..954ced1b7f10a60b09f5b811cfefbc4d2af76485 --- /dev/null +++ b/vfio-pci-Ascend710-change-to-bar2-quirk.patch @@ -0,0 +1,125 @@ +From 782040a627d0c3a44a9259a9055610e25c1f44fe Mon Sep 17 00:00:00 2001 +From: Wu Binfeng +Date: Mon, 25 Apr 2022 15:17:48 +0800 +Subject: [PATCH] vfio/pci: Ascend710 change to bar2 quirk + +Change Ascend710's quirk regions to bar2 for internal causes. +And support Ascend710 2P format now. +--- + hw/vfio/pci-quirks.c | 64 +++++++++++++++++++++++++++++++++++--------- + 1 file changed, 51 insertions(+), 13 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index ba4d8f020c..a71ebe26b4 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1213,10 +1213,17 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + #define PCI_DEVICE_ID_ASCEND910 0xd801 + #define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define PCI_SUB_DEVICE_ID_ASCEND710_1P_MIN 0x100 ++#define PCI_SUB_DEVICE_ID_ASCEND710_1P_MAX 0x10f ++#define PCI_SUB_DEVICE_ID_ASCEND710_2P_MIN 0x110 ++#define PCI_SUB_DEVICE_ID_ASCEND710_2P_MAX 0x11f + #define ASCEND910_XLOADER_SIZE 4 + #define ASCEND910_XLOADER_OFFSET 0x80400 ++#define ASCEND710_2P_BASE (128 * 1024 * 1024) ++#define ASCEND710_1P_DEVNUM 1 ++#define ASCEND710_2P_DEVNUM 2 + #define ASCEND710_XLOADER_SIZE 4 +-#define ASCEND710_XLOADER_OFFSET 0x20430 ++#define ASCEND710_XLOADER_OFFSET 0x100430 + #define ASCEND310_XLOADER_SIZE 4 + #define ASCEND310_XLOADER_OFFSET 0x400 + +@@ -1289,23 +1296,38 @@ static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr) + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + } + +-static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++static void vfio_probe_ascend710_bar2_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +- VFIOAscendBarQuirk *bar0_quirk; ++ VFIOAscendBarQuirk *bar2_quirk; ++ int sub_device_id; ++ int devnum = 0; + +- if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 2 || + vdev->device_id != PCI_DEVICE_ID_ASCEND710) { + return; + } + ++ sub_device_id = pci_get_word(vdev->pdev.config + PCI_SUBSYSTEM_ID); ++ if (sub_device_id >= PCI_SUB_DEVICE_ID_ASCEND710_1P_MIN && ++ sub_device_id <= PCI_SUB_DEVICE_ID_ASCEND710_1P_MAX) { ++ devnum = ASCEND710_1P_DEVNUM; ++ } else if (sub_device_id >= PCI_SUB_DEVICE_ID_ASCEND710_2P_MIN && ++ sub_device_id <= PCI_SUB_DEVICE_ID_ASCEND710_2P_MAX) { ++ devnum = ASCEND710_2P_DEVNUM; ++ } ++ ++ if (devnum != ASCEND710_1P_DEVNUM && devnum != ASCEND710_2P_DEVNUM) { ++ return; ++ } ++ + quirk = g_malloc0(sizeof(*quirk)); +- quirk->nr_mem = 1; ++ quirk->nr_mem = devnum; + quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); +- bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); +- bar0_quirk[0].vdev = vdev; +- bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET; +- bar0_quirk[0].bar = nr; ++ bar2_quirk = quirk->data = g_new0(typeof(*bar2_quirk), quirk->nr_mem); ++ bar2_quirk[0].vdev = vdev; ++ bar2_quirk[0].offset = ASCEND710_XLOADER_OFFSET; ++ bar2_quirk[0].bar = nr; + + /* + * intercept w/r to the xloader-updating register, +@@ -1313,12 +1335,28 @@ static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) + */ + memory_region_init_io(&quirk->mem[0], OBJECT(vdev), + &vfio_ascend_intercept_regs_quirk, +- &bar0_quirk[0], +- "vfio-ascend710-bar0-intercept-regs-quirk", ++ &bar2_quirk[0], ++ "vfio-ascend710-bar2-1p-intercept-regs-quirk", + ASCEND710_XLOADER_SIZE); + memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, +- bar0_quirk[0].offset, ++ bar2_quirk[0].offset, + &quirk->mem[0], 1); ++ ++ if (devnum == ASCEND710_2P_DEVNUM) { ++ bar2_quirk[1].vdev = vdev; ++ bar2_quirk[1].offset = (ASCEND710_2P_BASE + ASCEND710_XLOADER_OFFSET); ++ bar2_quirk[1].bar = nr; ++ ++ memory_region_init_io(&quirk->mem[1], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar2_quirk[1], ++ "vfio-ascend710-bar2-2p-intercept-regs-quirk", ++ ASCEND710_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar2_quirk[1].offset, ++ &quirk->mem[1], 1); ++ } ++ + QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); + } + +@@ -1408,7 +1446,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif + vfio_probe_ascend910_bar0_quirk(vdev, nr); +- vfio_probe_ascend710_bar0_quirk(vdev, nr); ++ vfio_probe_ascend710_bar2_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + +-- +2.27.0 + diff --git a/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch b/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch new file mode 100644 index 0000000000000000000000000000000000000000..771650754bfd6754a0e0d780d760507dcbb170a5 --- /dev/null +++ b/vfio-pci-Ascend710-need-4Bytes-quirk-in-bar0.patch @@ -0,0 +1,75 @@ +From f999392631e7f9fb15493f17b535a8a42ac88be2 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 17:16:04 +0800 +Subject: [PATCH] vfio/pci: Ascend710 need 4Bytes quirk in bar0 + +--- + hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 8fb190ce3c..9ef4b63e82 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1210,7 +1210,10 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + } + + #define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND710_XLOADER_SIZE 4 ++#define ASCEND710_XLOADER_OFFSET 0x20430 + #define ASCEND310_XLOADER_SIZE 4 + #define ASCEND310_XLOADER_OFFSET 0x400 + +@@ -1250,6 +1253,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar0_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND710) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); ++ bar0_quirk[0].vdev = vdev; ++ bar0_quirk[0].offset = ASCEND710_XLOADER_OFFSET; ++ bar0_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar0_quirk[0], ++ "vfio-ascend710-bar0-intercept-regs-quirk", ++ ASCEND710_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar0_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + static void vfio_probe_ascend310_bar4_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +@@ -1335,6 +1371,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend710_bar0_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } + +-- +2.27.0 + diff --git a/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch b/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch new file mode 100644 index 0000000000000000000000000000000000000000..e273e233d58c24617643a0564c4a27d466bd1297 --- /dev/null +++ b/vfio-pci-Ascend910-need-4Bytes-quirk-in-bar0.patch @@ -0,0 +1,76 @@ +From 5b068100780cf91cc1696589d2115ba3078f9d38 Mon Sep 17 00:00:00 2001 +From: Binfeng Wu +Date: Tue, 8 Feb 2022 19:20:36 +0800 +Subject: [PATCH] vfio/pci: Ascend910 need 4Bytes quirk in bar0 + +--- + hw/vfio/pci-quirks.c | 37 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 37 insertions(+) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 9ef4b63e82..ba4d8f020c 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1210,8 +1210,11 @@ int vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + } + + #define PCI_VENDOR_ID_HUAWEI 0x19e5 ++#define PCI_DEVICE_ID_ASCEND910 0xd801 + #define PCI_DEVICE_ID_ASCEND710 0xd500 + #define PCI_DEVICE_ID_ASCEND310 0xd100 ++#define ASCEND910_XLOADER_SIZE 4 ++#define ASCEND910_XLOADER_OFFSET 0x80400 + #define ASCEND710_XLOADER_SIZE 4 + #define ASCEND710_XLOADER_OFFSET 0x20430 + #define ASCEND310_XLOADER_SIZE 4 +@@ -1253,6 +1256,39 @@ static const MemoryRegionOps vfio_ascend_intercept_regs_quirk = { + .endianness = DEVICE_LITTLE_ENDIAN, + }; + ++static void vfio_probe_ascend910_bar0_quirk(VFIOPCIDevice *vdev, int nr) ++{ ++ VFIOQuirk *quirk; ++ VFIOAscendBarQuirk *bar0_quirk; ++ ++ if (vdev->vendor_id != PCI_VENDOR_ID_HUAWEI || nr != 0 || ++ vdev->device_id != PCI_DEVICE_ID_ASCEND910) { ++ return; ++ } ++ ++ quirk = g_malloc0(sizeof(*quirk)); ++ quirk->nr_mem = 1; ++ quirk->mem = g_new0(MemoryRegion, quirk->nr_mem); ++ bar0_quirk = quirk->data = g_new0(typeof(*bar0_quirk), quirk->nr_mem); ++ bar0_quirk[0].vdev = vdev; ++ bar0_quirk[0].offset = ASCEND910_XLOADER_OFFSET; ++ bar0_quirk[0].bar = nr; ++ ++ /* ++ * intercept w/r to the xloader-updating register, ++ * so the vm can't enable xloader-updating ++ */ ++ memory_region_init_io(&quirk->mem[0], OBJECT(vdev), ++ &vfio_ascend_intercept_regs_quirk, ++ &bar0_quirk[0], ++ "vfio-ascend910-bar0-intercept-regs-quirk", ++ ASCEND910_XLOADER_SIZE); ++ memory_region_add_subregion_overlap(vdev->bars[nr].region.mem, ++ bar0_quirk[0].offset, ++ &quirk->mem[0], 1); ++ QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next); ++} ++ + static void vfio_probe_ascend710_bar0_quirk(VFIOPCIDevice *vdev, int nr) + { + VFIOQuirk *quirk; +@@ -1371,6 +1407,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr) + #ifdef CONFIG_VFIO_IGD + vfio_probe_igd_bar4_quirk(vdev, nr); + #endif ++ vfio_probe_ascend910_bar0_quirk(vdev, nr); + vfio_probe_ascend710_bar0_quirk(vdev, nr); + vfio_probe_ascend310_bar4_quirk(vdev, nr); + } +-- +2.27.0 + diff --git a/vhost-add-vhost_dev_suspend-resume_op.patch b/vhost-add-vhost_dev_suspend-resume_op.patch new file mode 100644 index 0000000000000000000000000000000000000000..c400fb2e0705786be85cdc2dffd9246b2af30b0c --- /dev/null +++ b/vhost-add-vhost_dev_suspend-resume_op.patch @@ -0,0 +1,38 @@ +From b0a62a84bd1c6ad5d4c11463371fcf267b56d902 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:13:41 +0800 +Subject: [PATCH] vhost: add vhost_dev_suspend/resume_op + +Introduce new vhost interface to support vhost device suspend & resume + +Signed-off-by: libai +--- + include/hw/virtio/vhost-backend.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index 71b02e4a12..84b8fa1075 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -155,6 +155,9 @@ typedef int (*vhost_set_device_state_fd_op)(struct vhost_dev *dev, + Error **errp); + typedef int (*vhost_check_device_state_op)(struct vhost_dev *dev, Error **errp); + ++typedef int (*vhost_dev_suspend_op)(struct vhost_dev *dev); ++typedef int (*vhost_dev_resume_op)(struct vhost_dev *dev); ++ + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -208,6 +211,8 @@ typedef struct VhostOps { + vhost_supports_device_state_op vhost_supports_device_state; + vhost_set_device_state_fd_op vhost_set_device_state_fd; + vhost_check_device_state_op vhost_check_device_state; ++ vhost_dev_suspend_op vhost_dev_suspend; ++ vhost_dev_resume_op vhost_dev_resume; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.27.0 + diff --git a/vhost-cancel-migration-when-vhost-user-restarted-dur.patch b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch new file mode 100644 index 0000000000000000000000000000000000000000..b551e4b014b4707df959b2acb42517ca6c5e3d31 --- /dev/null +++ b/vhost-cancel-migration-when-vhost-user-restarted-dur.patch @@ -0,0 +1,86 @@ +From 302401ee7eb437712b69caff44ce684c88573dc6 Mon Sep 17 00:00:00 2001 +From: Chuan Zheng +Date: Mon, 29 Jul 2019 16:22:12 +0800 +Subject: [PATCH] vhost: cancel migration when vhost-user restarted during + migraiton + +Qemu will abort when vhost-user process is restarted during migration +when vhost_log_global_start/stop is called. The reason is clear that +vhost_dev_set_log returns -1 because network connection is temporarily +lost. Let's cancel migraiton and report it to user in this abnormal +situation. + +Signed-off-by: Ying Fang +--- + hw/virtio/vhost.c | 9 +++++++-- + migration/migration.c | 2 +- + migration/migration.h | 1 + + 3 files changed, 9 insertions(+), 3 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 2c9ac79468..a8adc149ad 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -26,6 +26,7 @@ + #include "hw/mem/memory-device.h" + #include "migration/blocker.h" + #include "migration/qemu-file-types.h" ++#include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" + +@@ -1047,20 +1048,24 @@ check_dev_state: + static void vhost_log_global_start(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, true); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to start vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + + static void vhost_log_global_stop(MemoryListener *listener) + { + int r; ++ Error *errp = NULL; + + r = vhost_migration_log(listener, false); + if (r < 0) { +- abort(); ++ error_setg(&errp, "Failed to stop vhost migration log"); ++ migrate_fd_error(migrate_get_current(), errp); + } + } + +diff --git a/migration/migration.c b/migration/migration.c +index 3ce04b2aaf..71a03b3248 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1377,7 +1377,7 @@ static void migrate_error_free(MigrationState *s) + } + } + +-static void migrate_fd_error(MigrationState *s, const Error *error) ++void migrate_fd_error(MigrationState *s, const Error *error) + { + trace_migrate_fd_error(error_get_pretty(error)); + assert(s->to_dst_file == NULL); +diff --git a/migration/migration.h b/migration/migration.h +index cf2c9c88e0..6aafa04314 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -482,6 +482,7 @@ bool migration_has_all_channels(void); + + uint64_t migrate_max_downtime(void); + ++void migrate_fd_error(MigrationState *s, const Error *error); + void migrate_set_error(MigrationState *s, const Error *error); + bool migrate_has_error(MigrationState *s); + +-- +2.27.0 + diff --git a/vhost-implement-migration-state-notifier-for-vdpa-de.patch b/vhost-implement-migration-state-notifier-for-vdpa-de.patch new file mode 100644 index 0000000000000000000000000000000000000000..16d939032c5859f2e774abe3242356feb6dd057d --- /dev/null +++ b/vhost-implement-migration-state-notifier-for-vdpa-de.patch @@ -0,0 +1,87 @@ +From 3ef6dc341d6921a95564e9089f41ddbd79cd2a94 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:55:53 +0800 +Subject: [PATCH] vhost: implement migration state notifier for vdpa device + +Register migration state notifier to support triggered by +migration exceptions + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev.h | 1 + + 2 files changed, 30 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1872f11f3f..9b47e3ed45 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -23,6 +23,7 @@ + #include "hw/virtio/virtio-bus.h" + #include "migration/register.h" + #include "migration/migration.h" ++#include "migration/misc.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" +@@ -354,6 +355,31 @@ static SaveVMHandlers savevm_vdpa_handlers = { + .load_setup = vdpa_load_setup, + }; + ++static void vdpa_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ VhostVdpaDevice *vdev = container_of(notifier, ++ VhostVdpaDevice, ++ migration_state); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ switch (s->state) { ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_CANCELLED: ++ case MIGRATION_STATUS_FAILED: ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_CANCEL); ++ if (ret) { ++ error_report("Failed to set state CANCEL\n"); ++ } ++ ++ break; ++ case MIGRATION_STATUS_COMPLETED: ++ default: ++ break; ++ } ++} ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), +@@ -361,10 +387,13 @@ void vdpa_migration_register(VhostVdpaDevice *vdev) + DEVICE(vdev)); + register_savevm_live("vdpa", -1, 1, + &savevm_vdpa_handlers, DEVICE(vdev)); ++ vdev->migration_state.notify = vdpa_migration_state_notifier; ++ migration_add_notifier(&vdev->migration_state, vdpa_migration_state_notifier); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ migration_remove_notifier(&vdev->migration_state); + unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 43cbcef81b..20f50c76c6 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -39,6 +39,7 @@ struct VhostVdpaDevice { + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); + VMChangeStateEntry *vmstate; ++ Notifier migration_state; + }; + + #endif +-- +2.27.0 + diff --git a/vhost-implement-post-resume-bh.patch b/vhost-implement-post-resume-bh.patch new file mode 100644 index 0000000000000000000000000000000000000000..1da1164301cc664adbbcd67dc05d6ba9d66ff6dd --- /dev/null +++ b/vhost-implement-post-resume-bh.patch @@ -0,0 +1,57 @@ +From 229737ca91d4e81b4a14143da9981bd59b80a539 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:57:35 +0800 +Subject: [PATCH] vhost: implement post resume bh + +Set vdpa device mig state to post start when vm post start + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 662d4a29dc..1872f11f3f 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -26,6 +26,7 @@ + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" + #include "migration/qemu-file-types.h" ++#include "qemu/main-loop.h" + + /* + * Flags used as delimiter: +@@ -218,6 +219,18 @@ err_host_notifiers: + return ret; + } + ++static void vdpa_dev_migration_handle_incoming_bh(void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int ret; ++ ++ /* Post start device, unsupport rollback if failed! */ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_POST_START); ++ if (ret) { ++ error_report("Failed to set state: POST_START\n"); ++ } ++} ++ + static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + { + VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); +@@ -247,6 +260,10 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + + if (mis->state == RUN_STATE_RESTORE_VM) { + vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ /* post resume */ ++ mis->bh = qemu_bh_new(vdpa_dev_migration_handle_incoming_bh, ++ hdev); ++ qemu_bh_schedule(mis->bh); + } + } + } +-- +2.27.0 + diff --git a/vhost-implement-savevm_handler-for-vdpa-device.patch b/vhost-implement-savevm_handler-for-vdpa-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..721636ac0092081615e33da62edb6717e39b63e6 --- /dev/null +++ b/vhost-implement-savevm_handler-for-vdpa-device.patch @@ -0,0 +1,270 @@ +From 556aaa9632862505548d5083d369e92590fb2087 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:53:28 +0800 +Subject: [PATCH] vhost: implement savevm_handler for vdpa device + +Register savevm_handler ops for vdpa devices to support migration:x + +Signed-off-by: libai +--- + hw/virtio/vdpa-dev-mig.c | 175 +++++++++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 13 +++ + linux-headers/linux/vhost.h | 9 ++ + 3 files changed, 197 insertions(+) + +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +index 1d2bed2571..662d4a29dc 100644 +--- a/hw/virtio/vdpa-dev-mig.c ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -21,9 +21,21 @@ + #include "hw/virtio/vhost.h" + #include "hw/virtio/vdpa-dev.h" + #include "hw/virtio/virtio-bus.h" ++#include "migration/register.h" + #include "migration/migration.h" + #include "qemu/error-report.h" + #include "hw/virtio/vdpa-dev-mig.h" ++#include "migration/qemu-file-types.h" ++ ++/* ++ * Flags used as delimiter: ++ * 0xffffffff => MSB 32-bit all 1s ++ * 0xef10 => emulated (virtual) function IO ++ * 0x0000 => 16-bits reserved for flags ++ */ ++#define VDPA_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) ++#define VDPA_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) ++#define VDPA_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + + static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + void *arg) +@@ -39,6 +51,80 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, + return ioctl(fd, request, arg); + } + ++static int vhost_vdpa_set_mig_state(struct vhost_dev *dev, uint8_t state) ++{ ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_MIG_STATE, &state); ++} ++ ++static int vhost_vdpa_dev_buffer_size(struct vhost_dev *dev, uint32_t *size) ++{ ++ return vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER_SIZE, size); ++} ++ ++static int vhost_vdpa_dev_buffer_save(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size = 0; ++ int ret; ++ ++ ret = vhost_vdpa_dev_buffer_size(dev, &buffer_size); ++ if (ret) { ++ error_report("get dev buffer size failed: %d\n", ret); ++ return ret; ++ } ++ ++ qemu_put_be32(f, buffer_size); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ ret = vhost_vdpa_call(dev, VHOST_GET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("get dev buffer failed: %d\n", ret); ++ goto free; ++ } ++ ++ qemu_put_buffer(f, config->buf, buffer_size); ++free: ++ g_free(config); ++ ++ return ret; ++} ++ ++static int vhost_vdpa_dev_buffer_load(struct vhost_dev *dev, QEMUFile *f) ++{ ++ struct vhost_vdpa_config *config; ++ unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); ++ uint32_t buffer_size, recv_size; ++ int ret; ++ ++ buffer_size = qemu_get_be32(f); ++ ++ config = g_malloc(buffer_size + config_size); ++ config->off = 0; ++ config->len = buffer_size; ++ ++ recv_size = qemu_get_buffer(f, config->buf, buffer_size); ++ if (recv_size != buffer_size) { ++ error_report("read dev mig buffer failed, buffer_size: %u, " ++ "recv_size: %u\n", buffer_size, recv_size); ++ ret = -EINVAL; ++ goto free; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_SET_DEV_BUFFER, config); ++ if (ret) { ++ error_report("set dev buffer failed: %d\n", ret); ++ } ++ ++free: ++ g_free(config); ++ ++ return ret; ++} ++ + static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) + { + VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); +@@ -165,14 +251,103 @@ static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) + } + } + ++static int vdpa_save_setup(QEMUFile *f, void *opaque) ++{ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_SETUP_STATE); ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_save_complete_precopy(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ int ret; ++ ++ qemu_put_be64(f, VDPA_MIG_FLAG_DEV_CONFIG_STATE); ++ ret = vhost_vdpa_dev_buffer_save(hdev, f); ++ if (ret) { ++ error_report("Save vdpa device buffer failed: %d\n", ret); ++ return ret; ++ } ++ qemu_put_be64(f, VDPA_MIG_FLAG_END_OF_STATE); ++ ++ return qemu_file_get_error(f); ++} ++ ++static int vdpa_load_state(QEMUFile *f, void *opaque, int version_id) ++{ ++ VhostVdpaDevice *vdev = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdev->dev; ++ ++ int ret; ++ uint64_t data; ++ ++ data = qemu_get_be64(f); ++ while (data != VDPA_MIG_FLAG_END_OF_STATE) { ++ if (data == VDPA_MIG_FLAG_DEV_SETUP_STATE) { ++ data = qemu_get_be64(f); ++ if (data == VDPA_MIG_FLAG_END_OF_STATE) { ++ return 0; ++ } else { ++ error_report("SETUP STATE: EOS not found 0x%lx\n", data); ++ return -EINVAL; ++ } ++ } else if (data == VDPA_MIG_FLAG_DEV_CONFIG_STATE) { ++ ret = vhost_vdpa_dev_buffer_load(hdev, f); ++ if (ret) { ++ error_report("fail to restore device buffer.\n"); ++ return ret; ++ } ++ } ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ error_report("qemu file error: %d\n", ret); ++ return ret; ++ } ++ data = qemu_get_be64(f); ++ } ++ ++ return 0; ++} ++ ++static int vdpa_load_setup(QEMUFile *f, void *opaque) ++{ ++ VhostVdpaDevice *v = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &v->dev; ++ int ret = 0; ++ ++ ret = vhost_vdpa_set_mig_state(hdev, VDPA_DEVICE_PRE_START); ++ if (ret) { ++ error_report("pre start device failed: %d\n", ret); ++ goto out; ++ } ++ ++ return qemu_file_get_error(f); ++out: ++ return ret; ++} ++ ++static SaveVMHandlers savevm_vdpa_handlers = { ++ .save_setup = vdpa_save_setup, ++ .save_live_complete_precopy = vdpa_save_complete_precopy, ++ .load_state = vdpa_load_state, ++ .load_setup = vdpa_load_setup, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev) + { + vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), + vdpa_dev_vmstate_change, + DEVICE(vdev)); ++ register_savevm_live("vdpa", -1, 1, ++ &savevm_vdpa_handlers, DEVICE(vdev)); + } + + void vdpa_migration_unregister(VhostVdpaDevice *vdev) + { ++ unregister_savevm(VMSTATE_IF(&vdev->parent_obj.parent_obj), "vdpa", DEVICE(vdev)); + qemu_del_vm_change_state_handler(vdev->vmstate); + } +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +index 89665ca747..adc1d657f7 100644 +--- a/include/hw/virtio/vdpa-dev-mig.h ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -9,6 +9,19 @@ + + #include "hw/virtio/vdpa-dev.h" + ++enum { ++ VDPA_DEVICE_START, ++ VDPA_DEVICE_STOP, ++ VDPA_DEVICE_PRE_START, ++ VDPA_DEVICE_PRE_STOP, ++ VDPA_DEVICE_CANCEL, ++ VDPA_DEVICE_POST_START, ++ VDPA_DEVICE_START_ASYNC, ++ VDPA_DEVICE_STOP_ASYNC, ++ VDPA_DEVICE_PRE_START_ASYNC, ++ VDPA_DEVICE_QUERY_OP_STATE, ++}; ++ + void vdpa_migration_register(VhostVdpaDevice *vdev); + + void vdpa_migration_unregister(VhostVdpaDevice *vdev); +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 19dc7fd36c..a08e980a1e 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -231,4 +231,13 @@ + */ + #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ + struct vhost_vring_state) ++ ++/* set and get device buffer */ ++#define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) ++#define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) ++#define VHOST_GET_DEV_BUFFER_SIZE _IOR(VHOST_VIRTIO, 0xb3, __u32) ++ ++/* set device migtration state */ ++#define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) ++ + #endif +-- +2.27.0 + diff --git a/vhost-implement-vhost-vdpa-suspend-resume.patch b/vhost-implement-vhost-vdpa-suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..620b963673bb22390b6f2d4bb36bbdfd3c060c53 --- /dev/null +++ b/vhost-implement-vhost-vdpa-suspend-resume.patch @@ -0,0 +1,80 @@ +From a7f9a67ee98a5261f7639619055034f40bccfef0 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:22:20 +0800 +Subject: [PATCH] vhost: implement vhost-vdpa suspend/resume + +vhost-vdpa implements the vhost_dev_suspend interface, +which will be called during the shutdown phase of the +live migration source virtual machine to suspend the +device but not reset the device information. + +vhost-vdpa implements the vhost_dev_resume interface. +If the live migration fails, it will be called during +the startup phase of the source virtual machine. +Enable the device but set the status, etc. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 41 +++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 41 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 037a9c6e4c..063e941544 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1513,6 +1513,45 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_suspend_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int ret; ++ ++ vhost_vdpa_svqs_stop(dev); ++ vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ ret = vhost_vdpa_call(dev, VHOST_VDPA_SUSPEND, NULL); ++ memory_listener_unregister(&v->listener); ++ return ret; ++} ++ ++static int vhost_vdpa_resume_device(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ bool ok; ++ ++ vhost_vdpa_host_notifiers_init(dev); ++ ok = vhost_vdpa_svqs_start(dev); ++ if (unlikely(!ok)) { ++ return -1; ++ } ++ for (int i = 0; i < v->dev->nvqs; ++i) { ++ vhost_vdpa_set_vring_ready(v, v->dev->vq_index + i); ++ } ++ ++ if (dev->vq_index + dev->nvqs != dev->vq_index_end) { ++ return 0; ++ } ++ ++ memory_listener_register(&v->listener, &address_space_memory); ++ return vhost_vdpa_call(dev, VHOST_VDPA_RESUME, NULL); ++} ++ + static int vhost_vdpa_log_sync(struct vhost_dev *dev) + { + struct vhost_vdpa *v = dev->opaque; +@@ -1559,4 +1598,6 @@ const VhostOps vdpa_ops = { + .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, ++ .vhost_dev_suspend = vhost_vdpa_suspend_device, ++ .vhost_dev_resume = vhost_vdpa_resume_device, + }; +-- +2.27.0 + diff --git a/vhost-implement-vhost_vdpa_device_suspend-resume.patch b/vhost-implement-vhost_vdpa_device_suspend-resume.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9ef199773c3fe3c136ad0fc6d5d29b67b7ef2c9 --- /dev/null +++ b/vhost-implement-vhost_vdpa_device_suspend-resume.patch @@ -0,0 +1,447 @@ +From 4c5a9a0703e227186639124f09cdf7214e40ea7d Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:27:34 +0800 +Subject: [PATCH] vhost: implement vhost_vdpa_device_suspend/resume + +Implement vhost device suspend & resume interface + +Signed-off-by: jiangdongxu +Signed-off-by: fangyi +Signed-off-by: libai +--- + hw/virtio/meson.build | 2 +- + hw/virtio/vdpa-dev-mig.c | 178 +++++++++++++++++++++++++++++++ + hw/virtio/vhost.c | 138 ++++++++++++++++++++++++ + include/hw/virtio/vdpa-dev-mig.h | 16 +++ + include/hw/virtio/vdpa-dev.h | 1 + + include/hw/virtio/vhost.h | 3 + + migration/migration.c | 3 +- + migration/migration.h | 2 + + 8 files changed, 340 insertions(+), 3 deletions(-) + create mode 100644 hw/virtio/vdpa-dev-mig.c + create mode 100644 include/hw/virtio/vdpa-dev-mig.h + +diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build +index c0055a7832..596651d113 100644 +--- a/hw/virtio/meson.build ++++ b/hw/virtio/meson.build +@@ -5,7 +5,7 @@ system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c') + system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) + system_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK_COMMON', if_true: files('vhost-vsock-common.c')) + system_virtio_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: files('virtio-iommu.c')) +-system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c')) ++system_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: files('vdpa-dev.c', 'vdpa-dev-mig.c')) + + specific_virtio_ss = ss.source_set() + specific_virtio_ss.add(files('virtio.c')) +diff --git a/hw/virtio/vdpa-dev-mig.c b/hw/virtio/vdpa-dev-mig.c +new file mode 100644 +index 0000000000..1d2bed2571 +--- /dev/null ++++ b/hw/virtio/vdpa-dev-mig.c +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ ++ * You should have received a copy of the GNU General Public License along ++ * with this program; if not, see . ++ */ ++ ++#include ++#include ++#include "qemu/osdep.h" ++#include "hw/virtio/vhost.h" ++#include "hw/virtio/vdpa-dev.h" ++#include "hw/virtio/virtio-bus.h" ++#include "migration/migration.h" ++#include "qemu/error-report.h" ++#include "hw/virtio/vdpa-dev-mig.h" ++ ++static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, ++ void *arg) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ int fd = v->device_fd; ++ ++ if (dev->vhost_ops->backend_type != VHOST_BACKEND_TYPE_VDPA) { ++ error_report("backend type isn't VDPA. Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ return ioctl(fd, request, arg); ++} ++ ++static int vhost_vdpa_device_suspend(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int ret; ++ ++ if (!vdpa->started) { ++ return -EFAULT; ++ } ++ ++ if (!k->set_guest_notifiers) { ++ return -EFAULT; ++ } ++ ++ vdpa->started = false; ++ ++ ret = vhost_dev_suspend(&vdpa->dev, vdev, false); ++ if (ret) { ++ goto suspend_fail; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++ if (ret < 0) { ++ error_report("vhost guest notifier cleanup failed: %d\n", ret); ++ goto set_guest_notifiers_fail; ++ } ++ ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++ ++set_guest_notifiers_fail: ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret) { ++ error_report("vhost guest notifier restore failed: %d\n", ret); ++ } ++ ++suspend_fail: ++ vdpa->started = true; ++ return ret; ++} ++ ++static int vhost_vdpa_device_resume(VhostVdpaDevice *vdpa) ++{ ++ VirtIODevice *vdev = VIRTIO_DEVICE(vdpa); ++ BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); ++ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); ++ int i, ret; ++ ++ if (!k->set_guest_notifiers) { ++ error_report("binding does not support guest notifiers\n"); ++ return -ENOSYS; ++ } ++ ++ ret = vhost_dev_enable_notifiers(&vdpa->dev, vdev); ++ if (ret < 0) { ++ error_report("Error enabling host notifiers: %d\n", ret); ++ return ret; ++ } ++ ++ ret = k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, true); ++ if (ret < 0) { ++ error_report("Error binding guest notifier: %d\n", ret); ++ goto err_host_notifiers; ++ } ++ ++ vdpa->dev.acked_features = vdev->guest_features; ++ ++ ret = vhost_dev_resume(&vdpa->dev, vdev, false); ++ if (ret < 0) { ++ error_report("Error starting vhost: %d\n", ret); ++ goto err_guest_notifiers; ++ } ++ vdpa->started = true; ++ ++ /* ++ * guest_notifier_mask/pending not used yet, so just unmask ++ * everything here. virtio-pci will do the right thing by ++ * enabling/disabling irqfd. ++ */ ++ for (i = 0; i < vdpa->dev.nvqs; i++) { ++ vhost_virtqueue_mask(&vdpa->dev, vdev, i, false); ++ } ++ ++ return ret; ++ ++err_guest_notifiers: ++ k->set_guest_notifiers(qbus->parent, vdpa->dev.nvqs, false); ++err_host_notifiers: ++ vhost_dev_disable_notifiers(&vdpa->dev, vdev); ++ return ret; ++} ++ ++static void vdpa_dev_vmstate_change(void *opaque, bool running, RunState state) ++{ ++ VhostVdpaDevice *vdpa = VHOST_VDPA_DEVICE(opaque); ++ struct vhost_dev *hdev = &vdpa->dev; ++ int ret; ++ MigrationState *ms = migrate_get_current(); ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!running) { ++ if (ms->state == RUN_STATE_PAUSED) { ++ ret = vhost_vdpa_device_suspend(vdpa); ++ if (ret) { ++ error_report("suspend vdpa device failed: %d\n", ret); ++ if (ms->migration_thread_running) { ++ migrate_fd_cancel(ms); ++ } ++ } ++ } ++ } else { ++ if (ms->state == RUN_STATE_RESTORE_VM) { ++ ret = vhost_vdpa_device_resume(vdpa); ++ if (ret) { ++ error_report("migration dest resume device failed, abort!\n"); ++ exit(EXIT_FAILURE); ++ } ++ } ++ ++ if (mis->state == RUN_STATE_RESTORE_VM) { ++ vhost_vdpa_call(hdev, VHOST_VDPA_RESUME, NULL); ++ } ++ } ++} ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev) ++{ ++ vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), ++ vdpa_dev_vmstate_change, ++ DEVICE(vdev)); ++} ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev) ++{ ++ qemu_del_vm_change_state_handler(vdev->vmstate); ++} +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 438182d850..d073a6d5a5 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -2492,3 +2492,141 @@ bool used_memslots_is_exceeded(void) + { + return used_memslots_exceeded; + } ++ ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i, r; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ vdev->vhost_started = true; ++ hdev->started = true; ++ hdev->vdev = vdev; ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ memory_listener_register(&hdev->iommu_listener, vdev->dma_as); ++ } ++ ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed"); ++ goto fail_mem; ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ r = vhost_virtqueue_start(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ if (r < 0) { ++ goto fail_vq; ++ } ++ } ++ ++ r = event_notifier_init(e, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear(e); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } ++ if (vrings) { ++ r = vhost_dev_set_vring_enable(hdev, true); ++ if (r) { ++ goto fail_vq; ++ } ++ } ++ if (hdev->vhost_ops->vhost_dev_resume) { ++ r = hdev->vhost_ops->vhost_dev_resume(hdev); ++ if (r) { ++ goto fail_start; ++ } ++ } ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true); ++ ++ /* ++ * Update used ring information for IOTLB to work correctly, ++ * vhost-kernel code requires for this. ++ */ ++ for (i = 0; i < hdev->nvqs; ++i) { ++ struct vhost_virtqueue *vq = hdev->vqs + i; ++ vhost_device_iotlb_miss(hdev, vq->used_phys, true); ++ } ++ } ++ vhost_start_config_intr(hdev); ++ return 0; ++fail_start: ++ if (vrings) { ++ vhost_dev_set_vring_enable(hdev, false); ++ } ++fail_vq: ++ while (--i >= 0) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++fail_mem: ++ vdev->vhost_started = false; ++ hdev->started = false; ++ return r; ++} ++ ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) ++{ ++ int i; ++ int ret = 0; ++ EventNotifier *e = &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ ++ /* should only be called after backend is connected */ ++ if (!hdev->vhost_ops) { ++ error_report("Missing vhost_ops! Operation not permitted!\n"); ++ return -EPERM; ++ } ++ ++ event_notifier_test_and_clear(e); ++ event_notifier_test_and_clear(&vdev->config_notifier); ++ ++ if (hdev->vhost_ops->vhost_dev_suspend) { ++ ret = hdev->vhost_ops->vhost_dev_suspend(hdev); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ if (vrings) { ++ ret = vhost_dev_set_vring_enable(hdev, false); ++ if (ret) { ++ goto fail_suspend; ++ } ++ } ++ for (i = 0; i < hdev->nvqs; ++i) { ++ vhost_virtqueue_stop(hdev, ++ vdev, ++ hdev->vqs + i, ++ hdev->vq_index + i); ++ } ++ ++ if (vhost_dev_has_iommu(hdev)) { ++ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); ++ memory_listener_unregister(&hdev->iommu_listener); ++ } ++ vhost_stop_config_intr(hdev); ++ vhost_log_put(hdev, true); ++ hdev->started = false; ++ vdev->vhost_started = false; ++ hdev->vdev = NULL; ++ ++ return ret; ++ ++fail_suspend: ++ event_notifier_test_and_clear(e); ++ ++ return ret; ++} +diff --git a/include/hw/virtio/vdpa-dev-mig.h b/include/hw/virtio/vdpa-dev-mig.h +new file mode 100644 +index 0000000000..89665ca747 +--- /dev/null ++++ b/include/hw/virtio/vdpa-dev-mig.h +@@ -0,0 +1,16 @@ ++/* ++ * Vhost Vdpa Device Migration Header ++ * ++ * Copyright (c) Huawei Technologies Co., Ltd. 2023. All Rights Reserved. ++ */ ++ ++#ifndef _VHOST_VDPA_MIGRATION_H ++#define _VHOST_VDPA_MIGRATION_H ++ ++#include "hw/virtio/vdpa-dev.h" ++ ++void vdpa_migration_register(VhostVdpaDevice *vdev); ++ ++void vdpa_migration_unregister(VhostVdpaDevice *vdev); ++ ++#endif /* _VHOST_VDPA_MIGRATION_H */ +diff --git a/include/hw/virtio/vdpa-dev.h b/include/hw/virtio/vdpa-dev.h +index 4dbf98195c..43cbcef81b 100644 +--- a/include/hw/virtio/vdpa-dev.h ++++ b/include/hw/virtio/vdpa-dev.h +@@ -38,6 +38,7 @@ struct VhostVdpaDevice { + uint16_t queue_size; + bool started; + int (*post_init)(VhostVdpaDevice *v, Error **errp); ++ VMChangeStateEntry *vmstate; + }; + + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 6ae86833e3..9ca5819deb 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -466,4 +466,7 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + */ + int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); + ++int vhost_dev_resume(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++int vhost_dev_suspend(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings); ++ + #endif +diff --git a/migration/migration.c b/migration/migration.c +index 23d9233bbe..dce22c2da5 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -99,7 +99,6 @@ static bool migration_object_check(MigrationState *ms, Error **errp); + static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); +-static void migrate_fd_cancel(MigrationState *s); + static bool close_return_path_on_source(MigrationState *s); + + static void migration_downtime_start(MigrationState *s) +@@ -1386,7 +1385,7 @@ void migrate_fd_error(MigrationState *s, const Error *error) + migrate_set_error(s, error); + } + +-static void migrate_fd_cancel(MigrationState *s) ++void migrate_fd_cancel(MigrationState *s) + { + int old_state ; + +diff --git a/migration/migration.h b/migration/migration.h +index 6aafa04314..2f26c9509b 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -551,4 +551,6 @@ void migration_rp_kick(MigrationState *s); + + int migration_stop_vm(RunState state); + ++void migrate_fd_cancel(MigrationState *s); ++ + #endif +-- +2.27.0 + diff --git a/vhost-introduce-bytemap-for-vhost-backend-logging.patch b/vhost-introduce-bytemap-for-vhost-backend-logging.patch new file mode 100644 index 0000000000000000000000000000000000000000..7293b3b13a637d96422a85a17e2fe52cea5cf825 --- /dev/null +++ b/vhost-introduce-bytemap-for-vhost-backend-logging.patch @@ -0,0 +1,304 @@ +From 962acd498b11ae5ccc040d76ec89990add119dec Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:09:26 +0800 +Subject: [PATCH] vhost: introduce bytemap for vhost backend logging + +As vhost backend may use bytemap for logging, when get log_size +of vhost device, check whether vhost device support VHOST_BACKEND_F_BYTEMAPLOG. +If vhost device support, use bytemap for logging. + +By the way, add log_resize func pointer check and vhost_log_sync return +value check. + +Signed-off-by: libai +--- + hw/virtio/vhost.c | 89 ++++++++++++++++++++++++++++++++++++--- + include/exec/memory.h | 9 ++++ + include/exec/ram_addr.h | 44 +++++++++++++++++++ + include/hw/virtio/vhost.h | 1 + + system/physmem.c | 11 +++++ + 5 files changed, 148 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 038ac37dd0..438182d850 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -29,6 +29,7 @@ + #include "migration/migration.h" + #include "sysemu/dma.h" + #include "trace.h" ++#include "qapi/qapi-commands-migration.h" + + /* enabled until disconnected backend stabilizes */ + #define _VHOST_DEBUG 1 +@@ -44,6 +45,11 @@ + do { } while (0) + #endif + ++static inline bool vhost_bytemap_log_support(struct vhost_dev *dev) ++{ ++ return (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG)); ++} ++ + static struct vhost_log *vhost_log; + static struct vhost_log *vhost_log_shm; + +@@ -232,12 +238,40 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + return 0; + } + ++static int vhost_sync_dirty_bytemap(struct vhost_dev *dev, ++ MemoryRegionSection *section) ++{ ++ unsigned long *bytemap = dev->log->log; ++ return memory_section_set_dirty_bytemap(section, bytemap); ++} ++ + static void vhost_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + struct vhost_dev *dev = container_of(listener, struct vhost_dev, + memory_listener); +- vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!dev->log_enabled || !dev->started) { ++ return; ++ } ++ ++ if (dev->vhost_ops->vhost_log_sync) { ++ int r = dev->vhost_ops->vhost_log_sync(dev); ++ if (r < 0) { ++ error_report("Failed to sync dirty log: 0x%x\n", r); ++ if (migration_is_running(ms->state)) { ++ qmp_migrate_cancel(NULL); ++ } ++ return; ++ } ++ } ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, 0x0, ~0x0ULL); ++ } + } + + static void vhost_log_sync_range(struct vhost_dev *dev, +@@ -247,7 +281,11 @@ static void vhost_log_sync_range(struct vhost_dev *dev, + /* FIXME: this is N^2 in number of sections */ + for (i = 0; i < dev->n_mem_sections; ++i) { + MemoryRegionSection *section = &dev->mem_sections[i]; +- vhost_sync_dirty_bitmap(dev, section, first, last); ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_sync_dirty_bytemap(dev, section); ++ } else { ++ vhost_sync_dirty_bitmap(dev, section, first, last); ++ } + } + } + +@@ -255,11 +293,19 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) + { + uint64_t log_size = 0; + int i; ++ uint64_t vhost_log_chunk_size; ++ ++ if (vhost_bytemap_log_support(dev)) { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK_BYTES; ++ } else { ++ vhost_log_chunk_size = VHOST_LOG_CHUNK; ++ } ++ + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + uint64_t last = range_get_last(reg->guest_phys_addr, + reg->memory_size); +- log_size = MAX(log_size, last / VHOST_LOG_CHUNK + 1); ++ log_size = MAX(log_size, last / vhost_log_chunk_size + 1); + } + return log_size; + } +@@ -377,12 +423,21 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) + dev->vhost_ops->vhost_requires_shm_log(dev); + } + +-static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) ++static inline int vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + { + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); +- uint64_t log_base = (uintptr_t)log->log; ++ uint64_t log_base; ++ int log_fd; + int r; + ++ if (!log) { ++ r = -ENOMEM; ++ goto out; ++ } ++ ++ log_base = (uint64_t)log->log; ++ log_fd = log_fd; ++ + /* inform backend of log switching, this must be done before + releasing the current log, to ensure no logging is lost */ + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); +@@ -390,9 +445,19 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + } + ++ if (dev->vhost_ops->vhost_set_log_size) { ++ r = dev->vhost_ops->vhost_set_log_size(dev, size, dev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ } ++ } ++ + vhost_log_put(dev, true); + dev->log = log; + dev->log_size = size; ++ ++out: ++ return r; + } + + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, +@@ -1018,7 +1083,11 @@ static int vhost_migration_log(MemoryListener *listener, bool enable) + } + vhost_log_put(dev, false); + } else { +- vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ r = vhost_dev_log_resize(dev, vhost_get_log_size(dev)); ++ if ( r < 0 ) { ++ return r; ++ } ++ + r = vhost_dev_set_log(dev, true); + if (r < 0) { + goto check_dev_state; +@@ -2057,6 +2126,14 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); + goto fail_log; + } ++ ++ if (hdev->vhost_ops->vhost_set_log_size) { ++ r = hdev->vhost_ops->vhost_set_log_size(hdev, hdev->log_size, hdev->log); ++ if (r < 0) { ++ VHOST_OPS_DEBUG(r, "vhost_set_log_size failed"); ++ goto fail_log; ++ } ++ } + } + if (vrings) { + r = vhost_dev_set_vring_enable(hdev, true); +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 831f7c996d..e131c2682c 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -2594,6 +2594,15 @@ MemTxResult memory_region_dispatch_write(MemoryRegion *mr, + MemOp op, + MemTxAttrs attrs); + ++/** ++ * memory_section_set_dirty_bytemap: Mark a range of bytes as dirty for a memory section ++ * using a bytemap ++ * ++ * @section: the memory section being dirtied. ++ * @bytemap: bytemap that stores dirty page range information. ++ */ ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap); ++ + /** + * address_space_init: initializes an address space + * +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index 90676093f5..ef6988b445 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -535,5 +535,49 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb, + + return num_dirty; + } ++ ++#define BYTES_PER_LONG (sizeof(unsigned long)) ++#define BYTE_WORD(nr) ((nr) / BYTES_PER_LONG) ++#define BYTES_TO_LONGS(nr) DIV_ROUND_UP(nr, BYTES_PER_LONG) ++ ++static inline int64_t _set_dirty_bytemap_atomic(unsigned long *bytemap, unsigned long cur_pfn) ++{ ++ char *byte_of_long = (char *)bytemap; ++ int i; ++ int64_t dirty_num = 0; ++ ++ for (i = 0; i < BYTES_PER_LONG; i++) { ++ if (byte_of_long[i]) { ++ cpu_physical_memory_set_dirty_range((cur_pfn + i) << TARGET_PAGE_BITS, ++ TARGET_PAGE_SIZE, ++ 1 << DIRTY_MEMORY_MIGRATION); ++ /* Per byte ops, no need to atomic_xchg */ ++ byte_of_long[i] = 0; ++ dirty_num++; ++ } ++ } ++ ++ return dirty_num; ++} ++ ++static inline int64_t cpu_physical_memory_set_dirty_bytemap(unsigned long *bytemap, ++ ram_addr_t start, ++ ram_addr_t pages) ++{ ++ unsigned long i; ++ unsigned long len = BYTES_TO_LONGS(pages); ++ unsigned long pfn = (start >> TARGET_PAGE_BITS) / ++ BYTES_PER_LONG * BYTES_PER_LONG; ++ int64_t dirty_mig_bits = 0; ++ ++ for (i = 0; i < len; i++) { ++ if (bytemap[i]) { ++ dirty_mig_bits += _set_dirty_bytemap_atomic(&bytemap[i], ++ pfn + BYTES_PER_LONG * i); ++ } ++ } ++ ++ return dirty_mig_bits; ++} + #endif + #endif +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 444ca0ad42..6ae86833e3 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -43,6 +43,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_PAGE 0x1000 + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) ++#define VHOST_LOG_CHUNK_BYTES (VHOST_LOG_PAGE * sizeof(vhost_log_chunk_t)) + #define VHOST_INVALID_FEATURE_BIT (0xff) + #define VHOST_QUEUE_NUM_CONFIG_INR 0 + +diff --git a/system/physmem.c b/system/physmem.c +index f14d64819b..247c252e53 100644 +--- a/system/physmem.c ++++ b/system/physmem.c +@@ -2602,6 +2602,17 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); + } + ++int64_t memory_section_set_dirty_bytemap(MemoryRegionSection *section, unsigned long *bytemap) ++{ ++ ram_addr_t start = section->offset_within_region + ++ memory_region_get_ram_addr(section->mr); ++ ram_addr_t pages = int128_get64(section->size) >> TARGET_PAGE_BITS; ++ ++ hwaddr idx = BYTE_WORD( ++ section->offset_within_address_space >> TARGET_PAGE_BITS); ++ return cpu_physical_memory_set_dirty_bytemap(bytemap + idx, start, pages); ++} ++ + void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) + { + /* +-- +2.27.0 + diff --git a/vhost-user-Add-support-reconnect-vhost-user-socket.patch b/vhost-user-Add-support-reconnect-vhost-user-socket.patch new file mode 100644 index 0000000000000000000000000000000000000000..eec650e70d8b06bb45a20ad0504e803153bfa805 --- /dev/null +++ b/vhost-user-Add-support-reconnect-vhost-user-socket.patch @@ -0,0 +1,168 @@ +From 0bc608ab4117818b32d2a1aaf2d4f5c2aeb54af7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:05:47 +0800 +Subject: [PATCH] vhost-user: Add support reconnect vhost-user socket + +Add support reconnect vhost-user socket, the reconnect time +is set to be 3 seconds. + +Signed-off-by: Jinhua Cao +--- + chardev/char-socket.c | 19 ++++++++++++++++++- + hw/net/vhost_net.c | 4 +++- + hw/virtio/vhost-user.c | 6 ++++++ + include/chardev/char.h | 16 ++++++++++++++++ + net/vhost-user.c | 3 +++ + 5 files changed, 46 insertions(+), 2 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 034840593d..9c60e15c8e 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -337,6 +337,22 @@ static GSource *tcp_chr_add_watch(Chardev *chr, GIOCondition cond) + return qio_channel_create_watch(s->ioc, cond); + } + ++static void tcp_chr_set_reconnect_time(Chardev *chr, ++ int64_t reconnect_time) ++{ ++ SocketChardev *s = SOCKET_CHARDEV(chr); ++ s->reconnect_time = reconnect_time; ++} ++ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time) ++{ ++ ChardevClass *cc = CHARDEV_GET_CLASS(chr); ++ ++ if (cc->chr_set_reconnect_time) { ++ cc->chr_set_reconnect_time(chr, reconnect_time); ++ } ++} ++ + static void remove_hup_source(SocketChardev *s) + { + if (s->hup_source != NULL) { +@@ -537,7 +553,7 @@ static int tcp_chr_sync_read(Chardev *chr, const uint8_t *buf, int len) + if (s->state != TCP_CHARDEV_STATE_DISCONNECTED) { + qio_channel_set_blocking(s->ioc, false, NULL); + } +- if (size == 0) { ++ if (size == 0 && chr->chr_for_flag != CHR_FOR_VHOST_USER) { + /* connection closed */ + tcp_chr_disconnect(chr); + } +@@ -1543,6 +1559,7 @@ static void char_socket_class_init(ObjectClass *oc, void *data) + cc->set_msgfds = tcp_set_msgfds; + cc->chr_add_client = tcp_chr_add_client; + cc->chr_add_watch = tcp_chr_add_watch; ++ cc->chr_set_reconnect_time = tcp_chr_set_reconnect_time; + cc->chr_update_read_handler = tcp_chr_update_read_handler; + + object_class_property_add(oc, "addr", "SocketAddress", +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 1b08b02477..e48c373b14 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -459,7 +459,9 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, + peer = qemu_get_peer(ncs, n->max_queue_pairs); + } + +- if (peer->vring_enable) { ++ /* ovs needs to restore all states of vring */ ++ if (peer->vring_enable || ++ ncs[i].peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { + /* restore vring enable state */ + r = vhost_set_vring_enable(peer, peer->vring_enable); + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..05e14e1eff 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2126,9 +2126,15 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + struct vhost_user *u; + VhostUserState *vus = (VhostUserState *) opaque; + int err; ++ Chardev *chr; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + ++ chr = qemu_chr_fe_get_driver(((VhostUserState *)opaque)->chr); ++ if (chr) { ++ chr->chr_for_flag = CHR_FOR_VHOST_USER; ++ } ++ + u = g_new0(struct vhost_user, 1); + u->user = vus; + u->dev = dev; +diff --git a/include/chardev/char.h b/include/chardev/char.h +index 01df55f9e8..f8bd469466 100644 +--- a/include/chardev/char.h ++++ b/include/chardev/char.h +@@ -14,6 +14,8 @@ + #define IAC_SB 250 + #define IAC 255 + ++#define CHR_FOR_VHOST_USER 0x32a1 ++ + /* character device */ + typedef struct CharBackend CharBackend; + +@@ -70,6 +72,7 @@ struct Chardev { + GSource *gsource; + GMainContext *gcontext; + DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST); ++ int chr_for_flag; + }; + + /** +@@ -227,6 +230,16 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all); + #define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true) + int qemu_chr_wait_connected(Chardev *chr, Error **errp); + ++/** ++ * @qemu_chr_set_reconnect_time: ++ * ++ * Set reconnect time for char disconnect. ++ * Currently, only vhost user will call it. ++ * ++ * @reconnect_time the reconnect_time to be set ++ */ ++void qemu_chr_set_reconnect_time(Chardev *chr, int64_t reconnect_time); ++ + #define TYPE_CHARDEV "chardev" + OBJECT_DECLARE_TYPE(Chardev, ChardevClass, CHARDEV) + +@@ -306,6 +319,9 @@ struct ChardevClass { + + /* handle various events */ + void (*chr_be_event)(Chardev *s, QEMUChrEvent event); ++ ++ /* set reconnect time */ ++ void (*chr_set_reconnect_time)(Chardev *chr, int64_t reconnect_time); + }; + + Chardev *qemu_chardev_new(const char *id, const char *typename, +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 12555518e8..51fa8c678f 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -21,6 +21,8 @@ + #include "qemu/option.h" + #include "trace.h" + ++#define VHOST_USER_RECONNECT_TIME (3) ++ + typedef struct NetVhostUserState { + NetClientState nc; + CharBackend chr; /* only queue index 0 */ +@@ -292,6 +294,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) + trace_vhost_user_event(chr->label, event); + switch (event) { + case CHR_EVENT_OPENED: ++ qemu_chr_set_reconnect_time(chr, VHOST_USER_RECONNECT_TIME); + if (vhost_user_start(queues, ncs, s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->chr); + return; +-- +2.27.0 + diff --git a/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch new file mode 100644 index 0000000000000000000000000000000000000000..24c2670561fc19f9c6c4e856c74c7bf9799c3f75 --- /dev/null +++ b/vhost-user-Set-the-acked_features-to-vm-s-featrue.patch @@ -0,0 +1,96 @@ +From 0154183e118169be5945cb5ebec2b79379071591 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:49:21 +0800 +Subject: [PATCH] vhost-user: Set the acked_features to vm's featrue + +Fix the problem when vm restart, the ovs restart and lead to the net +unreachable. The soluation is set the acked_features to vm's featrue +just the same as guest virtio-net mod load. + +Signed-off-by: Jinhua Cao +--- + hw/net/vhost_net.c | 58 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 57 insertions(+), 1 deletion(-) + +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index e8e1661646..1b08b02477 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -167,9 +167,26 @@ static int vhost_net_get_fd(NetClientState *backend) + } + } + ++static uint64_t vhost_get_mask_features(const int *feature_bits, uint64_t features) ++{ ++ const int *bit = feature_bits; ++ uint64_t out_features = 0; ++ ++ while (*bit != VHOST_INVALID_FEATURE_BIT) { ++ uint64_t bit_mask = (1ULL << *bit); ++ if (features & bit_mask) { ++ out_features |= bit_mask; ++ } ++ bit++; ++ } ++ return out_features; ++} ++ + struct vhost_net *vhost_net_init(VhostNetOptions *options) + { + int r; ++ VirtIONet *n; ++ VirtIODevice *vdev; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; + struct vhost_net *net = g_new0(struct vhost_net, 1); + uint64_t features = 0; +@@ -195,7 +212,46 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + net->backend = r; + net->dev.protocol_features = 0; + } else { +- net->dev.backend_features = 0; ++ /* for ovs restart when vm start. ++ * Normal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 4.feature set to ovs's vhostuser(0x40408000). ++ * 5.ovs restart. ++ * 6.vhost_user_stop will save net->dev.acked_features(0x40408000) to ++ * VhostUserState's acked_features(0x40408000). ++ * 7.restart ok. ++ * 8.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features, and set to net->dev.acked_features. ++ * Abnormal situation: ++ * 1.vm start. ++ * 2.vhost_net_init init ok, then dev.acked_features is 0x40000000. ++ * 3.ovs restart. ++ * 4.vhost_user_stop will save net->dev.acked_features(0x40000000) to ++ * VhostUserState's acked_features(0x40000000). ++ * 5.guest virtio-net mod load. qemu will call virtio_net_set_features set ++ * dev.acked_features to 0x40408000. ++ * 6.restart ok. ++ * 7.vhost_net_init fun call vhost_user_get_acked_features get the save ++ * features(0x40000000), and set to net->dev.acked_features(0x40000000). ++ * 8.feature set to ovs's vhostuser(0x40000000). ++ * ++ * in abnormal situation, qemu set the wrong features to ovs's vhostuser, ++ * then the vm's network will be down. ++ * in abnormal situation, we found it just lost the guest feartures in ++ * acked_features, so hear we set the acked_features to vm's featrue ++ * just the same as guest virtio-net mod load. ++ */ ++ if (options->net_backend->peer) { ++ n = qemu_get_nic_opaque(options->net_backend->peer); ++ vdev = VIRTIO_DEVICE(n); ++ net->dev.backend_features = vhost_get_mask_features(vhost_net_get_feature_bits(net), ++ vdev->guest_features); ++ } else { ++ net->dev.backend_features = 0; ++ } + net->dev.protocol_features = 0; + net->backend = -1; + +-- +2.27.0 + diff --git a/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch new file mode 100644 index 0000000000000000000000000000000000000000..558464ce14870d63b86b4685410cff74235a2865 --- /dev/null +++ b/vhost-user-add-unregister_savevm-when-vhost-user-cle.patch @@ -0,0 +1,32 @@ +From c65ff10063a6c599b88cba27fd70a72e2e0cc0ff Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 20:21:33 +0800 +Subject: [PATCH] vhost-user: add unregister_savevm when vhost-user cleanup + +commit 12cf5e9ece ("vhost-user: add vhost_set_mem_table +when vm load_setup at destination") only register savevm +handler but not unregister it, which will cause the +number of handers increase when vhost-user devices hotplug, +so this commit add unregister_savevm when vhost-user cleanup. + +Fixes: 12cf5e9ece ("vhost-user: add vhost_set_mem_table when vm load_setup at destination") +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index 6739dfc98e..e589ee3572 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -2310,6 +2310,7 @@ static int vhost_user_backend_cleanup(struct vhost_dev *dev) + u->region_rb_len = 0; + g_free(u); + dev->opaque = 0; ++ unregister_savevm(NULL, "vhost-user", dev); + + return 0; + } +-- +2.27.0 + diff --git a/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch new file mode 100644 index 0000000000000000000000000000000000000000..795185b2a1d522411e6b1bb5b127e56048afa7c1 --- /dev/null +++ b/vhost-user-add-vhost_set_mem_table-when-vm-load_setu.patch @@ -0,0 +1,130 @@ +From 12cf5e9ece9cb0825f14ca80f6b1c5d1eb95c3e5 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 18:59:34 +0800 +Subject: [PATCH] vhost-user: add vhost_set_mem_table when vm load_setup at + destination + +When migrate huge vm, packages lost are 90+. + +During the load_setup of the destination vm, pass the +vm mem structure to ovs, the netcard could be enabled +when the migration finish state shifting. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost-user.c | 24 ++++++++++++++++++++++++ + tests/qtest/vhost-user-test.c | 35 ++++++++++++++++++----------------- + 2 files changed, 42 insertions(+), 17 deletions(-) + +diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c +index f214df804b..6739dfc98e 100644 +--- a/hw/virtio/vhost-user.c ++++ b/hw/virtio/vhost-user.c +@@ -28,6 +28,7 @@ + #include "sysemu/cryptodev.h" + #include "migration/migration.h" + #include "migration/postcopy-ram.h" ++#include "migration/register.h" + #include "trace.h" + #include "exec/ramblock.h" + +@@ -2119,6 +2120,28 @@ static int vhost_user_postcopy_notifier(NotifierWithReturn *notifier, + return 0; + } + ++static int vhost_user_load_setup(QEMUFile *f, void *opaque) ++{ ++ struct vhost_dev *hdev = opaque; ++ int r; ++ ++ if (hdev->vhost_ops && hdev->vhost_ops->vhost_set_mem_table) { ++ r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); ++ if (r < 0) { ++ qemu_log("error: vhost_set_mem_table failed: %s(%d)\n", ++ strerror(errno), errno); ++ return r; ++ } else { ++ qemu_log("info: vhost_set_mem_table OK\n"); ++ } ++ } ++ return 0; ++} ++ ++SaveVMHandlers savevm_vhost_user_handlers = { ++ .load_setup = vhost_user_load_setup, ++}; ++ + static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + Error **errp) + { +@@ -2255,6 +2278,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque, + + u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + postcopy_add_notifier(&u->postcopy_notifier); ++ register_savevm_live("vhost-user", -1, 1, &savevm_vhost_user_handlers, dev); + + return 0; + } +diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c +index d4e437265f..fadf3f0f2e 100644 +--- a/tests/qtest/vhost-user-test.c ++++ b/tests/qtest/vhost-user-test.c +@@ -799,6 +799,23 @@ static void test_read_guest_mem(void *obj, void *arg, QGuestAllocator *alloc) + read_guest_mem_server(global_qtest, server); + } + ++static void wait_for_rings_started(TestServer *s, size_t count) ++{ ++ gint64 end_time; ++ ++ g_mutex_lock(&s->data_mutex); ++ end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; ++ while (ctpop64(s->rings) != count) { ++ if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { ++ /* timeout has passed */ ++ g_assert_cmpint(ctpop64(s->rings), ==, count); ++ break; ++ } ++ } ++ ++ g_mutex_unlock(&s->data_mutex); ++} ++ + static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + { + TestServer *s = arg; +@@ -869,6 +886,7 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + qtest_qmp_eventwait(to, "RESUME"); + + g_assert(wait_for_fds(dest)); ++ wait_for_rings_started(dest, 2); + read_guest_mem_server(to, dest); + + g_source_destroy(source); +@@ -880,23 +898,6 @@ static void test_migrate(void *obj, void *arg, QGuestAllocator *alloc) + g_string_free(dest_cmdline, true); + } + +-static void wait_for_rings_started(TestServer *s, size_t count) +-{ +- gint64 end_time; +- +- g_mutex_lock(&s->data_mutex); +- end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND; +- while (ctpop64(s->rings) != count) { +- if (!g_cond_wait_until(&s->data_cond, &s->data_mutex, end_time)) { +- /* timeout has passed */ +- g_assert_cmpint(ctpop64(s->rings), ==, count); +- break; +- } +- } +- +- g_mutex_unlock(&s->data_mutex); +-} +- + static inline void test_server_connect(TestServer *server) + { + test_server_create_chr(server, ",reconnect=1"); +-- +2.27.0 + diff --git a/vhost-user-quit-infinite-loop-while-used-memslots-is.patch b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch new file mode 100644 index 0000000000000000000000000000000000000000..c62111f584f741e6d64f9d08f77939bf3c525814 --- /dev/null +++ b/vhost-user-quit-infinite-loop-while-used-memslots-is.patch @@ -0,0 +1,89 @@ +From 90d4333d4bbde45a10892bf9004979d239d39e28 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Fri, 11 Feb 2022 19:24:30 +0800 +Subject: [PATCH] vhost-user: quit infinite loop while used memslots is more + than the backend limit + +When used memslots is more than the backend limit, +the vhost-user netcard would attach fail and quit +infinite loop. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/vhost.c | 10 ++++++++++ + include/hw/virtio/vhost.h | 1 + + net/vhost-user.c | 5 +++++ + 3 files changed, 16 insertions(+) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index a8adc149ad..038ac37dd0 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -56,6 +56,8 @@ static unsigned int used_shared_memslots; + static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + ++bool used_memslots_exceeded; ++ + unsigned int vhost_get_max_memslots(void) + { + unsigned int max = UINT_MAX; +@@ -1569,8 +1571,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + error_setg(errp, "vhost backend memory slots limit (%d) is less" + " than current number of used (%d) and reserved (%d)" + " memory slots for memory devices.", limit, used, reserved); ++ used_memslots_exceeded = true; + r = -EINVAL; + goto fail_busyloop; ++ } else { ++ used_memslots_exceeded = false; + } + + return 0; +@@ -2405,3 +2410,8 @@ fail: + + return ret; + } ++ ++bool used_memslots_is_exceeded(void) ++{ ++ return used_memslots_exceeded; ++} +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 02477788df..444ca0ad42 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -340,6 +340,7 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, + struct vhost_inflight *inflight); + int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, + struct vhost_inflight *inflight); ++bool used_memslots_is_exceeded(void); + bool vhost_dev_has_iommu(struct vhost_dev *dev); + + #ifdef CONFIG_VHOST +diff --git a/net/vhost-user.c b/net/vhost-user.c +index 51fa8c678f..86fd5056ab 100644 +--- a/net/vhost-user.c ++++ b/net/vhost-user.c +@@ -20,6 +20,7 @@ + #include "qemu/error-report.h" + #include "qemu/option.h" + #include "trace.h" ++#include "include/hw/virtio/vhost.h" + + #define VHOST_USER_RECONNECT_TIME (3) + +@@ -373,6 +374,10 @@ static int net_vhost_user_init(NetClientState *peer, const char *device, + qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, + net_vhost_user_event, NULL, nc0->name, NULL, + true); ++ if (used_memslots_is_exceeded()) { ++ error_report("used memslots exceeded the backend limit, quit loop"); ++ goto err; ++ } + } while (!s->started); + + assert(s->vhost_net); +-- +2.27.0 + diff --git a/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch new file mode 100644 index 0000000000000000000000000000000000000000..583fc50067bd42d94ccb891dbc358b4c9470613d --- /dev/null +++ b/vhost-vdpa-add-VHOST_BACKEND_F_BYTEMAPLOG.patch @@ -0,0 +1,49 @@ +From 3fe9a15feba924675ffcc5b797185091cfb8a007 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 14:49:53 +0800 +Subject: [PATCH] vhost-vdpa: add VHOST_BACKEND_F_BYTEMAPLOG + +support VHOST_BACKEND_F_BYTEMAPLOG to support vhost +device bytemap logging. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 9 +++++---- + include/standard-headers/linux/vhost_types.h | 2 ++ + 2 files changed, 7 insertions(+), 4 deletions(-) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 819b2d811a..ce8ff7f417 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -829,10 +829,11 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; +- uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | +- 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | +- 0x1ULL << VHOST_BACKEND_F_SUSPEND; ++ uint64_t f = BIT_ULL(VHOST_BACKEND_F_IOTLB_MSG_V2) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_BATCH) | ++ BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID) | ++ BIT_ULL(VHOST_BACKEND_F_SUSPEND) | ++ BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG); + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index fd54044936..46fc53cd83 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -192,5 +192,7 @@ struct vhost_vdpa_iova_range { + #define VHOST_BACKEND_F_DESC_ASID 0x7 + /* IOTLB don't flush memory mapping across device reset */ + #define VHOST_BACKEND_F_IOTLB_PERSIST 0x8 ++/* device can use bytemap log */ ++#define VHOST_BACKEND_F_BYTEMAPLOG 0x3f + + #endif +-- +2.27.0 + diff --git a/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..83b2612fbff63dda4649c055176a43423092c1d6 --- /dev/null +++ b/vhost-vdpa-add-migration-log-ops-for-VhostOps.patch @@ -0,0 +1,127 @@ +From 3bc7a4e430e01fd90b427bf74a904664eda9ece6 Mon Sep 17 00:00:00 2001 +From: libai +Date: Mon, 4 Dec 2023 15:04:25 +0800 +Subject: [PATCH] vhost-vdpa: add migration log ops for VhostOps + +Implement vhost_set_log_size for setting buffer size for logging. +Implement vhost_set_log_fd to specify an eventfd to signal on log write. +Implement vhost_log_sync for getting dirtymap logged by vhost backend. + +Signed-off-by: libai +--- + hw/virtio/vhost-vdpa.c | 37 +++++++++++++++++++++++++++++++ + include/hw/virtio/vhost-backend.h | 8 +++++++ + linux-headers/linux/vhost.h | 4 ++++ + 3 files changed, 49 insertions(+) + +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index ce8ff7f417..037a9c6e4c 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -1355,6 +1355,30 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, + return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); + } + ++static int vhost_vdpa_set_log_fd(struct vhost_dev *dev, int fd, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_FD, &fd); ++} ++ ++static int vhost_vdpa_set_log_size(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ uint64_t logsize = size * sizeof(*(log->log)); ++ ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_SET_LOG_SIZE, &logsize); ++} ++ + static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) + { +@@ -1489,11 +1513,23 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) + return true; + } + ++static int vhost_vdpa_log_sync(struct vhost_dev *dev) ++{ ++ struct vhost_vdpa *v = dev->opaque; ++ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { ++ return 0; ++ } ++ ++ return vhost_vdpa_call(dev, VHOST_LOG_SYNC, NULL); ++} ++ + const VhostOps vdpa_ops = { + .backend_type = VHOST_BACKEND_TYPE_VDPA, + .vhost_backend_init = vhost_vdpa_init, + .vhost_backend_cleanup = vhost_vdpa_cleanup, + .vhost_set_log_base = vhost_vdpa_set_log_base, ++ .vhost_set_log_size = vhost_vdpa_set_log_size, ++ .vhost_set_log_fd = vhost_vdpa_set_log_fd, + .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, + .vhost_set_vring_num = vhost_vdpa_set_vring_num, + .vhost_set_vring_base = vhost_vdpa_set_vring_base, +@@ -1520,6 +1556,7 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_log_sync = vhost_vdpa_log_sync, + .vhost_set_config_call = vhost_vdpa_set_config_call, + .vhost_reset_status = vhost_vdpa_reset_status, + }; +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index a86d103f82..71b02e4a12 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -65,6 +65,11 @@ typedef int (*vhost_scsi_get_abi_version_op)(struct vhost_dev *dev, + int *version); + typedef int (*vhost_set_log_base_op)(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log); ++typedef int (*vhost_set_log_size_op)(struct vhost_dev *dev, uint64_t size, ++ struct vhost_log *log); ++typedef int (*vhost_set_log_fd_op)(struct vhost_dev *dev, int fd, ++ struct vhost_log *log); ++typedef int (*vhost_log_sync_op)(struct vhost_dev *dev); + typedef int (*vhost_set_mem_table_op)(struct vhost_dev *dev, + struct vhost_memory *mem); + typedef int (*vhost_set_vring_addr_op)(struct vhost_dev *dev, +@@ -162,6 +167,9 @@ typedef struct VhostOps { + vhost_scsi_clear_endpoint_op vhost_scsi_clear_endpoint; + vhost_scsi_get_abi_version_op vhost_scsi_get_abi_version; + vhost_set_log_base_op vhost_set_log_base; ++ vhost_set_log_size_op vhost_set_log_size; ++ vhost_set_log_fd_op vhost_set_log_fd; ++ vhost_log_sync_op vhost_log_sync; + vhost_set_mem_table_op vhost_set_mem_table; + vhost_set_vring_addr_op vhost_set_vring_addr; + vhost_set_vring_endian_op vhost_set_vring_endian; +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index 649560c685..19dc7fd36c 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -43,6 +43,10 @@ + * The bit is set using an atomic 32 bit operation. */ + /* Set base address for logging. */ + #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) ++/* Set buffer size for logging */ ++#define VHOST_SET_LOG_SIZE _IOW(VHOST_VIRTIO, 0x05, __u64) ++/* Logging sync */ ++#define VHOST_LOG_SYNC _IO(VHOST_VIRTIO, 0x06) + /* Specify an eventfd file descriptor to signal on log write. */ + #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + /* By default, a device gets one vhost_worker that its virtqueues share. This +-- +2.27.0 + diff --git a/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d476ac2edd6745c975e35694679cbe38c8e3cd2 --- /dev/null +++ b/virtio-bugfix-add-rcu_read_lock-when-vring_avail_idx.patch @@ -0,0 +1,38 @@ +From 7b4a9547e68147291e68258db9415ef5a20fe06b Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:16:26 +0800 +Subject: [PATCH] virtio: bugfix: add rcu_read_lock when vring_avail_idx is + called + +viring_avail_idx should be called within rcu_read_lock(), +or may get NULL caches in vring_get_region_caches() and +trigger assert(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 27ceab92be..ec09d515c2 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2801,6 +2801,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; + ++ rcu_read_lock(); + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +@@ -2811,6 +2812,7 @@ static void check_vring_avail_num(VirtIODevice *vdev, int index) + vring_avail_idx(&vdev->vq[index]), + vdev->vq[index].last_avail_idx, nheads); + } ++ rcu_read_unlock(); + } + + int virtio_save(VirtIODevice *vdev, QEMUFile *f) +-- +2.27.0 + diff --git a/virtio-bugfix-check-the-value-of-caches-before-acces.patch b/virtio-bugfix-check-the-value-of-caches-before-acces.patch new file mode 100644 index 0000000000000000000000000000000000000000..56d0513fecb8f477e785bed17a1d59ab51ca42f4 --- /dev/null +++ b/virtio-bugfix-check-the-value-of-caches-before-acces.patch @@ -0,0 +1,42 @@ +From f6b3e8ea39d00d25ab979f7b24842dc24e263ed8 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:37:52 +0800 +Subject: [PATCH] virtio: bugfix: check the value of caches before accessing it + +Vring caches may be NULL in check_vring_avail_num() if +virtio_reset() is called at the same time, such as when +the virtual machine starts. +So check it before accessing it in vring_avail_idx(). + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 1f78b74c00..d93ea62723 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2800,8 +2800,19 @@ static const VMStateDescription vmstate_virtio = { + static void check_vring_avail_num(VirtIODevice *vdev, int index) + { + uint16_t nheads; ++ VRingMemoryRegionCaches *caches; + + rcu_read_lock(); ++ caches = qatomic_rcu_read(&vdev->vq[index].vring.caches); ++ if (caches == NULL) { ++ /* ++ * caches may be NULL if virtio_reset is called at the same time, ++ * such as when the virtual machine starts. ++ */ ++ rcu_read_unlock(); ++ return; ++ } ++ + /* Check it isn't doing strange things with descriptor numbers. */ + nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; + if (nheads > vdev->vq[index].vring.num) { +-- +2.27.0 + diff --git a/virtio-check-descriptor-numbers.patch b/virtio-check-descriptor-numbers.patch new file mode 100644 index 0000000000000000000000000000000000000000..f20748d899e42c610ff9653c39b99a331e919ba8 --- /dev/null +++ b/virtio-check-descriptor-numbers.patch @@ -0,0 +1,52 @@ +From b57e956ea522b487081d1c94aa2e4af6a3314d20 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 11:09:36 +0800 +Subject: [PATCH] virtio: check descriptor numbers + +Check if the vring num is normal in virtio_save(), and add LOG +the vm push the wrong viring num down through writing IO Port. + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index a9aa0c4f66..27ceab92be 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2797,6 +2797,22 @@ static const VMStateDescription vmstate_virtio = { + } + }; + ++static void check_vring_avail_num(VirtIODevice *vdev, int index) ++{ ++ uint16_t nheads; ++ ++ /* Check it isn't doing strange things with descriptor numbers. */ ++ nheads = vring_avail_idx(&vdev->vq[index]) - vdev->vq[index].last_avail_idx; ++ if (nheads > vdev->vq[index].vring.num) { ++ qemu_log("VQ %d size 0x%x Guest index 0x%x " ++ "inconsistent with Host index 0x%x: " ++ "delta 0x%x\n", ++ index, vdev->vq[index].vring.num, ++ vring_avail_idx(&vdev->vq[index]), ++ vdev->vq[index].last_avail_idx, nheads); ++ } ++} ++ + int virtio_save(VirtIODevice *vdev, QEMUFile *f) + { + BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); +@@ -2827,6 +2843,8 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) + if (vdev->vq[i].vring.num == 0) + break; + ++ check_vring_avail_num(vdev, i); ++ + qemu_put_be32(f, vdev->vq[i].vring.num); + if (k->has_variable_vring_alignment) { + qemu_put_be32(f, vdev->vq[i].vring.align); +-- +2.27.0 + diff --git a/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch b/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch new file mode 100644 index 0000000000000000000000000000000000000000..fe1ade710d449101dbca7ca0d7e09e03c697500f --- /dev/null +++ b/virtio-gpu-Correct-virgl_renderer_resource_get_info-.patch @@ -0,0 +1,73 @@ +From ffb0dcccbf5f6e662e7c0b6afa4fe7308d96cc06 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Tue, 27 Feb 2024 17:06:01 +0800 +Subject: [PATCH] virtio-gpu: Correct virgl_renderer_resource_get_info() error + check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from 574b64aa6754ba491f51024c5a823a674d48a658 + +virgl_renderer_resource_get_info() returns errno and not -1 on error. +Correct the return-value check. + +Reviewed-by: Marc-André Lureau +Signed-off-by: Dmitry Osipenko +Message-Id: <20240129073921.446869-1-dmitry.osipenko@collabora.com> +Cc: qemu-stable@nongnu.org +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Signed-off-by: dinglimin +--- + contrib/vhost-user-gpu/virgl.c | 6 +++--- + hw/display/virtio-gpu-virgl.c | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c +index d1ccdf7d06..51da0e3667 100644 +--- a/contrib/vhost-user-gpu/virgl.c ++++ b/contrib/vhost-user-gpu/virgl.c +@@ -327,7 +327,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, + #ifdef VIRGL_RENDERER_RESOURCE_INFO_EXT_VERSION + struct virgl_renderer_resource_info_ext info_ext; + ret = virgl_renderer_resource_get_info_ext(resource_id, &info_ext); +- if (ret < 0) { ++ if (ret) { + return ret; + } + +@@ -335,7 +335,7 @@ virgl_get_resource_info_modifiers(uint32_t resource_id, + *modifiers = info_ext.modifiers; + #else + ret = virgl_renderer_resource_get_info(resource_id, info); +- if (ret < 0) { ++ if (ret) { + return ret; + } + +@@ -372,7 +372,7 @@ virgl_cmd_set_scanout(VuGpu *g, + uint64_t modifiers = 0; + ret = virgl_get_resource_info_modifiers(ss.resource_id, &info, + &modifiers); +- if (ret == -1) { ++ if (ret) { + g_critical("%s: illegal resource specified %d\n", + __func__, ss.resource_id); + cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID; +diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c +index 8bb7a2c21f..9f34d0e661 100644 +--- a/hw/display/virtio-gpu-virgl.c ++++ b/hw/display/virtio-gpu-virgl.c +@@ -181,7 +181,7 @@ static void virgl_cmd_set_scanout(VirtIOGPU *g, + memset(&info, 0, sizeof(info)); + ret = virgl_renderer_resource_get_info(ss.resource_id, &info); + #endif +- if (ret == -1) { ++ if (ret) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: illegal resource specified %d\n", + __func__, ss.resource_id); +-- +2.27.0 + diff --git a/virtio-gpu-remove-needless-condition.patch b/virtio-gpu-remove-needless-condition.patch new file mode 100644 index 0000000000000000000000000000000000000000..4930a2c5129cef2ccf57167eaba31cf43aad920d --- /dev/null +++ b/virtio-gpu-remove-needless-condition.patch @@ -0,0 +1,44 @@ +From 77b2f29dce6ddedcc13488eb80add2f9023b4b89 Mon Sep 17 00:00:00 2001 +From: dinglimin +Date: Wed, 13 Mar 2024 11:23:35 +0800 +Subject: [PATCH] virtio-gpu: remove needless condition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +cheery-pick from cab47b210598c11b76053a01316df9835b94dc09 +qemu_create_displaysurface_pixman() never returns NULL. +Signed-off-by: Marc-André Lureau +Signed-off-by: dinglimin +--- + hw/display/virtio-gpu.c | 7 ------- + 1 file changed, 7 deletions(-) + +diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c +index b016d3bac8..b02d1e3a4c 100644 +--- a/hw/display/virtio-gpu.c ++++ b/hw/display/virtio-gpu.c +@@ -679,10 +679,6 @@ static void virtio_gpu_do_set_scanout(VirtIOGPU *g, + + /* realloc the surface ptr */ + scanout->ds = qemu_create_displaysurface_pixman(rect); +- if (!scanout->ds) { +- *error = VIRTIO_GPU_RESP_ERR_UNSPEC; +- return; +- } + #ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, fb->offset); + #endif +@@ -1418,9 +1414,6 @@ static int virtio_gpu_post_load(void *opaque, int version_id) + return -EINVAL; + } + scanout->ds = qemu_create_displaysurface_pixman(res->image); +- if (!scanout->ds) { +- return -EINVAL; +- } + #ifdef WIN32 + qemu_displaysurface_win32_set_handle(scanout->ds, res->handle, 0); + #endif +-- +2.27.0 + diff --git a/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch new file mode 100644 index 0000000000000000000000000000000000000000..e33cf68d855ec417f92778a3c5cea4b5da32e4d7 --- /dev/null +++ b/virtio-net-bugfix-do-not-delete-netdev-before-virtio.patch @@ -0,0 +1,38 @@ +From 3cd74fd83d58aa88f9a006980c73844d6b79d1fb Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:31:38 +0800 +Subject: [PATCH] virtio-net: bugfix: do not delete netdev before virtio net + +For the vhost-user net-card, it is allow to delete its +network backend while the virtio-net device still exists. +However, when the status of the device changes in guest, +QEMU will check whether the network backend exists, otherwise +it will crash. +So do not allowed to delete the network backend directly +without delete virtio-net device. + +Signed-off-by: Jinhua Cao +--- + net/net.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/net.c b/net/net.c +index 0520bc1681..bcd3d7e04c 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -1322,6 +1322,12 @@ void qmp_netdev_del(const char *id, Error **errp) + return; + } + ++ if (nc->info->type == NET_CLIENT_DRIVER_VHOST_USER && nc->peer) { ++ error_setg(errp, "Device '%s' is a netdev for vhostuser," ++ "please delete the peer front-end device (virtio-net) first.", id); ++ return; ++ } ++ + qemu_del_net_client(nc); + + /* +-- +2.27.0 + diff --git a/virtio-net-correctly-copy-vnet-header-when-flushing-.patch b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b381e1024885a043e43e688f319cd4b44cc34a4c --- /dev/null +++ b/virtio-net-correctly-copy-vnet-header-when-flushing-.patch @@ -0,0 +1,72 @@ +From 912641a75955a75f37ab8695a0753b1571762717 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 2 Jan 2024 11:29:01 +0800 +Subject: [PATCH] virtio-net: correctly copy vnet header when flushing TX + (CVE-2023-6693) + +When HASH_REPORT is negotiated, the guest_hdr_len might be larger than +the size of the mergeable rx buffer header. Using +virtio_net_hdr_mrg_rxbuf during the header swap might lead a stack +overflow in this case. Fixing this by using virtio_net_hdr_v1_hash +instead. + +Reported-by: Xiao Lei +Cc: Yuri Benditovich +Cc: qemu-stable@nongnu.org +Cc: Mauro Matteo Cascella +Fixes: CVE-2023-6693 +Fixes: e22f0603fb2f ("virtio-net: reference implementation of hash report") +Reviewed-by: Michael Tokarev +Signed-off-by: Jason Wang +--- + hw/net/virtio-net.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..73024babd4 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -674,6 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + + n->mergeable_rx_bufs = mergeable_rx_bufs; + ++ /* ++ * Note: when extending the vnet header, please make sure to ++ * change the vnet header copying logic in virtio_net_flush_tx() ++ * as well. ++ */ + if (version_1) { + n->guest_hdr_len = hash_report ? + sizeof(struct virtio_net_hdr_v1_hash) : +@@ -2693,7 +2698,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + ssize_t ret; + unsigned int out_num; + struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg; +- struct virtio_net_hdr_mrg_rxbuf mhdr; ++ struct virtio_net_hdr_v1_hash vhdr; + + elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement)); + if (!elem) { +@@ -2710,7 +2715,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + } + + if (n->has_vnet_hdr) { +- if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) < ++ if (iov_to_buf(out_sg, out_num, 0, &vhdr, n->guest_hdr_len) < + n->guest_hdr_len) { + virtio_error(vdev, "virtio-net header incorrect"); + virtqueue_detach_element(q->tx_vq, elem, 0); +@@ -2718,8 +2723,8 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) + return -EINVAL; + } + if (n->needs_vnet_hdr_swap) { +- virtio_net_hdr_swap(vdev, (void *) &mhdr); +- sg2[0].iov_base = &mhdr; ++ virtio_net_hdr_swap(vdev, (void *) &vhdr); ++ sg2[0].iov_base = &vhdr; + sg2[0].iov_len = n->guest_hdr_len; + out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1, + out_sg, out_num, +-- +2.27.0 + diff --git a/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb492879329a7e56b5fee12ccd67e36f15798987 --- /dev/null +++ b/virtio-net-fix-max-vring-buf-size-when-set-ring-num.patch @@ -0,0 +1,52 @@ +From 4321c9f8b85c6a4c1549399aa11e351b66bd1879 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 10:48:27 +0800 +Subject: [PATCH] virtio-net: fix max vring buf size when set ring num + +Set the max vring buf size of virtio-net devices to 4096 + +Signed-off-by: Jinhua Cao +--- + hw/virtio/virtio.c | 9 +++++++-- + include/hw/virtio/virtio.h | 1 + + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index d93ea62723..267c1e6fd0 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2196,12 +2196,17 @@ void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, + + void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) + { ++ int vq_max_size = VIRTQUEUE_MAX_SIZE; ++ ++ if (!strcmp(vdev->name, "virtio-net")) { ++ vq_max_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ + /* Don't allow guest to flip queue between existent and + * nonexistent states, or to set it to an invalid size. + */ + if (!!num != !!vdev->vq[n].vring.num || +- num > VIRTQUEUE_MAX_SIZE || +- num < 0) { ++ num > vq_max_size || num < 0) { + return; + } + vdev->vq[n].vring.num = num; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 7c35bb841b..e612441357 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -60,6 +60,7 @@ size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, + typedef struct VirtQueue VirtQueue; + + #define VIRTQUEUE_MAX_SIZE 1024 ++#define VIRTIO_NET_VQ_MAX_SIZE (4096) + + typedef struct VirtQueueElement + { +-- +2.27.0 + diff --git a/virtio-net-set-the-max-of-queue-size-to-4096.patch b/virtio-net-set-the-max-of-queue-size-to-4096.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e3e067ffaaf7ebad67639c9db209050a29f943c --- /dev/null +++ b/virtio-net-set-the-max-of-queue-size-to-4096.patch @@ -0,0 +1,58 @@ +From 58fe483bf5824db177843675629ed955051078fd Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Sat, 12 Feb 2022 17:22:38 +0800 +Subject: [PATCH] virtio-net: set the max of queue size to 4096 + +set the max of virtio-net queue size to 4096. Now the +queue_size of virtio-net is set by rx_queue_size and +tx_queue_size + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 5 +++-- + hw/virtio/virtio.c | 2 +- + 2 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 7f69a4b842..0ae2ddc002 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -710,6 +710,7 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: ++ return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: + return VIRTQUEUE_MAX_SIZE; + default: +@@ -3638,12 +3639,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + * help from us (using virtio 1 and up). + */ + if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || +- n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.rx_queue_size > VIRTIO_NET_VQ_MAX_SIZE || + !is_power_of_2(n->net_conf.rx_queue_size)) { + error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d.", + n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ VIRTIO_NET_VQ_MAX_SIZE); + virtio_cleanup(vdev); + return; + } +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 267c1e6fd0..d00effe4d5 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2338,7 +2338,7 @@ VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + break; + } + +- if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { ++ if (i == VIRTIO_QUEUE_MAX) { + qemu_log("unacceptable queue_size (%d) or num (%d)\n", + queue_size, i); + abort(); +-- +2.27.0 + diff --git a/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch new file mode 100644 index 0000000000000000000000000000000000000000..9817a2280cf9b883781794f0a4cc442c01bca77d --- /dev/null +++ b/virtio-net-update-the-default-and-max-of-rx-tx_queue.patch @@ -0,0 +1,110 @@ +From c2221815b79be9847c4729709809779b4b0550a7 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 17:28:49 +0800 +Subject: [PATCH] virtio-net: update the default and max of rx/tx_queue_size + +Set the max of tx_queue_size to 4096 even if the backends +are not vhost-user. + +Set the default of rx/tx_queue_size to 2048 if the backends +are vhost-user, otherwise to 4096. + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 43 ++++++++++++++++++++++++++++++++----------- + 1 file changed, 32 insertions(+), 11 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 0ae2ddc002..523d01746d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -50,12 +50,11 @@ + #define VIRTIO_NET_VM_VERSION 11 + + /* previously fixed value */ +-#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256 +-#define VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE 256 ++#define VIRTIO_NET_VHOST_USER_DEFAULT_SIZE 2048 + + /* for now, only allow larger queue_pairs; with virtio-1, guest can downsize */ +-#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE +-#define VIRTIO_NET_TX_QUEUE_MIN_SIZE VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE ++#define VIRTIO_NET_RX_QUEUE_MIN_SIZE 256 ++#define VIRTIO_NET_TX_QUEUE_MIN_SIZE 256 + + #define VIRTIO_NET_IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */ + +@@ -696,6 +695,28 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + } + } + ++static void virtio_net_set_default_queue_size(VirtIONet *n) ++{ ++ NetClientState *peer = n->nic_conf.peers.ncs[0]; ++ ++ /* Default value is 0 if not set */ ++ if (n->net_conf.rx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.rx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++ ++ if (n->net_conf.tx_queue_size == 0) { ++ if (peer && peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VHOST_USER_DEFAULT_SIZE; ++ } else { ++ n->net_conf.tx_queue_size = VIRTIO_NET_VQ_MAX_SIZE; ++ } ++ } ++} ++ + static int virtio_net_max_tx_queue_size(VirtIONet *n) + { + NetClientState *peer = n->nic_conf.peers.ncs[0]; +@@ -705,16 +726,16 @@ static int virtio_net_max_tx_queue_size(VirtIONet *n) + * size. + */ + if (!peer) { +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + } + + switch(peer->info->type) { + case NET_CLIENT_DRIVER_VHOST_USER: + return VIRTIO_NET_VQ_MAX_SIZE; + case NET_CLIENT_DRIVER_VHOST_VDPA: +- return VIRTQUEUE_MAX_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + default: +- return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; ++ return VIRTIO_NET_VQ_MAX_SIZE; + }; + } + +@@ -3633,6 +3654,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + virtio_net_set_config_size(n, n->host_features); + virtio_init(vdev, VIRTIO_ID_NET, n->config_size); + ++ virtio_net_set_default_queue_size(n); ++ + /* + * We set a lower limit on RX queue size to what it always was. + * Guests that want a smaller ring can always resize it without +@@ -3934,10 +3957,8 @@ static Property virtio_net_properties[] = { + TX_TIMER_INTERVAL), + DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST), + DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx), +- DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, +- VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE), +- DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, +- VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE), ++ DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size, 0), ++ DEFINE_PROP_UINT16("tx_queue_size", VirtIONet, net_conf.tx_queue_size, 0), + DEFINE_PROP_UINT16("host_mtu", VirtIONet, net_conf.mtu, 0), + DEFINE_PROP_BOOL("x-mtu-bypass-backend", VirtIONet, mtu_bypass_backend, + true), +-- +2.27.0 + diff --git a/virtio-print-the-guest-virtio_net-features-that-host.patch b/virtio-print-the-guest-virtio_net-features-that-host.patch new file mode 100644 index 0000000000000000000000000000000000000000..15157bb695f275763d5a9cd3eeb8538d86c4a109 --- /dev/null +++ b/virtio-print-the-guest-virtio_net-features-that-host.patch @@ -0,0 +1,112 @@ +From b24730e9abe34898483fa62b24c26abb9d98570c Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Thu, 10 Feb 2022 14:16:17 +0800 +Subject: [PATCH] virtio: print the guest virtio_net features that host does + not support + +print the guest virtio_net features that host does not support + +For example: +Please check host config, because host does not support required feature bits 0x1983 +virtio_net_feature: csum, guest_csum, guest_tso4, guest_tso6, host_tso4, host_tso6 +Features 0xef99a3 unsupported. Allowed features: 0x40ff8024 + +Signed-off-by: Jinhua Cao +--- + hw/net/virtio-net.c | 41 ++++++++++++++++++++++++++++++++++++++ + hw/virtio/virtio.c | 7 +++++++ + include/hw/virtio/virtio.h | 1 + + 3 files changed, 49 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 80c56f0cfc..7f69a4b842 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3952,6 +3952,46 @@ static Property virtio_net_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + ++static void virtio_net_print_features(uint64_t features) ++{ ++ Property *props = virtio_net_properties; ++ int feature_cnt = 0; ++ ++ if (!features) { ++ return; ++ } ++ printf("virtio_net_feature: "); ++ ++ for (; features && props->name; props++) { ++ /* The bitnr of property may be default(0) besides 'csum' property. */ ++ if (props->bitnr == 0 && strcmp(props->name, "csum")) { ++ continue; ++ } ++ ++ /* Features only support 64bit. */ ++ if (props->bitnr > 63) { ++ continue; ++ } ++ ++ if (virtio_has_feature(features, props->bitnr)) { ++ virtio_clear_feature(&features, props->bitnr); ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("%s", props->name); ++ feature_cnt++; ++ } ++ } ++ ++ if (features) { ++ if (feature_cnt != 0) { ++ printf(", "); ++ } ++ printf("unkown bits 0x%." PRIx64, features); ++ } ++ printf("\n"); ++} ++ + static void virtio_net_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3966,6 +4006,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) + vdc->set_config = virtio_net_set_config; + vdc->get_features = virtio_net_get_features; + vdc->set_features = virtio_net_set_features; ++ vdc->print_features = virtio_net_print_features; + vdc->bad_features = virtio_net_bad_features; + vdc->reset = virtio_net_reset; + vdc->queue_reset = virtio_net_queue_reset; +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index ec09d515c2..1f78b74c00 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2905,6 +2905,13 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) + { + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + bool bad = (val & ~(vdev->host_features)) != 0; ++ uint64_t feat = val & ~(vdev->host_features); ++ ++ if (bad && k->print_features) { ++ qemu_log("error: Please check host config, "\ ++ "because host does not support required feature bits 0x%" PRIx64 "\n", feat); ++ k->print_features(feat); ++ } + + val &= vdev->host_features; + if (k->set_features) { +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c8f72850bc..7c35bb841b 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -182,6 +182,7 @@ struct VirtioDeviceClass { + int (*validate_features)(VirtIODevice *vdev); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); ++ void (*print_features)(uint64_t features); + void (*reset)(VirtIODevice *vdev); + void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* Device must validate queue_index. */ +-- +2.27.0 + diff --git a/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch new file mode 100644 index 0000000000000000000000000000000000000000..f78f3d7eac329e114005a901c5a1ce37ceb77dba --- /dev/null +++ b/virtio-scsi-bugfix-fix-qemu-crash-for-hotplug-scsi-d.patch @@ -0,0 +1,37 @@ +From 4e5de00fb124d82f9c4ce2ac433ed3d691783c01 Mon Sep 17 00:00:00 2001 +From: Jinhua Cao +Date: Wed, 9 Feb 2022 19:58:21 +0800 +Subject: [PATCH] virtio-scsi: bugfix: fix qemu crash for hotplug scsi disk + with dataplane + +The vm will trigger a disk sweep operation after plugging +a controller who's io type is iothread. If attach a scsi +disk immediately, the sg_inqury request in vm will trigger +the assert in virtio_scsi_ctx_check(), which is called by +virtio_scsi_handle_cmd_req_prepare(). + +Add judgment in virtio_scsi_handle_cmd_req_prepare() and +return IO Error directly if the device has not been +initialized. + +Signed-off-by: Jinhua Cao +--- + hw/scsi/virtio-scsi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c +index 9c751bf296..bc7feb404a 100644 +--- a/hw/scsi/virtio-scsi.c ++++ b/hw/scsi/virtio-scsi.c +@@ -781,7 +781,7 @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) + req->req.cmd.tag, req->req.cmd.cdb[0]); + + d = virtio_scsi_device_get(s, req->req.cmd.lun); +- if (!d) { ++ if (!d || !d->qdev.realized) { + req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + virtio_scsi_complete_cmd_req(req); + return -ENOENT; +-- +2.27.0 + diff --git a/virtio_blk-Add-support-for-retry-on-errors.patch b/virtio_blk-Add-support-for-retry-on-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..696bde5b607d4ca3ffe912930afcf5e854c0491f --- /dev/null +++ b/virtio_blk-Add-support-for-retry-on-errors.patch @@ -0,0 +1,118 @@ +From 0da112402efe63e09fdd6ed43aa026d5b625988f Mon Sep 17 00:00:00 2001 +From: yexiao +Date: Thu, 21 Jan 2021 15:46:53 +0800 +Subject: [PATCH] virtio_blk: Add support for retry on errors + +Insert failed requests into device's list for later retry and handle +queued requests to implement retry_request_cb. + +Signed-off-by: Jiahui Cen +Signed-off-by: Ying Fang +Signed-off-by: Alex Chen +--- + hw/block/virtio-blk.c | 47 ++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 44 insertions(+), 3 deletions(-) + +diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c +index a1f8e15522..1ebc9188c0 100644 +--- a/hw/block/virtio-blk.c ++++ b/hw/block/virtio-blk.c +@@ -90,6 +90,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, + block_acct_failed(blk_get_stats(s->blk), &req->acct); + } + virtio_blk_free_request(req); ++ } else if (action == BLOCK_ERROR_ACTION_RETRY) { ++ req->mr_next = NULL; ++ req->next = s->rq; ++ s->rq = req; + } + + blk_error_action(s->blk, action, is_read, error); +@@ -131,6 +135,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -150,6 +155,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + block_acct_done(blk_get_stats(s->blk), &req->acct); + virtio_blk_free_request(req); +@@ -172,6 +178,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) + } + } + ++ blk_error_retry_reset_timeout(s->blk); + virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); + if (is_write_zeroes) { + block_acct_done(blk_get_stats(s->blk), &req->acct); +@@ -1183,12 +1190,12 @@ static void virtio_blk_dma_restart_bh(void *opaque) + { + VirtIOBlock *s = opaque; + +- VirtIOBlockReq *req = s->rq; ++ VirtIOBlockReq *req; + MultiReqBuffer mrb = {}; + +- s->rq = NULL; +- + aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); ++ req = s->rq; ++ s->rq = NULL; + while (req) { + VirtIOBlockReq *next = req->next; + if (virtio_blk_handle_request(req, &mrb)) { +@@ -1541,10 +1548,44 @@ static void virtio_blk_drained_end(void *opaque) + } + } + ++static void virtio_blk_retry_request(void *opaque) ++{ ++ VirtIOBlock *s = VIRTIO_BLK(opaque); ++ ++ VirtIOBlockReq *req; ++ MultiReqBuffer mrb = {}; ++ ++ aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); ++ req = s->rq; ++ s->rq = NULL; ++ while (req) { ++ VirtIOBlockReq *next = req->next; ++ if (virtio_blk_handle_request(req, &mrb)) { ++ /* Device is now broken and won't do any processing until it gets ++ * reset. Already queued requests will be lost: let's purge them. ++ */ ++ while (req) { ++ next = req->next; ++ virtqueue_detach_element(req->vq, &req->elem, 0); ++ virtio_blk_free_request(req); ++ req = next; ++ } ++ break; ++ } ++ req = next; ++ } ++ ++ if (mrb.num_reqs) { ++ virtio_blk_submit_multireq(s, &mrb); ++ } ++ aio_context_release(blk_get_aio_context(s->conf.conf.blk)); ++} ++ + static const BlockDevOps virtio_block_ops = { + .resize_cb = virtio_blk_resize, + .drained_begin = virtio_blk_drained_begin, + .drained_end = virtio_blk_drained_end, ++ .retry_request_cb = virtio_blk_retry_request, + }; + + static void virtio_blk_device_realize(DeviceState *dev, Error **errp) +-- +2.27.0 +