diff --git a/1135-arm-virt-target-arm-support-arm-vcpu-hotplug.patch b/1135-arm-virt-target-arm-support-arm-vcpu-hotplug.patch new file mode 100644 index 0000000000000000000000000000000000000000..9e96f2deb156e920d562e406a77668f9200e9fb7 --- /dev/null +++ b/1135-arm-virt-target-arm-support-arm-vcpu-hotplug.patch @@ -0,0 +1,3617 @@ +From: blingxue +Date: Mon, 8 Sep 2025 20:54:19 +0800 +Subject: [PATCH] arm64: Add arm vCPU Hot-plug/unplug support + +patch series: +https://gitee.com/openeuler/qemu/pulls/804 +https://gitee.com/openeuler/qemu/pulls/850 +https://gitee.com/openeuler/qemu/pulls/860 +https://gitee.com/openeuler/qemu/pulls/863 + +Signed-off-by: Keqian Zhu +Signed-off-by: Salil Mehta +Signed-off-by: Xiaowei Xue +--- + accel/kvm/kvm-all.c | 6 + + accel/tcg/tcg-accel-ops-mttcg.c | 1 + + cpu-common.c | 41 ++ + hw/acpi/cpu.c | 67 +- + hw/acpi/generic_event_device.c | 25 +- + hw/arm/Kconfig | 1 + + hw/arm/boot.c | 2 +- + hw/arm/virt-acpi-build.c | 63 +- + hw/arm/virt.c | 891 +++++++++++++++++++++---- + hw/core/gpio.c | 2 +- + hw/intc/arm_gicv3.c | 1 + + hw/intc/arm_gicv3_common.c | 71 +- + hw/intc/arm_gicv3_cpuif.c | 265 ++++---- + hw/intc/arm_gicv3_cpuif_common.c | 5 + + hw/intc/arm_gicv3_kvm.c | 39 +- + hw/intc/gicv3_internal.h | 2 + + include/exec/memory.h | 1 + + include/hw/acpi/cpu.h | 2 + + include/hw/acpi/cpu_hotplug.h | 4 + + include/hw/acpi/generic_event_device.h | 1 + + include/hw/arm/boot.h | 2 + + include/hw/arm/virt.h | 17 +- + include/hw/core/cpu.h | 76 +++ + include/hw/intc/arm_gicv3_common.h | 23 + + include/hw/qdev-core.h | 2 + + include/sysemu/kvm.h | 2 + + include/sysemu/kvm_int.h | 1 + + include/tcg/startup.h | 5 + + system/cpus.c | 32 +- + system/memory.c | 3 + + target/arm/arm-powerctl.c | 51 +- + target/arm/cpu.c | 121 ++++ + target/arm/cpu.h | 21 + + target/arm/cpu64.c | 15 + + target/arm/gdbstub.c | 6 + + target/arm/helper.c | 27 +- + target/arm/internals.h | 3 + + target/arm/kvm.c | 96 ++- + target/arm/kvm64.c | 61 +- + target/arm/kvm_arm.h | 24 + + target/arm/meson.build | 1 + + target/arm/{tcg => }/psci.c | 8 + + target/arm/tcg/meson.build | 4 - + tcg/tcg.c | 23 + + 44 files changed, 1797 insertions(+), 317 deletions(-) + rename target/arm/{tcg => }/psci.c (97%) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6db6085..4d83fdf 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3611,6 +3611,11 @@ bool kvm_kernel_irqchip_split(void) + return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON; + } + ++bool kvm_smccc_filter_enabled(void) ++{ ++ return kvm_state->kvm_smccc_filter_enabled; ++} ++ + static void kvm_get_dirty_ring_size(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +@@ -3656,6 +3661,7 @@ static void kvm_accel_instance_init(Object *obj) + /* KVM dirty ring is by default off */ + s->kvm_dirty_ring_size = 0; + s->kvm_dirty_ring_with_bitmap = false; ++ s->kvm_smccc_filter_enabled = false; + s->kvm_eager_split_size = 0; + s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN; + s->notify_window = 0; +diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c +index fac8009..7386699 100644 +--- a/accel/tcg/tcg-accel-ops-mttcg.c ++++ b/accel/tcg/tcg-accel-ops-mttcg.c +@@ -122,6 +122,7 @@ static void *mttcg_cpu_thread_fn(void *arg) + qemu_mutex_unlock_iothread(); + rcu_remove_force_rcu_notifier(&force_rcu.notifier); + rcu_unregister_thread(); ++ tcg_unregister_thread(); + return NULL; + } + +diff --git a/cpu-common.c b/cpu-common.c +index c81fd72..54e63b3 100644 +--- a/cpu-common.c ++++ b/cpu-common.c +@@ -24,6 +24,7 @@ + #include "sysemu/cpus.h" + #include "qemu/lockable.h" + #include "trace/trace-root.h" ++#include "hw/boards.h" + + QemuMutex qemu_cpu_list_lock; + static QemuCond exclusive_cond; +@@ -107,6 +108,46 @@ void cpu_list_remove(CPUState *cpu) + cpu_list_generation_id++; + } + ++CPUState *qemu_get_possible_cpu(int index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ if (possible_cpus == NULL) { ++ return qemu_get_cpu(index); ++ } ++ ++ assert((index >= 0) && (index < possible_cpus->len)); ++ ++ return CPU(possible_cpus->cpus[index].cpu); ++} ++ ++bool qemu_present_cpu(CPUState *cpu) ++{ ++ return cpu; ++} ++ ++bool qemu_enabled_cpu(CPUState *cpu) ++{ ++ return cpu && !cpu->disabled; ++} ++ ++bool qemu_persistent_cpu(CPUState *cpu) ++{ ++ /* cpu state can be faked to the guest via acpi */ ++ return cpu->acpi_persistent; ++} ++ ++uint64_t qemu_get_cpu_archid(int cpu_index) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ const CPUArchIdList *possible_cpus = ms->possible_cpus; ++ ++ assert((cpu_index >= 0) && (cpu_index < possible_cpus->len)); ++ ++ return possible_cpus->cpus[cpu_index].arch_id; ++} ++ + CPUState *qemu_get_cpu(int index) + { + CPUState *cpu; +diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c +index b48a0e7..f0fb356 100644 +--- a/hw/acpi/cpu.c ++++ b/hw/acpi/cpu.c +@@ -1,6 +1,7 @@ + #include "qemu/osdep.h" + #include "migration/vmstate.h" + #include "hw/acpi/cpu.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/core/cpu.h" + #include "qapi/error.h" + #include "qapi/qapi-events-acpi.h" +@@ -63,10 +64,11 @@ static uint64_t cpu_hotplug_rd(void *opaque, hwaddr addr, unsigned size) + cdev = &cpu_st->devs[cpu_st->selector]; + switch (addr) { + case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ +- val |= cdev->cpu ? 1 : 0; ++ val |= cdev->is_enabled ? 1 : 0; + val |= cdev->is_inserting ? 2 : 0; + val |= cdev->is_removing ? 4 : 0; + val |= cdev->fw_remove ? 16 : 0; ++ val |= cdev->is_present ? 32 : 0; + trace_cpuhp_acpi_read_flags(cpu_st->selector, val); + break; + case ACPI_CPU_CMD_DATA_OFFSET_RW: +@@ -225,7 +227,20 @@ void cpu_hotplug_hw_init(MemoryRegion *as, Object *owner, + state->dev_count = id_list->len; + state->devs = g_new0(typeof(*state->devs), state->dev_count); + for (i = 0; i < id_list->len; i++) { +- state->devs[i].cpu = CPU(id_list->cpus[i].cpu); ++ struct CPUState *cpu = CPU(id_list->cpus[i].cpu); ++ if (qemu_present_cpu(cpu)) { ++ state->devs[i].is_present = true; ++ } else { ++ state->devs[i].is_present = false; ++ } ++ ++ if (qemu_enabled_cpu(cpu)) { ++ state->devs[i].cpu = cpu; ++ state->devs[i].is_enabled = true; ++ } else { ++ state->devs[i].is_enabled = false; ++ } ++ + state->devs[i].arch_id = id_list->cpus[i].arch_id; + } + memory_region_init_io(&state->ctrl_reg, owner, &cpu_hotplug_ops, state, +@@ -258,6 +273,8 @@ void acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, + } + + cdev->cpu = CPU(dev); ++ cdev->is_present = true; ++ cdev->is_enabled = true; + if (dev->hotplugged) { + cdev->is_inserting = true; + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); +@@ -289,6 +306,11 @@ void acpi_cpu_unplug_cb(CPUHotplugState *cpu_st, + return; + } + ++ cdev->is_enabled = false; ++ if (!qemu_persistent_cpu(CPU(dev))) { ++ cdev->is_present = false; ++ } ++ + cdev->cpu = NULL; + } + +@@ -299,6 +321,8 @@ static const VMStateDescription vmstate_cpuhp_sts = { + .fields = (VMStateField[]) { + VMSTATE_BOOL(is_inserting, AcpiCpuStatus), + VMSTATE_BOOL(is_removing, AcpiCpuStatus), ++ VMSTATE_BOOL(is_present, AcpiCpuStatus), ++ VMSTATE_BOOL(is_enabled, AcpiCpuStatus), + VMSTATE_UINT32(ost_event, AcpiCpuStatus), + VMSTATE_UINT32(ost_status, AcpiCpuStatus), + VMSTATE_END_OF_LIST() +@@ -336,6 +360,7 @@ const VMStateDescription vmstate_cpu_hotplug = { + #define CPU_REMOVE_EVENT "CRMV" + #define CPU_EJECT_EVENT "CEJ0" + #define CPU_FW_EJECT_EVENT "CEJF" ++#define CPU_PRESENT "CPRS" + + void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + build_madt_cpu_fn build_madt_cpu, hwaddr base_addr, +@@ -396,7 +421,9 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(field, aml_named_field(CPU_EJECT_EVENT, 1)); + /* tell firmware to do device eject, write only */ + aml_append(field, aml_named_field(CPU_FW_EJECT_EVENT, 1)); +- aml_append(field, aml_reserved_field(3)); ++ /* 1 if present, read only */ ++ aml_append(field, aml_named_field(CPU_PRESENT, 1)); ++ aml_append(field, aml_reserved_field(2)); + aml_append(field, aml_named_field(CPU_COMMAND, 8)); + aml_append(cpu_ctrl_dev, field); + +@@ -426,6 +453,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + Aml *ctrl_lock = aml_name("%s.%s", cphp_res_path, CPU_LOCK); + Aml *cpu_selector = aml_name("%s.%s", cphp_res_path, CPU_SELECTOR); + Aml *is_enabled = aml_name("%s.%s", cphp_res_path, CPU_ENABLED); ++ Aml *is_present = aml_name("%s.%s", cphp_res_path, CPU_PRESENT); + Aml *cpu_cmd = aml_name("%s.%s", cphp_res_path, CPU_COMMAND); + Aml *cpu_data = aml_name("%s.%s", cphp_res_path, CPU_DATA); + Aml *ins_evt = aml_name("%s.%s", cphp_res_path, CPU_INSERT_EVENT); +@@ -454,13 +482,26 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + { + Aml *idx = aml_arg(0); + Aml *sta = aml_local(0); ++ Aml *ifctx2; ++ Aml *else_ctx; + + aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); + aml_append(method, aml_store(idx, cpu_selector)); + aml_append(method, aml_store(zero, sta)); +- ifctx = aml_if(aml_equal(is_enabled, one)); ++ ifctx = aml_if(aml_equal(is_present, one)); + { +- aml_append(ifctx, aml_store(aml_int(0xF), sta)); ++ ifctx2 = aml_if(aml_equal(is_enabled, one)); ++ { ++ /* cpu is present and enabled */ ++ aml_append(ifctx2, aml_store(aml_int(0xF), sta)); ++ } ++ aml_append(ifctx, ifctx2); ++ else_ctx = aml_else(); ++ { ++ /* cpu is present but disabled */ ++ aml_append(else_ctx, aml_store(aml_int(0xD), sta)); ++ } ++ aml_append(ifctx, else_ctx); + } + aml_append(method, ifctx); + aml_append(method, aml_release(ctrl_lock)); +@@ -671,9 +712,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(dev, method); + + /* build _MAT object */ +- build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ +- aml_append(dev, aml_name_decl("_MAT", +- aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ if (build_madt_cpu) { ++ build_madt_cpu(i, arch_ids, madt_buf, true); /* set enabled flag */ ++ aml_append(dev, aml_name_decl("_MAT", ++ aml_buffer(madt_buf->len, (uint8_t *)madt_buf->data))); ++ } + g_array_free(madt_buf, true); + + if (CPU(arch_ids->cpus[i].cpu) != first_cpu) { +@@ -704,9 +747,11 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, + aml_append(sb_scope, cpus_dev); + aml_append(table, sb_scope); + +- method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); +- aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); +- aml_append(table, method); ++ if (event_handler_method) { ++ method = aml_method(event_handler_method, 0, AML_NOTSERIALIZED); ++ aml_append(method, aml_call0("\\_SB.CPUS." CPU_SCAN_METHOD)); ++ aml_append(table, method); ++ } + + g_free(cphp_res_path); + } +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index 48f03cd..7e6d627 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -12,6 +12,7 @@ + #include "qemu/osdep.h" + #include "qapi/error.h" + #include "hw/acpi/acpi.h" ++#include "hw/acpi/cpu.h" + #include "hw/acpi/generic_event_device.h" + #include "hw/irq.h" + #include "hw/mem/pc-dimm.h" +@@ -109,7 +110,8 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, + MEMORY_SLOT_SCAN_METHOD)); + break; + case ACPI_GED_CPU_HOTPLUG_EVT: +- aml_append(if_ctx, aml_call0(AML_GED_EVT_CPU_SCAN_METHOD)); ++ aml_append(if_ctx, aml_call0(ACPI_CPU_CONTAINER "." ++ ACPI_CPU_SCAN_METHOD)); + break; + case ACPI_GED_PWR_DOWN_EVT: + aml_append(if_ctx, +@@ -331,6 +333,16 @@ static const VMStateDescription vmstate_memhp_state = { + } + }; + ++static const VMStateDescription vmstate_cpuhp_state = { ++ .name = "acpi-ged/cpuhp", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_CPU_HOTPLUG(cpuhp_state, AcpiGedState), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static const VMStateDescription vmstate_ged_state = { + .name = "acpi-ged-state", + .version_id = 1, +@@ -379,6 +391,7 @@ static const VMStateDescription vmstate_acpi_ged = { + }, + .subsections = (const VMStateDescription * []) { + &vmstate_memhp_state, ++ &vmstate_cpuhp_state, + &vmstate_ghes_state, + NULL + } +@@ -426,6 +439,7 @@ static void acpi_ged_initfn(Object *obj) + AcpiGedState *s = ACPI_GED(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(obj); + GEDState *ged_st = &s->ged_state; ++ MachineClass *mc; + + memory_region_init_io(&ged_st->evt, obj, &ged_evt_ops, ged_st, + TYPE_ACPI_GED, ACPI_GED_EVT_SEL_LEN); +@@ -449,6 +463,15 @@ static void acpi_ged_initfn(Object *obj) + memory_region_init_io(&ged_st->regs, obj, &ged_regs_ops, ged_st, + TYPE_ACPI_GED "-regs", ACPI_GED_REG_COUNT); + sysbus_init_mmio(sbd, &ged_st->regs); ++ ++ mc = MACHINE_GET_CLASS(qdev_get_machine()); ++ if (mc->possible_cpu_arch_ids) { ++ memory_region_init(&s->container_cpuhp, OBJECT(dev), "cpuhp container", ++ ACPI_CPU_HOTPLUG_REG_LEN); ++ sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->container_cpuhp); ++ cpu_hotplug_hw_init(&s->container_cpuhp, OBJECT(dev), ++ &s->cpuhp_state, 0); ++ } + } + + static void acpi_ged_class_init(ObjectClass *class, void *data) +diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig +index 3ada335..c0a7d0b 100644 +--- a/hw/arm/Kconfig ++++ b/hw/arm/Kconfig +@@ -29,6 +29,7 @@ config ARM_VIRT + select ACPI_HW_REDUCED + select ACPI_APEI + select ACPI_VIOT ++ select ACPI_CPU_HOTPLUG + select VIRTIO_MEM_SUPPORTED + select ACPI_CXL + select ACPI_HMAT +diff --git a/hw/arm/boot.c b/hw/arm/boot.c +index 84ea6a8..42e0e44 100644 +--- a/hw/arm/boot.c ++++ b/hw/arm/boot.c +@@ -682,7 +682,7 @@ fail: + return -1; + } + +-static void do_cpu_reset(void *opaque) ++void do_cpu_reset(void *opaque) + { + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); +diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c +index 78d0bfb..36dbd13 100644 +--- a/hw/arm/virt-acpi-build.c ++++ b/hw/arm/virt-acpi-build.c +@@ -686,14 +686,50 @@ static void build_append_gicr(GArray *table_data, uint64_t base, uint32_t size) + build_append_int_noprefix(table_data, size, 4); /* Discovery Range Length */ + } + ++static uint32_t virt_acpi_get_gicc_flags(CPUState *cpu, VirtMachineState *vms) ++{ ++ /* can only exist in 'enabled' state */ ++ if (!vms->cpu_hotplug_enabled) { ++ return 1; ++ } ++ ++ /* ++ * ARM GIC CPU Interface can be 'online-capable' or 'enabled' at boot. We ++ * MUST set 'online-capable' bit for all hotpluggable CPUs. ++ * Change Link: https://bugzilla.tianocore.org/show_bug.cgi?id=3706 ++ * ++ * UEFI ACPI Specification 6.5 ++ * Section: 5.2.12.14. GIC CPU Interface (GICC) Structure ++ * Table: 5.37 GICC CPU Interface Flags ++ * Link: https://uefi.org/specs/ACPI/6.5 ++ * ++ * Cold-booted CPUs, except for the first/boot CPU, SHOULD be allowed to be ++ * hot(un)plug as well but for this to happen these MUST have ++ * 'online-capable' bit set. Later creates compatibility problem with legacy ++ * OS as it might ignore online-capable' bits during boot time and hence ++ * some CPUs might not get detected. To fix this MADT GIC CPU interface flag ++ * should be allowed to have both bits set i.e. 'online-capable' and ++ * 'Enabled' bits together. This change will require UEFI ACPI standard ++ * change. Till this happens exposing all cold-booted CPUs as 'enabled' only ++ * ++ */ ++ return cpu && cpu->cold_booted ? 1 : (1 << 3); ++} ++ + static void + build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + { + int i; + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(vms); ++ MachineState *ms = MACHINE(vms); + const MemMapEntry *memmap = vms->memmap; + AcpiTable table = { .sig = "APIC", .rev = 4, .oem_id = vms->oem_id, + .oem_table_id = vms->oem_table_id }; ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + acpi_table_begin(&table, table_data); + /* Local Interrupt Controller Address */ +@@ -712,12 +748,13 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vms->gic_version, 1); + build_append_int_noprefix(table_data, 0, 3); /* Reserved */ + +- for (i = 0; i < MACHINE(vms)->smp.cpus; i++) { +- ARMCPU *armcpu = ARM_CPU(qemu_get_cpu(i)); ++ for (i = 0; i < max_cpus; i++) { ++ CPUState *cpu = qemu_get_possible_cpu(i); + uint64_t physical_base_address = 0, gich = 0, gicv = 0; + uint32_t vgic_interrupt = vms->virt ? ARCH_GIC_MAINT_IRQ : 0; +- uint32_t pmu_interrupt = arm_feature(&armcpu->env, ARM_FEATURE_PMU) ? +- VIRTUAL_PMU_IRQ : 0; ++ uint32_t pmu_interrupt = vms->pmu ? VIRTUAL_PMU_IRQ : 0; ++ uint32_t flags = virt_acpi_get_gicc_flags(cpu, vms); ++ uint64_t mpidr = qemu_get_cpu_archid(i); + + if (vms->gic_version == VIRT_GIC_VERSION_2) { + physical_base_address = memmap[VIRT_GIC_CPU].base; +@@ -732,7 +769,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, i, 4); /* GIC ID */ + build_append_int_noprefix(table_data, i, 4); /* ACPI Processor UID */ + /* Flags */ +- build_append_int_noprefix(table_data, 1, 4); /* Enabled */ ++ build_append_int_noprefix(table_data, flags, 4); + /* Parking Protocol Version */ + build_append_int_noprefix(table_data, 0, 4); + /* Performance Interrupt GSIV */ +@@ -746,7 +783,7 @@ build_madt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + build_append_int_noprefix(table_data, vgic_interrupt, 4); + build_append_int_noprefix(table_data, 0, 8); /* GICR Base Address*/ + /* MPIDR */ +- build_append_int_noprefix(table_data, armcpu->mp_affinity, 8); ++ build_append_int_noprefix(table_data, mpidr, 8); + /* Processor Power Efficiency Class */ + build_append_int_noprefix(table_data, 0, 1); + /* Reserved */ +@@ -844,7 +881,19 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) + * the RTC ACPI device at all when using UEFI. + */ + scope = aml_scope("\\_SB"); +- acpi_dsdt_add_cpus(scope, vms); ++ ++ if (vms->cpu_hotplug_enabled) { ++ CPUHotplugFeatures opts = { ++ .acpi_1_compatible = false, ++ .has_legacy_cphp = false ++ }; ++ ++ build_cpus_aml(scope, ms, opts, NULL, memmap[VIRT_CPUHP_ACPI].base, ++ "\\_SB", NULL, AML_SYSTEM_MEMORY); ++ } else { ++ acpi_dsdt_add_cpus(scope, vms); ++ } ++ + acpi_dsdt_add_uart(scope, &memmap[VIRT_UART], + (irqmap[VIRT_UART] + ARM_SPI_BASE)); + if (vmc->acpi_expose_flash) { +diff --git a/hw/arm/virt.c b/hw/arm/virt.c +index be2856c..16609eb 100644 +--- a/hw/arm/virt.c ++++ b/hw/arm/virt.c +@@ -45,6 +45,8 @@ + #include "sysemu/device_tree.h" + #include "sysemu/numa.h" + #include "sysemu/runstate.h" ++#include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "sysemu/tpm.h" + #include "sysemu/tcg.h" + #include "sysemu/kvm.h" +@@ -77,10 +79,12 @@ + #include "hw/mem/pc-dimm.h" + #include "hw/mem/nvdimm.h" + #include "hw/acpi/generic_event_device.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/virtio/virtio-md-pci.h" + #include "hw/virtio/virtio-iommu.h" + #include "hw/char/pl011.h" + #include "qemu/guest-random.h" ++#include "qapi/qmp/qdict.h" + + #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \ + static void virt_##major##_##minor##_class_init(ObjectClass *oc, \ +@@ -156,6 +160,7 @@ static const MemMapEntry base_memmap[] = { + [VIRT_NVDIMM_ACPI] = { 0x09090000, NVDIMM_ACPI_IO_LEN}, + [VIRT_PVTIME] = { 0x090a0000, 0x00010000 }, + [VIRT_SECURE_GPIO] = { 0x090b0000, 0x00001000 }, ++ [VIRT_CPUHP_ACPI] = { 0x090c0000, ACPI_CPU_HOTPLUG_REG_LEN}, + [VIRT_MMIO] = { 0x0a000000, 0x00000200 }, + /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ + [VIRT_PLATFORM_BUS] = { 0x0c000000, 0x02000000 }, +@@ -224,6 +229,12 @@ static const char *valid_cpus[] = { + ARM_CPU_TYPE_NAME("max"), + }; + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid); ++static int virt_get_socket_id(const MachineState *ms, int cpu_index); ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index); ++static int virt_get_core_id(const MachineState *ms, int cpu_index); ++static int virt_get_thread_id(const MachineState *ms, int cpu_index); ++ + static bool cpu_type_valid(const char *cpu) + { + int i; +@@ -644,7 +655,7 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + DeviceState *dev; + MachineState *ms = MACHINE(vms); + int irq = vms->irqmap[VIRT_ACPI_GED]; +- uint32_t event = ACPI_GED_PWR_DOWN_EVT; ++ uint32_t event = ACPI_GED_PWR_DOWN_EVT | ACPI_GED_CPU_HOTPLUG_EVT; + + if (ms->ram_slots) { + event |= ACPI_GED_MEM_HOTPLUG_EVT; +@@ -660,11 +671,22 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms) + + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vms->memmap[VIRT_ACPI_GED].base); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 1, vms->memmap[VIRT_PCDIMM_ACPI].base); ++ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 3, vms->memmap[VIRT_CPUHP_ACPI].base); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, qdev_get_gpio_in(vms->gic, irq)); + + return dev; + } + ++static void virt_add_gic_cpuhp_notifier(VirtMachineState *vms) ++{ ++ MachineClass *mc = MACHINE_GET_CLASS(vms); ++ ++ if (mc->has_hotpluggable_cpus && vms->gic_version >= VIRT_GIC_VERSION_3) { ++ Notifier *cpuhp_notifier = gicv3_cpuhp_notifier(vms->gic); ++ notifier_list_add(&vms->cpuhp_notifiers, cpuhp_notifier); ++ } ++} ++ + static void create_its(VirtMachineState *vms) + { + const char *itsclass = its_class_name(); +@@ -713,6 +735,107 @@ static void create_v2m(VirtMachineState *vms) + vms->msi_controller = VIRT_MSI_CTRL_GICV2M; + } + ++/* ++ * Mapping from the output timer irq lines from the CPU to the GIC PPI inputs ++ * we use for the virt board. ++ */ ++const int timer_irq[] = { ++ [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, ++ [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, ++ [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, ++ [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, ++}; ++ ++static void unwire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ int irq; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_disconnect_gpio_out_named(cpudev, NULL, irq); ++ } ++ ++ if (type != VIRT_GIC_VERSION_2) { ++ qdev_disconnect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0); ++ } else if (vms->virt) { ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 4 * max_cpus); ++ } ++ ++ /* ++ * RFC: Question: This currently does not takes care of intimating the ++ * devices which might be sitting on system bus. Do we need a ++ * sysbus_disconnect_irq() which also does the job of notification beside ++ * disconnection? ++ */ ++ qdev_disconnect_gpio_out_named(cpudev, "pmu-interrupt", 0); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, cpu); ++ qdev_disconnect_gpio_out_named(gicdev, ++ SYSBUS_DEVICE_GPIO_IRQ, cpu + max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 2 * max_cpus); ++ qdev_disconnect_gpio_out_named(gicdev, SYSBUS_DEVICE_GPIO_IRQ, ++ cpu + 3 * max_cpus); ++} ++ ++static void wire_gic_cpu_irqs(VirtMachineState *vms, CPUState *cs) ++{ ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ DeviceState *cpudev = DEVICE(cs); ++ DeviceState *gicdev = vms->gic; ++ int cpu = CPU(cs)->cpu_index; ++ int type = vms->gic_version; ++ SysBusDevice *gicbusdev; ++ int intidbase; ++ int irq; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ ++ intidbase = NUM_IRQS + cpu * GIC_INTERNAL; ++ ++ for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { ++ qdev_connect_gpio_out(cpudev, irq, ++ qdev_get_gpio_in(gicdev, ++ intidbase + timer_irq[irq])); ++ } ++ ++ gicbusdev = SYS_BUS_DEVICE(gicdev); ++ if (type != VIRT_GIC_VERSION_2) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", ++ 0, qirq); ++ } else if (vms->virt) { ++ qemu_irq qirq = qdev_get_gpio_in(gicdev, ++ intidbase + ARCH_GIC_MAINT_IRQ); ++ sysbus_connect_irq(gicbusdev, cpu + 4 * max_cpus, qirq); ++ } ++ ++ qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, ++ qdev_get_gpio_in(gicdev, ++ intidbase + VIRTUAL_PMU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 2 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); ++ sysbus_connect_irq(gicbusdev, cpu + 3 * max_cpus, ++ qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++} ++ + static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + { + MachineState *ms = MACHINE(vms); +@@ -721,9 +844,14 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + const char *gictype; + int i; + unsigned int smp_cpus = ms->smp.cpus; ++ unsigned int max_cpus = ms->smp.max_cpus; + uint32_t nb_redist_regions = 0; + int revision; + ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } ++ + if (vms->gic_version == VIRT_GIC_VERSION_2) { + gictype = gic_class_name(); + } else { +@@ -745,7 +873,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } + vms->gic = qdev_new(gictype); + qdev_prop_set_uint32(vms->gic, "revision", revision); +- qdev_prop_set_uint32(vms->gic, "num-cpu", smp_cpus); ++ qdev_prop_set_uint32(vms->gic, "num-cpu", max_cpus); + /* Note that the num-irq property counts both internal and external + * interrupts; there are always 32 of the former (mandated by GIC spec). + */ +@@ -757,7 +885,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + if (vms->gic_version != VIRT_GIC_VERSION_2) { + QList *redist_region_count; + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); +- uint32_t redist0_count = MIN(smp_cpus, redist0_capacity); ++ uint32_t redist0_count = MIN(max_cpus, redist0_capacity); + + nb_redist_regions = virt_gicv3_redist_region_count(vms); + +@@ -768,7 +896,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + virt_redist_capacity(vms, VIRT_HIGH_GIC_REDIST2); + + qlist_append_int(redist_region_count, +- MIN(smp_cpus - redist0_count, redist1_capacity)); ++ MIN(max_cpus - redist0_count, redist1_capacity)); + } + qdev_prop_set_array(vms->gic, "redist-region-count", + redist_region_count); +@@ -808,46 +936,7 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + * and the GIC's IRQ/FIQ/VIRQ/VFIQ interrupt outputs to the CPU's inputs. + */ + for (i = 0; i < smp_cpus; i++) { +- DeviceState *cpudev = DEVICE(qemu_get_cpu(i)); +- int intidbase = NUM_IRQS + i * GIC_INTERNAL; +- /* Mapping from the output timer irq lines from the CPU to the +- * GIC PPI inputs we use for the virt board. +- */ +- const int timer_irq[] = { +- [GTIMER_PHYS] = ARCH_TIMER_NS_EL1_IRQ, +- [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, +- [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, +- [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, +- }; +- +- for (unsigned irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { +- qdev_connect_gpio_out(cpudev, irq, +- qdev_get_gpio_in(vms->gic, +- intidbase + timer_irq[irq])); +- } +- +- if (vms->gic_version != VIRT_GIC_VERSION_2) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt", +- 0, irq); +- } else if (vms->virt) { +- qemu_irq irq = qdev_get_gpio_in(vms->gic, +- intidbase + ARCH_GIC_MAINT_IRQ); +- sysbus_connect_irq(gicbusdev, i + 4 * smp_cpus, irq); +- } +- +- qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0, +- qdev_get_gpio_in(vms->gic, intidbase +- + VIRTUAL_PMU_IRQ)); +- +- sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ)); +- sysbus_connect_irq(gicbusdev, i + smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_FIQ)); +- sysbus_connect_irq(gicbusdev, i + 2 * smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ)); +- sysbus_connect_irq(gicbusdev, i + 3 * smp_cpus, +- qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ)); ++ wire_gic_cpu_irqs(vms, qemu_get_cpu(i)); + } + + fdt_add_gic_node(vms); +@@ -857,6 +946,9 @@ static void create_gic(VirtMachineState *vms, MemoryRegion *mem) + } else if (vms->gic_version == VIRT_GIC_VERSION_2) { + create_v2m(vms); + } ++ ++ /* add GIC CPU hot(un)plug update notifier */ ++ virt_add_gic_cpuhp_notifier(vms); + } + + static void create_uart(const VirtMachineState *vms, int uart, +@@ -1289,7 +1381,7 @@ static FWCfgState *create_fw_cfg(const VirtMachineState *vms, AddressSpace *as) + char *nodename; + + fw_cfg = fw_cfg_init_mem_wide(base + 8, base, 8, base + 16, as); +- fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)ms->smp.cpus); ++ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); + + nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base); + qemu_fdt_add_subnode(ms->fdt, nodename); +@@ -1962,12 +2054,15 @@ static void finalize_gic_version(VirtMachineState *vms) + */ + static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + { ++ CPUArchIdList *possible_cpus = vms->parent.possible_cpus; + int max_cpus = MACHINE(vms)->smp.max_cpus; +- bool aarch64, pmu, steal_time; ++ MachineState *ms = MACHINE(vms); ++ bool aarch64, steal_time; + CPUState *cpu; ++ int n; + + aarch64 = object_property_get_bool(OBJECT(first_cpu), "aarch64", NULL); +- pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); ++ vms->pmu = object_property_get_bool(OBJECT(first_cpu), "pmu", NULL); + steal_time = object_property_get_bool(OBJECT(first_cpu), + "kvm-steal-time", NULL); + +@@ -1994,8 +2089,13 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + memory_region_add_subregion(sysmem, pvtime_reg_base, pvtime); + } + +- CPU_FOREACH(cpu) { +- if (pmu) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } ++ ++ if (vms->pmu) { + assert(arm_feature(&ARM_CPU(cpu)->env, ARM_FEATURE_PMU)); + if (kvm_irqchip_in_kernel()) { + kvm_arm_pmu_set_irq(cpu, VIRTUAL_PMU_IRQ); +@@ -2020,6 +2120,157 @@ static void virt_cpu_post_init(VirtMachineState *vms, MemoryRegion *sysmem) + } + } + } ++ ++ if (kvm_enabled() || tcg_enabled()) { ++ for (n = 0; n < possible_cpus->len; n++) { ++ cpu = qemu_get_possible_cpu(n); ++ if (!qemu_present_cpu(cpu)) { ++ continue; ++ } ++ ++ /* ++ * Now, GIC has been sized with possible CPUs and we dont require ++ * disabled vCPU objects to be represented in the QOM. Release the ++ * disabled ARMCPU objects earlier used during init for pre-sizing. ++ * ++ * We fake to the guest through ACPI about the presence(_STA.PRES=1) ++ * of these non-existent vCPUs at VMM/qemu and present these as ++ * disabled vCPUs(_STA.ENA=0) so that they cant be used. These vCPUs ++ * can be later added to the guest through hotplug exchanges when ++ * ARMCPU objects are created back again using 'device_add' QMP ++ * command. ++ */ ++ /* ++ * RFC: Question: Other approach could've been to keep them forever ++ * and release it only once when qemu exits as part of finalize or ++ * when new vCPU is hotplugged. In the later old could be released ++ * for the newly created object for the same vCPU? ++ */ ++ if (!qemu_enabled_cpu(cpu)) { ++ CPUArchId *cpu_slot; ++ cpu_slot = virt_find_cpu_slot(ms, cpu->cpu_index); ++ cpu_slot->cpu = NULL; ++ object_unref(OBJECT(cpu)); ++ } ++ } ++ } ++} ++ ++static void virt_cpu_set_properties(Object *cpuobj, const CPUArchId *cpu_slot, ++ Error **errp) ++{ ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ Error *local_err = NULL; ++ VirtMachineClass *vmc; ++ ++ vmc = VIRT_MACHINE_GET_CLASS(ms); ++ ++ /* now, set the cpu object property values */ ++ numa_cpu_pre_plug(cpu_slot, DEVICE(cpuobj), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ object_property_set_int(cpuobj, "mp-affinity", cpu_slot->arch_id, NULL); ++ ++ if (!vms->secure) { ++ object_property_set_bool(cpuobj, "has_el3", false, NULL); ++ } ++ ++ if (!vms->virt && object_property_find(cpuobj, "has_el2")) { ++ object_property_set_bool(cpuobj, "has_el2", false, NULL); ++ } ++ ++ if (vmc->kvm_no_adjvtime && ++ object_property_find(cpuobj, "kvm-no-adjvtime")) { ++ object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); ++ } ++ ++ if (vmc->no_kvm_steal_time && ++ object_property_find(cpuobj, "kvm-steal-time")) { ++ object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); ++ } ++ ++ if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { ++ object_property_set_bool(cpuobj, "pmu", false, NULL); ++ } ++ ++ if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { ++ object_property_set_bool(cpuobj, "lpa2", false, NULL); ++ } ++ ++ if (object_property_find(cpuobj, "reset-cbar")) { ++ object_property_set_int(cpuobj, "reset-cbar", ++ vms->memmap[VIRT_CPUPERIPHS].base, ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ /* link already initialized {secure,tag}-memory regions to this cpu */ ++ object_property_set_link(cpuobj, "memory", OBJECT(vms->sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-memory", ++ OBJECT(vms->secure_sysmem), &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ if (vms->mte) { ++ if (!object_property_find(cpuobj, "tag-memory")) { ++ error_setg(&local_err, "MTE requested, but not supported " ++ "by the guest CPU"); ++ if (local_err) { ++ goto out; ++ } ++ } ++ ++ object_property_set_link(cpuobj, "tag-memory", OBJECT(vms->tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ if (vms->secure) { ++ object_property_set_link(cpuobj, "secure-tag-memory", ++ OBJECT(vms->secure_tag_sysmem), ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ } ++ } ++ ++ /* ++ * RFC: Question: this must only be called for the hotplugged cpus. For the ++ * cold booted secondary cpus this is being taken care in arm_load_kernel() ++ * in boot.c. Perhaps we should remove that code now? ++ */ ++ if (vms->psci_conduit != QEMU_PSCI_CONDUIT_DISABLED) { ++ object_property_set_int(cpuobj, "psci-conduit", vms->psci_conduit, ++ &local_err); ++ if (local_err) { ++ goto out; ++ } ++ ++ /* Secondary CPUs start in PSCI powered-down state */ ++ if (CPU(cpuobj)->cpu_index > 0) { ++ object_property_set_bool(cpuobj, "start-powered-off", true, NULL); ++ } ++ } ++ ++out: ++ if (local_err) { ++ error_propagate(errp, local_err); ++ } ++ return; + } + + static void machvirt_init(MachineState *machine) +@@ -2028,22 +2279,25 @@ static void machvirt_init(MachineState *machine) + VirtMachineClass *vmc = VIRT_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const CPUArchIdList *possible_cpus; +- MemoryRegion *sysmem = get_system_memory(); ++ MemoryRegion *secure_tag_sysmem = NULL; + MemoryRegion *secure_sysmem = NULL; + MemoryRegion *tag_sysmem = NULL; +- MemoryRegion *secure_tag_sysmem = NULL; ++ MemoryRegion *sysmem; + int n, virt_max_cpus; + bool firmware_loaded; + bool aarch64 = true; + bool has_ged = !vmc->no_ged; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int max_cpus = machine->smp.max_cpus; ++ ObjectClass *cpu_class; + + if (!cpu_type_valid(machine->cpu_type)) { + error_report("mach-virt: CPU type %s not supported", machine->cpu_type); + exit(1); + } + ++ finalize_gic_version(vms); ++ + possible_cpus = mc->possible_cpu_arch_ids(machine); + + /* +@@ -2070,10 +2324,7 @@ static void machvirt_init(MachineState *machine) + virt_set_memmap(vms, pa_bits); + } + +- /* We can probe only here because during property set +- * KVM is not available yet +- */ +- finalize_gic_version(vms); ++ sysmem = vms->sysmem = get_system_memory(); + + if (vms->secure) { + /* +@@ -2082,7 +2333,7 @@ static void machvirt_init(MachineState *machine) + * containing the system memory at low priority; any secure-only + * devices go in at higher priority and take precedence. + */ +- secure_sysmem = g_new(MemoryRegion, 1); ++ secure_sysmem = vms->secure_sysmem = g_new(MemoryRegion, 1); + memory_region_init(secure_sysmem, OBJECT(machine), "secure-memory", + UINT64_MAX); + memory_region_add_subregion_overlap(secure_sysmem, 0, sysmem, -1); +@@ -2155,107 +2406,116 @@ static void machvirt_init(MachineState *machine) + exit(1); + } + ++ if (vms->mte) { ++ /* Create the memory region only once, but link to all cpus later */ ++ tag_sysmem = vms->tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(tag_sysmem, OBJECT(machine), ++ "tag-memory", UINT64_MAX / 32); ++ ++ if (vms->secure) { ++ secure_tag_sysmem = vms->secure_tag_sysmem = g_new(MemoryRegion, 1); ++ memory_region_init(secure_tag_sysmem, OBJECT(machine), ++ "secure-tag-memory", UINT64_MAX / 32); ++ ++ /* As with ram, secure-tag takes precedence over tag. */ ++ memory_region_add_subregion_overlap(secure_tag_sysmem, 0, ++ tag_sysmem, -1); ++ } ++ } ++ + create_fdt(vms); + ++ cpu_class = object_class_by_name(machine->cpu_type); ++ has_ged = has_ged && firmware_loaded && ++ virt_is_acpi_enabled(vms) && ++ !!object_class_dynamic_cast(cpu_class, TYPE_AARCH64_CPU); ++ ++ if (tcg_enabled() || hvf_enabled() || qtest_enabled() || ++ (kvm_enabled() && !kvm_smccc_filter_enabled()) || ++ (vms->gic_version < VIRT_GIC_VERSION_3) || !has_ged) { ++ vms->cpu_hotplug_enabled = false; ++ } else { ++ vms->cpu_hotplug_enabled = true; ++ } ++ ++ if (!vms->cpu_hotplug_enabled) { ++ if (machine->smp.max_cpus > smp_cpus) { ++ warn_report("cpu hotplug feature has been disabled"); ++ } ++ } ++ ++ notifier_list_init(&vms->cpuhp_notifiers); + assert(possible_cpus->len == max_cpus); + for (n = 0; n < possible_cpus->len; n++) { + Object *cpuobj; + CPUState *cs; + +- if (n >= smp_cpus) { ++ if (!vms->cpu_hotplug_enabled && n >= smp_cpus) { + break; + } + + cpuobj = object_new(possible_cpus->cpus[n].type); +- object_property_set_int(cpuobj, "mp-affinity", +- possible_cpus->cpus[n].arch_id, NULL); +- + cs = CPU(cpuobj); +- cs->cpu_index = n; +- +- numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj), +- &error_fatal); + + aarch64 &= object_property_get_bool(cpuobj, "aarch64", NULL); ++ object_property_set_int(cpuobj, "socket-id", ++ virt_get_socket_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "cluster-id", ++ virt_get_cluster_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "core-id", ++ virt_get_core_id(machine, n), NULL); ++ object_property_set_int(cpuobj, "thread-id", ++ virt_get_thread_id(machine, n), NULL); ++ ++ if (n < smp_cpus) { ++ qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); ++ object_unref(cpuobj); ++ } else { ++ CPUArchId *cpu_slot; + +- if (!vms->secure) { +- object_property_set_bool(cpuobj, "has_el3", false, NULL); +- } +- +- if (!vms->virt && object_property_find(cpuobj, "has_el2")) { +- object_property_set_bool(cpuobj, "has_el2", false, NULL); +- } +- +- if (vmc->kvm_no_adjvtime && +- object_property_find(cpuobj, "kvm-no-adjvtime")) { +- object_property_set_bool(cpuobj, "kvm-no-adjvtime", true, NULL); +- } +- +- if (vmc->no_kvm_steal_time && +- object_property_find(cpuobj, "kvm-steal-time")) { +- object_property_set_bool(cpuobj, "kvm-steal-time", false, NULL); +- } +- +- if (vmc->no_pmu && object_property_find(cpuobj, "pmu")) { +- object_property_set_bool(cpuobj, "pmu", false, NULL); +- } +- +- if (vmc->no_tcg_lpa2 && object_property_find(cpuobj, "lpa2")) { +- object_property_set_bool(cpuobj, "lpa2", false, NULL); +- } +- +- if (object_property_find(cpuobj, "reset-cbar")) { +- object_property_set_int(cpuobj, "reset-cbar", +- vms->memmap[VIRT_CPUPERIPHS].base, +- &error_abort); +- } +- +- object_property_set_link(cpuobj, "memory", OBJECT(sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-memory", +- OBJECT(secure_sysmem), &error_abort); +- } +- +- if (vms->mte) { +- /* Create the memory region only once, but link to all cpus. */ +- if (!tag_sysmem) { +- /* +- * The property exists only if MemTag is supported. +- * If it is, we must allocate the ram to back that up. +- */ +- if (!object_property_find(cpuobj, "tag-memory")) { +- error_report("MTE requested, but not supported " +- "by the guest CPU"); +- exit(1); +- } +- +- tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(tag_sysmem, OBJECT(machine), +- "tag-memory", UINT64_MAX / 32); +- +- if (vms->secure) { +- secure_tag_sysmem = g_new(MemoryRegion, 1); +- memory_region_init(secure_tag_sysmem, OBJECT(machine), +- "secure-tag-memory", UINT64_MAX / 32); ++ /* handling for vcpus which are yet to be hot-plugged */ ++ cs->cpu_index = n; ++ cpu_slot = virt_find_cpu_slot(machine, cs->cpu_index); + +- /* As with ram, secure-tag takes precedence over tag. */ +- memory_region_add_subregion_overlap(secure_tag_sysmem, 0, +- tag_sysmem, -1); +- } +- } ++ /* ++ * ARM host vCPU features need to be fixed at the boot time. But as ++ * per current approach this CPU object will be destroyed during ++ * cpu_post_init(). During hotplug of vCPUs these properties are ++ * initialized again. ++ */ ++ virt_cpu_set_properties(cpuobj, cpu_slot, &error_fatal); + +- object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem), +- &error_abort); +- if (vms->secure) { +- object_property_set_link(cpuobj, "secure-tag-memory", +- OBJECT(secure_tag_sysmem), +- &error_abort); ++ /* ++ * For KVM, we shall be pre-creating the now disabled/un-plugged ++ * possbile host vcpus and park them till the time they are ++ * actually hot plugged. This is required to pre-size the host ++ * GICC and GICR with the all possible vcpus for this VM. ++ */ ++ if (kvm_enabled()) { ++ kvm_arm_create_host_vcpu(ARM_CPU(cs)); + } ++ /* ++ * Add disabled vCPU to CPU slot during the init phase of the virt ++ * machine ++ * 1. We need this ARMCPU object during the GIC init. This object ++ * will facilitate in pre-realizing the GIC. Any info like ++ * mp-affinity(required to derive gicr_type) etc. could still be ++ * fetched while preserving QOM abstraction akin to realized ++ * vCPUs. ++ * 2. Now, after initialization of the virt machine is complete we ++ * could use two approaches to deal with this ARMCPU object: ++ * (i) re-use this ARMCPU object during hotplug of this vCPU. ++ * OR ++ * (ii) defer release this ARMCPU object after gic has been ++ * initialized or during pre-plug phase when a vCPU is ++ * hotplugged. ++ * ++ * We will use the (ii) approach and release the ARMCPU objects ++ * after GIC and machine has been fully initialized during ++ * machine_init_done() phase. ++ */ ++ cpu_slot->cpu = OBJECT(cs); + } +- +- qdev_realize(DEVICE(cpuobj), NULL, &error_fatal); +- object_unref(cpuobj); + } + fdt_add_timer_nodes(vms); + fdt_add_cpu_nodes(vms); +@@ -2267,6 +2527,10 @@ static void machvirt_init(MachineState *machine) + + create_gic(vms, sysmem); + ++ if (has_ged) { ++ vms->acpi_dev = create_acpi_ged(vms); ++ } ++ + virt_cpu_post_init(vms, sysmem); + + fdt_add_pmu_nodes(vms); +@@ -2289,9 +2553,7 @@ static void machvirt_init(MachineState *machine) + + create_pcie(vms); + +- if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) { +- vms->acpi_dev = create_acpi_ged(vms); +- } else { ++ if (!has_ged) { + create_gpio_devices(vms, VIRT_GPIO, sysmem); + } + +@@ -2661,10 +2923,59 @@ static int64_t virt_get_default_cpu_node_id(const MachineState *ms, int idx) + return socket_id % ms->numa_state->num_nodes; + } + ++static int virt_get_socket_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.socket_id; ++} ++ ++static int virt_get_cluster_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.cluster_id; ++} ++ ++static int virt_get_core_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.core_id; ++} ++ ++static int virt_get_thread_id(const MachineState *ms, int cpu_index) ++{ ++ assert(cpu_index >= 0 && cpu_index < ms->possible_cpus->len); ++ ++ return ms->possible_cpus->cpus[cpu_index].props.thread_id; ++} ++ ++static int ++virt_get_cpu_id_from_cpu_topo(const MachineState *ms, DeviceState *dev) ++{ ++ int cpu_id, sock_vcpu_num, clus_vcpu_num, core_vcpu_num; ++ ARMCPU *cpu = ARM_CPU(dev); ++ ++ /* calculate total logical cpus across socket/cluster/core */ ++ sock_vcpu_num = cpu->socket_id * (ms->smp.threads * ms->smp.cores * ++ ms->smp.clusters); ++ clus_vcpu_num = cpu->cluster_id * (ms->smp.threads * ms->smp.cores); ++ core_vcpu_num = cpu->core_id * ms->smp.threads; ++ ++ /* get vcpu-id(logical cpu index) for this vcpu from this topology */ ++ cpu_id = (sock_vcpu_num + clus_vcpu_num + core_vcpu_num) + cpu->thread_id; ++ ++ assert(cpu_id >= 0 && cpu_id < ms->possible_cpus->len); ++ ++ return cpu_id; ++} ++ + static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + { + int n; + unsigned int max_cpus = ms->smp.max_cpus; ++ unsigned int smp_threads = ms->smp.threads; + VirtMachineState *vms = VIRT_MACHINE(ms); + MachineClass *mc = MACHINE_GET_CLASS(vms); + +@@ -2678,6 +2989,7 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + ms->possible_cpus->len = max_cpus; + for (n = 0; n < ms->possible_cpus->len; n++) { + ms->possible_cpus->cpus[n].type = ms->cpu_type; ++ ms->possible_cpus->cpus[n].vcpus_count = smp_threads; + ms->possible_cpus->cpus[n].arch_id = + virt_cpu_mp_affinity(vms, n); + +@@ -2698,6 +3010,50 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) + return ms->possible_cpus; + } + ++static CPUArchId *virt_find_cpu_slot(MachineState *ms, int vcpuid) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(ms); ++ CPUArchId *found_cpu; ++ uint64_t mp_affinity; ++ ++ assert(vcpuid >= 0 && vcpuid < ms->possible_cpus->len); ++ ++ /* ++ * RFC: Question: ++ * TBD: Should mp-affinity be treated as MPIDR? ++ */ ++ mp_affinity = virt_cpu_mp_affinity(vms, vcpuid); ++ found_cpu = &ms->possible_cpus->cpus[vcpuid]; ++ ++ assert(found_cpu->arch_id == mp_affinity); ++ ++ /* ++ * RFC: Question: ++ * Slot-id is the index where vCPU with certain arch-id(=mpidr/ap-affinity) ++ * is plugged. For Host KVM, MPIDR for vCPU is derived using vcpu-id. ++ * As I understand, MPIDR and vcpu-id are property of vCPU but slot-id is ++ * more related to machine? Current code assumes slot-id and vcpu-id are ++ * same i.e. meaning of slot is bit vague. ++ * ++ * Q1: Is there any requirement to clearly represent slot and dissociate it ++ * from vcpu-id? ++ * Q2: Should we make MPIDR within host KVM user configurable? ++ * ++ * +----+----+----+----+----+----+----+----+ ++ * MPIDR ||| Res | Aff2 | Aff1 | Aff0 | ++ * +----+----+----+----+----+----+----+----+ ++ * \ \ \ | | ++ * \ 8bit \ 8bit \ |4bit| ++ * \<------->\<------->\ |<-->| ++ * \ \ \| | ++ * +----+----+----+----+----+----+----+----+ ++ * VCPU-ID | Byte4 | Byte2 | Byte1 | Byte0 | ++ * +----+----+----+----+----+----+----+----+ ++ */ ++ ++ return found_cpu; ++} ++ + static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { +@@ -2741,6 +3097,244 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, + dev, &error_abort); + } + ++static void virt_update_gic(VirtMachineState *vms, CPUState *cs) ++{ ++ GICv3CPUHotplugInfo gic_info = { .gic = vms->gic, .cpu = cs }; ++ ++ /* notify gic to stitch GICC to this new cpu */ ++ notifier_list_notify(&vms->cpuhp_notifiers, &gic_info); ++} ++ ++static void virt_cpu_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ CPUArchId *cpu_slot; ++ ++ /* sanity check the cpu */ ++ if (!object_dynamic_cast(OBJECT(cpu), ms->cpu_type)) { ++ error_setg(errp, "Invalid CPU type, expected cpu type: '%s'", ++ ms->cpu_type); ++ return; ++ } ++ ++ if ((cpu->thread_id < 0) || (cpu->thread_id >= ms->smp.threads)) { ++ error_setg(errp, "Invalid thread-id %u specified, correct range 0:%u", ++ cpu->thread_id, ms->smp.threads - 1); ++ return; ++ } ++ ++ if ((cpu->core_id < 0) || (cpu->core_id >= ms->smp.cores)) { ++ error_setg(errp, "Invalid core-id %d specified, correct range 0:%u", ++ cpu->core_id, ms->smp.cores - 1); ++ return; ++ } ++ ++ if ((cpu->cluster_id < 0) || (cpu->cluster_id >= ms->smp.clusters)) { ++ error_setg(errp, "Invalid cluster-id %u specified, correct range 0:%u", ++ cpu->cluster_id, ms->smp.clusters - 1); ++ return; ++ } ++ ++ if ((cpu->socket_id < 0) || (cpu->socket_id >= ms->smp.sockets)) { ++ error_setg(errp, "Invalid socket-id %u specified, correct range 0:%u", ++ cpu->socket_id, ms->smp.sockets - 1); ++ return; ++ } ++ ++ cs->cpu_index = virt_get_cpu_id_from_cpu_topo(ms, dev); ++ ++ /* Except for cold-booted vCPUs, this should check presence of ACPI GED */ ++ if (cs->cpu_index >= ms->smp.cpus && !vms->acpi_dev) { ++ error_setg(errp, "GED acpi device does not exists"); ++ return; ++ } ++ ++ if (cs->cpu_index >= ms->smp.cpus && !vms->cpu_hotplug_enabled) { ++ error_setg(errp, "CPU [cold|hot]plug not supported on this machine"); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ if (qemu_present_cpu(CPU(cpu_slot->cpu))) { ++ error_setg(errp, "cpu(id%d=%d:%d:%d:%d) with arch-id %" PRIu64 " exist", ++ cs->cpu_index, cpu->socket_id, cpu->cluster_id, cpu->core_id, ++ cpu->thread_id, cpu_slot->arch_id); ++ return; ++ } ++ virt_cpu_set_properties(OBJECT(cs), cpu_slot, errp); ++ ++ /* ++ * Fix the GIC for this new vCPU being plugged. The QOM CPU object for the ++ * new vCPU need to be updated in the corresponding QOM GICv3CPUState object ++ * We also need to re-wire the IRQs for this new CPU object. This update ++ * is limited to the QOM only and does not affects the KVM. Later has ++ * already been pre-sized with possible CPU at VM init time. This is a ++ * workaround to the constraints posed by ARM architecture w.r.t supporting ++ * CPU Hotplug. Specification does not exist for the later. ++ * This patch-up is required both for {cold,hot}-plugged vCPUs. Cold-inited ++ * vCPUs have their GIC state initialized during machvit_init(). ++ */ ++ if (vms->acpi_dev) { ++ virt_update_gic(vms, cs); ++ wire_gic_cpu_irqs(vms, cs); ++ } ++ ++ /* ++ * To give persistent presence view of vCPUs to the guest, ACPI might need ++ * to fake the presence of the vCPUs to the guest but keep them disabled. ++ * This shall be used during the init of ACPI Hotplug state and hot-unplug ++ */ ++ cs->acpi_persistent = true; ++ ++ if (!dev->hotplugged) { ++ cs->cold_booted = true; ++ } ++#ifdef CONFIG_KVM ++ if (cs->cpu_index >= ms->smp.cpus) { ++ cpu->kvm_sve_finalized = true; ++ } ++#endif ++} ++ ++static void virt_cpu_plug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ CPUState *cs = CPU(dev); ++ Error *local_err = NULL; ++ CPUArchId *cpu_slot; ++ ++ /* insert the cold/hot-plugged vcpu in the slot */ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ cpu_slot->cpu = OBJECT(dev); ++ ++ /* ++ * Update the ACPI Hotplug state both for vCPUs being {hot,cold}-plugged. ++ * vCPUs can be cold-plugged using '-device' option. For vCPUs being hot ++ * plugged, guest is also notified. ++ */ ++ if (vms->acpi_dev) { ++ HotplugHandlerClass *hhc; ++ /* update acpi hotplug state and send cpu hotplug event to guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } ++ /* register this cpu for reset & update F/W info for the next boot */ ++ qemu_register_reset(do_cpu_reset, ARM_CPU(cs)); ++ } ++ ++ vms->boot_cpus++; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } ++ ++ cs->disabled = false; ++ return; ++fail: ++ error_propagate(errp, local_err); ++} ++ ++static void virt_cpu_unplug_request(HotplugHandler *hotplug_dev, ++ DeviceState *dev, Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUState *cs = CPU(dev); ++ Error *local_err = NULL; ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ if (!vms->cpu_hotplug_enabled) { ++ error_setg(errp, "CPU hot(un)plug not supported on this machine"); ++ return; ++ } ++ ++ /* ++ * UEFI ACPI standard change is required to make both 'enabled' and the ++ * 'online-capable' bit co-exist instead of being mutually exclusive. ++ * check virt_acpi_get_gicc_flags() for more details. ++ * ++ * Disable the unplugging of cold-booted vCPUs as a temporary mitigation. ++ */ ++ if (cs->cold_booted) { ++ error_setg(errp, "Hot-unplug of cold-booted CPU not supported!"); ++ return; ++ } ++ ++ if (cs->cpu_index == first_cpu->cpu_index) { ++ error_setg(errp, "Boot CPU(id%d=%d:%d:%d:%d) hot-unplug not supported", ++ first_cpu->cpu_index, cpu->socket_id, cpu->cluster_id, ++ cpu->core_id, cpu->thread_id); ++ return; ++ } ++ ++ /* request cpu hotplug from guest */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug_request(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } ++ ++ return; ++fail: ++ error_propagate(errp, local_err); ++} ++ ++static void virt_cpu_unplug(HotplugHandler *hotplug_dev, DeviceState *dev, ++ Error **errp) ++{ ++ VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); ++ MachineState *ms = MACHINE(hotplug_dev); ++ HotplugHandlerClass *hhc; ++ CPUState *cs = CPU(dev); ++ Error *local_err = NULL; ++ CPUArchId *cpu_slot; ++ ++ if (!vms->acpi_dev || !dev->realized) { ++ error_setg(errp, "GED does not exists or device is not realized!"); ++ return; ++ } ++ ++ cpu_slot = virt_find_cpu_slot(ms, cs->cpu_index); ++ ++ /* update the acpi cpu hotplug state for cpu hot-unplug */ ++ hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); ++ hhc->unplug(HOTPLUG_HANDLER(vms->acpi_dev), dev, &local_err); ++ if (local_err) { ++ goto fail; ++ } ++ ++ unwire_gic_cpu_irqs(vms, cs); ++ virt_update_gic(vms, cs); ++ ++ qemu_unregister_reset(do_cpu_reset, ARM_CPU(cs)); ++ vms->boot_cpus--; ++ if (vms->fw_cfg) { ++ fw_cfg_modify_i16(vms->fw_cfg, FW_CFG_NB_CPUS, vms->boot_cpus); ++ } ++ ++ qobject_unref(dev->opts); ++ dev->opts = NULL; ++ ++ cpu_slot->cpu = NULL; ++ cs->disabled = true; ++ ++ return; ++fail: ++ error_propagate(errp, local_err); ++} ++ + static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) + { +@@ -2783,6 +3377,8 @@ static void virt_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev, + qlist_append_str(reserved_regions, resv_prop_str); + qdev_prop_set_array(dev, "reserved-regions", reserved_regions); + g_free(resv_prop_str); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_pre_plug(hotplug_dev, dev, errp); + } + } + +@@ -2804,6 +3400,8 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + virt_memory_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_plug(hotplug_dev, dev, errp); + } + + if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { +@@ -2861,6 +3459,8 @@ static void virt_machine_device_unplug_request_cb(HotplugHandler *hotplug_dev, + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug_request(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), + errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "device unplug request for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -2874,6 +3474,8 @@ static void virt_machine_device_unplug_cb(HotplugHandler *hotplug_dev, + virt_dimm_unplug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) { + virtio_md_pci_unplug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp); ++ } else if (object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { ++ virt_cpu_unplug(hotplug_dev, dev, errp); + } else { + error_setg(errp, "virt: device unplug for unsupported device" + " type: %s", object_get_typename(OBJECT(dev))); +@@ -2888,7 +3490,8 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + if (device_is_dynamic_sysbus(mc, dev) || + object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) || +- object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { ++ object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || ++ object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { + return HOTPLUG_HANDLER(machine); + } + return NULL; +@@ -2965,6 +3568,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) + #endif + mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + mc->kvm_type = virt_kvm_type; ++ mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->pre_plug = virt_machine_device_pre_plug_cb; +@@ -3115,6 +3719,9 @@ static void virt_instance_init(Object *obj) + /* EL2 is also disabled by default, for similar reasons */ + vms->virt = false; + ++ /* CPU hotplug is enabled by default */ ++ vms->cpu_hotplug_enabled = true; ++ + /* High memory is enabled by default */ + vms->highmem = true; + vms->highmem_compact = !vmc->no_highmem_compact; +diff --git a/hw/core/gpio.c b/hw/core/gpio.c +index 80d07a6..abb164d 100644 +--- a/hw/core/gpio.c ++++ b/hw/core/gpio.c +@@ -143,7 +143,7 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n) + + /* disconnect a GPIO output, returning the disconnected input (if any) */ + +-static qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, + const char *name, int n) + { + char *propname = g_strdup_printf("%s[%d]", +diff --git a/hw/intc/arm_gicv3.c b/hw/intc/arm_gicv3.c +index 0b8f79a..e1c7c8c 100644 +--- a/hw/intc/arm_gicv3.c ++++ b/hw/intc/arm_gicv3.c +@@ -410,6 +410,7 @@ static void arm_gicv3_class_init(ObjectClass *klass, void *data) + ARMGICv3Class *agc = ARM_GICV3_CLASS(klass); + + agcc->post_load = arm_gicv3_post_load; ++ agcc->init_cpu_reginfo = gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, arm_gic_realize, &agc->parent_realize); + } + +diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c +index 2ebf880..5667d9f 100644 +--- a/hw/intc/arm_gicv3_common.c ++++ b/hw/intc/arm_gicv3_common.c +@@ -25,6 +25,7 @@ + #include "qapi/error.h" + #include "qemu/module.h" + #include "qemu/error-report.h" ++#include "hw/boards.h" + #include "hw/core/cpu.h" + #include "hw/intc/arm_gicv3_common.h" + #include "hw/qdev-properties.h" +@@ -33,7 +34,6 @@ + #include "hw/arm/linux-boot-if.h" + #include "sysemu/kvm.h" + +- + static void gicv3_gicd_no_migration_shift_bug_post_load(GICv3State *cs) + { + if (cs->gicd_no_migration_shift_bug) { +@@ -322,6 +322,61 @@ void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + } + } + ++static int arm_gicv3_get_proc_num(GICv3State *s, CPUState *cpu) ++{ ++ uint64_t mp_affinity; ++ uint64_t gicr_typer; ++ uint64_t cpu_affid; ++ int i; ++ ++ mp_affinity = object_property_get_uint(OBJECT(cpu), "mp-affinity", NULL); ++ /* match the cpu mp-affinity to get the gic cpuif number */ ++ for (i = 0; i < s->num_cpu; i++) { ++ gicr_typer = s->cpu[i].gicr_typer; ++ cpu_affid = (gicr_typer >> 32) & 0xFFFFFF; ++ if (cpu_affid == mp_affinity) { ++ return i; ++ } ++ } ++ ++ return -1; ++} ++ ++static void arm_gicv3_cpu_update_notifier(Notifier *notifier, void * data) ++{ ++ GICv3CPUHotplugInfo *gic_info = (GICv3CPUHotplugInfo *)data; ++ CPUState *cpu = gic_info->cpu; ++ ARMGICv3CommonClass *c; ++ int gic_cpuif_num; ++ GICv3State *s; ++ ++ s = ARM_GICV3_COMMON(gic_info->gic); ++ c = ARM_GICV3_COMMON_GET_CLASS(s); ++ ++ /* this shall get us mapped gicv3 cpuif corresponding to mpidr */ ++ gic_cpuif_num = arm_gicv3_get_proc_num(s, cpu); ++ if (gic_cpuif_num < 0) { ++ error_report("Failed to associate cpu %d with any GIC cpuif", ++ cpu->cpu_index); ++ abort(); ++ } ++ ++ /* check if update is for vcpu hot-unplug */ ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[gic_cpuif_num].cpu = NULL; ++ return; ++ } ++ ++ /* re-stitch the gic cpuif to this new cpu */ ++ gicv3_set_gicv3state(cpu, &s->cpu[gic_cpuif_num]); ++ gicv3_set_cpustate(&s->cpu[gic_cpuif_num], cpu); ++ ++ /* initialize the registers info for this newly added cpu */ ++ if (c->init_cpu_reginfo) { ++ c->init_cpu_reginfo(cpu); ++ } ++} ++ + static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + { + GICv3State *s = ARM_GICV3_COMMON(dev); +@@ -392,10 +447,13 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu = g_new0(GICv3CPUState, s->num_cpu); + + for (i = 0; i < s->num_cpu; i++) { +- CPUState *cpu = qemu_get_cpu(i); ++ CPUState *cpu = qemu_get_possible_cpu(i) ? : qemu_get_cpu(i); + uint64_t cpu_affid; + +- s->cpu[i].cpu = cpu; ++ if (qemu_enabled_cpu(cpu)) { ++ s->cpu[i].cpu = cpu; ++ } ++ + s->cpu[i].gic = s; + /* Store GICv3CPUState in CPUARMState gicv3state pointer */ + gicv3_set_gicv3state(cpu, &s->cpu[i]); +@@ -441,13 +499,20 @@ static void arm_gicv3_common_realize(DeviceState *dev, Error **errp) + s->cpu[cpuidx - 1].gicr_typer |= GICR_TYPER_LAST; + } + ++ s->cpu_update_notifier.notify = arm_gicv3_cpu_update_notifier; ++ + s->itslist = g_ptr_array_new(); + } + + static void arm_gicv3_finalize(Object *obj) + { + GICv3State *s = ARM_GICV3_COMMON(obj); ++ Object *ms = qdev_get_machine(); ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + ++ if (mc->has_hotpluggable_cpus) { ++ notifier_remove(&s->cpu_update_notifier); ++ } + g_free(s->redist_region_count); + } + +diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c +index 258dee1..3234a90 100644 +--- a/hw/intc/arm_gicv3_cpuif.c ++++ b/hw/intc/arm_gicv3_cpuif.c +@@ -934,6 +934,10 @@ void gicv3_cpuif_update(GICv3CPUState *cs) + ARMCPU *cpu = ARM_CPU(cs->cpu); + CPUARMState *env = &cpu->env; + ++ if (!qemu_enabled_cpu(cs->cpu)) { ++ return; ++ } ++ + g_assert(qemu_mutex_iothread_locked()); + + trace_gicv3_cpuif_update(gicv3_redist_affid(cs), cs->hppi.irq, +@@ -1835,6 +1839,10 @@ static void icc_generate_sgi(CPUARMState *env, GICv3CPUState *cs, + for (i = 0; i < s->num_cpu; i++) { + GICv3CPUState *ocs = &s->cpu[i]; + ++ if (!qemu_enabled_cpu(ocs->cpu)) { ++ continue; ++ } ++ + if (irm) { + /* IRM == 1 : route to all CPUs except self */ + if (cs == ocs) { +@@ -2783,6 +2791,127 @@ static const ARMCPRegInfo gicv3_cpuif_ich_apxr23_reginfo[] = { + }, + }; + ++void gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ GICv3CPUState *gcs = icc_cs_from_env(&cpu->env); ++ ++ /* ++ * If the CPU doesn't define a GICv3 configuration, probably because ++ * in real hardware it doesn't have one, then we use default values ++ * matching the one used by most Arm CPUs. This applies to: ++ * cpu->gic_num_lrs ++ * cpu->gic_vpribits ++ * cpu->gic_vprebits ++ * cpu->gic_pribits ++ */ ++ ++ /* ++ * Note that we can't just use the GICv3CPUState as an opaque pointer ++ * in define_arm_cp_regs_with_opaque(), because when we're called back ++ * it might be with code translated by CPU 0 but run by CPU 1, in ++ * which case we'd get the wrong value. ++ * So instead we define the regs with no ri->opaque info, and ++ * get back to the GICv3CPUState from the CPUARMState. ++ */ ++ define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ ++ /* ++ * The CPU implementation specifies the number of supported ++ * bits of physical priority. For backwards compatibility ++ * of migration, we have a compat property that forces use ++ * of 8 priority bits regardless of what the CPU really has. ++ */ ++ if (gcs->gic->force_8bit_prio) { ++ gcs->pribits = 8; ++ } else { ++ gcs->pribits = cpu->gic_pribits ?: 5; ++ } ++ ++ /* ++ * The GICv3 has separate ID register fields for virtual priority ++ * and preemption bit values, but only a single ID register field ++ * for the physical priority bits. The preemption bit count is ++ * always the same as the priority bit count, except that 8 bits ++ * of priority means 7 preemption bits. We precalculate the ++ * preemption bits because it simplifies the code and makes the ++ * parallels between the virtual and physical bits of the GIC ++ * a bit clearer. ++ */ ++ gcs->prebits = gcs->pribits; ++ if (gcs->prebits == 8) { ++ gcs->prebits--; ++ } ++ /* ++ * Check that CPU code defining pribits didn't violate ++ * architectural constraints our implementation relies on. ++ */ ++ g_assert(gcs->pribits >= 4 && gcs->pribits <= 8); ++ ++ /* ++ * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions ++ * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. ++ */ ++ if (gcs->prebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); ++ } ++ if (gcs->prebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); ++ } ++ ++ if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { ++ int j; ++ ++ gcs->num_list_regs = cpu->gic_num_lrs ?: 4; ++ gcs->vpribits = cpu->gic_vpribits ?: 5; ++ gcs->vprebits = cpu->gic_vprebits ?: 5; ++ ++ /* ++ * Check against architectural constraints: getting these ++ * wrong would be a bug in the CPU code defining these, ++ * and the implementation relies on them holding. ++ */ ++ g_assert(gcs->vprebits <= gcs->vpribits); ++ g_assert(gcs->vprebits >= 5 && gcs->vprebits <= 7); ++ g_assert(gcs->vpribits >= 5 && gcs->vpribits <= 8); ++ ++ define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); ++ ++ for (j = 0; j < gcs->num_list_regs; j++) { ++ /* ++ * Note that the AArch64 LRs are 64-bit; the AArch32 LRs ++ * are split into two cp15 regs, LR (the low part, with the ++ * same encoding as the AArch64 LR) and LRC (the high part). ++ */ ++ ARMCPRegInfo lr_regset[] = { ++ { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, ++ .opc0 = 3, .opc1 = 4, .crn = 12, ++ .crm = 12 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, ++ .cp = 15, .opc1 = 4, .crn = 12, ++ .crm = 14 + (j >> 3), .opc2 = j & 7, ++ .type = ARM_CP_IO | ARM_CP_NO_RAW, ++ .access = PL2_RW, ++ .readfn = ich_lr_read, ++ .writefn = ich_lr_write, ++ }, ++ }; ++ define_arm_cp_regs(cpu, lr_regset); ++ } ++ if (gcs->vprebits >= 6) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); ++ } ++ if (gcs->vprebits == 7) { ++ define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ } ++ } ++} ++ + static void gicv3_cpuif_el_change_hook(ARMCPU *cpu, void *opaque) + { + GICv3CPUState *cs = opaque; +@@ -2805,131 +2934,23 @@ void gicv3_init_cpuif(GICv3State *s) + + for (i = 0; i < s->num_cpu; i++) { + ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- GICv3CPUState *cs = &s->cpu[i]; +- +- /* +- * If the CPU doesn't define a GICv3 configuration, probably because +- * in real hardware it doesn't have one, then we use default values +- * matching the one used by most Arm CPUs. This applies to: +- * cpu->gic_num_lrs +- * cpu->gic_vpribits +- * cpu->gic_vprebits +- * cpu->gic_pribits +- */ +- +- /* Note that we can't just use the GICv3CPUState as an opaque pointer +- * in define_arm_cp_regs_with_opaque(), because when we're called back +- * it might be with code translated by CPU 0 but run by CPU 1, in +- * which case we'd get the wrong value. +- * So instead we define the regs with no ri->opaque info, and +- * get back to the GICv3CPUState from the CPUARMState. +- * +- * These CP regs callbacks can be called from either TCG or HVF code. +- */ +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); + +- /* +- * The CPU implementation specifies the number of supported +- * bits of physical priority. For backwards compatibility +- * of migration, we have a compat property that forces use +- * of 8 priority bits regardless of what the CPU really has. +- */ +- if (s->force_8bit_prio) { +- cs->pribits = 8; +- } else { +- cs->pribits = cpu->gic_pribits ?: 5; +- } +- +- /* +- * The GICv3 has separate ID register fields for virtual priority +- * and preemption bit values, but only a single ID register field +- * for the physical priority bits. The preemption bit count is +- * always the same as the priority bit count, except that 8 bits +- * of priority means 7 preemption bits. We precalculate the +- * preemption bits because it simplifies the code and makes the +- * parallels between the virtual and physical bits of the GIC +- * a bit clearer. +- */ +- cs->prebits = cs->pribits; +- if (cs->prebits == 8) { +- cs->prebits--; +- } +- /* +- * Check that CPU code defining pribits didn't violate +- * architectural constraints our implementation relies on. +- */ +- g_assert(cs->pribits >= 4 && cs->pribits <= 8); +- +- /* +- * gicv3_cpuif_reginfo[] defines ICC_AP*R0_EL1; add definitions +- * for ICC_AP*R{1,2,3}_EL1 if the prebits value requires them. +- */ +- if (cs->prebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr1_reginfo); +- } +- if (cs->prebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_icc_apxr23_reginfo); +- } +- +- if (arm_feature(&cpu->env, ARM_FEATURE_EL2)) { +- int j; +- +- cs->num_list_regs = cpu->gic_num_lrs ?: 4; +- cs->vpribits = cpu->gic_vpribits ?: 5; +- cs->vprebits = cpu->gic_vprebits ?: 5; +- +- /* Check against architectural constraints: getting these +- * wrong would be a bug in the CPU code defining these, +- * and the implementation relies on them holding. +- */ +- g_assert(cs->vprebits <= cs->vpribits); +- g_assert(cs->vprebits >= 5 && cs->vprebits <= 7); +- g_assert(cs->vpribits >= 5 && cs->vpribits <= 8); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_hcr_reginfo); +- +- for (j = 0; j < cs->num_list_regs; j++) { +- /* Note that the AArch64 LRs are 64-bit; the AArch32 LRs +- * are split into two cp15 regs, LR (the low part, with the +- * same encoding as the AArch64 LR) and LRC (the high part). ++ if (qemu_enabled_cpu(CPU(cpu))) { ++ GICv3CPUState *cs = icc_cs_from_env(&cpu->env); ++ gicv3_init_cpu_reginfo(CPU(cpu)); ++ if (tcg_enabled() || qtest_enabled()) { ++ /* ++ * We can only trap EL changes with TCG. However the GIC ++ * interrupt state only changes on EL changes involving EL2 or ++ * EL3, so for the non-TCG case this is OK, as EL2 and EL3 can't ++ * exist. + */ +- ARMCPRegInfo lr_regset[] = { +- { .name = "ICH_LRn_EL2", .state = ARM_CP_STATE_BOTH, +- .opc0 = 3, .opc1 = 4, .crn = 12, +- .crm = 12 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- { .name = "ICH_LRCn_EL2", .state = ARM_CP_STATE_AA32, +- .cp = 15, .opc1 = 4, .crn = 12, +- .crm = 14 + (j >> 3), .opc2 = j & 7, +- .type = ARM_CP_IO | ARM_CP_NO_RAW, +- .access = PL2_RW, +- .readfn = ich_lr_read, +- .writefn = ich_lr_write, +- }, +- }; +- define_arm_cp_regs(cpu, lr_regset); +- } +- if (cs->vprebits >= 6) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr1_reginfo); +- } +- if (cs->vprebits == 7) { +- define_arm_cp_regs(cpu, gicv3_cpuif_ich_apxr23_reginfo); ++ arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, ++ cs); ++ } else { ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); ++ assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); + } + } +- if (tcg_enabled() || qtest_enabled()) { +- /* +- * We can only trap EL changes with TCG. However the GIC interrupt +- * state only changes on EL changes involving EL2 or EL3, so for +- * the non-TCG case this is OK, as EL2 and EL3 can't exist. +- */ +- arm_register_el_change_hook(cpu, gicv3_cpuif_el_change_hook, cs); +- } else { +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL2)); +- assert(!arm_feature(&cpu->env, ARM_FEATURE_EL3)); +- } + } + } +diff --git a/hw/intc/arm_gicv3_cpuif_common.c b/hw/intc/arm_gicv3_cpuif_common.c +index ff1239f..381cf27 100644 +--- a/hw/intc/arm_gicv3_cpuif_common.c ++++ b/hw/intc/arm_gicv3_cpuif_common.c +@@ -20,3 +20,8 @@ void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s) + + env->gicv3state = (void *)s; + }; ++ ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu) ++{ ++ s->cpu = cpu; ++} +diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c +index 77eb37e..dd2a60f 100644 +--- a/hw/intc/arm_gicv3_kvm.c ++++ b/hw/intc/arm_gicv3_kvm.c +@@ -24,6 +24,7 @@ + #include "hw/intc/arm_gicv3_common.h" + #include "qemu/error-report.h" + #include "qemu/module.h" ++#include "sysemu/cpus.h" + #include "sysemu/kvm.h" + #include "sysemu/runstate.h" + #include "kvm_arm.h" +@@ -458,6 +459,18 @@ static void kvm_arm_gicv3_put(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* ++ * To support hotplug of vcpus we need to make sure all gic cpuif/GICC ++ * are initialized at machvirt init time. Once the init is done we ++ * release the ARMCPU object for disabled vcpus but this leg could hit ++ * during reset of GICC later as well i.e. after init has happened and ++ * all of the cases we want to make sure we dont acess the GICC for ++ * the disabled VCPUs. ++ */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, true); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], true); +@@ -616,6 +629,11 @@ static void kvm_arm_gicv3_get(GICv3State *s) + GICv3CPUState *c = &s->cpu[ncpu]; + int num_pri_bits; + ++ /* don't access GICC for the disabled vCPUs. */ ++ if (!qemu_enabled_cpu(c->cpu)) { ++ continue; ++ } ++ + kvm_gicc_access(s, ICC_SRE_EL1, ncpu, &c->icc_sre_el1, false); + kvm_gicc_access(s, ICC_CTLR_EL1, ncpu, + &c->icc_ctlr_el1[GICV3_NS], false); +@@ -695,10 +713,19 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri) + return; + } + ++ /* ++ * This shall be called even when vcpu is being hotplugged or onlined and ++ * other vcpus might be running. Host kernel KVM code to handle device ++ * access of IOCTLs KVM_{GET|SET}_DEVICE_ATTR might fail due to inability to ++ * grab vcpu locks for all the vcpus. Hence, we need to pause all vcpus to ++ * facilitate locking within host. ++ */ ++ pause_all_vcpus(); + /* Initialize to actual HW supported configuration */ + kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + KVM_VGIC_ATTR(ICC_CTLR_EL1, c->gicr_typer), + &c->icc_ctlr_el1[GICV3_NS], false, &error_abort); ++ resume_all_vcpus(); + + c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS]; + } +@@ -777,6 +804,10 @@ static void vm_change_state_handler(void *opaque, bool running, + } + } + ++static void kvm_gicv3_init_cpu_reginfo(CPUState *cs) ++{ ++ define_arm_cp_regs(ARM_CPU(cs), gicv3_cpuif_reginfo); ++} + + static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + { +@@ -808,9 +839,10 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp) + gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL); + + for (i = 0; i < s->num_cpu; i++) { +- ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i)); +- +- define_arm_cp_regs(cpu, gicv3_cpuif_reginfo); ++ CPUState *cs = qemu_get_cpu(i); ++ if (qemu_enabled_cpu(cs)) { ++ kvm_gicv3_init_cpu_reginfo(cs); ++ } + } + + /* Try to create the device via the device control API */ +@@ -897,6 +929,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data) + + agcc->pre_save = kvm_arm_gicv3_get; + agcc->post_load = kvm_arm_gicv3_put; ++ agcc->init_cpu_reginfo = kvm_gicv3_init_cpu_reginfo; + device_class_set_parent_realize(dc, kvm_arm_gicv3_realize, + &kgc->parent_realize); + resettable_class_set_parent_phases(rc, NULL, kvm_arm_gicv3_reset_hold, NULL, +diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h +index 29d5cdc..0bed0f6 100644 +--- a/hw/intc/gicv3_internal.h ++++ b/hw/intc/gicv3_internal.h +@@ -709,6 +709,7 @@ void gicv3_redist_vinvall(GICv3CPUState *cs, uint64_t vptaddr); + + void gicv3_redist_send_sgi(GICv3CPUState *cs, int grp, int irq, bool ns); + void gicv3_init_cpuif(GICv3State *s); ++void gicv3_init_cpu_reginfo(CPUState *cs); + + /** + * gicv3_cpuif_update: +@@ -848,5 +849,6 @@ static inline void gicv3_cache_all_target_cpustates(GICv3State *s) + } + + void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s); ++void gicv3_set_cpustate(GICv3CPUState *s, CPUState *cpu); + + #endif /* QEMU_ARM_GICV3_INTERNAL_H */ +diff --git a/include/exec/memory.h b/include/exec/memory.h +index 5357c22..70bfa9f 100644 +--- a/include/exec/memory.h ++++ b/include/exec/memory.h +@@ -1132,6 +1132,7 @@ struct AddressSpace { + struct rcu_head rcu; + char *name; + MemoryRegion *root; ++ bool free_in_rcu; + + /* Accessed via RCU. */ + struct FlatView *current_map; +diff --git a/include/hw/acpi/cpu.h b/include/hw/acpi/cpu.h +index d2ca2ee..dc6ff9d 100644 +--- a/include/hw/acpi/cpu.h ++++ b/include/hw/acpi/cpu.h +@@ -25,6 +25,8 @@ typedef struct AcpiCpuStatus { + uint64_t arch_id; + bool is_inserting; + bool is_removing; ++ bool is_present; ++ bool is_enabled; + bool fw_remove; + uint32_t ost_event; + uint32_t ost_status; +diff --git a/include/hw/acpi/cpu_hotplug.h b/include/hw/acpi/cpu_hotplug.h +index 3b932ab..ef63175 100644 +--- a/include/hw/acpi/cpu_hotplug.h ++++ b/include/hw/acpi/cpu_hotplug.h +@@ -19,6 +19,10 @@ + #include "hw/hotplug.h" + #include "hw/acpi/cpu.h" + ++#define ACPI_CPU_HOTPLUG_REG_LEN 12 ++#define ACPI_CPU_SCAN_METHOD "CSCN" ++#define ACPI_CPU_CONTAINER "\\_SB.CPUS" ++ + typedef struct AcpiCpuHotplug { + Object *device; + MemoryRegion io; +diff --git a/include/hw/acpi/generic_event_device.h b/include/hw/acpi/generic_event_device.h +index d2dac87..22b2529 100644 +--- a/include/hw/acpi/generic_event_device.h ++++ b/include/hw/acpi/generic_event_device.h +@@ -60,6 +60,7 @@ + #define HW_ACPI_GENERIC_EVENT_DEVICE_H + + #include "hw/sysbus.h" ++#include "hw/acpi/cpu_hotplug.h" + #include "hw/acpi/memory_hotplug.h" + #include "hw/acpi/ghes.h" + #include "hw/acpi/cpu.h" +diff --git a/include/hw/arm/boot.h b/include/hw/arm/boot.h +index 80c492d..f81326a 100644 +--- a/include/hw/arm/boot.h ++++ b/include/hw/arm/boot.h +@@ -178,6 +178,8 @@ AddressSpace *arm_boot_address_space(ARMCPU *cpu, + int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as, MachineState *ms); + ++void do_cpu_reset(void *opaque); ++ + /* Write a secure board setup routine with a dummy handler for SMCs */ + void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, + const struct arm_boot_info *info, +diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h +index f692398..786a33c 100644 +--- a/include/hw/arm/virt.h ++++ b/include/hw/arm/virt.h +@@ -75,6 +75,7 @@ enum { + VIRT_PCDIMM_ACPI, + VIRT_ACPI_GED, + VIRT_NVDIMM_ACPI, ++ VIRT_CPUHP_ACPI, + VIRT_PVTIME, + VIRT_LOWMEMMAP_LAST, + }; +@@ -138,6 +139,10 @@ struct VirtMachineState { + DeviceState *platform_bus_dev; + FWCfgState *fw_cfg; + PFlashCFI01 *flash[2]; ++ MemoryRegion *sysmem; ++ MemoryRegion *secure_sysmem; ++ MemoryRegion *tag_sysmem; ++ MemoryRegion *secure_tag_sysmem; + bool secure; + bool highmem; + bool highmem_compact; +@@ -147,9 +152,11 @@ struct VirtMachineState { + bool its; + bool tcg_its; + bool virt; ++ bool cpu_hotplug_enabled; + bool ras; + bool mte; + bool dtb_randomness; ++ bool pmu; + OnOffAuto acpi; + VirtGICType gic_version; + VirtIOMMUType iommu; +@@ -160,6 +167,7 @@ struct VirtMachineState { + MemMapEntry *memmap; + char *pciehb_nodename; + const int *irqmap; ++ uint16_t boot_cpus; + int fdt_size; + uint32_t clock_phandle; + uint32_t gic_phandle; +@@ -173,6 +181,7 @@ struct VirtMachineState { + PCIBus *bus; + char *oem_id; + char *oem_table_id; ++ NotifierList cpuhp_notifiers; + }; + + #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM) +@@ -200,10 +209,16 @@ static uint32_t virt_redist_capacity(VirtMachineState *vms, int region) + static inline int virt_gicv3_redist_region_count(VirtMachineState *vms) + { + uint32_t redist0_capacity = virt_redist_capacity(vms, VIRT_GIC_REDIST); ++ MachineState *ms = MACHINE(vms); ++ unsigned int max_cpus = ms->smp.max_cpus; ++ ++ if (!vms->cpu_hotplug_enabled) { ++ max_cpus = ms->smp.cpus; ++ } + + assert(vms->gic_version != VIRT_GIC_VERSION_2); + +- return (MACHINE(vms)->smp.cpus > redist0_capacity && ++ return (max_cpus > redist0_capacity && + vms->highmem_redists) ? 2 : 1; + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index be87df1..37f3a46 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -529,6 +529,7 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + int kvm_vcpu_stats_fd; ++ VMChangeStateEntry *vmcse; + + /* Use by accel-block: CPU is executing an ioctl() */ + QemuLockCnt in_ioctl_lock; +@@ -539,6 +540,24 @@ struct CPUState { + GArray *plugin_mem_cbs; + #endif + ++ /* ++ * Some architectures do not allow *presence* of vCPUs to be changed ++ * after guest has booted using information specified by VMM/firmware ++ * via ACPI MADT at the boot time. Thus to enable vCPU hotplug on these ++ * architectures possible vCPU can have CPUState object in 'disabled' ++ * state or can also not have CPUState object at all. This is possible ++ * when vCPU Hotplug is supported and vCPUs are 'yet-to-be-plugged' in ++ * the QOM or have been hot-unplugged. ++ * By default every CPUState is enabled as of now across all archs. ++ */ ++ bool disabled; ++ /* ++ * On certain architectures, to give persistent view of the 'presence' of ++ * vCPUs to the guest, ACPI might need to fake the 'presence' of the vCPUs ++ * but keep them ACPI disabled to the guest. This is done by returning ++ * _STA.PRES=True and _STA.Ena=False for the unplugged vCPUs in QEMU QoM. ++ */ ++ bool acpi_persistent; + /* TODO Move common fields from CPUArchState here. */ + int cpu_index; + int cluster_index; +@@ -546,6 +565,8 @@ struct CPUState { + uint32_t halted; + int32_t exception_index; + ++ bool cold_booted; ++ + AccelCPUState *accel; + /* shared by kvm and hvf */ + bool vcpu_dirty; +@@ -914,6 +935,61 @@ static inline bool cpu_in_exclusive_context(const CPUState *cpu) + */ + CPUState *qemu_get_cpu(int index); + ++/** ++ * qemu_get_possible_cpu: ++ * @index: The CPUState@cpu_index value of the CPU to obtain. ++ * Input index MUST be in range [0, Max Possible CPUs) ++ * ++ * If CPUState object exists,then it gets a CPU matching ++ * @index in the possible CPU array. ++ * ++ * Returns: The possible CPU or %NULL if CPU does not exist. ++ */ ++CPUState *qemu_get_possible_cpu(int index); ++ ++/** ++ * qemu_present_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is amongst the present possible vcpus. ++ * ++ * Returns: True if it is present possible vCPU else false ++ */ ++bool qemu_present_cpu(CPUState *cpu); ++ ++/** ++ * qemu_enabled_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU is enabled. ++ * ++ * Returns: True if it is 'enabled' else false ++ */ ++bool qemu_enabled_cpu(CPUState *cpu); ++ ++/** ++ * qemu_persistent_cpu: ++ * @cpu: The vCPU to check ++ * ++ * Checks if the vCPU state should always be reflected as *present* via ACPI ++ * to the Guest. By default, this is False on all architectures and has to be ++ * explicity set during initialization. ++ * ++ * Returns: True if it is ACPI 'persistent' CPU ++ * ++ */ ++bool qemu_persistent_cpu(CPUState *cpu); ++ ++/** ++ * qemu_get_cpu_archid: ++ * @cpu_index: possible vCPU for which arch-id needs to be retreived ++ * ++ * Fetches the vCPU arch-id from the present possible vCPUs. ++ * ++ * Returns: arch-id of the possible vCPU ++ */ ++uint64_t qemu_get_cpu_archid(int cpu_index); ++ + /** + * cpu_exists: + * @id: Guest-exposed CPU ID to lookup. +diff --git a/include/hw/intc/arm_gicv3_common.h b/include/hw/intc/arm_gicv3_common.h +index 4e2fb51..b5f8ba1 100644 +--- a/include/hw/intc/arm_gicv3_common.h ++++ b/include/hw/intc/arm_gicv3_common.h +@@ -280,6 +280,7 @@ struct GICv3State { + GICv3CPUState *gicd_irouter_target[GICV3_MAXIRQ]; + uint32_t gicd_nsacr[DIV_ROUND_UP(GICV3_MAXIRQ, 16)]; + ++ Notifier cpu_update_notifier; + GICv3CPUState *cpu; + /* List of all ITSes connected to this GIC */ + GPtrArray *itslist; +@@ -324,10 +325,32 @@ struct ARMGICv3CommonClass { + + void (*pre_save)(GICv3State *s); + void (*post_load)(GICv3State *s); ++ void (*init_cpu_reginfo)(CPUState *cs); + }; + + void gicv3_init_irqs_and_mmio(GICv3State *s, qemu_irq_handler handler, + const MemoryRegionOps *ops); ++/** ++ * Structure used by GICv3 CPU hotplug notifier ++ */ ++typedef struct GICv3CPUHotplugInfo { ++ DeviceState *gic; /* GICv3State */ ++ CPUState *cpu; ++} GICv3CPUHotplugInfo; ++ ++/** ++ * gicv3_cpuhp_notifier ++ * ++ * Returns CPU hotplug notifier which could be used to update GIC about any ++ * CPU hot(un)plug events. ++ * ++ * Returns: Notifier initialized with CPU Hot(un)plug update function ++ */ ++static inline Notifier *gicv3_cpuhp_notifier(DeviceState *dev) ++{ ++ GICv3State *s = ARM_GICV3_COMMON(dev); ++ return &s->cpu_update_notifier; ++} + + /** + * gicv3_class_name +diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h +index 151d968..2d3661d 100644 +--- a/include/hw/qdev-core.h ++++ b/include/hw/qdev-core.h +@@ -739,6 +739,8 @@ qemu_irq qdev_get_gpio_out_connector(DeviceState *dev, const char *name, int n); + */ + qemu_irq qdev_intercept_gpio_out(DeviceState *dev, qemu_irq icpt, + const char *name, int n); ++qemu_irq qdev_disconnect_gpio_out_named(DeviceState *dev, ++ const char *name, int n); + + BusState *qdev_get_child_bus(DeviceState *dev, const char *name); + +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 2275f77..ee286d1 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -532,6 +532,8 @@ bool kvm_kernel_irqchip_allowed(void); + bool kvm_kernel_irqchip_required(void); + bool kvm_kernel_irqchip_split(void); + ++bool kvm_smccc_filter_enabled(void); ++ + /** + * kvm_arch_irqchip_create: + * @KVMState: The KVMState pointer +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index fd84639..b2d2c59 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -112,6 +112,7 @@ struct KVMState + uint64_t kvm_dirty_ring_bytes; /* Size of the per-vcpu dirty ring */ + uint32_t kvm_dirty_ring_size; /* Number of dirty GFNs per ring */ + bool kvm_dirty_ring_with_bitmap; ++ bool kvm_smccc_filter_enabled; + uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */ + struct KVMDirtyRingReaper reaper; + NotifyVmexitOption notify_vmexit; +diff --git a/include/tcg/startup.h b/include/tcg/startup.h +index f713057..c6cb1d9 100644 +--- a/include/tcg/startup.h ++++ b/include/tcg/startup.h +@@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus); + */ + void tcg_register_thread(void); + ++/** ++ * tcg_register_thread: Unregister this thread with the TCG runtime ++ */ ++void tcg_unregister_thread(void); ++ + /** + * tcg_prologue_init(): Generate the code for the TCG prologue + * +diff --git a/system/cpus.c b/system/cpus.c +index cbeec13..d9de09b 100644 +--- a/system/cpus.c ++++ b/system/cpus.c +@@ -565,12 +565,14 @@ static bool all_vcpus_paused(void) + return true; + } + +-void pause_all_vcpus(void) ++static void request_pause_all_vcpus(void) + { + CPUState *cpu; + +- qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); + CPU_FOREACH(cpu) { ++ if (cpu->stopped) { ++ continue; ++ } + if (qemu_cpu_is_self(cpu)) { + qemu_cpu_stop(cpu, true); + } else { +@@ -578,6 +580,14 @@ void pause_all_vcpus(void) + qemu_cpu_kick(cpu); + } + } ++} ++ ++void pause_all_vcpus(void) ++{ ++ qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); ++ ++retry: ++ request_pause_all_vcpus(); + + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks +@@ -586,14 +596,23 @@ void pause_all_vcpus(void) + + while (!all_vcpus_paused()) { + qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); +- CPU_FOREACH(cpu) { +- qemu_cpu_kick(cpu); +- } ++ /* During we waited on qemu_pause_cond the bql was unlocked, ++ * the vcpu's state may has been changed by other thread, so ++ * we must request the pause state on all vcpus again. ++ */ ++ request_pause_all_vcpus(); + } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); ++ ++ /* During the bql was unlocked, the vcpu's state may has been ++ * changed by other thread, so we must retry. ++ */ ++ if (!all_vcpus_paused()) { ++ goto retry; ++ } + } + + void cpu_resume(CPUState *cpu) +@@ -613,6 +632,9 @@ void resume_all_vcpus(void) + + qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); + CPU_FOREACH(cpu) { ++ if (!object_property_get_bool(OBJECT(cpu), "realized", &error_abort)) { ++ continue; ++ } + cpu_resume(cpu); + } + } +diff --git a/system/memory.c b/system/memory.c +index 2b5369e..9db07fd 100644 +--- a/system/memory.c ++++ b/system/memory.c +@@ -3156,6 +3156,9 @@ static void do_address_space_destroy(AddressSpace *as) + g_free(as->name); + g_free(as->ioeventfds); + memory_region_unref(as->root); ++ if (as->free_in_rcu) { ++ g_free(as); ++ } + } + + void address_space_destroy(AddressSpace *as) +diff --git a/target/arm/arm-powerctl.c b/target/arm/arm-powerctl.c +index c078849..fb19b04 100644 +--- a/target/arm/arm-powerctl.c ++++ b/target/arm/arm-powerctl.c +@@ -16,6 +16,7 @@ + #include "qemu/log.h" + #include "qemu/main-loop.h" + #include "sysemu/tcg.h" ++#include "hw/boards.h" + + #ifndef DEBUG_ARM_POWERCTL + #define DEBUG_ARM_POWERCTL 0 +@@ -28,18 +29,37 @@ + } \ + } while (0) + ++static CPUArchId *arm_get_archid_by_id(uint64_t id) ++{ ++ int n; ++ CPUArchId *arch_id; ++ MachineState *ms = MACHINE(qdev_get_machine()); ++ ++ /* ++ * At this point disabled CPUs don't have a CPUState, but their CPUArchId ++ * exists. ++ * ++ * TODO: Is arch_id == mp_affinity? This needs work. ++ */ ++ for (n = 0; n < ms->possible_cpus->len; n++) { ++ arch_id = &ms->possible_cpus->cpus[n]; ++ ++ if (arch_id->arch_id == id) { ++ return arch_id; ++ } ++ } ++ return NULL; ++} ++ + CPUState *arm_get_cpu_by_id(uint64_t id) + { +- CPUState *cpu; ++ CPUArchId *arch_id; + + DPRINTF("cpu %" PRId64 "\n", id); + +- CPU_FOREACH(cpu) { +- ARMCPU *armcpu = ARM_CPU(cpu); +- +- if (armcpu->mp_affinity == id) { +- return cpu; +- } ++ arch_id = arm_get_archid_by_id(id); ++ if (arch_id && arch_id->cpu) { ++ return CPU(arch_id->cpu); + } + + qemu_log_mask(LOG_GUEST_ERROR, +@@ -97,6 +117,7 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + { + CPUState *target_cpu_state; + ARMCPU *target_cpu; ++ CPUArchId *arch_id; + struct CpuOnInfo *info; + + assert(qemu_mutex_iothread_locked()); +@@ -117,12 +138,24 @@ int arm_set_cpu_on(uint64_t cpuid, uint64_t entry, uint64_t context_id, + } + + /* Retrieve the cpu we are powering up */ +- target_cpu_state = arm_get_cpu_by_id(cpuid); +- if (!target_cpu_state) { ++ arch_id = arm_get_archid_by_id(cpuid); ++ if (!arch_id) { + /* The cpu was not found */ + return QEMU_ARM_POWERCTL_INVALID_PARAM; + } + ++ target_cpu_state = CPU(arch_id->cpu); ++ if (!qemu_enabled_cpu(target_cpu_state)) { ++ /* ++ * The cpu is not plugged in or disabled. We should return appropriate ++ * value as introduced in DEN0022E PSCI 1.2 issue E ++ */ ++ qemu_log_mask(LOG_GUEST_ERROR, ++ "[ARM]%s: Denying attempt to online removed/disabled " ++ "CPU%" PRId64"\n", __func__, cpuid); ++ return QEMU_ARM_POWERCTL_IS_OFF; ++ } ++ + target_cpu = ARM_CPU(target_cpu_state); + if (target_cpu->power_state == PSCI_ON) { + qemu_log_mask(LOG_GUEST_ERROR, +diff --git a/target/arm/cpu.c b/target/arm/cpu.c +index 5d9bca5..09d391b 100644 +--- a/target/arm/cpu.c ++++ b/target/arm/cpu.c +@@ -142,6 +142,16 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->pre_el_change_hooks, entry, node); + } + ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->pre_el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque) + { +@@ -153,6 +163,16 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); + } + ++void arm_unregister_el_change_hooks(ARMCPU *cpu) ++{ ++ ARMELChangeHook *entry, *next; ++ ++ QLIST_FOREACH_SAFE(entry, &cpu->el_change_hooks, node, next) { ++ QLIST_REMOVE(entry, node); ++ g_free(entry); ++ } ++} ++ + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) + { + /* Reset a single ARMCPRegInfo register */ +@@ -2394,6 +2414,94 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) + acc->parent_realize(dev, errp); + } + ++static void arm_cpu_unrealizefn(DeviceState *dev) ++{ ++ ARMCPUClass *acc = ARM_CPU_GET_CLASS(dev); ++ ARMCPU *cpu = ARM_CPU(dev); ++ CPUARMState *env = &cpu->env; ++ CPUState *cs = CPU(dev); ++ bool has_secure; ++ ++#ifndef CONFIG_USER_ONLY ++ has_secure = cpu->has_el3 || arm_feature(env, ARM_FEATURE_M_SECURITY); ++ ++ /* rock 'n' un-roll, whatever happened in the arm_cpu_realizefn cleanly */ ++ cpu_address_space_destroy(cs, ARMASIdx_NS); ++ ++ if (cpu->tag_memory != NULL) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagNS); ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_TagS); ++ } ++ } ++ ++ if (has_secure) { ++ cpu_address_space_destroy(cs, ARMASIdx_S); ++ } ++#endif ++ ++ destroy_cpreg_list(cpu); ++ arm_cpu_unregister_gdb_regs(cpu); ++ unregister_cp_regs_for_features(cpu); ++ ++#ifndef CONFIG_USER_ONLY ++ if (tcg_enabled() && cpu_isar_feature(aa64_rme, cpu)) { ++ arm_unregister_el_change_hooks(cpu); ++ } ++#endif ++ ++ if (cpu->sau_sregion && arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->sau.rbar); ++ g_free(env->sau.rlar); ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMSA) && ++ arm_feature(env, ARM_FEATURE_V7)) { ++ if (cpu->pmsav7_dregion) { ++ if (arm_feature(env, ARM_FEATURE_V8)) { ++ g_free(env->pmsav8.rbar[M_REG_NS]); ++ g_free(env->pmsav8.rlar[M_REG_NS]); ++ if (arm_feature(env, ARM_FEATURE_M_SECURITY)) { ++ g_free(env->pmsav8.rbar[M_REG_S]); ++ g_free(env->pmsav8.rlar[M_REG_S]); ++ } ++ } else { ++ g_free(env->pmsav7.drbar); ++ g_free(env->pmsav7.drsr); ++ g_free(env->pmsav7.dracr); ++ } ++ } ++ if (cpu->pmsav8r_hdregion) { ++ g_free(env->pmsav8.hprbar); ++ g_free(env->pmsav8.hprlar); ++ } ++ } ++ ++ if (arm_feature(env, ARM_FEATURE_PMU)) { ++ if (!kvm_enabled()) { ++ arm_unregister_pre_el_change_hooks(cpu); ++ arm_unregister_el_change_hooks(cpu); ++ } ++ ++#ifndef CONFIG_USER_ONLY ++ if (cpu->pmu_timer) { ++ timer_del(cpu->pmu_timer); ++ } ++#endif ++ } ++ ++ cpu_remove_sync(CPU(dev)); ++ acc->parent_unrealize(dev); ++ ++#ifndef CONFIG_USER_ONLY ++ timer_del(cpu->gt_timer[GTIMER_PHYS]); ++ timer_del(cpu->gt_timer[GTIMER_VIRT]); ++ timer_del(cpu->gt_timer[GTIMER_HYP]); ++ timer_del(cpu->gt_timer[GTIMER_SEC]); ++ timer_del(cpu->gt_timer[GTIMER_HYPVIRT]); ++#endif ++} ++ + static ObjectClass *arm_cpu_class_by_name(const char *cpu_model) + { + ObjectClass *oc; +@@ -2426,6 +2534,10 @@ static Property arm_cpu_properties[] = { + DEFINE_PROP_UINT64("mp-affinity", ARMCPU, + mp_affinity, ARM64_AFFINITY_INVALID), + DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), ++ DEFINE_PROP_INT32("socket-id", ARMCPU, socket_id, 0), ++ DEFINE_PROP_INT32("cluster-id", ARMCPU, cluster_id, 0), ++ DEFINE_PROP_INT32("core-id", ARMCPU, core_id, 0), ++ DEFINE_PROP_INT32("thread-id", ARMCPU, thread_id, 0), + DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + DEFINE_PROP_END_OF_LIST() + }; +@@ -2477,6 +2589,12 @@ static const struct TCGCPUOps arm_tcg_ops = { + }; + #endif /* CONFIG_TCG */ + ++static int64_t arm_cpu_get_arch_id(CPUState *cs) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ return cpu->mp_affinity; ++} ++ + static void arm_cpu_class_init(ObjectClass *oc, void *data) + { + ARMCPUClass *acc = ARM_CPU_CLASS(oc); +@@ -2486,6 +2604,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + + device_class_set_parent_realize(dc, arm_cpu_realizefn, + &acc->parent_realize); ++ device_class_set_parent_unrealize(dc, arm_cpu_unrealizefn, ++ &acc->parent_unrealize); + + device_class_set_props(dc, arm_cpu_properties); + +@@ -2495,6 +2615,7 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data) + cc->class_by_name = arm_cpu_class_by_name; + cc->has_work = arm_cpu_has_work; + cc->dump_state = arm_cpu_dump_state; ++ cc->get_arch_id = arm_cpu_get_arch_id; + cc->set_pc = arm_cpu_set_pc; + cc->get_pc = arm_cpu_get_pc; + cc->gdb_read_register = arm_cpu_gdb_read_register; +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index a0282e0..a5ba7f2 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -971,6 +971,9 @@ struct ArchCPU { + + /* KVM steal time */ + OnOffAuto kvm_steal_time; ++ ++ /* KVM SVE has been finalized for this CPU */ ++ bool kvm_sve_finalized; + #endif /* CONFIG_KVM */ + + /* Uniprocessor system with MP extensions */ +@@ -1096,6 +1099,10 @@ struct ArchCPU { + QLIST_HEAD(, ARMELChangeHook) el_change_hooks; + + int32_t node_id; /* NUMA node this CPU belongs to */ ++ int32_t socket_id; ++ int32_t cluster_id; ++ int32_t core_id; ++ int32_t thread_id; + + /* Used to synchronize KVM and QEMU in-kernel device levels */ + uint8_t device_irq_level; +@@ -1134,6 +1141,7 @@ struct ARMCPUClass { + + const ARMCPUInfo *info; + DeviceRealize parent_realize; ++ DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; + }; + +@@ -3355,6 +3363,13 @@ static inline AddressSpace *arm_addressspace(CPUState *cs, MemTxAttrs attrs) + */ + void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + void *opaque); ++/** ++ * arm_unregister_pre_el_change_hook: ++ * unregister all pre EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_pre_el_change_hooks(ARMCPU *cpu); ++ + /** + * arm_register_el_change_hook: + * Register a hook function which will be called immediately after this +@@ -3367,6 +3382,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, + */ + void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void + *opaque); ++/** ++ * arm_unregister_el_change_hook: ++ * unregister all EL change hook functions. Generally called during ++ * unrealize'ing leg ++ */ ++void arm_unregister_el_change_hooks(ARMCPU *cpu); + + /** + * arm_rebuild_hflags: +diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c +index 1e9c6c8..4f26e88 100644 +--- a/target/arm/cpu64.c ++++ b/target/arm/cpu64.c +@@ -778,6 +778,18 @@ static void aarch64_cpu_set_aarch64(Object *obj, bool value, Error **errp) + } + } + ++static void aarch64_cpu_initfn(Object *obj) ++{ ++ CPUState *cs = CPU(obj); ++ ++ /* ++ * we start every ARM64 vcpu as disabled possible vCPU. It needs to be ++ * enabled explicitly ++ */ ++ cs->disabled = true; ++ cs->thread_id = 0; ++} ++ + static void aarch64_cpu_finalizefn(Object *obj) + { + } +@@ -790,7 +802,9 @@ static const gchar *aarch64_gdb_arch_name(CPUState *cs) + static void aarch64_cpu_class_init(ObjectClass *oc, void *data) + { + CPUClass *cc = CPU_CLASS(oc); ++ DeviceClass *dc = DEVICE_CLASS(oc); + ++ dc->user_creatable = true; + cc->gdb_read_register = aarch64_cpu_gdb_read_register; + cc->gdb_write_register = aarch64_cpu_gdb_write_register; + cc->gdb_num_core_regs = 34; +@@ -836,6 +850,7 @@ void aarch64_cpu_register(const ARMCPUInfo *info) + static const TypeInfo aarch64_cpu_type_info = { + .name = TYPE_AARCH64_CPU, + .parent = TYPE_ARM_CPU, ++ .instance_init = aarch64_cpu_initfn, + .instance_finalize = aarch64_cpu_finalizefn, + .abstract = true, + .class_init = aarch64_cpu_class_init, +diff --git a/target/arm/gdbstub.c b/target/arm/gdbstub.c +index 28f546a..5ba1e28 100644 +--- a/target/arm/gdbstub.c ++++ b/target/arm/gdbstub.c +@@ -553,3 +553,9 @@ void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu) + } + #endif /* CONFIG_TCG */ + } ++ ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ gdb_unregister_coprocessor_all(cs); ++} +diff --git a/target/arm/helper.c b/target/arm/helper.c +index df1646d..0370a73 100644 +--- a/target/arm/helper.c ++++ b/target/arm/helper.c +@@ -263,6 +263,19 @@ void init_cpreg_list(ARMCPU *cpu) + g_list_free(keys); + } + ++void destroy_cpreg_list(ARMCPU *cpu) ++{ ++ assert(cpu->cpreg_indexes); ++ assert(cpu->cpreg_values); ++ assert(cpu->cpreg_vmstate_indexes); ++ assert(cpu->cpreg_vmstate_values); ++ ++ g_free(cpu->cpreg_indexes); ++ g_free(cpu->cpreg_values); ++ g_free(cpu->cpreg_vmstate_indexes); ++ g_free(cpu->cpreg_vmstate_values); ++} ++ + /* + * Some registers are not accessible from AArch32 EL3 if SCR.NS == 0. + */ +@@ -9446,6 +9459,18 @@ void register_cp_regs_for_features(ARMCPU *cpu) + #endif + } + ++void unregister_cp_regs_for_features(ARMCPU *cpu) ++{ ++ CPUARMState *env = &cpu->env; ++ if (arm_feature(env, ARM_FEATURE_M)) { ++ /* M profile has no coprocessor registers */ ++ return; ++ } ++ ++ /* empty it all. unregister all the coprocessor registers */ ++ g_hash_table_remove_all(cpu->cp_regs); ++} ++ + /* Sort alphabetically by type name, except for "any". */ + static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b) + { +@@ -11347,7 +11372,7 @@ void arm_cpu_do_interrupt(CPUState *cs) + env->exception.syndrome); + } + +- if (tcg_enabled() && arm_is_psci_call(cpu, cs->exception_index)) { ++ if (arm_is_psci_call(cpu, cs->exception_index)) { + arm_handle_psci_call(cpu); + qemu_log_mask(CPU_LOG_INT, "...handled as PSCI call\n"); + return; +diff --git a/target/arm/internals.h b/target/arm/internals.h +index 8342f46..2b4378c 100644 +--- a/target/arm/internals.h ++++ b/target/arm/internals.h +@@ -187,9 +187,12 @@ void arm_cpu_register(const ARMCPUInfo *info); + void aarch64_cpu_register(const ARMCPUInfo *info); + + void register_cp_regs_for_features(ARMCPU *cpu); ++void unregister_cp_regs_for_features(ARMCPU *cpu); + void init_cpreg_list(ARMCPU *cpu); ++void destroy_cpreg_list(ARMCPU *cpu); + + void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu); ++void arm_cpu_unregister_gdb_regs(ARMCPU *cpu); + void arm_translate_init(void); + + void arm_restore_state_to_opc(CPUState *cs, +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index 7903e2d..e6cd9b6 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -257,7 +257,9 @@ int kvm_arch_get_default_type(MachineState *ms) + + int kvm_arch_init(MachineState *ms, KVMState *s) + { ++ MachineClass *mc = MACHINE_GET_CLASS(ms); + int ret = 0; ++ + /* For ARM interrupt delivery is always asynchronous, + * whether we are using an in-kernel VGIC or not. + */ +@@ -308,6 +310,23 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + } + } + ++ /* ++ * To be able to handle PSCI CPU ON calls in QEMU, we need to install SMCCC ++ * filter in the Host KVM. This is required to support features like ++ * virtual CPU Hotplug on ARM platforms. ++ */ ++ if (mc->has_hotpluggable_cpus && ms->smp.max_cpus > ms->smp.cpus) { ++ if (kvm_arm_set_smccc_filter(PSCI_0_2_FN64_CPU_ON, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU On PSCI-to-user-space fwd filter install failed"); ++ } else if (kvm_arm_set_smccc_filter(PSCI_0_2_FN_CPU_OFF, ++ KVM_SMCCC_FILTER_FWD_TO_USER)) { ++ error_report("CPU Off PSCI-to-user-space fwd filter install failed"); ++ } else { ++ s->kvm_smccc_filter_enabled = true; ++ } ++ } ++ + kvm_arm_init_debug(s); + + return ret; +@@ -634,11 +653,12 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu) + void kvm_arm_reset_vcpu(ARMCPU *cpu) + { + int ret; ++ CPUState *cs = CPU(cpu); + + /* Re-init VCPU so that all registers are set to + * their respective reset values. + */ +- ret = kvm_arm_vcpu_init(CPU(cpu)); ++ ret = kvm_arm_vcpu_init(cs); + if (ret < 0) { + fprintf(stderr, "kvm_arm_vcpu_init failed: %s\n", strerror(-ret)); + abort(); +@@ -655,6 +675,45 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) + * for the same reason we do so in kvm_arch_get_registers(). + */ + write_list_to_cpustate(cpu); ++ ++ /* ++ * Ensure we call kvm_arch_put_registers(). The vCPU isn't marked dirty if ++ * it was parked in KVM and is now booting from a PSCI CPU_ON call. ++ */ ++ cs->vcpu_dirty = true; ++} ++ ++void kvm_arm_create_host_vcpu(ARMCPU *cpu) ++{ ++ CPUState *cs = CPU(cpu); ++ unsigned long vcpu_id = cs->cpu_index; ++ int ret; ++ ++ ret = kvm_create_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to create host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * Initialize the vCPU in the host. This will reset the sys regs ++ * for this vCPU and related registers like MPIDR_EL1 etc. also ++ * gets programmed during this call to host. These are referred ++ * later while setting device attributes of the GICR during GICv3 ++ * reset ++ */ ++ arm_cpu_finalize_features(cpu, &error_abort); ++ ret = kvm_arch_init_vcpu(cs); ++ if (ret < 0) { ++ error_report("Failed to initialize host vcpu %ld", vcpu_id); ++ abort(); ++ } ++ ++ /* ++ * park the created vCPU. shall be used during kvm_get_vcpu() when ++ * threads are created during realization of ARM vCPUs. ++ */ ++ kvm_park_vcpu(cs); + } + + /* +@@ -925,6 +984,38 @@ static int kvm_arm_handle_dabt_nisv(CPUState *cs, uint64_t esr_iss, + return -1; + } + ++static int kvm_arm_handle_hypercall(CPUState *cs, struct kvm_run *run) ++{ ++ ARMCPU *cpu = ARM_CPU(cs); ++ CPUARMState *env = &cpu->env; ++ ++ kvm_cpu_synchronize_state(cs); ++ ++ /* ++ * hard coding immediate to 0 as we dont expect non-zero value as of now ++ * This might change in future versions. Hence, KVM_GET_ONE_REG could be ++ * used in such cases but it must be enhanced then only synchronize will ++ * also fetch ESR_EL2 value. ++ */ ++ if (run->hypercall.flags == KVM_HYPERCALL_EXIT_SMC) { ++ cs->exception_index = EXCP_SMC; ++ env->exception.syndrome = syn_aa64_smc(0); ++ } else { ++ cs->exception_index = EXCP_HVC; ++ env->exception.syndrome = syn_aa64_hvc(0); ++ } ++ env->exception.target_el = 1; ++ qemu_mutex_lock_iothread(); ++ arm_cpu_do_interrupt(cs); ++ qemu_mutex_unlock_iothread(); ++ ++ /* ++ * For PSCI, exit the kvm_run loop and process the work. Especially ++ * important if this was a CPU_OFF command and we can't return to the guest. ++ */ ++ return EXCP_INTERRUPT; ++} ++ + int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + { + int ret = 0; +@@ -940,6 +1031,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) + ret = kvm_arm_handle_dabt_nisv(cs, run->arm_nisv.esr_iss, + run->arm_nisv.fault_ipa); + break; ++ case KVM_EXIT_HYPERCALL: ++ ret = kvm_arm_handle_hypercall(cs, run); ++ break; + default: + qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n", + __func__, run->exit_reason); +diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c +index 3c175c9..8f01d48 100644 +--- a/target/arm/kvm64.c ++++ b/target/arm/kvm64.c +@@ -111,6 +111,25 @@ bool kvm_arm_hw_debug_active(CPUState *cs) + return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); + } + ++static bool kvm_arm_set_vm_attr(struct kvm_device_attr *attr, const char *name) ++{ ++ int err; ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_HAS_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ err = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, attr); ++ if (err != 0) { ++ error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); ++ return false; ++ } ++ ++ return true; ++} ++ + static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, + const char *name) + { +@@ -181,6 +200,28 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) + } + } + ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ struct kvm_smccc_filter filter = { ++ .base = func, ++ .nr_functions = 1, ++ .action = faction, ++ }; ++ struct kvm_device_attr attr = { ++ .group = KVM_ARM_VM_SMCCC_CTRL, ++ .attr = KVM_ARM_VM_SMCCC_FILTER, ++ .flags = 0, ++ .addr = (uintptr_t)&filter, ++ }; ++ ++ if (!kvm_arm_set_vm_attr(&attr, "SMCCC Filter")) { ++ error_report("failed to set SMCCC filter in KVM Host"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) + { + uint64_t ret; +@@ -562,7 +603,14 @@ int kvm_arch_init_vcpu(CPUState *cs) + return -EINVAL; + } + +- qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); ++ /* ++ * Install VM change handler only when vCPU thread has been spawned ++ * i.e. vCPU is being realized ++ */ ++ if (cs->thread_id) { ++ cs->vmcse = qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, ++ cs); ++ } + + /* Determine init features for this CPU */ + memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); +@@ -599,7 +647,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + return ret; + } + +- if (cpu_isar_feature(aa64_sve, cpu)) { ++ if (cpu_isar_feature(aa64_sve, cpu) && !cpu->kvm_sve_finalized) { + ret = kvm_arm_sve_set_vls(cs); + if (ret) { + return ret; +@@ -622,9 +670,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + } + + /* +- * When KVM is in use, PSCI is emulated in-kernel and not by qemu. +- * Currently KVM has its own idea about MPIDR assignment, so we +- * override our defaults with what we get from KVM. ++ * KVM may emulate PSCI in-kernel. Currently KVM has its own idea about ++ * MPIDR assignment, so we override our defaults with what we get from KVM. + */ + ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); + if (ret) { +@@ -640,6 +687,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + + int kvm_arch_destroy_vcpu(CPUState *cs) + { ++ if (cs->thread_id) { ++ qemu_del_vm_change_state_handler(cs->vmcse); ++ } ++ + return 0; + } + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 051a0da..bf4df54 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -163,6 +163,17 @@ void kvm_arm_cpu_post_load(ARMCPU *cpu); + */ + void kvm_arm_reset_vcpu(ARMCPU *cpu); + ++/** ++ * kvm_arm_create_host_vcpu: ++ * @cpu: ARMCPU ++ * ++ * Called at to pre create all possible kvm vCPUs within the the host at the ++ * virt machine init time. This will also init this pre-created vCPU and ++ * hence result in vCPU reset at host. These pre created and inited vCPUs ++ * shall be parked for use when ARM vCPUs are actually realized. ++ */ ++void kvm_arm_create_host_vcpu(ARMCPU *cpu); ++ + /** + * kvm_arm_init_serror_injection: + * @cs: CPUState +@@ -377,6 +388,15 @@ void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa); + + int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level); + ++/** ++ * kvm_arm_set_smccc_filter ++ * @func: funcion ++ * @faction: SMCCC filter action(handle, deny, fwd-to-user) to be deployed ++ * ++ * Sets the ARMs SMC-CC filter in KVM Host for selective hypercall exits ++ */ ++int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction); ++ + #else + + /* +@@ -451,6 +471,10 @@ static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs) + g_assert_not_reached(); + } + ++static inline int kvm_arm_set_smccc_filter(uint64_t func, uint8_t faction) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +diff --git a/target/arm/meson.build b/target/arm/meson.build +index 5d04a8e..d1dd493 100644 +--- a/target/arm/meson.build ++++ b/target/arm/meson.build +@@ -23,6 +23,7 @@ arm_system_ss.add(files( + 'arm-qmp-cmds.c', + 'cortex-regs.c', + 'machine.c', ++ 'psci.c', + 'ptw.c', + )) + +diff --git a/target/arm/tcg/psci.c b/target/arm/psci.c +similarity index 97% +rename from target/arm/tcg/psci.c +rename to target/arm/psci.c +index 6c1239b..a8690a1 100644 +--- a/target/arm/tcg/psci.c ++++ b/target/arm/psci.c +@@ -21,7 +21,9 @@ + #include "exec/helper-proto.h" + #include "kvm-consts.h" + #include "qemu/main-loop.h" ++#include "qemu/error-report.h" + #include "sysemu/runstate.h" ++#include "sysemu/tcg.h" + #include "internals.h" + #include "arm-powerctl.h" + +@@ -157,6 +159,11 @@ void arm_handle_psci_call(ARMCPU *cpu) + case QEMU_PSCI_0_1_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN_CPU_SUSPEND: + case QEMU_PSCI_0_2_FN64_CPU_SUSPEND: ++ if (!tcg_enabled()) { ++ warn_report("CPU suspend not supported in non-tcg mode"); ++ break; ++ } ++#ifdef CONFIG_TCG + /* Affinity levels are not supported in QEMU */ + if (param[1] & 0xfffe0000) { + ret = QEMU_PSCI_RET_INVALID_PARAMS; +@@ -169,6 +176,7 @@ void arm_handle_psci_call(ARMCPU *cpu) + env->regs[0] = 0; + } + helper_wfi(env, 4); ++#endif + break; + case QEMU_PSCI_1_0_FN_PSCI_FEATURES: + switch (param[1]) { +diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build +index 6fca38f..ad3cfcb 100644 +--- a/target/arm/tcg/meson.build ++++ b/target/arm/tcg/meson.build +@@ -51,7 +51,3 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files( + 'sme_helper.c', + 'sve_helper.c', + )) +- +-arm_system_ss.add(files( +- 'psci.c', +-)) +diff --git a/tcg/tcg.c b/tcg/tcg.c +index e2c38f6..06b9f7f 100644 +--- a/tcg/tcg.c ++++ b/tcg/tcg.c +@@ -769,6 +769,14 @@ static void alloc_tcg_plugin_context(TCGContext *s) + #endif + } + ++static void free_tcg_plugin_context(TCGContext *s) ++{ ++#ifdef CONFIG_PLUGIN ++ g_ptr_array_unref(s->plugin_tb->insns); ++ g_free(s->plugin_tb); ++#endif ++} ++ + /* + * All TCG threads except the parent (i.e. the one that called tcg_context_init + * and registered the target's TCG globals) must register with this function +@@ -819,6 +827,21 @@ void tcg_register_thread(void) + + tcg_ctx = s; + } ++ ++void tcg_unregister_thread(void) ++{ ++ TCGContext *s = tcg_ctx; ++ unsigned int n; ++ ++ /* Unclaim an entry in tcg_ctxs */ ++ n = qatomic_fetch_dec(&tcg_cur_ctxs); ++ g_assert(n > 1); ++ qatomic_store_release(&tcg_ctxs[n - 1], 0); ++ ++ free_tcg_plugin_context(s); ++ ++ g_free(s); ++} + #endif /* !CONFIG_USER_ONLY */ + + /* pool based memory allocation */ +diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h +index dfb8523..5e57e62 100644 +--- a/tests/qtest/bios-tables-test-allowed-diff.h ++++ b/tests/qtest/bios-tables-test-allowed-diff.h +@@ -1 +1,41 @@ + /* List of comma-separated changed AML files to ignore */ ++"tests/data/acpi/pc/DSDT", ++"tests/data/acpi/pc/DSDT.acpierst", ++"tests/data/acpi/pc/DSDT.acpihmat", ++"tests/data/acpi/pc/DSDT.bridge", ++"tests/data/acpi/pc/DSDT.cphp", ++"tests/data/acpi/pc/DSDT.dimmpxm", ++"tests/data/acpi/pc/DSDT.hpbridge", ++"tests/data/acpi/pc/DSDT.hpbrroot", ++"tests/data/acpi/pc/DSDT.ipmikcs", ++"tests/data/acpi/pc/DSDT.memhp", ++"tests/data/acpi/pc/DSDT.nohpet", ++"tests/data/acpi/pc/DSDT.numamem", ++"tests/data/acpi/pc/DSDT.roothp", ++"tests/data/acpi/q35/DSDT", ++"tests/data/acpi/q35/DSDT.acpierst", ++"tests/data/acpi/q35/DSDT.acpihmat", ++"tests/data/acpi/q35/DSDT.acpihmat-noinitiator", ++"tests/data/acpi/q35/DSDT.applesmc", ++"tests/data/acpi/q35/DSDT.bridge", ++"tests/data/acpi/q35/DSDT.cphp", ++"tests/data/acpi/q35/DSDT.cxl", ++"tests/data/acpi/q35/DSDT.dimmpxm", ++"tests/data/acpi/q35/DSDT.ipmibt", ++"tests/data/acpi/q35/DSDT.ipmismbus", ++"tests/data/acpi/q35/DSDT.ivrs", ++"tests/data/acpi/q35/DSDT.memhp", ++"tests/data/acpi/q35/DSDT.mmio64", ++"tests/data/acpi/q35/DSDT.multi-bridge", ++"tests/data/acpi/q35/DSDT.noacpihp", ++"tests/data/acpi/q35/DSDT.nohpet", ++"tests/data/acpi/q35/DSDT.numamem", ++"tests/data/acpi/q35/DSDT.pvpanic-isa", ++"tests/data/acpi/q35/DSDT.tis.tpm12", ++"tests/data/acpi/q35/DSDT.tis.tpm2", ++"tests/data/acpi/q35/DSDT.viot", ++"tests/data/acpi/virt/DSDT", ++"tests/data/acpi/virt/DSDT.acpihmatvirt", ++"tests/data/acpi/virt/DSDT.memhp", ++"tests/data/acpi/virt/DSDT.pxb", ++"tests/data/acpi/virt/DSDT.topology", +-- +2.43.7 + diff --git a/qemu.spec b/qemu.spec index 8a86d1d908c9ea2a91530fea90dc34f7084d8e0e..219aa145b54cb710085208ddd0ac132bb6861809 100644 --- a/qemu.spec +++ b/qemu.spec @@ -136,7 +136,7 @@ Summary: QEMU is a FAST! processor emulator Name: qemu Version: 8.2.2 -Release: 35%{?dist} +Release: 37%{?dist} License: GPLv2 and BSD and MIT and CC-BY URL: http://www.qemu.org/ Source0: https://download.qemu.org/%{name}-%{version}.tar.xz @@ -442,6 +442,9 @@ Patch1132: 1132-i386-cpu-Mark-avx10_version-filtered-when-prefix-is-.patch #CWF ISA AVX_VNNI_INT16 support Patch1133: 1133-target-i386-add-avx-vnni-int16-feature.patch +# arm vcpu hotplug support +Patch1135: 1135-arm-virt-target-arm-support-arm-vcpu-hotplug.patch + BuildRequires: meson >= %{meson_version} BuildRequires: zlib-devel BuildRequires: glib2-devel @@ -2147,6 +2150,10 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %changelog +* Mon Sep 8 2025 blingxue - 8.2.2-37 +- [Type] other +- [DESC] Add arm vCPU Hot-plug/unplug support. - 8.2.2-37 + * Thu Aug 21 2025 Quanxian Wang - 8.2.2-35 - [Type] other - [DESC] plus one AVX10 bug fix and CWF ISA AVX_VNNI_INT16 support