diff --git a/hw-net-fix-vmxnet3-live-migration.patch b/hw-net-fix-vmxnet3-live-migration.patch new file mode 100644 index 0000000000000000000000000000000000000000..be97b3ac0dc76d839f646078151cddc0861ab094 --- /dev/null +++ b/hw-net-fix-vmxnet3-live-migration.patch @@ -0,0 +1,136 @@ +From b8b9f58ee5d3cff0a1e7cca770fe632043efb728 Mon Sep 17 00:00:00 2001 +From: Marcel Apfelbaum +Date: Fri, 5 Jul 2019 04:07:11 +0300 +Subject: [PATCH] hw/net: fix vmxnet3 live migration + +At some point vmxnet3 live migration stopped working and git-bisect +didn't help finding a working version. +The issue is the PCI configuration space is not being migrated +successfully and MSIX remains masked at destination. + +Remove the migration differentiation between PCI and PCIe since +the logic resides now inside VMSTATE_PCI_DEVICE. +Remove also the VMXNET3_COMPAT_FLAG_DISABLE_PCIE based differentiation +since at 'realize' time is decided if the device is PCI or PCIe, +then the above macro is enough. + +Use the opportunity to move to the standard VMSTATE_MSIX +instead of the deprecated SaveVMHandlers. + +Signed-off-by: Marcel Apfelbaum +Message-Id: <20190705010711.23277-1-marcel.apfelbaum@gmail.com> +Tested-by: Sukrit Bhatnagar +Reviewed-by: Dmitry Fleytman +Signed-off-by: Dr. David Alan Gilbert +--- + hw/net/vmxnet3.c | 52 ++---------------------------------------------- + 1 file changed, 2 insertions(+), 50 deletions(-) + +diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c +index ecc4f5bcf0..bf8e6ca4c9 100644 +--- a/hw/net/vmxnet3.c ++++ b/hw/net/vmxnet3.c +@@ -2153,21 +2153,6 @@ vmxnet3_cleanup_msi(VMXNET3State *s) + msi_uninit(d); + } + +-static void +-vmxnet3_msix_save(QEMUFile *f, void *opaque) +-{ +- PCIDevice *d = PCI_DEVICE(opaque); +- msix_save(d, f); +-} +- +-static int +-vmxnet3_msix_load(QEMUFile *f, void *opaque, int version_id) +-{ +- PCIDevice *d = PCI_DEVICE(opaque); +- msix_load(d, f); +- return 0; +-} +- + static const MemoryRegionOps b0_ops = { + .read = vmxnet3_io_bar0_read, + .write = vmxnet3_io_bar0_write, +@@ -2188,11 +2173,6 @@ static const MemoryRegionOps b1_ops = { + }, + }; + +-static SaveVMHandlers savevm_vmxnet3_msix = { +- .save_state = vmxnet3_msix_save, +- .load_state = vmxnet3_msix_load, +-}; +- + static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) + { + uint64_t dsn_payload; +@@ -2215,7 +2195,6 @@ static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) + + static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) + { +- DeviceState *dev = DEVICE(pci_dev); + VMXNET3State *s = VMXNET3(pci_dev); + int ret; + +@@ -2261,8 +2240,6 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) + pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, + vmxnet3_device_serial_num(s)); + } +- +- register_savevm_live(dev, "vmxnet3-msix", -1, 1, &savevm_vmxnet3_msix, s); + } + + static void vmxnet3_instance_init(Object *obj) +@@ -2452,29 +2429,6 @@ static const VMStateDescription vmstate_vmxnet3_int_state = { + } + }; + +-static bool vmxnet3_vmstate_need_pcie_device(void *opaque) +-{ +- VMXNET3State *s = VMXNET3(opaque); +- +- return !(s->compat_flags & VMXNET3_COMPAT_FLAG_DISABLE_PCIE); +-} +- +-static bool vmxnet3_vmstate_test_pci_device(void *opaque, int version_id) +-{ +- return !vmxnet3_vmstate_need_pcie_device(opaque); +-} +- +-static const VMStateDescription vmstate_vmxnet3_pcie_device = { +- .name = "vmxnet3/pcie", +- .version_id = 1, +- .minimum_version_id = 1, +- .needed = vmxnet3_vmstate_need_pcie_device, +- .fields = (VMStateField[]) { +- VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), +- VMSTATE_END_OF_LIST() +- } +-}; +- + static const VMStateDescription vmstate_vmxnet3 = { + .name = "vmxnet3", + .version_id = 1, +@@ -2482,9 +2436,8 @@ static const VMStateDescription vmstate_vmxnet3 = { + .pre_save = vmxnet3_pre_save, + .post_load = vmxnet3_post_load, + .fields = (VMStateField[]) { +- VMSTATE_STRUCT_TEST(parent_obj, VMXNET3State, +- vmxnet3_vmstate_test_pci_device, 0, +- vmstate_pci_device, PCIDevice), ++ VMSTATE_PCI_DEVICE(parent_obj, VMXNET3State), ++ VMSTATE_MSIX(parent_obj, VMXNET3State), + VMSTATE_BOOL(rx_packets_compound, VMXNET3State), + VMSTATE_BOOL(rx_vlan_stripping, VMXNET3State), + VMSTATE_BOOL(lro_supported, VMXNET3State), +@@ -2520,7 +2473,6 @@ static const VMStateDescription vmstate_vmxnet3 = { + }, + .subsections = (const VMStateDescription*[]) { + &vmxstate_vmxnet3_mcast_list, +- &vmstate_vmxnet3_pcie_device, + NULL + } + }; +-- +2.27.0 + diff --git a/include-Make-headers-more-self-contained.patch b/include-Make-headers-more-self-contained.patch new file mode 100644 index 0000000000000000000000000000000000000000..565471c8ce67ec70b0bb5691f66cba384b8a1202 --- /dev/null +++ b/include-Make-headers-more-self-contained.patch @@ -0,0 +1,1551 @@ +From 1b6a1ef572411efee7cbf1b65aeb15c704b997cc Mon Sep 17 00:00:00 2001 +From: Markus Armbruster +Date: Mon, 12 Aug 2019 07:23:31 +0200 +Subject: [PATCH] include: Make headers more self-contained +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Back in 2016, we discussed[1] rules for headers, and these were +generally liked: + +1. Have a carefully curated header that's included everywhere first. We + got that already thanks to Peter: osdep.h. + +2. Headers should normally include everything they need beyond osdep.h. + If exceptions are needed for some reason, they must be documented in + the header. If all that's needed from a header is typedefs, put + those into qemu/typedefs.h instead of including the header. + +3. Cyclic inclusion is forbidden. + +This patch gets include/ closer to obeying 2. + +It's actually extracted from my "[RFC] Baby steps towards saner +headers" series[2], which demonstrates a possible path towards +checking 2 automatically. It passes the RFC test there. + +[1] Message-ID: <87h9g8j57d.fsf@blackfin.pond.sub.org> + https://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg03345.html +[2] Message-Id: <20190711122827.18970-1-armbru@redhat.com> + https://lists.nongnu.org/archive/html/qemu-devel/2019-07/msg02715.html + +Signed-off-by: Markus Armbruster +Reviewed-by: Alistair Francis +Message-Id: <20190812052359.30071-2-armbru@redhat.com> +Tested-by: Philippe Mathieu-Daudé +--- + include/block/raw-aio.h | 2 ++ + include/block/write-threshold.h | 2 ++ + include/disas/disas.h | 1 + + include/exec/cputlb.h | 3 +++ + include/exec/exec-all.h | 1 + + include/exec/ioport.h | 2 ++ + include/exec/memory-internal.h | 2 ++ + include/exec/ram_addr.h | 1 + + include/exec/softmmu-semi.h | 2 ++ + include/exec/tb-hash.h | 2 ++ + include/exec/user/thunk.h | 2 ++ + include/fpu/softfloat-macros.h | 2 ++ + include/hw/acpi/pci.h | 3 +++ + include/hw/acpi/tco.h | 3 +++ + include/hw/adc/stm32f2xx_adc.h | 2 ++ + include/hw/arm/allwinner-a10.h | 1 + + include/hw/arm/aspeed_soc.h | 1 + + include/hw/arm/bcm2836.h | 1 + + include/hw/arm/exynos4210.h | 3 +-- + include/hw/arm/fsl-imx25.h | 1 + + include/hw/arm/fsl-imx31.h | 1 + + include/hw/arm/sharpsl.h | 3 +++ + include/hw/arm/xlnx-zynqmp.h | 1 + + include/hw/block/fdc.h | 2 ++ + include/hw/block/flash.h | 1 + + include/hw/char/escc.h | 1 + + include/hw/char/xilinx_uartlite.h | 2 ++ + include/hw/core/generic-loader.h | 1 + + include/hw/cris/etraxfs.h | 1 + + include/hw/cris/etraxfs_dma.h | 3 +++ + include/hw/display/i2c-ddc.h | 1 + + include/hw/empty_slot.h | 2 ++ + include/hw/gpio/bcm2835_gpio.h | 1 + + include/hw/i2c/aspeed_i2c.h | 2 ++ + include/hw/i386/apic_internal.h | 1 + + include/hw/i386/ioapic_internal.h | 1 + + include/hw/intc/allwinner-a10-pic.h | 2 ++ + include/hw/intc/heathrow_pic.h | 2 ++ + include/hw/intc/mips_gic.h | 1 + + include/hw/isa/vt82c686.h | 2 ++ + include/hw/mips/cps.h | 1 + + include/hw/misc/macio/cuda.h | 2 ++ + include/hw/misc/macio/gpio.h | 3 +++ + include/hw/misc/macio/macio.h | 2 ++ + include/hw/misc/macio/pmu.h | 3 +++ + include/hw/misc/mips_cmgcr.h | 2 ++ + include/hw/misc/mips_cpc.h | 2 ++ + include/hw/misc/pvpanic.h | 3 +++ + include/hw/net/allwinner_emac.h | 1 + + include/hw/net/lance.h | 1 + + include/hw/nvram/chrp_nvram.h | 2 ++ + include/hw/pci-host/sabre.h | 2 ++ + include/hw/pci-host/uninorth.h | 2 +- + include/hw/pci/pcie_aer.h | 1 + + include/hw/ppc/pnv_core.h | 1 + + include/hw/ppc/ppc4xx.h | 4 ++++ + include/hw/ppc/spapr_irq.h | 3 +++ + include/hw/ppc/spapr_vio.h | 1 + + include/hw/ppc/spapr_xive.h | 2 ++ + include/hw/ppc/xive_regs.h | 3 +++ + include/hw/riscv/boot.h | 2 ++ + include/hw/riscv/riscv_hart.h | 3 +++ + include/hw/riscv/sifive_clint.h | 2 ++ + include/hw/riscv/sifive_e.h | 1 + + include/hw/riscv/sifive_plic.h | 2 +- + include/hw/riscv/sifive_prci.h | 2 ++ + include/hw/riscv/sifive_test.h | 2 ++ + include/hw/riscv/sifive_u.h | 1 + + include/hw/riscv/sifive_uart.h | 3 +++ + include/hw/riscv/spike.h | 3 +++ + include/hw/riscv/virt.h | 3 +++ + include/hw/s390x/ap-device.h | 3 +++ + include/hw/s390x/css-bridge.h | 3 ++- + include/hw/s390x/css.h | 1 + + include/hw/s390x/tod.h | 2 +- + include/hw/semihosting/console.h | 2 ++ + include/hw/sh4/sh_intc.h | 1 + + include/hw/sparc/sparc64.h | 2 ++ + include/hw/ssi/aspeed_smc.h | 1 + + include/hw/ssi/xilinx_spips.h | 1 + + include/hw/timer/allwinner-a10-pit.h | 1 + + include/hw/timer/i8254_internal.h | 1 + + include/hw/timer/m48t59.h | 2 ++ + include/hw/timer/mc146818rtc_regs.h | 2 ++ + include/hw/timer/xlnx-zynqmp-rtc.h | 1 + + include/hw/virtio/virtio-access.h | 1 + + include/hw/virtio/virtio-gpu-bswap.h | 1 + + include/hw/virtio/virtio-rng.h | 1 + + include/hw/watchdog/wdt_aspeed.h | 1 + + include/libdecnumber/decNumberLocal.h | 1 + + include/migration/cpu.h | 3 +++ + include/monitor/hmp-target.h | 2 ++ + include/qemu/atomic128.h | 2 ++ + include/qemu/ratelimit.h | 2 ++ + include/qemu/thread-win32.h | 2 +- + include/sysemu/balloon.h | 1 + + include/sysemu/cryptodev-vhost-user.h | 3 +++ + include/sysemu/hvf.h | 1 + + include/sysemu/iothread.h | 1 + + include/sysemu/kvm_int.h | 2 ++ + include/sysemu/memory_mapping.h | 2 ++ + include/sysemu/xen-mapcache.h | 2 ++ + include/ui/egl-helpers.h | 3 +++ + include/ui/input.h | 1 + + include/ui/spice-display.h | 1 + + target/hppa/cpu.h | 2 +- + 106 files changed, 183 insertions(+), 8 deletions(-) + +diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h +index 0cb7cc74a2..4629f24d08 100644 +--- a/include/block/raw-aio.h ++++ b/include/block/raw-aio.h +@@ -12,9 +12,11 @@ + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ ++ + #ifndef QEMU_RAW_AIO_H + #define QEMU_RAW_AIO_H + ++#include "block/aio.h" + #include "qemu/coroutine.h" + #include "qemu/iov.h" + +diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h +index 80d8aab5d0..c646f267a4 100644 +--- a/include/block/write-threshold.h ++++ b/include/block/write-threshold.h +@@ -9,9 +9,11 @@ + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ ++ + #ifndef BLOCK_WRITE_THRESHOLD_H + #define BLOCK_WRITE_THRESHOLD_H + ++#include "block/block_int.h" + + /* + * bdrv_write_threshold_set: +diff --git a/include/disas/disas.h b/include/disas/disas.h +index 15da511f49..ba47e9197c 100644 +--- a/include/disas/disas.h ++++ b/include/disas/disas.h +@@ -1,6 +1,7 @@ + #ifndef QEMU_DISAS_H + #define QEMU_DISAS_H + ++#include "exec/hwaddr.h" + + #ifdef NEED_CPU_H + #include "cpu.h" +diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h +index 5373188be3..a62cfb28d5 100644 +--- a/include/exec/cputlb.h ++++ b/include/exec/cputlb.h +@@ -16,9 +16,12 @@ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ ++ + #ifndef CPUTLB_H + #define CPUTLB_H + ++#include "exec/cpu-common.h" ++ + #if !defined(CONFIG_USER_ONLY) + /* cputlb.c */ + void tlb_protect_code(ram_addr_t ram_addr); +diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h +index 16034ee651..135aeaab0d 100644 +--- a/include/exec/exec-all.h ++++ b/include/exec/exec-all.h +@@ -20,6 +20,7 @@ + #ifndef EXEC_ALL_H + #define EXEC_ALL_H + ++#include "cpu.h" + #include "exec/tb-context.h" + #include "sysemu/cpus.h" + +diff --git a/include/exec/ioport.h b/include/exec/ioport.h +index a298b89ce1..97feb296d2 100644 +--- a/include/exec/ioport.h ++++ b/include/exec/ioport.h +@@ -24,6 +24,8 @@ + #ifndef IOPORT_H + #define IOPORT_H + ++#include "exec/memory.h" ++ + #define MAX_IOPORTS (64 * 1024) + #define IOPORTS_MASK (MAX_IOPORTS - 1) + +diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h +index d1a9dd1ec8..ef4fb92371 100644 +--- a/include/exec/memory-internal.h ++++ b/include/exec/memory-internal.h +@@ -20,6 +20,8 @@ + #ifndef MEMORY_INTERNAL_H + #define MEMORY_INTERNAL_H + ++#include "cpu.h" ++ + #ifndef CONFIG_USER_ONLY + static inline AddressSpaceDispatch *flatview_to_dispatch(FlatView *fv) + { +diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h +index 523440662b..27a164b669 100644 +--- a/include/exec/ram_addr.h ++++ b/include/exec/ram_addr.h +@@ -20,6 +20,7 @@ + #define RAM_ADDR_H + + #ifndef CONFIG_USER_ONLY ++#include "cpu.h" + #include "hw/xen/xen.h" + #include "sysemu/tcg.h" + #include "exec/ramlist.h" +diff --git a/include/exec/softmmu-semi.h b/include/exec/softmmu-semi.h +index 970837992e..fbcae88f4b 100644 +--- a/include/exec/softmmu-semi.h ++++ b/include/exec/softmmu-semi.h +@@ -10,6 +10,8 @@ + #ifndef SOFTMMU_SEMI_H + #define SOFTMMU_SEMI_H + ++#include "cpu.h" ++ + static inline uint64_t softmmu_tget64(CPUArchState *env, target_ulong addr) + { + uint64_t val; +diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h +index 4f3a37d927..805235d321 100644 +--- a/include/exec/tb-hash.h ++++ b/include/exec/tb-hash.h +@@ -20,6 +20,8 @@ + #ifndef EXEC_TB_HASH_H + #define EXEC_TB_HASH_H + ++#include "exec/cpu-defs.h" ++#include "exec/exec-all.h" + #include "qemu/xxhash.h" + + #ifdef CONFIG_SOFTMMU +diff --git a/include/exec/user/thunk.h b/include/exec/user/thunk.h +index 8d3af5a3be..eae2c27f99 100644 +--- a/include/exec/user/thunk.h ++++ b/include/exec/user/thunk.h +@@ -16,10 +16,12 @@ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ ++ + #ifndef THUNK_H + #define THUNK_H + + #include "cpu.h" ++#include "exec/user/abitypes.h" + + /* types enums definitions */ + +diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h +index c55aa6d174..be83a833ec 100644 +--- a/include/fpu/softfloat-macros.h ++++ b/include/fpu/softfloat-macros.h +@@ -82,6 +82,8 @@ this code that are retained. + #ifndef FPU_SOFTFLOAT_MACROS_H + #define FPU_SOFTFLOAT_MACROS_H + ++#include "fpu/softfloat.h" ++ + /*---------------------------------------------------------------------------- + | Shifts `a' right by the number of bits given in `count'. If any nonzero + | bits are shifted off, they are ``jammed'' into the least significant bit of +diff --git a/include/hw/acpi/pci.h b/include/hw/acpi/pci.h +index 8bbd32cf45..bf2a3ed0ba 100644 +--- a/include/hw/acpi/pci.h ++++ b/include/hw/acpi/pci.h +@@ -22,9 +22,12 @@ + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ ++ + #ifndef HW_ACPI_PCI_H + #define HW_ACPI_PCI_H + ++#include "hw/acpi/bios-linker-loader.h" ++ + typedef struct AcpiMcfgInfo { + uint64_t base; + uint32_t size; +diff --git a/include/hw/acpi/tco.h b/include/hw/acpi/tco.h +index d19dd59353..726f840cce 100644 +--- a/include/hw/acpi/tco.h ++++ b/include/hw/acpi/tco.h +@@ -6,9 +6,12 @@ + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ ++ + #ifndef HW_ACPI_TCO_H + #define HW_ACPI_TCO_H + ++#include "exec/memory.h" ++#include "migration/vmstate.h" + + /* As per ICH9 spec, the internal timer has an error of ~0.6s on every tick */ + #define TCO_TICK_NSEC 600000000LL +diff --git a/include/hw/adc/stm32f2xx_adc.h b/include/hw/adc/stm32f2xx_adc.h +index a72f734eb1..663b79f4f3 100644 +--- a/include/hw/adc/stm32f2xx_adc.h ++++ b/include/hw/adc/stm32f2xx_adc.h +@@ -25,6 +25,8 @@ + #ifndef HW_STM32F2XX_ADC_H + #define HW_STM32F2XX_ADC_H + ++#include "hw/sysbus.h" ++ + #define ADC_SR 0x00 + #define ADC_CR1 0x04 + #define ADC_CR2 0x08 +diff --git a/include/hw/arm/allwinner-a10.h b/include/hw/arm/allwinner-a10.h +index e99fe2ea2e..7182ce5c4b 100644 +--- a/include/hw/arm/allwinner-a10.h ++++ b/include/hw/arm/allwinner-a10.h +@@ -11,6 +11,7 @@ + #include "hw/ide/ahci.h" + + #include "sysemu/sysemu.h" ++#include "target/arm/cpu.h" + + + #define AW_A10_PIC_REG_BASE 0x01c20400 +diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h +index cef605ad6b..976fd6be93 100644 +--- a/include/hw/arm/aspeed_soc.h ++++ b/include/hw/arm/aspeed_soc.h +@@ -22,6 +22,7 @@ + #include "hw/ssi/aspeed_smc.h" + #include "hw/watchdog/wdt_aspeed.h" + #include "hw/net/ftgmac100.h" ++#include "target/arm/cpu.h" + + #define ASPEED_SPIS_NUM 2 + #define ASPEED_WDTS_NUM 3 +diff --git a/include/hw/arm/bcm2836.h b/include/hw/arm/bcm2836.h +index a2cb8454de..97187f72be 100644 +--- a/include/hw/arm/bcm2836.h ++++ b/include/hw/arm/bcm2836.h +@@ -13,6 +13,7 @@ + + #include "hw/arm/bcm2835_peripherals.h" + #include "hw/intc/bcm2836_control.h" ++#include "target/arm/cpu.h" + + #define TYPE_BCM283X "bcm283x" + #define BCM283X(obj) OBJECT_CHECK(BCM283XState, (obj), TYPE_BCM283X) +diff --git a/include/hw/arm/exynos4210.h b/include/hw/arm/exynos4210.h +index aa137271c0..f0f23b0e9b 100644 +--- a/include/hw/arm/exynos4210.h ++++ b/include/hw/arm/exynos4210.h +@@ -19,13 +19,12 @@ + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . +- * + */ + + #ifndef EXYNOS4210_H + #define EXYNOS4210_H + +-#include "exec/memory.h" ++#include "hw/sysbus.h" + #include "target/arm/cpu-qom.h" + + #define EXYNOS4210_NCPUS 2 +diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h +index 3280ab1fb0..241efb52ae 100644 +--- a/include/hw/arm/fsl-imx25.h ++++ b/include/hw/arm/fsl-imx25.h +@@ -27,6 +27,7 @@ + #include "hw/i2c/imx_i2c.h" + #include "hw/gpio/imx_gpio.h" + #include "exec/memory.h" ++#include "target/arm/cpu.h" + + #define TYPE_FSL_IMX25 "fsl,imx25" + #define FSL_IMX25(obj) OBJECT_CHECK(FslIMX25State, (obj), TYPE_FSL_IMX25) +diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h +index e68a81efd7..ac5ca9826a 100644 +--- a/include/hw/arm/fsl-imx31.h ++++ b/include/hw/arm/fsl-imx31.h +@@ -26,6 +26,7 @@ + #include "hw/i2c/imx_i2c.h" + #include "hw/gpio/imx_gpio.h" + #include "exec/memory.h" ++#include "target/arm/cpu.h" + + #define TYPE_FSL_IMX31 "fsl,imx31" + #define FSL_IMX31(obj) OBJECT_CHECK(FslIMX31State, (obj), TYPE_FSL_IMX31) +diff --git a/include/hw/arm/sharpsl.h b/include/hw/arm/sharpsl.h +index 5bf6db1fa2..89e168fbff 100644 +--- a/include/hw/arm/sharpsl.h ++++ b/include/hw/arm/sharpsl.h +@@ -3,9 +3,12 @@ + * + * This file is licensed under the GNU GPL. + */ ++ + #ifndef QEMU_SHARPSL_H + #define QEMU_SHARPSL_H + ++#include "exec/hwaddr.h" ++ + #define zaurus_printf(format, ...) \ + fprintf(stderr, "%s: " format, __func__, ##__VA_ARGS__) + +diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h +index 35804ea80a..6cb65e7537 100644 +--- a/include/hw/arm/xlnx-zynqmp.h ++++ b/include/hw/arm/xlnx-zynqmp.h +@@ -32,6 +32,7 @@ + #include "hw/intc/xlnx-zynqmp-ipi.h" + #include "hw/timer/xlnx-zynqmp-rtc.h" + #include "hw/cpu/cluster.h" ++#include "target/arm/cpu.h" + + #define TYPE_XLNX_ZYNQMP "xlnx,zynqmp" + #define XLNX_ZYNQMP(obj) OBJECT_CHECK(XlnxZynqMPState, (obj), \ +diff --git a/include/hw/block/fdc.h b/include/hw/block/fdc.h +index 8cece84326..f4fe2f471b 100644 +--- a/include/hw/block/fdc.h ++++ b/include/hw/block/fdc.h +@@ -1,6 +1,8 @@ + #ifndef HW_FDC_H + #define HW_FDC_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" + #include "qapi/qapi-types-block.h" + + /* fdc.c */ +diff --git a/include/hw/block/flash.h b/include/hw/block/flash.h +index 1acaf7de80..83a75f3170 100644 +--- a/include/hw/block/flash.h ++++ b/include/hw/block/flash.h +@@ -4,6 +4,7 @@ + /* NOR flash devices */ + + #include "exec/memory.h" ++#include "migration/vmstate.h" + + /* pflash_cfi01.c */ + +diff --git a/include/hw/char/escc.h b/include/hw/char/escc.h +index 42aca83611..d5196c53e6 100644 +--- a/include/hw/char/escc.h ++++ b/include/hw/char/escc.h +@@ -3,6 +3,7 @@ + + #include "chardev/char-fe.h" + #include "chardev/char-serial.h" ++#include "hw/sysbus.h" + #include "ui/input.h" + + /* escc.c */ +diff --git a/include/hw/char/xilinx_uartlite.h b/include/hw/char/xilinx_uartlite.h +index 634086b657..99d8bbf405 100644 +--- a/include/hw/char/xilinx_uartlite.h ++++ b/include/hw/char/xilinx_uartlite.h +@@ -15,6 +15,8 @@ + #ifndef XILINX_UARTLITE_H + #define XILINX_UARTLITE_H + ++#include "hw/sysbus.h" ++ + static inline DeviceState *xilinx_uartlite_create(hwaddr addr, + qemu_irq irq, + Chardev *chr) +diff --git a/include/hw/core/generic-loader.h b/include/hw/core/generic-loader.h +index dd27c42ab0..9ffce1c5a3 100644 +--- a/include/hw/core/generic-loader.h ++++ b/include/hw/core/generic-loader.h +@@ -19,6 +19,7 @@ + #define GENERIC_LOADER_H + + #include "elf.h" ++#include "hw/qdev-core.h" + + typedef struct GenericLoaderState { + /* */ +diff --git a/include/hw/cris/etraxfs.h b/include/hw/cris/etraxfs.h +index 8da965addb..494222d315 100644 +--- a/include/hw/cris/etraxfs.h ++++ b/include/hw/cris/etraxfs.h +@@ -27,6 +27,7 @@ + + #include "net/net.h" + #include "hw/cris/etraxfs_dma.h" ++#include "hw/sysbus.h" + + /* Instantiate an ETRAXFS Ethernet MAC. */ + static inline DeviceState * +diff --git a/include/hw/cris/etraxfs_dma.h b/include/hw/cris/etraxfs_dma.h +index f6f33e0980..31ae360611 100644 +--- a/include/hw/cris/etraxfs_dma.h ++++ b/include/hw/cris/etraxfs_dma.h +@@ -1,6 +1,9 @@ + #ifndef HW_ETRAXFS_DMA_H + #define HW_ETRAXFS_DMA_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" ++ + struct dma_context_metadata { + /* data descriptor md */ + uint16_t metadata; +diff --git a/include/hw/display/i2c-ddc.h b/include/hw/display/i2c-ddc.h +index c29443c5af..1cf53a0c8d 100644 +--- a/include/hw/display/i2c-ddc.h ++++ b/include/hw/display/i2c-ddc.h +@@ -20,6 +20,7 @@ + #define I2C_DDC_H + + #include "hw/display/edid.h" ++#include "hw/i2c/i2c.h" + + /* A simple I2C slave which just returns the contents of its EDID blob. */ + struct I2CDDCState { +diff --git a/include/hw/empty_slot.h b/include/hw/empty_slot.h +index 123a9f8989..cb9a221aa6 100644 +--- a/include/hw/empty_slot.h ++++ b/include/hw/empty_slot.h +@@ -1,6 +1,8 @@ + #ifndef HW_EMPTY_SLOT_H + #define HW_EMPTY_SLOT_H + ++#include "exec/hwaddr.h" ++ + /* empty_slot.c */ + void empty_slot_init(hwaddr addr, uint64_t slot_size); + +diff --git a/include/hw/gpio/bcm2835_gpio.h b/include/hw/gpio/bcm2835_gpio.h +index 9f8e0c720c..b0de0a3c74 100644 +--- a/include/hw/gpio/bcm2835_gpio.h ++++ b/include/hw/gpio/bcm2835_gpio.h +@@ -15,6 +15,7 @@ + #define BCM2835_GPIO_H + + #include "hw/sd/sd.h" ++#include "hw/sysbus.h" + + typedef struct BCM2835GpioState { + SysBusDevice parent_obj; +diff --git a/include/hw/i2c/aspeed_i2c.h b/include/hw/i2c/aspeed_i2c.h +index f9020acdef..a2753f0bbb 100644 +--- a/include/hw/i2c/aspeed_i2c.h ++++ b/include/hw/i2c/aspeed_i2c.h +@@ -17,10 +17,12 @@ + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ ++ + #ifndef ASPEED_I2C_H + #define ASPEED_I2C_H + + #include "hw/i2c/i2c.h" ++#include "hw/sysbus.h" + + #define TYPE_ASPEED_I2C "aspeed.i2c" + #define ASPEED_I2C(obj) \ +diff --git a/include/hw/i386/apic_internal.h b/include/hw/i386/apic_internal.h +index 1209eb483a..b04bdd947f 100644 +--- a/include/hw/i386/apic_internal.h ++++ b/include/hw/i386/apic_internal.h +@@ -24,6 +24,7 @@ + #include "cpu.h" + #include "exec/memory.h" + #include "qemu/timer.h" ++#include "target/i386/cpu-qom.h" + + /* APIC Local Vector Table */ + #define APIC_LVT_TIMER 0 +diff --git a/include/hw/i386/ioapic_internal.h b/include/hw/i386/ioapic_internal.h +index 07002f9662..3d2eec2aa7 100644 +--- a/include/hw/i386/ioapic_internal.h ++++ b/include/hw/i386/ioapic_internal.h +@@ -24,6 +24,7 @@ + + #include "hw/hw.h" + #include "exec/memory.h" ++#include "hw/i386/ioapic.h" + #include "hw/sysbus.h" + #include "qemu/notify.h" + +diff --git a/include/hw/intc/allwinner-a10-pic.h b/include/hw/intc/allwinner-a10-pic.h +index 1d314a70d9..a5895401d1 100644 +--- a/include/hw/intc/allwinner-a10-pic.h ++++ b/include/hw/intc/allwinner-a10-pic.h +@@ -1,6 +1,8 @@ + #ifndef ALLWINNER_A10_PIC_H + #define ALLWINNER_A10_PIC_H + ++#include "hw/sysbus.h" ++ + #define TYPE_AW_A10_PIC "allwinner-a10-pic" + #define AW_A10_PIC(obj) OBJECT_CHECK(AwA10PICState, (obj), TYPE_AW_A10_PIC) + +diff --git a/include/hw/intc/heathrow_pic.h b/include/hw/intc/heathrow_pic.h +index 6c91ec91bb..b163e27ab9 100644 +--- a/include/hw/intc/heathrow_pic.h ++++ b/include/hw/intc/heathrow_pic.h +@@ -26,6 +26,8 @@ + #ifndef HW_INTC_HEATHROW_PIC_H + #define HW_INTC_HEATHROW_PIC_H + ++#include "hw/sysbus.h" ++ + #define TYPE_HEATHROW "heathrow" + #define HEATHROW(obj) OBJECT_CHECK(HeathrowState, (obj), TYPE_HEATHROW) + +diff --git a/include/hw/intc/mips_gic.h b/include/hw/intc/mips_gic.h +index 902a12b178..8428287bf9 100644 +--- a/include/hw/intc/mips_gic.h ++++ b/include/hw/intc/mips_gic.h +@@ -13,6 +13,7 @@ + + #include "qemu/units.h" + #include "hw/timer/mips_gictimer.h" ++#include "hw/sysbus.h" + #include "cpu.h" + /* + * GIC Specific definitions +diff --git a/include/hw/isa/vt82c686.h b/include/hw/isa/vt82c686.h +index c3c2b6e786..a54c3fe60a 100644 +--- a/include/hw/isa/vt82c686.h ++++ b/include/hw/isa/vt82c686.h +@@ -1,6 +1,8 @@ + #ifndef HW_VT82C686_H + #define HW_VT82C686_H + ++#include "hw/irq.h" ++ + #define TYPE_VT82C686B_SUPERIO "vt82c686b-superio" + + /* vt82c686.c */ +diff --git a/include/hw/mips/cps.h b/include/hw/mips/cps.h +index aab1af926d..a941c55f27 100644 +--- a/include/hw/mips/cps.h ++++ b/include/hw/mips/cps.h +@@ -25,6 +25,7 @@ + #include "hw/intc/mips_gic.h" + #include "hw/misc/mips_cpc.h" + #include "hw/misc/mips_itu.h" ++#include "target/mips/cpu.h" + + #define TYPE_MIPS_CPS "mips-cps" + #define MIPS_CPS(obj) OBJECT_CHECK(MIPSCPSState, (obj), TYPE_MIPS_CPS) +diff --git a/include/hw/misc/macio/cuda.h b/include/hw/misc/macio/cuda.h +index 7dad469142..5768075ac5 100644 +--- a/include/hw/misc/macio/cuda.h ++++ b/include/hw/misc/macio/cuda.h +@@ -26,6 +26,8 @@ + #ifndef CUDA_H + #define CUDA_H + ++#include "hw/misc/mos6522.h" ++ + /* CUDA commands (2nd byte) */ + #define CUDA_WARM_START 0x0 + #define CUDA_AUTOPOLL 0x1 +diff --git a/include/hw/misc/macio/gpio.h b/include/hw/misc/macio/gpio.h +index 2838ae5fde..24a4364b39 100644 +--- a/include/hw/misc/macio/gpio.h ++++ b/include/hw/misc/macio/gpio.h +@@ -26,6 +26,9 @@ + #ifndef MACIO_GPIO_H + #define MACIO_GPIO_H + ++#include "hw/ppc/openpic.h" ++#include "hw/sysbus.h" ++ + #define TYPE_MACIO_GPIO "macio-gpio" + #define MACIO_GPIO(obj) OBJECT_CHECK(MacIOGPIOState, (obj), TYPE_MACIO_GPIO) + +diff --git a/include/hw/misc/macio/macio.h b/include/hw/misc/macio/macio.h +index 970058b6ed..070a694eb5 100644 +--- a/include/hw/misc/macio/macio.h ++++ b/include/hw/misc/macio/macio.h +@@ -27,10 +27,12 @@ + #define MACIO_H + + #include "hw/char/escc.h" ++#include "hw/ide/internal.h" + #include "hw/intc/heathrow_pic.h" + #include "hw/misc/macio/cuda.h" + #include "hw/misc/macio/gpio.h" + #include "hw/misc/macio/pmu.h" ++#include "hw/ppc/mac.h" + #include "hw/ppc/mac_dbdma.h" + #include "hw/ppc/openpic.h" + +diff --git a/include/hw/misc/macio/pmu.h b/include/hw/misc/macio/pmu.h +index d10895ba5f..7ef83dee4c 100644 +--- a/include/hw/misc/macio/pmu.h ++++ b/include/hw/misc/macio/pmu.h +@@ -10,6 +10,9 @@ + #ifndef PMU_H + #define PMU_H + ++#include "hw/misc/mos6522.h" ++#include "hw/misc/macio/gpio.h" ++ + /* + * PMU commands + */ +diff --git a/include/hw/misc/mips_cmgcr.h b/include/hw/misc/mips_cmgcr.h +index c9dfcb4b84..3e6e223273 100644 +--- a/include/hw/misc/mips_cmgcr.h ++++ b/include/hw/misc/mips_cmgcr.h +@@ -10,6 +10,8 @@ + #ifndef MIPS_CMGCR_H + #define MIPS_CMGCR_H + ++#include "hw/sysbus.h" ++ + #define TYPE_MIPS_GCR "mips-gcr" + #define MIPS_GCR(obj) OBJECT_CHECK(MIPSGCRState, (obj), TYPE_MIPS_GCR) + +diff --git a/include/hw/misc/mips_cpc.h b/include/hw/misc/mips_cpc.h +index 72c834e039..3f670578b0 100644 +--- a/include/hw/misc/mips_cpc.h ++++ b/include/hw/misc/mips_cpc.h +@@ -20,6 +20,8 @@ + #ifndef MIPS_CPC_H + #define MIPS_CPC_H + ++#include "hw/sysbus.h" ++ + #define CPC_ADDRSPACE_SZ 0x6000 + + /* CPC blocks offsets relative to base address */ +diff --git a/include/hw/misc/pvpanic.h b/include/hw/misc/pvpanic.h +index 1ee071a703..ae0c8188ce 100644 +--- a/include/hw/misc/pvpanic.h ++++ b/include/hw/misc/pvpanic.h +@@ -11,9 +11,12 @@ + * See the COPYING file in the top-level directory. + * + */ ++ + #ifndef HW_MISC_PVPANIC_H + #define HW_MISC_PVPANIC_H + ++#include "qom/object.h" ++ + #define TYPE_PVPANIC "pvpanic" + + #define PVPANIC_IOPORT_PROP "ioport" +diff --git a/include/hw/net/allwinner_emac.h b/include/hw/net/allwinner_emac.h +index 905a43deb4..5013207d15 100644 +--- a/include/hw/net/allwinner_emac.h ++++ b/include/hw/net/allwinner_emac.h +@@ -27,6 +27,7 @@ + #include "net/net.h" + #include "qemu/fifo8.h" + #include "hw/net/mii.h" ++#include "hw/sysbus.h" + + #define TYPE_AW_EMAC "allwinner-emac" + #define AW_EMAC(obj) OBJECT_CHECK(AwEmacState, (obj), TYPE_AW_EMAC) +diff --git a/include/hw/net/lance.h b/include/hw/net/lance.h +index ffdd35c4d7..0357f5f65c 100644 +--- a/include/hw/net/lance.h ++++ b/include/hw/net/lance.h +@@ -31,6 +31,7 @@ + + #include "net/net.h" + #include "hw/net/pcnet.h" ++#include "hw/sysbus.h" + + #define TYPE_LANCE "lance" + #define SYSBUS_PCNET(obj) \ +diff --git a/include/hw/nvram/chrp_nvram.h b/include/hw/nvram/chrp_nvram.h +index b4f5b2b104..09941a9be4 100644 +--- a/include/hw/nvram/chrp_nvram.h ++++ b/include/hw/nvram/chrp_nvram.h +@@ -18,6 +18,8 @@ + #ifndef CHRP_NVRAM_H + #define CHRP_NVRAM_H + ++#include "qemu/bswap.h" ++ + /* OpenBIOS NVRAM partition */ + typedef struct { + uint8_t signature; +diff --git a/include/hw/pci-host/sabre.h b/include/hw/pci-host/sabre.h +index 9afa4938fd..99b5aefbec 100644 +--- a/include/hw/pci-host/sabre.h ++++ b/include/hw/pci-host/sabre.h +@@ -1,6 +1,8 @@ + #ifndef HW_PCI_HOST_SABRE_H + #define HW_PCI_HOST_SABRE_H + ++#include "hw/pci/pci.h" ++#include "hw/pci/pci_host.h" + #include "hw/sparc/sun4u_iommu.h" + + #define MAX_IVEC 0x40 +diff --git a/include/hw/pci-host/uninorth.h b/include/hw/pci-host/uninorth.h +index 060324536a..9a5cabd4c5 100644 +--- a/include/hw/pci-host/uninorth.h ++++ b/include/hw/pci-host/uninorth.h +@@ -26,7 +26,7 @@ + #define UNINORTH_H + + #include "hw/hw.h" +- ++#include "hw/pci/pci_host.h" + #include "hw/ppc/openpic.h" + + /* UniNorth version */ +diff --git a/include/hw/pci/pcie_aer.h b/include/hw/pci/pcie_aer.h +index 729a9439c8..502dcd7eba 100644 +--- a/include/hw/pci/pcie_aer.h ++++ b/include/hw/pci/pcie_aer.h +@@ -22,6 +22,7 @@ + #define QEMU_PCIE_AER_H + + #include "hw/hw.h" ++#include "hw/pci/pci_regs.h" + + /* definitions which PCIExpressDevice uses */ + +diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h +index d0926454a9..bfbd2ec42a 100644 +--- a/include/hw/ppc/pnv_core.h ++++ b/include/hw/ppc/pnv_core.h +@@ -21,6 +21,7 @@ + #define PPC_PNV_CORE_H + + #include "hw/cpu/core.h" ++#include "target/ppc/cpu.h" + + #define TYPE_PNV_CORE "powernv-cpu-core" + #define PNV_CORE(obj) \ +diff --git a/include/hw/ppc/ppc4xx.h b/include/hw/ppc/ppc4xx.h +index 39a7ba1ce6..90f8866138 100644 +--- a/include/hw/ppc/ppc4xx.h ++++ b/include/hw/ppc/ppc4xx.h +@@ -25,6 +25,10 @@ + #ifndef PPC4XX_H + #define PPC4XX_H + ++#include "hw/ppc/ppc.h" ++#include "exec/cpu-common.h" ++#include "exec/memory.h" ++ + /* PowerPC 4xx core initialization */ + PowerPCCPU *ppc4xx_init(const char *cpu_model, + clk_setup_t *cpu_clk, clk_setup_t *tb_clk, +diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h +index f965a58f89..cd6e18b05e 100644 +--- a/include/hw/ppc/spapr_irq.h ++++ b/include/hw/ppc/spapr_irq.h +@@ -10,6 +10,9 @@ + #ifndef HW_SPAPR_IRQ_H + #define HW_SPAPR_IRQ_H + ++#include "hw/irq.h" ++#include "target/ppc/cpu-qom.h" ++ + /* + * IRQ range offsets per device type + */ +diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h +index 97951fc6b4..92bfa72caf 100644 +--- a/include/hw/ppc/spapr_vio.h ++++ b/include/hw/ppc/spapr_vio.h +@@ -22,6 +22,7 @@ + * License along with this library; if not, see . + */ + ++#include "hw/ppc/spapr.h" + #include "sysemu/dma.h" + + #define TYPE_VIO_SPAPR_DEVICE "vio-spapr-device" +diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h +index 7197144265..a39e672f27 100644 +--- a/include/hw/ppc/spapr_xive.h ++++ b/include/hw/ppc/spapr_xive.h +@@ -10,7 +10,9 @@ + #ifndef PPC_SPAPR_XIVE_H + #define PPC_SPAPR_XIVE_H + ++#include "hw/ppc/spapr_irq.h" + #include "hw/ppc/xive.h" ++#include "sysemu/sysemu.h" + + #define TYPE_SPAPR_XIVE "spapr-xive" + #define SPAPR_XIVE(obj) OBJECT_CHECK(SpaprXive, (obj), TYPE_SPAPR_XIVE) +diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h +index 1a8c5b5e64..b0c68ab5f7 100644 +--- a/include/hw/ppc/xive_regs.h ++++ b/include/hw/ppc/xive_regs.h +@@ -16,6 +16,9 @@ + #ifndef PPC_XIVE_REGS_H + #define PPC_XIVE_REGS_H + ++#include "qemu/bswap.h" ++#include "qemu/host-utils.h" ++ + /* + * Interrupt source number encoding on PowerBUS + */ +diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h +index d56f2ae3eb..1f21c2bef1 100644 +--- a/include/hw/riscv/boot.h ++++ b/include/hw/riscv/boot.h +@@ -20,6 +20,8 @@ + #ifndef RISCV_BOOT_H + #define RISCV_BOOT_H + ++#include "exec/cpu-defs.h" ++ + void riscv_find_and_load_firmware(MachineState *machine, + const char *default_machine_firmware, + hwaddr firmware_load_addr); +diff --git a/include/hw/riscv/riscv_hart.h b/include/hw/riscv/riscv_hart.h +index 0671d88a44..3b52b50571 100644 +--- a/include/hw/riscv/riscv_hart.h ++++ b/include/hw/riscv/riscv_hart.h +@@ -21,6 +21,9 @@ + #ifndef HW_RISCV_HART_H + #define HW_RISCV_HART_H + ++#include "hw/sysbus.h" ++#include "target/riscv/cpu.h" ++ + #define TYPE_RISCV_HART_ARRAY "riscv.hart_array" + + #define RISCV_HART_ARRAY(obj) \ +diff --git a/include/hw/riscv/sifive_clint.h b/include/hw/riscv/sifive_clint.h +index e2865be1d1..ae8286c884 100644 +--- a/include/hw/riscv/sifive_clint.h ++++ b/include/hw/riscv/sifive_clint.h +@@ -20,6 +20,8 @@ + #ifndef HW_SIFIVE_CLINT_H + #define HW_SIFIVE_CLINT_H + ++#include "hw/sysbus.h" ++ + #define TYPE_SIFIVE_CLINT "riscv.sifive.clint" + + #define SIFIVE_CLINT(obj) \ +diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h +index d175b24cb2..9c868dd7f9 100644 +--- a/include/hw/riscv/sifive_e.h ++++ b/include/hw/riscv/sifive_e.h +@@ -19,6 +19,7 @@ + #ifndef HW_SIFIVE_E_H + #define HW_SIFIVE_E_H + ++#include "hw/riscv/riscv_hart.h" + #include "hw/riscv/sifive_gpio.h" + + #define TYPE_RISCV_E_SOC "riscv.sifive.e.soc" +diff --git a/include/hw/riscv/sifive_plic.h b/include/hw/riscv/sifive_plic.h +index ce8907f6aa..b0edba2884 100644 +--- a/include/hw/riscv/sifive_plic.h ++++ b/include/hw/riscv/sifive_plic.h +@@ -21,7 +21,7 @@ + #ifndef HW_SIFIVE_PLIC_H + #define HW_SIFIVE_PLIC_H + +-#include "hw/irq.h" ++#include "hw/sysbus.h" + + #define TYPE_SIFIVE_PLIC "riscv.sifive.plic" + +diff --git a/include/hw/riscv/sifive_prci.h b/include/hw/riscv/sifive_prci.h +index bd51c4af3c..8b7de134f8 100644 +--- a/include/hw/riscv/sifive_prci.h ++++ b/include/hw/riscv/sifive_prci.h +@@ -19,6 +19,8 @@ + #ifndef HW_SIFIVE_PRCI_H + #define HW_SIFIVE_PRCI_H + ++#include "hw/sysbus.h" ++ + enum { + SIFIVE_PRCI_HFROSCCFG = 0x0, + SIFIVE_PRCI_HFXOSCCFG = 0x4, +diff --git a/include/hw/riscv/sifive_test.h b/include/hw/riscv/sifive_test.h +index 71d4c9fad7..3a603a6ead 100644 +--- a/include/hw/riscv/sifive_test.h ++++ b/include/hw/riscv/sifive_test.h +@@ -19,6 +19,8 @@ + #ifndef HW_SIFIVE_TEST_H + #define HW_SIFIVE_TEST_H + ++#include "hw/sysbus.h" ++ + #define TYPE_SIFIVE_TEST "riscv.sifive.test" + + #define SIFIVE_TEST(obj) \ +diff --git a/include/hw/riscv/sifive_u.h b/include/hw/riscv/sifive_u.h +index 892f0eee21..be021ce256 100644 +--- a/include/hw/riscv/sifive_u.h ++++ b/include/hw/riscv/sifive_u.h +@@ -20,6 +20,7 @@ + #define HW_SIFIVE_U_H + + #include "hw/net/cadence_gem.h" ++#include "hw/riscv/riscv_hart.h" + + #define TYPE_RISCV_U_SOC "riscv.sifive.u.soc" + #define RISCV_U_SOC(obj) \ +diff --git a/include/hw/riscv/sifive_uart.h b/include/hw/riscv/sifive_uart.h +index c8dc1c57fd..65668825a3 100644 +--- a/include/hw/riscv/sifive_uart.h ++++ b/include/hw/riscv/sifive_uart.h +@@ -20,6 +20,9 @@ + #ifndef HW_SIFIVE_UART_H + #define HW_SIFIVE_UART_H + ++#include "chardev/char-fe.h" ++#include "hw/sysbus.h" ++ + enum { + SIFIVE_UART_TXFIFO = 0, + SIFIVE_UART_RXFIFO = 4, +diff --git a/include/hw/riscv/spike.h b/include/hw/riscv/spike.h +index 641b70da67..03d870363c 100644 +--- a/include/hw/riscv/spike.h ++++ b/include/hw/riscv/spike.h +@@ -19,6 +19,9 @@ + #ifndef HW_RISCV_SPIKE_H + #define HW_RISCV_SPIKE_H + ++#include "hw/riscv/riscv_hart.h" ++#include "hw/sysbus.h" ++ + typedef struct { + /*< private >*/ + SysBusDevice parent_obj; +diff --git a/include/hw/riscv/virt.h b/include/hw/riscv/virt.h +index d01a1a85c4..6e5fbe5d3b 100644 +--- a/include/hw/riscv/virt.h ++++ b/include/hw/riscv/virt.h +@@ -19,6 +19,9 @@ + #ifndef HW_RISCV_VIRT_H + #define HW_RISCV_VIRT_H + ++#include "hw/riscv/riscv_hart.h" ++#include "hw/sysbus.h" ++ + typedef struct { + /*< private >*/ + SysBusDevice parent_obj; +diff --git a/include/hw/s390x/ap-device.h b/include/hw/s390x/ap-device.h +index 765e9082a3..8df9cd2954 100644 +--- a/include/hw/s390x/ap-device.h ++++ b/include/hw/s390x/ap-device.h +@@ -7,9 +7,12 @@ + * your option) any later version. See the COPYING file in the top-level + * directory. + */ ++ + #ifndef HW_S390X_AP_DEVICE_H + #define HW_S390X_AP_DEVICE_H + ++#include "hw/qdev-core.h" ++ + #define AP_DEVICE_TYPE "ap-device" + + typedef struct APDevice { +diff --git a/include/hw/s390x/css-bridge.h b/include/hw/s390x/css-bridge.h +index 5a0203be5f..f7ed2d9a03 100644 +--- a/include/hw/s390x/css-bridge.h ++++ b/include/hw/s390x/css-bridge.h +@@ -12,8 +12,9 @@ + + #ifndef HW_S390X_CSS_BRIDGE_H + #define HW_S390X_CSS_BRIDGE_H ++ + #include "qom/object.h" +-#include "hw/qdev-core.h" ++#include "hw/sysbus.h" + + /* virtual css bridge */ + typedef struct VirtualCssBridge { +diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h +index d033387fba..f46bcafb16 100644 +--- a/include/hw/s390x/css.h ++++ b/include/hw/s390x/css.h +@@ -17,6 +17,7 @@ + #include "hw/s390x/s390_flic.h" + #include "hw/s390x/ioinst.h" + #include "sysemu/kvm.h" ++#include "target/s390x/cpu-qom.h" + + /* Channel subsystem constants. */ + #define MAX_DEVNO 65535 +diff --git a/include/hw/s390x/tod.h b/include/hw/s390x/tod.h +index 9c4a6000c3..d71f4ea8a7 100644 +--- a/include/hw/s390x/tod.h ++++ b/include/hw/s390x/tod.h +@@ -12,7 +12,7 @@ + #define HW_S390_TOD_H + + #include "hw/qdev.h" +-#include "s390-tod.h" ++#include "target/s390x/s390-tod.h" + + typedef struct S390TOD { + uint8_t high; +diff --git a/include/hw/semihosting/console.h b/include/hw/semihosting/console.h +index cfab572c0c..9be9754bcd 100644 +--- a/include/hw/semihosting/console.h ++++ b/include/hw/semihosting/console.h +@@ -9,6 +9,8 @@ + #ifndef SEMIHOST_CONSOLE_H + #define SEMIHOST_CONSOLE_H + ++#include "cpu.h" ++ + /** + * qemu_semihosting_console_outs: + * @env: CPUArchState +diff --git a/include/hw/sh4/sh_intc.h b/include/hw/sh4/sh_intc.h +index b7c2404334..3d3efde059 100644 +--- a/include/hw/sh4/sh_intc.h ++++ b/include/hw/sh4/sh_intc.h +@@ -1,6 +1,7 @@ + #ifndef SH_INTC_H + #define SH_INTC_H + ++#include "exec/memory.h" + #include "hw/irq.h" + + typedef unsigned char intc_enum; +diff --git a/include/hw/sparc/sparc64.h b/include/hw/sparc/sparc64.h +index 21ab79e343..4ced36fb5a 100644 +--- a/include/hw/sparc/sparc64.h ++++ b/include/hw/sparc/sparc64.h +@@ -1,6 +1,8 @@ + #ifndef HW_SPARC_SPARC64_H + #define HW_SPARC_SPARC64_H + ++#include "target/sparc/cpu-qom.h" ++ + #define IVEC_MAX 0x40 + + SPARCCPU *sparc64_cpu_devinit(const char *cpu_type, uint64_t prom_addr); +diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h +index 591279ba1f..aa07dac4fe 100644 +--- a/include/hw/ssi/aspeed_smc.h ++++ b/include/hw/ssi/aspeed_smc.h +@@ -26,6 +26,7 @@ + #define ASPEED_SMC_H + + #include "hw/ssi/ssi.h" ++#include "hw/sysbus.h" + + typedef struct AspeedSegments { + hwaddr addr; +diff --git a/include/hw/ssi/xilinx_spips.h b/include/hw/ssi/xilinx_spips.h +index a0a0ae7584..6a39b55a7b 100644 +--- a/include/hw/ssi/xilinx_spips.h ++++ b/include/hw/ssi/xilinx_spips.h +@@ -28,6 +28,7 @@ + #include "hw/ssi/ssi.h" + #include "qemu/fifo32.h" + #include "hw/stream.h" ++#include "hw/sysbus.h" + + typedef struct XilinxSPIPS XilinxSPIPS; + +diff --git a/include/hw/timer/allwinner-a10-pit.h b/include/hw/timer/allwinner-a10-pit.h +index c0cc3e2169..871c95b512 100644 +--- a/include/hw/timer/allwinner-a10-pit.h ++++ b/include/hw/timer/allwinner-a10-pit.h +@@ -2,6 +2,7 @@ + #define ALLWINNER_A10_PIT_H + + #include "hw/ptimer.h" ++#include "hw/sysbus.h" + + #define TYPE_AW_A10_PIT "allwinner-A10-timer" + #define AW_A10_PIT(obj) OBJECT_CHECK(AwA10PITState, (obj), TYPE_AW_A10_PIT) +diff --git a/include/hw/timer/i8254_internal.h b/include/hw/timer/i8254_internal.h +index c37a438f82..e611c6f227 100644 +--- a/include/hw/timer/i8254_internal.h ++++ b/include/hw/timer/i8254_internal.h +@@ -27,6 +27,7 @@ + + #include "hw/hw.h" + #include "hw/isa/isa.h" ++#include "hw/timer/i8254.h" + #include "qemu/timer.h" + + typedef struct PITChannelState { +diff --git a/include/hw/timer/m48t59.h b/include/hw/timer/m48t59.h +index 43efc91f56..d3fb50e08c 100644 +--- a/include/hw/timer/m48t59.h ++++ b/include/hw/timer/m48t59.h +@@ -1,6 +1,8 @@ + #ifndef HW_M48T59_H + #define HW_M48T59_H + ++#include "exec/hwaddr.h" ++#include "hw/irq.h" + #include "qom/object.h" + + #define TYPE_NVRAM "nvram" +diff --git a/include/hw/timer/mc146818rtc_regs.h b/include/hw/timer/mc146818rtc_regs.h +index c62f17bf2d..bfbb57e570 100644 +--- a/include/hw/timer/mc146818rtc_regs.h ++++ b/include/hw/timer/mc146818rtc_regs.h +@@ -25,6 +25,8 @@ + #ifndef MC146818RTC_REGS_H + #define MC146818RTC_REGS_H + ++#include "qemu/timer.h" ++ + #define RTC_ISA_IRQ 8 + + #define RTC_SECONDS 0 +diff --git a/include/hw/timer/xlnx-zynqmp-rtc.h b/include/hw/timer/xlnx-zynqmp-rtc.h +index 6e9134edf6..97e32322ed 100644 +--- a/include/hw/timer/xlnx-zynqmp-rtc.h ++++ b/include/hw/timer/xlnx-zynqmp-rtc.h +@@ -28,6 +28,7 @@ + #define HW_TIMER_XLNX_ZYNQMP_RTC_H + + #include "hw/register.h" ++#include "hw/sysbus.h" + + #define TYPE_XLNX_ZYNQMP_RTC "xlnx-zynmp.rtc" + +diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h +index bdf58f3119..6818a23a2d 100644 +--- a/include/hw/virtio/virtio-access.h ++++ b/include/hw/virtio/virtio-access.h +@@ -16,6 +16,7 @@ + #ifndef QEMU_VIRTIO_ACCESS_H + #define QEMU_VIRTIO_ACCESS_H + ++#include "exec/hwaddr.h" + #include "hw/virtio/virtio.h" + #include "hw/virtio/virtio-bus.h" + +diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h +index 38d12160f6..203f9e1718 100644 +--- a/include/hw/virtio/virtio-gpu-bswap.h ++++ b/include/hw/virtio/virtio-gpu-bswap.h +@@ -15,6 +15,7 @@ + #define HW_VIRTIO_GPU_BSWAP_H + + #include "qemu/bswap.h" ++#include "standard-headers/linux/virtio_gpu.h" + + static inline void + virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) +diff --git a/include/hw/virtio/virtio-rng.h b/include/hw/virtio/virtio-rng.h +index 922dce7cac..ff699335e3 100644 +--- a/include/hw/virtio/virtio-rng.h ++++ b/include/hw/virtio/virtio-rng.h +@@ -12,6 +12,7 @@ + #ifndef QEMU_VIRTIO_RNG_H + #define QEMU_VIRTIO_RNG_H + ++#include "hw/virtio/virtio.h" + #include "sysemu/rng.h" + #include "sysemu/rng-random.h" + #include "standard-headers/linux/virtio_rng.h" +diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h +index daef0c0e23..8c5691ce20 100644 +--- a/include/hw/watchdog/wdt_aspeed.h ++++ b/include/hw/watchdog/wdt_aspeed.h +@@ -10,6 +10,7 @@ + #ifndef WDT_ASPEED_H + #define WDT_ASPEED_H + ++#include "hw/misc/aspeed_scu.h" + #include "hw/sysbus.h" + + #define TYPE_ASPEED_WDT "aspeed.wdt" +diff --git a/include/libdecnumber/decNumberLocal.h b/include/libdecnumber/decNumberLocal.h +index 12cf1d8b6f..4d53c077f2 100644 +--- a/include/libdecnumber/decNumberLocal.h ++++ b/include/libdecnumber/decNumberLocal.h +@@ -44,6 +44,7 @@ + #define DECNLAUTHOR "Mike Cowlishaw" /* Who to blame */ + + #include "libdecnumber/dconfig.h" ++ #include "libdecnumber/decContext.h" + + /* Conditional code flag -- set this to match hardware platform */ + /* 1=little-endian, 0=big-endian */ +diff --git a/include/migration/cpu.h b/include/migration/cpu.h +index a40bd3549f..da1618d620 100644 +--- a/include/migration/cpu.h ++++ b/include/migration/cpu.h +@@ -1,7 +1,10 @@ + /* Declarations for use for CPU state serialization. */ ++ + #ifndef MIGRATION_CPU_H + #define MIGRATION_CPU_H + ++#include "exec/cpu-defs.h" ++ + #if TARGET_LONG_BITS == 64 + #define qemu_put_betl qemu_put_be64 + #define qemu_get_betl qemu_get_be64 +diff --git a/include/monitor/hmp-target.h b/include/monitor/hmp-target.h +index 454e8ed155..8b7820a3ad 100644 +--- a/include/monitor/hmp-target.h ++++ b/include/monitor/hmp-target.h +@@ -25,6 +25,8 @@ + #ifndef MONITOR_HMP_TARGET_H + #define MONITOR_HMP_TARGET_H + ++#include "cpu.h" ++ + #define MD_TLONG 0 + #define MD_I32 1 + +diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h +index ddd0d55d31..6b34484e15 100644 +--- a/include/qemu/atomic128.h ++++ b/include/qemu/atomic128.h +@@ -13,6 +13,8 @@ + #ifndef QEMU_ATOMIC128_H + #define QEMU_ATOMIC128_H + ++#include "qemu/int128.h" ++ + /* + * GCC is a house divided about supporting large atomic operations. + * +diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h +index 1b38291823..01da8d63f1 100644 +--- a/include/qemu/ratelimit.h ++++ b/include/qemu/ratelimit.h +@@ -14,6 +14,8 @@ + #ifndef QEMU_RATELIMIT_H + #define QEMU_RATELIMIT_H + ++#include "qemu/timer.h" ++ + typedef struct { + int64_t slice_start_time; + int64_t slice_end_time; +diff --git a/include/qemu/thread-win32.h b/include/qemu/thread-win32.h +index 50af5dd7ab..d0a1a9597e 100644 +--- a/include/qemu/thread-win32.h ++++ b/include/qemu/thread-win32.h +@@ -47,6 +47,6 @@ struct QemuThread { + }; + + /* Only valid for joinable threads. */ +-HANDLE qemu_thread_get_handle(QemuThread *thread); ++HANDLE qemu_thread_get_handle(struct QemuThread *thread); + + #endif +diff --git a/include/sysemu/balloon.h b/include/sysemu/balloon.h +index c8f6145257..aea0c44985 100644 +--- a/include/sysemu/balloon.h ++++ b/include/sysemu/balloon.h +@@ -14,6 +14,7 @@ + #ifndef QEMU_BALLOON_H + #define QEMU_BALLOON_H + ++#include "exec/cpu-common.h" + #include "qapi/qapi-types-misc.h" + + typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target); +diff --git a/include/sysemu/cryptodev-vhost-user.h b/include/sysemu/cryptodev-vhost-user.h +index 6debf53fc5..0d3421e7e8 100644 +--- a/include/sysemu/cryptodev-vhost-user.h ++++ b/include/sysemu/cryptodev-vhost-user.h +@@ -20,9 +20,12 @@ + * License along with this library; if not, see . + * + */ ++ + #ifndef CRYPTODEV_VHOST_USER_H + #define CRYPTODEV_VHOST_USER_H + ++#include "sysemu/cryptodev-vhost.h" ++ + #define VHOST_USER_MAX_AUTH_KEY_LEN 512 + #define VHOST_USER_MAX_CIPHER_KEY_LEN 64 + +diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h +index d275b5a843..dd1722f2df 100644 +--- a/include/sysemu/hvf.h ++++ b/include/sysemu/hvf.h +@@ -13,6 +13,7 @@ + #ifndef HVF_H + #define HVF_H + ++#include "cpu.h" + #include "qemu/bitops.h" + #include "exec/memory.h" + #include "sysemu/accel.h" +diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h +index 5f6240d5cb..6181486401 100644 +--- a/include/sysemu/iothread.h ++++ b/include/sysemu/iothread.h +@@ -16,6 +16,7 @@ + + #include "block/aio.h" + #include "qemu/thread.h" ++#include "qom/object.h" + + #define TYPE_IOTHREAD "iothread" + +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 31df465fdc..787dbc7770 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -9,6 +9,8 @@ + #ifndef QEMU_KVM_INT_H + #define QEMU_KVM_INT_H + ++#include "exec/cpu-common.h" ++#include "exec/memory.h" + #include "sysemu/sysemu.h" + #include "sysemu/accel.h" + #include "sysemu/kvm.h" +diff --git a/include/sysemu/memory_mapping.h b/include/sysemu/memory_mapping.h +index 58452457ce..1b440df486 100644 +--- a/include/sysemu/memory_mapping.h ++++ b/include/sysemu/memory_mapping.h +@@ -15,6 +15,8 @@ + #define MEMORY_MAPPING_H + + #include "qemu/queue.h" ++#include "exec/cpu-common.h" ++#include "exec/cpu-defs.h" + #include "exec/memory.h" + + typedef struct GuestPhysBlock { +diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h +index a03e2f1878..c8e7c2f6cf 100644 +--- a/include/sysemu/xen-mapcache.h ++++ b/include/sysemu/xen-mapcache.h +@@ -9,6 +9,8 @@ + #ifndef XEN_MAPCACHE_H + #define XEN_MAPCACHE_H + ++#include "exec/cpu-common.h" ++ + typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr phys_offset, + ram_addr_t size); + #ifdef CONFIG_XEN +diff --git a/include/ui/egl-helpers.h b/include/ui/egl-helpers.h +index d714127799..58bd3a1ec4 100644 +--- a/include/ui/egl-helpers.h ++++ b/include/ui/egl-helpers.h +@@ -4,6 +4,9 @@ + #include + #include + #include ++#include "qapi/qapi-types-ui.h" ++#include "ui/console.h" ++#include "ui/shader.h" + + extern EGLDisplay *qemu_egl_display; + extern EGLConfig qemu_egl_config; +diff --git a/include/ui/input.h b/include/ui/input.h +index 8c8ccb999f..c86219a1c1 100644 +--- a/include/ui/input.h ++++ b/include/ui/input.h +@@ -2,6 +2,7 @@ + #define INPUT_H + + #include "qapi/qapi-types-ui.h" ++#include "qemu/notify.h" + + #define INPUT_EVENT_MASK_KEY (1< + #include + #include + #include +diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h +index aab251bc4b..e9fba96be9 100644 +--- a/target/hppa/cpu.h ++++ b/target/hppa/cpu.h +@@ -22,7 +22,7 @@ + + #include "cpu-qom.h" + #include "exec/cpu-defs.h" +- ++#include "exec/memory.h" + + /* PA-RISC 1.x processors have a strong memory model. */ + /* ??? While we do not yet implement PA-RISC 2.0, those processors have +-- +2.27.0 + diff --git a/linux-headers-Update-against-Add-migration-support-f.patch b/linux-headers-Update-against-Add-migration-support-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..1bfef98c0c9b6771ccbe2fbd700a233e09cd9baf --- /dev/null +++ b/linux-headers-Update-against-Add-migration-support-f.patch @@ -0,0 +1,517 @@ +From 7ab9ce4016ec48e0af8010f742ee39fc84342d00 Mon Sep 17 00:00:00 2001 +From: Jinhao Gao +Date: Fri, 23 Jul 2021 14:55:12 +0800 +Subject: [PATCH] linux headers: Update against "Add migration support for VFIO + devices" + +Update linux-headers/linux/vfio.h against Linux 5.9-rc7 for the +VFIO migration support series. + +Signed-off-by: Jinhao Gao +Signed-off-by: Shenming Lu +--- + linux-headers/linux/vfio.h | 420 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 405 insertions(+), 15 deletions(-) + +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 24f505199f..a90672494d 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -295,15 +295,39 @@ struct vfio_region_info_cap_type { + __u32 subtype; /* type specific */ + }; + ++/* ++ * List of region types, global per bus driver. ++ * If you introduce a new type, please add it here. ++ */ ++ ++/* PCI region type containing a PCI vendor part */ + #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) + #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) ++#define VFIO_REGION_TYPE_GFX (1) ++#define VFIO_REGION_TYPE_CCW (2) ++#define VFIO_REGION_TYPE_MIGRATION (3) ++ ++/* sub-types for VFIO_REGION_TYPE_PCI_* */ + +-/* 8086 Vendor sub-types */ ++/* 8086 vendor PCI sub-types */ + #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) + #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) + #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) + +-#define VFIO_REGION_TYPE_GFX (1) ++/* 10de vendor PCI sub-types */ ++/* ++ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. ++ */ ++#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) ++ ++/* 1014 vendor PCI sub-types */ ++/* ++ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU ++ * to do TLB invalidation on a GPU. ++ */ ++#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) ++ ++/* sub-types for VFIO_REGION_TYPE_GFX */ + #define VFIO_REGION_SUBTYPE_GFX_EDID (1) + + /** +@@ -353,24 +377,237 @@ struct vfio_region_gfx_edid { + #define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 + }; + +-#define VFIO_REGION_TYPE_CCW (2) +-/* ccw sub-types */ ++/* sub-types for VFIO_REGION_TYPE_CCW */ + #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) ++#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) ++#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + +-/* +- * 10de vendor sub-type +- * +- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. +- */ +-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) ++/* sub-types for VFIO_REGION_TYPE_MIGRATION */ ++#define VFIO_REGION_SUBTYPE_MIGRATION (1) + + /* +- * 1014 vendor sub-type ++ * The structure vfio_device_migration_info is placed at the 0th offset of ++ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related ++ * migration information. Field accesses from this structure are only supported ++ * at their native width and alignment. Otherwise, the result is undefined and ++ * vendor drivers should return an error. + * +- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU +- * to do TLB invalidation on a GPU. ++ * device_state: (read/write) ++ * - The user application writes to this field to inform the vendor driver ++ * about the device state to be transitioned to. ++ * - The vendor driver should take the necessary actions to change the ++ * device state. After successful transition to a given state, the ++ * vendor driver should return success on write(device_state, state) ++ * system call. If the device state transition fails, the vendor driver ++ * should return an appropriate -errno for the fault condition. ++ * - On the user application side, if the device state transition fails, ++ * that is, if write(device_state, state) returns an error, read ++ * device_state again to determine the current state of the device from ++ * the vendor driver. ++ * - The vendor driver should return previous state of the device unless ++ * the vendor driver has encountered an internal error, in which case ++ * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. ++ * - The user application must use the device reset ioctl to recover the ++ * device from VFIO_DEVICE_STATE_ERROR state. If the device is ++ * indicated to be in a valid device state by reading device_state, the ++ * user application may attempt to transition the device to any valid ++ * state reachable from the current state or terminate itself. ++ * ++ * device_state consists of 3 bits: ++ * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, ++ * it indicates the _STOP state. When the device state is changed to ++ * _STOP, driver should stop the device before write() returns. ++ * - If bit 1 is set, it indicates the _SAVING state, which means that the ++ * driver should start gathering device state information that will be ++ * provided to the VFIO user application to save the device's state. ++ * - If bit 2 is set, it indicates the _RESUMING state, which means that ++ * the driver should prepare to resume the device. Data provided through ++ * the migration region should be used to resume the device. ++ * Bits 3 - 31 are reserved for future use. To preserve them, the user ++ * application should perform a read-modify-write operation on this ++ * field when modifying the specified bits. ++ * ++ * +------- _RESUMING ++ * |+------ _SAVING ++ * ||+----- _RUNNING ++ * ||| ++ * 000b => Device Stopped, not saving or resuming ++ * 001b => Device running, which is the default state ++ * 010b => Stop the device & save the device state, stop-and-copy state ++ * 011b => Device running and save the device state, pre-copy state ++ * 100b => Device stopped and the device state is resuming ++ * 101b => Invalid state ++ * 110b => Error state ++ * 111b => Invalid state ++ * ++ * State transitions: ++ * ++ * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP ++ * (100b) (001b) (011b) (010b) (000b) ++ * 0. Running or default state ++ * | ++ * ++ * 1. Normal Shutdown (optional) ++ * |------------------------------------->| ++ * ++ * 2. Save the state or suspend ++ * |------------------------->|---------->| ++ * ++ * 3. Save the state during live migration ++ * |----------->|------------>|---------->| ++ * ++ * 4. Resuming ++ * |<---------| ++ * ++ * 5. Resumed ++ * |--------->| ++ * ++ * 0. Default state of VFIO device is _RUNNNG when the user application starts. ++ * 1. During normal shutdown of the user application, the user application may ++ * optionally change the VFIO device state from _RUNNING to _STOP. This ++ * transition is optional. The vendor driver must support this transition but ++ * must not require it. ++ * 2. When the user application saves state or suspends the application, the ++ * device state transitions from _RUNNING to stop-and-copy and then to _STOP. ++ * On state transition from _RUNNING to stop-and-copy, driver must stop the ++ * device, save the device state and send it to the application through the ++ * migration region. The sequence to be followed for such transition is given ++ * below. ++ * 3. In live migration of user application, the state transitions from _RUNNING ++ * to pre-copy, to stop-and-copy, and to _STOP. ++ * On state transition from _RUNNING to pre-copy, the driver should start ++ * gathering the device state while the application is still running and send ++ * the device state data to application through the migration region. ++ * On state transition from pre-copy to stop-and-copy, the driver must stop ++ * the device, save the device state and send it to the user application ++ * through the migration region. ++ * Vendor drivers must support the pre-copy state even for implementations ++ * where no data is provided to the user before the stop-and-copy state. The ++ * user must not be required to consume all migration data before the device ++ * transitions to a new state, including the stop-and-copy state. ++ * The sequence to be followed for above two transitions is given below. ++ * 4. To start the resuming phase, the device state should be transitioned from ++ * the _RUNNING to the _RESUMING state. ++ * In the _RESUMING state, the driver should use the device state data ++ * received through the migration region to resume the device. ++ * 5. After providing saved device data to the driver, the application should ++ * change the state from _RESUMING to _RUNNING. ++ * ++ * reserved: ++ * Reads on this field return zero and writes are ignored. ++ * ++ * pending_bytes: (read only) ++ * The number of pending bytes still to be migrated from the vendor driver. ++ * ++ * data_offset: (read only) ++ * The user application should read data_offset field from the migration ++ * region. The user application should read the device data from this ++ * offset within the migration region during the _SAVING state or write ++ * the device data during the _RESUMING state. See below for details of ++ * sequence to be followed. ++ * ++ * data_size: (read/write) ++ * The user application should read data_size to get the size in bytes of ++ * the data copied in the migration region during the _SAVING state and ++ * write the size in bytes of the data copied in the migration region ++ * during the _RESUMING state. ++ * ++ * The format of the migration region is as follows: ++ * ------------------------------------------------------------------ ++ * |vfio_device_migration_info| data section | ++ * | | /////////////////////////////// | ++ * ------------------------------------------------------------------ ++ * ^ ^ ++ * offset 0-trapped part data_offset ++ * ++ * The structure vfio_device_migration_info is always followed by the data ++ * section in the region, so data_offset will always be nonzero. The offset ++ * from where the data is copied is decided by the kernel driver. The data ++ * section can be trapped, mmapped, or partitioned, depending on how the kernel ++ * driver defines the data section. The data section partition can be defined ++ * as mapped by the sparse mmap capability. If mmapped, data_offset must be ++ * page aligned, whereas initial section which contains the ++ * vfio_device_migration_info structure, might not end at the offset, which is ++ * page aligned. The user is not required to access through mmap regardless ++ * of the capabilities of the region mmap. ++ * The vendor driver should determine whether and how to partition the data ++ * section. The vendor driver should return data_offset accordingly. ++ * ++ * The sequence to be followed while in pre-copy state and stop-and-copy state ++ * is as follows: ++ * a. Read pending_bytes, indicating the start of a new iteration to get device ++ * data. Repeated read on pending_bytes at this stage should have no side ++ * effects. ++ * If pending_bytes == 0, the user application should not iterate to get data ++ * for that device. ++ * If pending_bytes > 0, perform the following steps. ++ * b. Read data_offset, indicating that the vendor driver should make data ++ * available through the data section. The vendor driver should return this ++ * read operation only after data is available from (region + data_offset) ++ * to (region + data_offset + data_size). ++ * c. Read data_size, which is the amount of data in bytes available through ++ * the migration region. ++ * Read on data_offset and data_size should return the offset and size of ++ * the current buffer if the user application reads data_offset and ++ * data_size more than once here. ++ * d. Read data_size bytes of data from (region + data_offset) from the ++ * migration region. ++ * e. Process the data. ++ * f. Read pending_bytes, which indicates that the data from the previous ++ * iteration has been read. If pending_bytes > 0, go to step b. ++ * ++ * The user application can transition from the _SAVING|_RUNNING ++ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the ++ * number of pending bytes. The user application should iterate in _SAVING ++ * (stop-and-copy) until pending_bytes is 0. ++ * ++ * The sequence to be followed while _RESUMING device state is as follows: ++ * While data for this device is available, repeat the following steps: ++ * a. Read data_offset from where the user application should write data. ++ * b. Write migration data starting at the migration region + data_offset for ++ * the length determined by data_size from the migration source. ++ * c. Write data_size, which indicates to the vendor driver that data is ++ * written in the migration region. Vendor driver must return this write ++ * operations on consuming data. Vendor driver should apply the ++ * user-provided migration region data to the device resume state. ++ * ++ * If an error occurs during the above sequences, the vendor driver can return ++ * an error code for next read() or write() operation, which will terminate the ++ * loop. The user application should then take the next necessary action, for ++ * example, failing migration or terminating the user application. ++ * ++ * For the user application, data is opaque. The user application should write ++ * data in the same order as the data is received and the data should be of ++ * same transaction size at the source. + */ +-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) ++ ++struct vfio_device_migration_info { ++ __u32 device_state; /* VFIO device state */ ++#define VFIO_DEVICE_STATE_STOP (0) ++#define VFIO_DEVICE_STATE_RUNNING (1 << 0) ++#define VFIO_DEVICE_STATE_SAVING (1 << 1) ++#define VFIO_DEVICE_STATE_RESUMING (1 << 2) ++#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ ++ VFIO_DEVICE_STATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING) ++ ++#define VFIO_DEVICE_STATE_VALID(state) \ ++ (state & VFIO_DEVICE_STATE_RESUMING ? \ ++ (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) ++ ++#define VFIO_DEVICE_STATE_IS_ERROR(state) \ ++ ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING)) ++ ++#define VFIO_DEVICE_STATE_SET_ERROR(state) \ ++ ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ ++ VFIO_DEVICE_STATE_RESUMING) ++ ++ __u32 reserved; ++ __u64 pending_bytes; ++ __u64 data_offset; ++ __u64 data_size; ++}; + + /* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped +@@ -570,6 +807,7 @@ enum { + + enum { + VFIO_CCW_IO_IRQ_INDEX, ++ VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_NUM_IRQS + }; + +@@ -700,6 +938,43 @@ struct vfio_device_ioeventfd { + + #define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) + ++/** ++ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, ++ * struct vfio_device_feature) ++ * ++ * Get, set, or probe feature data of the device. The feature is selected ++ * using the FEATURE_MASK portion of the flags field. Support for a feature ++ * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe ++ * may optionally include the GET and/or SET bits to determine read vs write ++ * access of the feature respectively. Probing a feature will return success ++ * if the feature is supported and all of the optionally indicated GET/SET ++ * methods are supported. The format of the data portion of the structure is ++ * specific to the given feature. The data portion is not required for ++ * probing. GET and SET are mutually exclusive, except for use with PROBE. ++ * ++ * Return 0 on success, -errno on failure. ++ */ ++struct vfio_device_feature { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ ++#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ ++#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ ++#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ ++ __u8 data[]; ++}; ++ ++#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) ++ ++/* ++ * Provide support for setting a PCI VF Token, which is used as a shared ++ * secret between PF and VF drivers. This feature may only be set on a ++ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing ++ * open VFs. Data provided when setting this feature is a 16-byte array ++ * (__u8 b[16]), representing a UUID. ++ */ ++#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +@@ -714,7 +989,54 @@ struct vfio_iommu_type1_info { + __u32 argsz; + __u32 flags; + #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ +- __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ ++ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ ++ __u32 cap_offset; /* Offset within info struct of first cap */ ++}; ++ ++/* ++ * The IOVA capability allows to report the valid IOVA range(s) ++ * excluding any non-relaxable reserved regions exposed by ++ * devices attached to the container. Any DMA map attempt ++ * outside the valid iova range will return error. ++ * ++ * The structures below define version 1 of this capability. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 ++ ++struct vfio_iova_range { ++ __u64 start; ++ __u64 end; ++}; ++ ++struct vfio_iommu_type1_info_cap_iova_range { ++ struct vfio_info_cap_header header; ++ __u32 nr_iovas; ++ __u32 reserved; ++ struct vfio_iova_range iova_ranges[]; ++}; ++ ++/* ++ * The migration capability allows to report supported features for migration. ++ * ++ * The structures below define version 1 of this capability. ++ * ++ * The existence of this capability indicates that IOMMU kernel driver supports ++ * dirty page logging. ++ * ++ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty ++ * page logging. ++ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap ++ * size in bytes that can be used by user applications when getting the dirty ++ * bitmap. ++ */ ++#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 ++ ++struct vfio_iommu_type1_info_cap_migration { ++ struct vfio_info_cap_header header; ++ __u32 flags; ++ __u64 pgsize_bitmap; ++ __u64 max_dirty_bitmap_size; /* in bytes */ + }; + + #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) +@@ -737,6 +1059,12 @@ struct vfio_iommu_type1_dma_map { + + #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) + ++struct vfio_bitmap { ++ __u64 pgsize; /* page size for bitmap in bytes */ ++ __u64 size; /* in bytes */ ++ __u64 *data; /* one bit per page */ ++}; ++ + /** + * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_dma_unmap) +@@ -746,12 +1074,23 @@ struct vfio_iommu_type1_dma_map { + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. ++ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap ++ * before unmapping IO virtual addresses. When this flag is set, the user must ++ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated ++ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. ++ * A bit in the bitmap represents one page, of user provided page size in ++ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set ++ * indicates that the page at that offset from iova is dirty. A Bitmap of the ++ * pages in the range of unmapped size is returned in the user-provided ++ * vfio_bitmap.data. + */ + struct vfio_iommu_type1_dma_unmap { + __u32 argsz; + __u32 flags; ++#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ ++ __u8 data[]; + }; + + #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) +@@ -763,6 +1102,57 @@ struct vfio_iommu_type1_dma_unmap { + #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) + #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) + ++/** ++ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, ++ * struct vfio_iommu_type1_dirty_bitmap) ++ * IOCTL is used for dirty pages logging. ++ * Caller should set flag depending on which operation to perform, details as ++ * below: ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs ++ * the IOMMU driver to log pages that are dirtied or potentially dirtied by ++ * the device; designed to be used when a migration is in progress. Dirty pages ++ * are logged until logging is disabled by user application by calling the IOCTL ++ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs ++ * the IOMMU driver to stop logging dirtied pages. ++ * ++ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set ++ * returns the dirty pages bitmap for IOMMU container for a given IOVA range. ++ * The user must specify the IOVA range and the pgsize through the structure ++ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface ++ * supports getting a bitmap of the smallest supported pgsize only and can be ++ * modified in future to get a bitmap of any specified supported pgsize. The ++ * user must provide a zeroed memory area for the bitmap memory and specify its ++ * size in bitmap.size. One bit is used to represent one page consecutively ++ * starting from iova offset. The user should provide page size in bitmap.pgsize ++ * field. A bit set in the bitmap indicates that the page at that offset from ++ * iova is dirty. The caller must set argsz to a value including the size of ++ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the ++ * actual bitmap. If dirty pages logging is not enabled, an error will be ++ * returned. ++ * ++ * Only one of the flags _START, _STOP and _GET may be specified at a time. ++ * ++ */ ++struct vfio_iommu_type1_dirty_bitmap { ++ __u32 argsz; ++ __u32 flags; ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) ++#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) ++ __u8 data[]; ++}; ++ ++struct vfio_iommu_type1_dirty_bitmap_get { ++ __u64 iova; /* IO virtual address */ ++ __u64 size; /* Size of iova range */ ++ struct vfio_bitmap bitmap; ++}; ++ ++#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) ++ + /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + + /* +-- +2.27.0 + diff --git a/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch b/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c4052639c2c104de23c1a6d99674ec036e675af --- /dev/null +++ b/memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch @@ -0,0 +1,35 @@ +From 0ae8b3e05294fee99870efa9b58e22e16f31caf9 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:20 +0530 +Subject: [PATCH] memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled + +mr->ram_block is NULL when mr->is_iommu is true, then fr.dirty_log_mask +wasn't set correctly due to which memory listener's log_sync doesn't +get called. +This patch returns log_mask with DIRTY_MEMORY_MIGRATION set when +IOMMU is enabled. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Yan Zhao +Acked-by: Paolo Bonzini +Signed-off-by: Alex Williamson +--- + memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 5d8c9a9234..44713efc66 100644 +--- a/memory.c ++++ b/memory.c +@@ -1825,7 +1825,7 @@ bool memory_region_is_ram_device(MemoryRegion *mr) + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +- if (global_dirty_log && mr->ram_block) { ++ if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; +-- +2.27.0 + diff --git a/migration-register_savevm_live-doesn-t-need-dev.patch b/migration-register_savevm_live-doesn-t-need-dev.patch new file mode 100644 index 0000000000000000000000000000000000000000..a980deccbcac98d709a35e62f41c7d52e39b0d11 --- /dev/null +++ b/migration-register_savevm_live-doesn-t-need-dev.patch @@ -0,0 +1,201 @@ +From 0f7cde69416f85ec3d3f57769ae38db3d72fda8c Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 22 Aug 2019 12:54:33 +0100 +Subject: [PATCH] migration: register_savevm_live doesn't need dev + +Commit 78dd48df3 removed the last caller of register_savevm_live for an +instantiable device (rather than a single system wide device); +so trim out the parameter. + +Signed-off-by: Dr. David Alan Gilbert +Message-Id: <20190822115433.12070-1-dgilbert@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Cornelia Huck +Signed-off-by: Dr. David Alan Gilbert +--- + docs/devel/migration.rst | 3 +-- + hw/ppc/spapr.c | 2 +- + hw/s390x/s390-skeys.c | 2 +- + hw/s390x/s390-stattrib.c | 2 +- + hw/s390x/tod.c | 2 +- + include/migration/register.h | 3 +-- + migration/block-dirty-bitmap.c | 2 +- + migration/block.c | 2 +- + migration/ram.c | 2 +- + migration/savevm.c | 23 +---------------------- + net/slirp.c | 2 +- + 11 files changed, 11 insertions(+), 34 deletions(-) + +diff --git a/docs/devel/migration.rst b/docs/devel/migration.rst +index 220059679a..cc6f839fce 100644 +--- a/docs/devel/migration.rst ++++ b/docs/devel/migration.rst +@@ -183,8 +183,7 @@ another to load the state back. + + .. code:: c + +- int register_savevm_live(DeviceState *dev, +- const char *idstr, ++ int register_savevm_live(const char *idstr, + int instance_id, + int version_id, + SaveVMHandlers *ops, +diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c +index b0f37c34a4..289967c3de 100644 +--- a/hw/ppc/spapr.c ++++ b/hw/ppc/spapr.c +@@ -3069,7 +3069,7 @@ static void spapr_machine_init(MachineState *machine) + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); +- register_savevm_live(NULL, "spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, ++ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine), +diff --git a/hw/s390x/s390-skeys.c b/hw/s390x/s390-skeys.c +index e5bd92c0c7..fb7d57865d 100644 +--- a/hw/s390x/s390-skeys.c ++++ b/hw/s390x/s390-skeys.c +@@ -388,7 +388,7 @@ static inline void s390_skeys_set_migration_enabled(Object *obj, bool value, + ss->migration_enabled = value; + + if (ss->migration_enabled) { +- register_savevm_live(NULL, TYPE_S390_SKEYS, 0, 1, ++ register_savevm_live(TYPE_S390_SKEYS, 0, 1, + &savevm_s390_storage_keys, ss); + } else { + unregister_savevm(DEVICE(ss), TYPE_S390_SKEYS, ss); +diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c +index 766f2015a4..5ee15d5e82 100644 +--- a/hw/s390x/s390-stattrib.c ++++ b/hw/s390x/s390-stattrib.c +@@ -382,7 +382,7 @@ static void s390_stattrib_instance_init(Object *obj) + { + S390StAttribState *sas = S390_STATTRIB(obj); + +- register_savevm_live(NULL, TYPE_S390_STATTRIB, 0, 0, ++ register_savevm_live(TYPE_S390_STATTRIB, 0, 0, + &savevm_s390_stattrib_handlers, sas); + + object_property_add_bool(obj, "migration-enabled", +diff --git a/hw/s390x/tod.c b/hw/s390x/tod.c +index a9fca8eb0b..d6b22bb966 100644 +--- a/hw/s390x/tod.c ++++ b/hw/s390x/tod.c +@@ -100,7 +100,7 @@ static void s390_tod_realize(DeviceState *dev, Error **errp) + S390TODState *td = S390_TOD(dev); + + /* Legacy migration interface */ +- register_savevm_live(NULL, "todclock", 0, 1, &savevm_tod, td); ++ register_savevm_live("todclock", 0, 1, &savevm_tod, td); + } + + static void s390_tod_class_init(ObjectClass *oc, void *data) +diff --git a/include/migration/register.h b/include/migration/register.h +index 8b2bc5b129..f3ba10b6ef 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -68,8 +68,7 @@ typedef struct SaveVMHandlers { + int (*resume_prepare)(MigrationState *s, void *opaque); + } SaveVMHandlers; + +-int register_savevm_live(DeviceState *dev, +- const char *idstr, ++int register_savevm_live(const char *idstr, + uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, +diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c +index 4a896a09eb..11e8feb595 100644 +--- a/migration/block-dirty-bitmap.c ++++ b/migration/block-dirty-bitmap.c +@@ -733,7 +733,7 @@ void dirty_bitmap_mig_init(void) + { + QSIMPLEQ_INIT(&dirty_bitmap_mig_state.dbms_list); + +- register_savevm_live(NULL, "dirty-bitmap", 0, 1, ++ register_savevm_live("dirty-bitmap", 0, 1, + &savevm_dirty_bitmap_handlers, + &dirty_bitmap_mig_state); + } +diff --git a/migration/block.c b/migration/block.c +index 91f98ef44a..ec15d1d6b3 100644 +--- a/migration/block.c ++++ b/migration/block.c +@@ -1030,6 +1030,6 @@ void blk_mig_init(void) + QSIMPLEQ_INIT(&block_mig_state.blk_list); + qemu_mutex_init(&block_mig_state.lock); + +- register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, ++ register_savevm_live("block", 0, 1, &savevm_block_handlers, + &block_mig_state); + } +diff --git a/migration/ram.c b/migration/ram.c +index d6657a8093..2077ba5be4 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -5125,5 +5125,5 @@ static SaveVMHandlers savevm_ram_handlers = { + void ram_mig_init(void) + { + qemu_mutex_init(&XBZRLE.lock); +- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state); ++ register_savevm_live("ram", 0, 4, &savevm_ram_handlers, &ram_state); + } +diff --git a/migration/savevm.c b/migration/savevm.c +index f0974380e5..cdb79222a4 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -683,8 +683,7 @@ static void savevm_state_handler_insert(SaveStateEntry *nse) + of the system, so instance_id should be removed/replaced. + Meanwhile pass -1 as instance_id if you do not already have a clearly + distinguishing id for all instances of your device class. */ +-int register_savevm_live(DeviceState *dev, +- const char *idstr, ++int register_savevm_live(const char *idstr, + uint32_t instance_id, + int version_id, + const SaveVMHandlers *ops, +@@ -703,26 +702,6 @@ int register_savevm_live(DeviceState *dev, + se->is_ram = 1; + } + +- if (dev) { +- char *id = qdev_get_dev_path(dev); +- if (id) { +- if (snprintf(se->idstr, sizeof(se->idstr), "%s/", id) >= +- sizeof(se->idstr)) { +- error_report("Path too long for VMState (%s)", id); +- g_free(id); +- g_free(se); +- +- return -1; +- } +- g_free(id); +- +- se->compat = g_new0(CompatEntry, 1); +- pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr); +- se->compat->instance_id = instance_id == -1 ? +- calculate_compat_instance_id(idstr) : instance_id; +- instance_id = -1; +- } +- } + pstrcat(se->idstr, sizeof(se->idstr), idstr); + + if (instance_id == VMSTATE_INSTANCE_ID_ANY) { +diff --git a/net/slirp.c b/net/slirp.c +index b34cb29276..f42f496641 100644 +--- a/net/slirp.c ++++ b/net/slirp.c +@@ -576,7 +576,7 @@ static int net_slirp_init(NetClientState *peer, const char *model, + * specific version? + */ + g_assert(slirp_state_version() == 4); +- register_savevm_live(NULL, "slirp", 0, slirp_state_version(), ++ register_savevm_live("slirp", 0, slirp_state_version(), + &savevm_slirp_state, s->slirp); + + s->poll_notifier.notify = net_slirp_poll_notify; +-- +2.27.0 + diff --git a/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch b/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..3ee078c19f392be5b53214f0f03dcea3ecc216fa --- /dev/null +++ b/qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch @@ -0,0 +1,214 @@ +From f97eaa27e2fb6b985f090af9acaa780bb6a2ee5b Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:27 +0530 +Subject: [PATCH] qapi: Add VFIO devices migration stats in Migration stats + +Added amount of bytes transferred to the VM at destination by all VFIO +devices + +Signed-off-by: Kirti Wankhede +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 19 +++++++++++++++++++ + hw/vfio/migration.c | 9 +++++++++ + include/hw/vfio/vfio-common.h | 3 +++ + migration/migration.c | 17 +++++++++++++++++ + monitor/hmp-cmds.c | 6 ++++++ + qapi/migration.json | 17 +++++++++++++++++ + 6 files changed, 71 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4ce1c10734..a86a4c4506 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -291,6 +291,25 @@ const MemoryRegionOps vfio_region_ops = { + * Device state interfaces + */ + ++bool vfio_mig_active(void) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ ++ if (QLIST_EMPTY(&vfio_group_list)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &vfio_group_list, next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ if (vbasedev->migration_blocker) { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ + static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + { + VFIOGroup *group; +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0bdf6a1820..b77c66557e 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -45,6 +45,8 @@ + #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) + ++static int64_t bytes_transferred; ++ + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, + off_t off, bool iswrite) + { +@@ -255,6 +257,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) + *size = data_size; + } + ++ bytes_transferred += data_size; + return ret; + } + +@@ -785,6 +788,7 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: ++ bytes_transferred = 0; + ret = vfio_migration_set_state(vbasedev, + ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), + VFIO_DEVICE_STATE_RUNNING); +@@ -866,6 +870,11 @@ err: + + /* ---------------------------------------------------------------------- */ + ++int64_t vfio_mig_bytes_transferred(void) ++{ ++ return bytes_transferred; ++} ++ + int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + { + VFIOContainer *container = vbasedev->group->container; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 8fd0212264..048731e81f 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -203,6 +203,9 @@ extern const MemoryRegionOps vfio_region_ops; + typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + extern VFIOGroupList vfio_group_list; + ++bool vfio_mig_active(void); ++int64_t vfio_mig_bytes_transferred(void); ++ + #ifdef CONFIG_LINUX + int vfio_get_region_info(VFIODevice *vbasedev, int index, + struct vfio_region_info **info); +diff --git a/migration/migration.c b/migration/migration.c +index b0b9430822..9faf5f63a6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -49,6 +49,10 @@ + #include "monitor/monitor.h" + #include "net/announce.h" + ++#ifdef CONFIG_VFIO ++#include "hw/vfio/vfio-common.h" ++#endif ++ + #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled +@@ -908,6 +912,17 @@ static void populate_disk_info(MigrationInfo *info) + } + } + ++static void populate_vfio_info(MigrationInfo *info) ++{ ++#ifdef CONFIG_VFIO ++ if (vfio_mig_active()) { ++ info->has_vfio = true; ++ info->vfio = g_malloc0(sizeof(*info->vfio)); ++ info->vfio->transferred = vfio_mig_bytes_transferred(); ++ } ++#endif ++} ++ + static void fill_source_migration_info(MigrationInfo *info) + { + MigrationState *s = migrate_get_current(); +@@ -941,6 +956,7 @@ static void fill_source_migration_info(MigrationInfo *info) + + populate_ram_info(info, s); + populate_disk_info(info); ++ populate_vfio_info(info); + break; + case MIGRATION_STATUS_COLO: + info->has_status = true; +@@ -956,6 +972,7 @@ static void fill_source_migration_info(MigrationInfo *info) + info->setup_time = s->setup_time; + + populate_ram_info(info, s); ++ populate_vfio_info(info); + break; + case MIGRATION_STATUS_FAILED: + info->has_status = true; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index e5a7a88ba2..cecaae0a47 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -370,6 +370,12 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + } + monitor_printf(mon, "]\n"); + } ++ ++ if (info->has_vfio) { ++ monitor_printf(mon, "vfio device transferred: %" PRIu64 " kbytes\n", ++ info->vfio->transferred >> 10); ++ } ++ + qapi_free_MigrationInfo(info); + qapi_free_MigrationCapabilityStatusList(caps); + } +diff --git a/qapi/migration.json b/qapi/migration.json +index 587ef65872..1f0eb19ac6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -141,6 +141,18 @@ + 'active', 'postcopy-active', 'postcopy-paused', + 'postcopy-recover', 'completed', 'failed', 'colo', + 'pre-switchover', 'device' ] } ++## ++# @VfioStats: ++# ++# Detailed VFIO devices migration statistics ++# ++# @transferred: amount of bytes transferred to the target VM by VFIO devices ++# ++# Since: 5.2 ++# ++## ++{ 'struct': 'VfioStats', ++ 'data': {'transferred': 'int' } } + + ## + # @MigrationInfo: +@@ -202,11 +214,16 @@ + # + # @socket-address: Only used for tcp, to know what the real port is (Since 4.0) + # ++# @vfio: @VfioStats containing detailed VFIO devices migration statistics, ++# only returned if VFIO device is present, migration is supported by all ++# VFIO devices and status is 'active' or 'completed' (since 5.2) ++# + # Since: 0.14.0 + ## + { 'struct': 'MigrationInfo', + 'data': {'*status': 'MigrationStatus', '*ram': 'MigrationStats', + '*disk': 'MigrationStats', ++ '*vfio': 'VfioStats', + '*xbzrle-cache': 'XBZRLECacheStats', + '*total-time': 'int', + '*expected-downtime': 'int', +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index c6089ff7438a09ad88ff6ed4099dd6335196aedf..54ab5f4df5551ef28baaf3b2436d3a506c692802 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 4.1.0 -Release: 72 +Release: 73 Epoch: 2 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -476,6 +476,28 @@ Patch0463: virtio-input-fix-memory-leak-on-unrealize.patch Patch0464: target-arm-only-set-ID_PFR1_EL1.GIC-for-AArch32-gues.patch Patch0465: target-arm-clear-EL2-and-EL3-only-when-kvm-is-not-en.patch Patch0466: target-arm-Update-the-ID-registers-of-Kunpeng-920.patch +Patch0467: hw-net-fix-vmxnet3-live-migration.patch +Patch0468: include-Make-headers-more-self-contained.patch +Patch0469: migration-register_savevm_live-doesn-t-need-dev.patch +Patch0470: vmstate-add-qom-interface-to-get-id.patch +Patch0471: linux-headers-Update-against-Add-migration-support-f.patch +Patch0472: vfio-Add-function-to-unmap-VFIO-region.patch +Patch0473: vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch +Patch0474: vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch +Patch0475: vfio-Add-migration-region-initialization-and-finaliz.patch +Patch0476: vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch +Patch0477: vfio-Add-migration-state-change-notifier.patch +Patch0478: vfio-Register-SaveVMHandlers-for-VFIO-device.patch +Patch0479: vfio-Add-save-state-functions-to-SaveVMHandlers.patch +Patch0480: vfio-Add-load-state-functions-to-SaveVMHandlers.patch +Patch0481: memory-Set-DIRTY_MEMORY_MIGRATION-when-IOMMU-is-enab.patch +Patch0482: vfio-Get-migration-capability-flags-for-container.patch +Patch0483: vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch +Patch0484: vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch +Patch0485: vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch +Patch0486: vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch +Patch0487: vfio-Make-vfio-pci-device-migration-capable.patch +Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch BuildRequires: flex BuildRequires: gcc @@ -870,6 +892,30 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Thu Jul 29 2021 imxcc +- hw/net: fix vmxnet3 live migration +- include: Make headers more self-contained +- migration: register_savevm_live doesn't need dev +- vmstate: add qom interface to get id +- linux headers: Update against "Add migration support for VFIO devices" +- vfio: Add function to unmap VFIO region +- vfio: Add vfio_get_object callback to VFIODeviceOps +- vfio: Add save and load functions for VFIO PCI devices +- vfio: Add migration region initialization and finalize function +- vfio: Add VM state change handler to know state of VM +- vfio: Add migration state change notifier +- vfio: Register SaveVMHandlers for VFIO device +- vfio: Add save state functions to SaveVMHandlers +- vfio: Add load state functions to SaveVMHandlers +- memory: Set DIRTY_MEMORY_MIGRATION when IOMMU is enabled +- vfio: Get migration capability flags for container +- vfio: Add function to start and stop dirty pages tracking +- vfio: Add vfio_listener_log_sync to mark dirty pages +- vfio: Dirty page tracking when vIOMMU is enabled +- vfio: Add ioctl to get dirty pages bitmap during dma unmap +- vfio: Make vfio-pci device migration capable +- qapi: Add VFIO devices migration stats in Migration stats + * Wed Jul 28 2021 imxcc - object: return self in object_ref() - file-posix: Fix leaked fd in raw_open_common() error path diff --git a/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch b/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch new file mode 100644 index 0000000000000000000000000000000000000000..eb8fb5d8f84b88bed1e48516050af5546dfae1cb --- /dev/null +++ b/vfio-Add-VM-state-change-handler-to-know-state-of-VM.patch @@ -0,0 +1,258 @@ +From 3a875293ae00266e1c82a5c382066efc4acc64ce Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:15 +0530 +Subject: [PATCH] vfio: Add VM state change handler to know state of VM + +VM state change handler is called on change in VM's state. Based on +VM state, VFIO device state should be changed. +Added read/write helper functions for migration region. +Added function to set device_state. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +[aw: lx -> HWADDR_PRIx, remove redundant parens] +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/migration.c | 160 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 2 + + include/hw/vfio/vfio-common.h | 4 + + 3 files changed, 166 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index fd7faf423c..ca82c78536 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -10,6 +10,7 @@ + #include "qemu/osdep.h" + #include + ++#include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" + #include "cpu.h" + #include "migration/migration.h" +@@ -22,6 +23,157 @@ + #include "exec/ram_addr.h" + #include "pci.h" + #include "trace.h" ++#include "hw/hw.h" ++ ++static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, ++ off_t off, bool iswrite) ++{ ++ int ret; ++ ++ ret = iswrite ? pwrite(vbasedev->fd, val, count, off) : ++ pread(vbasedev->fd, val, count, off); ++ if (ret < count) { ++ error_report("vfio_mig_%s %d byte %s: failed at offset 0x%" ++ HWADDR_PRIx", err: %s", iswrite ? "write" : "read", count, ++ vbasedev->name, off, strerror(errno)); ++ return (ret < 0) ? ret : -EINVAL; ++ } ++ return 0; ++} ++ ++static int vfio_mig_rw(VFIODevice *vbasedev, __u8 *buf, size_t count, ++ off_t off, bool iswrite) ++{ ++ int ret, done = 0; ++ __u8 *tbuf = buf; ++ ++ while (count) { ++ int bytes = 0; ++ ++ if (count >= 8 && !(off % 8)) { ++ bytes = 8; ++ } else if (count >= 4 && !(off % 4)) { ++ bytes = 4; ++ } else if (count >= 2 && !(off % 2)) { ++ bytes = 2; ++ } else { ++ bytes = 1; ++ } ++ ++ ret = vfio_mig_access(vbasedev, tbuf, bytes, off, iswrite); ++ if (ret) { ++ return ret; ++ } ++ ++ count -= bytes; ++ done += bytes; ++ off += bytes; ++ tbuf += bytes; ++ } ++ return done; ++} ++ ++#define vfio_mig_read(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, false) ++#define vfio_mig_write(f, v, c, o) vfio_mig_rw(f, (__u8 *)v, c, o, true) ++ ++#define VFIO_MIG_STRUCT_OFFSET(f) \ ++ offsetof(struct vfio_device_migration_info, f) ++/* ++ * Change the device_state register for device @vbasedev. Bits set in @mask ++ * are preserved, bits set in @value are set, and bits not set in either @mask ++ * or @value are cleared in device_state. If the register cannot be accessed, ++ * the resulting state would be invalid, or the device enters an error state, ++ * an error is returned. ++ */ ++ ++static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, ++ uint32_t value) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ off_t dev_state_off = region->fd_offset + ++ VFIO_MIG_STRUCT_OFFSET(device_state); ++ uint32_t device_state; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ device_state = (device_state & mask) | value; ++ ++ if (!VFIO_DEVICE_STATE_VALID(device_state)) { ++ return -EINVAL; ++ } ++ ++ ret = vfio_mig_write(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ if (ret < 0) { ++ int rret; ++ ++ rret = vfio_mig_read(vbasedev, &device_state, sizeof(device_state), ++ dev_state_off); ++ ++ if ((rret < 0) || (VFIO_DEVICE_STATE_IS_ERROR(device_state))) { ++ hw_error("%s: Device in error state 0x%x", vbasedev->name, ++ device_state); ++ return rret ? rret : -EIO; ++ } ++ return ret; ++ } ++ ++ migration->device_state = device_state; ++ trace_vfio_migration_set_state(vbasedev->name, device_state); ++ return 0; ++} ++ ++static void vfio_vmstate_change(void *opaque, int running, RunState state) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint32_t value, mask; ++ int ret; ++ ++ if (vbasedev->migration->vm_running == running) { ++ return; ++ } ++ ++ if (running) { ++ /* ++ * Here device state can have one of _SAVING, _RESUMING or _STOP bit. ++ * Transition from _SAVING to _RUNNING can happen if there is migration ++ * failure, in that case clear _SAVING bit. ++ * Transition from _RESUMING to _RUNNING occurs during resuming ++ * phase, in that case clear _RESUMING bit. ++ * In both the above cases, set _RUNNING bit. ++ */ ++ mask = ~VFIO_DEVICE_STATE_MASK; ++ value = VFIO_DEVICE_STATE_RUNNING; ++ } else { ++ /* ++ * Here device state could be either _RUNNING or _SAVING|_RUNNING. Reset ++ * _RUNNING bit ++ */ ++ mask = ~VFIO_DEVICE_STATE_RUNNING; ++ value = 0; ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, mask, value); ++ if (ret) { ++ /* ++ * Migration should be aborted in this case, but vm_state_notify() ++ * currently does not support reporting failures. ++ */ ++ error_report("%s: Failed to set device state 0x%x", vbasedev->name, ++ (migration->device_state & mask) | value); ++ qemu_file_set_error(migrate_get_current()->to_dst_file, ret); ++ } ++ vbasedev->migration->vm_running = running; ++ trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state), ++ (migration->device_state & mask) | value); ++} + + static void vfio_migration_exit(VFIODevice *vbasedev) + { +@@ -38,6 +190,7 @@ static int vfio_migration_init(VFIODevice *vbasedev, + { + int ret; + Object *obj; ++ VFIOMigration *migration; + + if (!vbasedev->ops->vfio_get_object) { + return -EINVAL; +@@ -64,6 +217,10 @@ static int vfio_migration_init(VFIODevice *vbasedev, + ret = -EINVAL; + goto err; + } ++ ++ migration = vbasedev->migration; ++ migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, ++ vbasedev); + return 0; + + err: +@@ -111,6 +268,9 @@ add_blocker: + void vfio_migration_finalize(VFIODevice *vbasedev) + { + if (vbasedev->migration) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ qemu_del_vm_change_state_handler(migration->vm_state); + vfio_migration_exit(vbasedev); + } + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index fd034ac536..1626862315 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -146,3 +146,5 @@ vfio_display_edid_write_error(void) "" + + # migration.c + vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" ++vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" ++vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index e0482c2bac..533d6737ac 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -29,6 +29,7 @@ + #ifdef CONFIG_LINUX + #include + #endif ++#include "sysemu/sysemu.h" + + #define VFIO_MSG_PREFIX "vfio %s: " + +@@ -58,7 +59,10 @@ typedef struct VFIORegion { + } VFIORegion; + + typedef struct VFIOMigration { ++ VMChangeStateEntry *vm_state; + VFIORegion region; ++ uint32_t device_state; ++ int vm_running; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch b/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch new file mode 100644 index 0000000000000000000000000000000000000000..b15a1c4bfbeb108bc5a098df8d68aef288b5e9c2 --- /dev/null +++ b/vfio-Add-function-to-start-and-stop-dirty-pages-trac.patch @@ -0,0 +1,83 @@ +From 4363ea5cded9c6d2838a9564b067f583a6ef077f Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:22 +0530 +Subject: [PATCH] vfio: Add function to start and stop dirty pages tracking + +Call VFIO_IOMMU_DIRTY_PAGES ioctl to start and stop dirty pages tracking +for VFIO devices. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0d2bd9e5cd..0bdf6a1820 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -11,6 +11,7 @@ + #include "qemu/main-loop.h" + #include "qemu/cutils.h" + #include ++#include + + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" +@@ -391,10 +392,40 @@ static int vfio_load_device_config_state(QEMUFile *f, void *opaque) + return qemu_file_get_error(f); + } + ++static int vfio_set_dirty_page_tracking(VFIODevice *vbasedev, bool start) ++{ ++ int ret; ++ VFIOMigration *migration = vbasedev->migration; ++ VFIOContainer *container = vbasedev->group->container; ++ struct vfio_iommu_type1_dirty_bitmap dirty = { ++ .argsz = sizeof(dirty), ++ }; ++ ++ if (start) { ++ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START; ++ } else { ++ return -EINVAL; ++ } ++ } else { ++ dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty); ++ if (ret) { ++ error_report("Failed to set dirty tracking flag 0x%x errno: %d", ++ dirty.flags, errno); ++ return -errno; ++ } ++ return ret; ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; + ++ vfio_set_dirty_page_tracking(vbasedev, false); ++ + if (migration->region.mmaps) { + vfio_region_unmap(&migration->region); + } +@@ -435,6 +466,11 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return ret; + } + ++ ret = vfio_set_dirty_page_tracking(vbasedev, true); ++ if (ret) { ++ return ret; ++ } ++ + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + ret = qemu_file_get_error(f); +-- +2.27.0 + diff --git a/vfio-Add-function-to-unmap-VFIO-region.patch b/vfio-Add-function-to-unmap-VFIO-region.patch new file mode 100644 index 0000000000000000000000000000000000000000..2cdd76a09bd44c73b42f4294055b935a31446b7e --- /dev/null +++ b/vfio-Add-function-to-unmap-VFIO-region.patch @@ -0,0 +1,103 @@ +From 68cc2be61588d14de2313342ee87eb0bb2b990e0 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:11 +0530 +Subject: [PATCH] vfio: Add function to unmap VFIO region + +This function will be used for migration region. +Migration region is mmaped when migration starts and will be unmapped when +migration is complete. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 32 ++++++++++++++++++++++++++++---- + hw/vfio/trace-events | 1 + + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 30 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a859298fda..4c32b1bb99 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -906,6 +906,18 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + return 0; + } + ++static void vfio_subregion_unmap(VFIORegion *region, int index) ++{ ++ trace_vfio_region_unmap(memory_region_name(®ion->mmaps[index].mem), ++ region->mmaps[index].offset, ++ region->mmaps[index].offset + ++ region->mmaps[index].size - 1); ++ memory_region_del_subregion(region->mem, ®ion->mmaps[index].mem); ++ munmap(region->mmaps[index].mmap, region->mmaps[index].size); ++ object_unparent(OBJECT(®ion->mmaps[index].mem)); ++ region->mmaps[index].mmap = NULL; ++} ++ + int vfio_region_mmap(VFIORegion *region) + { + int i, prot = 0; +@@ -936,10 +948,7 @@ int vfio_region_mmap(VFIORegion *region) + region->mmaps[i].mmap = NULL; + + for (i--; i >= 0; i--) { +- memory_region_del_subregion(region->mem, ®ion->mmaps[i].mem); +- munmap(region->mmaps[i].mmap, region->mmaps[i].size); +- object_unparent(OBJECT(®ion->mmaps[i].mem)); +- region->mmaps[i].mmap = NULL; ++ vfio_subregion_unmap(region, i); + } + + return ret; +@@ -964,6 +973,21 @@ int vfio_region_mmap(VFIORegion *region) + return 0; + } + ++void vfio_region_unmap(VFIORegion *region) ++{ ++ int i; ++ ++ if (!region->mem) { ++ return; ++ } ++ ++ for (i = 0; i < region->nr_mmaps; i++) { ++ if (region->mmaps[i].mmap) { ++ vfio_subregion_unmap(region, i); ++ } ++ } ++} ++ + void vfio_region_exit(VFIORegion *region) + { + int i; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index b1ef55a33f..8cdc27946c 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -111,6 +111,7 @@ vfio_region_mmap(const char *name, unsigned long offset, unsigned long end) "Reg + vfio_region_exit(const char *name, int index) "Device %s, region %d" + vfio_region_finalize(const char *name, int index) "Device %s, region %d" + vfio_region_mmaps_set_enabled(const char *name, bool enabled) "Region %s mmaps enabled: %d" ++vfio_region_unmap(const char *name, unsigned long offset, unsigned long end) "Region %s unmap [0x%lx - 0x%lx]" + vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) "Device %s region %d: %d sparse mmap entries" + vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) "sparse entry %d [0x%lx - 0x%lx]" + vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype) "%s index %d, %08x/%0x8" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 9107bd41c0..93493891ba 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -171,6 +171,7 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region, + int index, const char *name); + int vfio_region_mmap(VFIORegion *region); + void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); ++void vfio_region_unmap(VFIORegion *region); + void vfio_region_exit(VFIORegion *region); + void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); +-- +2.27.0 + diff --git a/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch b/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch new file mode 100644 index 0000000000000000000000000000000000000000..2831e94ca260e5753f2bcd0007ab036cba387b33 --- /dev/null +++ b/vfio-Add-ioctl-to-get-dirty-pages-bitmap-during-dma-.patch @@ -0,0 +1,162 @@ +From 1333031bd3b488ed4904a61fd292cd5aa93f8c5b Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:25 +0530 +Subject: [PATCH] vfio: Add ioctl to get dirty pages bitmap during dma unmap + +With vIOMMU, IO virtual address range can get unmapped while in pre-copy +phase of migration. In that case, unmap ioctl should return pages pinned +in that range and QEMU should find its correcponding guest physical +addresses and report those dirty. + +Suggested-by: Alex Williamson +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 93 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 8773b998ac..4ce1c10734 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -320,11 +320,95 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + return true; + } + ++static bool vfio_devices_all_running_and_saving(VFIOContainer *container) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!migration_is_setup_or_active(ms->state)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &container->group_list, container_next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (!migration) { ++ return false; ++ } ++ ++ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && ++ (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ continue; ++ } else { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++static int vfio_dma_unmap_bitmap(VFIOContainer *container, ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) ++{ ++ struct vfio_iommu_type1_dma_unmap *unmap; ++ struct vfio_bitmap *bitmap; ++ uint64_t pages = TARGET_PAGE_ALIGN(size) >> TARGET_PAGE_BITS; ++ int ret; ++ ++ unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap)); ++ ++ unmap->argsz = sizeof(*unmap) + sizeof(*bitmap); ++ unmap->iova = iova; ++ unmap->size = size; ++ unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; ++ bitmap = (struct vfio_bitmap *)&unmap->data; ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap_pgsize to ++ * TARGET_PAGE_SIZE. ++ */ ++ ++ bitmap->pgsize = TARGET_PAGE_SIZE; ++ bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ ++ if (bitmap->size > container->max_dirty_bitmap_size) { ++ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, ++ (uint64_t)bitmap->size); ++ ret = -E2BIG; ++ goto unmap_exit; ++ } ++ ++ bitmap->data = g_try_malloc0(bitmap->size); ++ if (!bitmap->data) { ++ ret = -ENOMEM; ++ goto unmap_exit; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap); ++ if (!ret) { ++ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data, ++ iotlb->translated_addr, pages); ++ } else { ++ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m"); ++ } ++ ++ g_free(bitmap->data); ++unmap_exit: ++ g_free(unmap); ++ return ret; ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ + static int vfio_dma_unmap(VFIOContainer *container, +- hwaddr iova, ram_addr_t size) ++ hwaddr iova, ram_addr_t size, ++ IOMMUTLBEntry *iotlb) + { + struct vfio_iommu_type1_dma_unmap unmap = { + .argsz = sizeof(unmap), +@@ -333,6 +417,11 @@ static int vfio_dma_unmap(VFIOContainer *container, + .size = size, + }; + ++ if (iotlb && container->dirty_pages_supported && ++ vfio_devices_all_running_and_saving(container)) { ++ return vfio_dma_unmap_bitmap(container, iova, size, iotlb); ++ } ++ + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) { + /* + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c +@@ -380,7 +469,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova, + * the VGA ROM space. + */ + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 || +- (errno == EBUSY && vfio_dma_unmap(container, iova, size) == 0 && ++ (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 && + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) { + return 0; + } +@@ -530,7 +619,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + iotlb->addr_mask + 1, vaddr, ret); + } + } else { +- ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1); ++ ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1, iotlb); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", +@@ -816,7 +905,7 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + + if (try_unmap) { +- ret = vfio_dma_unmap(container, iova, int128_get64(llsize)); ++ ret = vfio_dma_unmap(container, iova, int128_get64(llsize), NULL); + if (ret) { + error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", " + "0x%"HWADDR_PRIx") = %d (%m)", +-- +2.27.0 + diff --git a/vfio-Add-load-state-functions-to-SaveVMHandlers.patch b/vfio-Add-load-state-functions-to-SaveVMHandlers.patch new file mode 100644 index 0000000000000000000000000000000000000000..d70caeeef043b6c6cb53f09c9adb67b40b344862 --- /dev/null +++ b/vfio-Add-load-state-functions-to-SaveVMHandlers.patch @@ -0,0 +1,266 @@ +From ddef5d5257987f2f415ce41fdc482feda61aa796 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:19 +0530 +Subject: [PATCH] vfio: Add load state functions to SaveVMHandlers + +Sequence during _RESUMING device state: +While data for this device is available, repeat below steps: +a. read data_offset from where user application should write data. +b. write data of data_size to migration region from data_offset. +c. write data_size which indicates vendor driver that data is written in + staging buffer. + +For user, data is opaque. User should write data in the same order as +received. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 195 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 4 + + 2 files changed, 199 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index f78a77e1e3..954c064435 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -257,6 +257,77 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) + return ret; + } + ++static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, ++ uint64_t data_size) ++{ ++ VFIORegion *region = &vbasedev->migration->region; ++ uint64_t data_offset = 0, size, report_size; ++ int ret; ++ ++ do { ++ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ if (data_offset + data_size > region->size) { ++ /* ++ * If data_size is greater than the data section of migration region ++ * then iterate the write buffer operation. This case can occur if ++ * size of migration region at destination is smaller than size of ++ * migration region at source. ++ */ ++ report_size = size = region->size - data_offset; ++ data_size -= size; ++ } else { ++ report_size = size = data_size; ++ data_size = 0; ++ } ++ ++ trace_vfio_load_state_device_data(vbasedev->name, data_offset, size); ++ ++ while (size) { ++ void *buf; ++ uint64_t sec_size; ++ bool buf_alloc = false; ++ ++ buf = get_data_section_size(region, data_offset, size, &sec_size); ++ ++ if (!buf) { ++ buf = g_try_malloc(sec_size); ++ if (!buf) { ++ error_report("%s: Error allocating buffer ", __func__); ++ return -ENOMEM; ++ } ++ buf_alloc = true; ++ } ++ ++ qemu_get_buffer(f, buf, sec_size); ++ ++ if (buf_alloc) { ++ ret = vfio_mig_write(vbasedev, buf, sec_size, ++ region->fd_offset + data_offset); ++ g_free(buf); ++ ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ size -= sec_size; ++ data_offset += sec_size; ++ } ++ ++ ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); ++ if (ret < 0) { ++ return ret; ++ } ++ } while (data_size); ++ ++ return 0; ++} ++ + static int vfio_update_pending(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -293,6 +364,33 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque) + return qemu_file_get_error(f); + } + ++static int vfio_load_device_config_state(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ uint64_t data; ++ ++ if (vbasedev->ops && vbasedev->ops->vfio_load_config) { ++ int ret; ++ ++ ret = vbasedev->ops->vfio_load_config(vbasedev, f); ++ if (ret) { ++ error_report("%s: Failed to load device config space", ++ vbasedev->name); ++ return ret; ++ } ++ } ++ ++ data = qemu_get_be64(f); ++ if (data != VFIO_MIG_FLAG_END_OF_STATE) { ++ error_report("%s: Failed loading device config space, " ++ "end flag incorrect 0x%"PRIx64, vbasedev->name, data); ++ return -EINVAL; ++ } ++ ++ trace_vfio_load_device_config_state(vbasedev->name); ++ return qemu_file_get_error(f); ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -483,12 +581,109 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + return ret; + } + ++static int vfio_load_setup(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret = 0; ++ ++ if (migration->region.mmaps) { ++ ret = vfio_region_mmap(&migration->region); ++ if (ret) { ++ error_report("%s: Failed to mmap VFIO migration region %d: %s", ++ vbasedev->name, migration->region.nr, ++ strerror(-ret)); ++ error_report("%s: Falling back to slow path", vbasedev->name); ++ } ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_MASK, ++ VFIO_DEVICE_STATE_RESUMING); ++ if (ret) { ++ error_report("%s: Failed to set state RESUMING", vbasedev->name); ++ if (migration->region.mmaps) { ++ vfio_region_unmap(&migration->region); ++ } ++ } ++ return ret; ++} ++ ++static int vfio_load_cleanup(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ vfio_migration_cleanup(vbasedev); ++ trace_vfio_load_cleanup(vbasedev->name); ++ return 0; ++} ++ ++static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) ++{ ++ VFIODevice *vbasedev = opaque; ++ int ret = 0; ++ uint64_t data; ++ ++ data = qemu_get_be64(f); ++ while (data != VFIO_MIG_FLAG_END_OF_STATE) { ++ ++ trace_vfio_load_state(vbasedev->name, data); ++ ++ switch (data) { ++ case VFIO_MIG_FLAG_DEV_CONFIG_STATE: ++ { ++ ret = vfio_load_device_config_state(f, opaque); ++ if (ret) { ++ return ret; ++ } ++ break; ++ } ++ case VFIO_MIG_FLAG_DEV_SETUP_STATE: ++ { ++ data = qemu_get_be64(f); ++ if (data == VFIO_MIG_FLAG_END_OF_STATE) { ++ return ret; ++ } else { ++ error_report("%s: SETUP STATE: EOS not found 0x%"PRIx64, ++ vbasedev->name, data); ++ return -EINVAL; ++ } ++ break; ++ } ++ case VFIO_MIG_FLAG_DEV_DATA_STATE: ++ { ++ uint64_t data_size = qemu_get_be64(f); ++ ++ if (data_size) { ++ ret = vfio_load_buffer(f, vbasedev, data_size); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ break; ++ } ++ default: ++ error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); ++ return -EINVAL; ++ } ++ ++ data = qemu_get_be64(f); ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ } ++ return ret; ++} ++ + static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, + .save_live_pending = vfio_save_pending, + .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, ++ .load_setup = vfio_load_setup, ++ .load_cleanup = vfio_load_cleanup, ++ .load_state = vfio_load_state, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 9a1c5e17d9..4f08f5a633 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -157,3 +157,7 @@ vfio_save_device_config_state(const char *name) " (%s)" + vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 + vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" + vfio_save_complete_precopy(const char *name) " (%s)" ++vfio_load_device_config_state(const char *name) " (%s)" ++vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 ++vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 ++vfio_load_cleanup(const char *name) " (%s)" +-- +2.27.0 + diff --git a/vfio-Add-migration-region-initialization-and-finaliz.patch b/vfio-Add-migration-region-initialization-and-finaliz.patch new file mode 100644 index 0000000000000000000000000000000000000000..c804f1f6c353143ca74fe05889d5e163ab9dc8c3 --- /dev/null +++ b/vfio-Add-migration-region-initialization-and-finaliz.patch @@ -0,0 +1,209 @@ +From b7128f8aa03482634c07691cef69e7ed2d35200e Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:14 +0530 +Subject: [PATCH] vfio: Add migration region initialization and finalize + function + +Whether the VFIO device supports migration or not is decided based of +migration region query. If migration region query is successful and migration +region initialization is successful then migration is supported else +migration is blocked. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Acked-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/Makefile.objs | 2 +- + hw/vfio/migration.c | 122 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 3 + + include/hw/vfio/vfio-common.h | 9 +++ + 4 files changed, 135 insertions(+), 1 deletion(-) + create mode 100644 hw/vfio/migration.c + +diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs +index abad8b818c..36033d1437 100644 +--- a/hw/vfio/Makefile.objs ++++ b/hw/vfio/Makefile.objs +@@ -1,4 +1,4 @@ +-obj-y += common.o spapr.o ++obj-y += common.o spapr.o migration.o + obj-$(CONFIG_VFIO_PCI) += pci.o pci-quirks.o display.o + obj-$(CONFIG_VFIO_CCW) += ccw.o + obj-$(CONFIG_VFIO_PLATFORM) += platform.o +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +new file mode 100644 +index 0000000000..fd7faf423c +--- /dev/null ++++ b/hw/vfio/migration.c +@@ -0,0 +1,122 @@ ++/* ++ * Migration support for VFIO devices ++ * ++ * Copyright NVIDIA, Inc. 2020 ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2. See ++ * the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include ++ ++#include "hw/vfio/vfio-common.h" ++#include "cpu.h" ++#include "migration/migration.h" ++#include "migration/qemu-file.h" ++#include "migration/register.h" ++#include "migration/blocker.h" ++#include "migration/misc.h" ++#include "qapi/error.h" ++#include "exec/ramlist.h" ++#include "exec/ram_addr.h" ++#include "pci.h" ++#include "trace.h" ++ ++static void vfio_migration_exit(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ vfio_region_exit(&migration->region); ++ vfio_region_finalize(&migration->region); ++ g_free(vbasedev->migration); ++ vbasedev->migration = NULL; ++} ++ ++static int vfio_migration_init(VFIODevice *vbasedev, ++ struct vfio_region_info *info) ++{ ++ int ret; ++ Object *obj; ++ ++ if (!vbasedev->ops->vfio_get_object) { ++ return -EINVAL; ++ } ++ ++ obj = vbasedev->ops->vfio_get_object(vbasedev); ++ if (!obj) { ++ return -EINVAL; ++ } ++ ++ vbasedev->migration = g_new0(VFIOMigration, 1); ++ ++ ret = vfio_region_setup(obj, vbasedev, &vbasedev->migration->region, ++ info->index, "migration"); ++ if (ret) { ++ error_report("%s: Failed to setup VFIO migration region %d: %s", ++ vbasedev->name, info->index, strerror(-ret)); ++ goto err; ++ } ++ ++ if (!vbasedev->migration->region.size) { ++ error_report("%s: Invalid zero-sized VFIO migration region %d", ++ vbasedev->name, info->index); ++ ret = -EINVAL; ++ goto err; ++ } ++ return 0; ++ ++err: ++ vfio_migration_exit(vbasedev); ++ return ret; ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) ++{ ++ struct vfio_region_info *info = NULL; ++ Error *local_err = NULL; ++ int ret; ++ ++ ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, ++ VFIO_REGION_SUBTYPE_MIGRATION, &info); ++ if (ret) { ++ goto add_blocker; ++ } ++ ++ ret = vfio_migration_init(vbasedev, info); ++ if (ret) { ++ goto add_blocker; ++ } ++ ++ g_free(info); ++ trace_vfio_migration_probe(vbasedev->name, info->index); ++ return 0; ++ ++add_blocker: ++ error_setg(&vbasedev->migration_blocker, ++ "VFIO device doesn't support migration"); ++ g_free(info); ++ ++ ret = migrate_add_blocker(vbasedev->migration_blocker, &local_err); ++ if (local_err) { ++ error_propagate(errp, local_err); ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++ return ret; ++} ++ ++void vfio_migration_finalize(VFIODevice *vbasedev) ++{ ++ if (vbasedev->migration) { ++ vfio_migration_exit(vbasedev); ++ } ++ ++ if (vbasedev->migration_blocker) { ++ migrate_del_blocker(vbasedev->migration_blocker); ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++} +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 8cdc27946c..fd034ac536 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -143,3 +143,6 @@ vfio_display_edid_link_up(void) "" + vfio_display_edid_link_down(void) "" + vfio_display_edid_update(uint32_t prefx, uint32_t prefy) "%ux%u" + vfio_display_edid_write_error(void) "" ++ ++# migration.c ++vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6ea4898c4d..e0482c2bac 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -57,6 +57,10 @@ typedef struct VFIORegion { + uint8_t nr; /* cache the region number for debug */ + } VFIORegion; + ++typedef struct VFIOMigration { ++ VFIORegion region; ++} VFIOMigration; ++ + typedef struct VFIOAddressSpace { + AddressSpace *as; + QLIST_HEAD(, VFIOContainer) containers; +@@ -113,6 +117,8 @@ typedef struct VFIODevice { + unsigned int num_irqs; + unsigned int num_regions; + unsigned int flags; ++ VFIOMigration *migration; ++ Error *migration_blocker; + } VFIODevice; + + struct VFIODeviceOps { +@@ -204,4 +210,7 @@ int vfio_spapr_create_window(VFIOContainer *container, + int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + ++int vfio_migration_probe(VFIODevice *vbasedev, Error **errp); ++void vfio_migration_finalize(VFIODevice *vbasedev); ++ + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.27.0 + diff --git a/vfio-Add-migration-state-change-notifier.patch b/vfio-Add-migration-state-change-notifier.patch new file mode 100644 index 0000000000000000000000000000000000000000..5fe73a4cb18cd401d8d63ec8440cc361bbae60d9 --- /dev/null +++ b/vfio-Add-migration-state-change-notifier.patch @@ -0,0 +1,104 @@ +From b61729a5e0ab89d29f041202b50d042405076e62 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:16 +0530 +Subject: [PATCH] vfio: Add migration state change notifier + +Added migration state change notifier to get notification on migration state +change. These states are translated to VFIO device state and conveyed to +vendor driver. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 28 ++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + include/hw/vfio/vfio-common.h | 2 ++ + 3 files changed, 31 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index ca82c78536..0c6c9b655f 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -175,6 +175,30 @@ static void vfio_vmstate_change(void *opaque, int running, RunState state) + (migration->device_state & mask) | value); + } + ++static void vfio_migration_state_notifier(Notifier *notifier, void *data) ++{ ++ MigrationState *s = data; ++ VFIOMigration *migration = container_of(notifier, VFIOMigration, ++ migration_state); ++ VFIODevice *vbasedev = migration->vbasedev; ++ int ret; ++ ++ trace_vfio_migration_state_notifier(vbasedev->name, ++ MigrationStatus_str(s->state)); ++ ++ switch (s->state) { ++ case MIGRATION_STATUS_CANCELLING: ++ case MIGRATION_STATUS_CANCELLED: ++ case MIGRATION_STATUS_FAILED: ++ ret = vfio_migration_set_state(vbasedev, ++ ~(VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING), ++ VFIO_DEVICE_STATE_RUNNING); ++ if (ret) { ++ error_report("%s: Failed to set state RUNNING", vbasedev->name); ++ } ++ } ++} ++ + static void vfio_migration_exit(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -219,8 +243,11 @@ static int vfio_migration_init(VFIODevice *vbasedev, + } + + migration = vbasedev->migration; ++ migration->vbasedev = vbasedev; + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); ++ migration->migration_state.notify = vfio_migration_state_notifier; ++ add_migration_state_change_notifier(&migration->migration_state); + return 0; + + err: +@@ -270,6 +297,7 @@ void vfio_migration_finalize(VFIODevice *vbasedev) + if (vbasedev->migration) { + VFIOMigration *migration = vbasedev->migration; + ++ remove_migration_state_change_notifier(&migration->migration_state); + qemu_del_vm_change_state_handler(migration->vm_state); + vfio_migration_exit(vbasedev); + } +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 1626862315..bd3d47b005 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -148,3 +148,4 @@ vfio_display_edid_write_error(void) "" + vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" + vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" + vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" ++vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 533d6737ac..efff0590ae 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -59,10 +59,12 @@ typedef struct VFIORegion { + } VFIORegion; + + typedef struct VFIOMigration { ++ struct VFIODevice *vbasedev; + VMChangeStateEntry *vm_state; + VFIORegion region; + uint32_t device_state; + int vm_running; ++ Notifier migration_state; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch b/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..232efcf5dbcc072f358e3fec5a5f1186a1a97b55 --- /dev/null +++ b/vfio-Add-save-and-load-functions-for-VFIO-PCI-device.patch @@ -0,0 +1,106 @@ +From 92f104ca6e35acae079ca3bb432f24452058d483 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:13 +0530 +Subject: [PATCH] vfio: Add save and load functions for VFIO PCI devices + +Added functions to save and restore PCI device specific data, +specifically config space of PCI device. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Signed-off-by: Alex Williamson +--- + hw/vfio/pci.c | 51 +++++++++++++++++++++++++++++++++++ + include/hw/vfio/vfio-common.h | 2 ++ + 2 files changed, 53 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index de0d286fc9..b9fae3ad28 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -35,6 +35,7 @@ + #include "pci.h" + #include "trace.h" + #include "qapi/error.h" ++#include "migration/qemu-file.h" + + #define TYPE_VFIO_PCI "vfio-pci" + #define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI) +@@ -2395,11 +2396,61 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) + return OBJECT(vdev); + } + ++static bool vfio_msix_present(void *opaque, int version_id) ++{ ++ PCIDevice *pdev = opaque; ++ ++ return msix_present(pdev); ++} ++ ++const VMStateDescription vmstate_vfio_pci_config = { ++ .name = "VFIOPCIDevice", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .fields = (VMStateField[]) { ++ VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice), ++ VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ ++ vmstate_save_state(f, &vmstate_vfio_pci_config, vdev, NULL); ++} ++ ++static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ PCIDevice *pdev = &vdev->pdev; ++ int ret; ++ ++ ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1); ++ if (ret) { ++ return ret; ++ } ++ ++ vfio_pci_write_config(pdev, PCI_COMMAND, ++ pci_get_word(pdev->config + PCI_COMMAND), 2); ++ ++ if (msi_enabled(pdev)) { ++ vfio_msi_enable(vdev); ++ } else if (msix_enabled(pdev)) { ++ vfio_msix_enable(vdev); ++ } ++ ++ return ret; ++} ++ + static VFIODeviceOps vfio_pci_ops = { + .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, + .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_intx_eoi, + .vfio_get_object = vfio_pci_get_object, ++ .vfio_save_config = vfio_pci_save_config, ++ .vfio_load_config = vfio_pci_load_config, + }; + + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 771b6d59a3..6ea4898c4d 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -120,6 +120,8 @@ struct VFIODeviceOps { + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); + Object *(*vfio_get_object)(VFIODevice *vdev); ++ void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); ++ int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); + }; + + typedef struct VFIOGroup { +-- +2.27.0 + diff --git a/vfio-Add-save-state-functions-to-SaveVMHandlers.patch b/vfio-Add-save-state-functions-to-SaveVMHandlers.patch new file mode 100644 index 0000000000000000000000000000000000000000..14047fd8a474c07c71fa4ba622e1fb33d043b02d --- /dev/null +++ b/vfio-Add-save-state-functions-to-SaveVMHandlers.patch @@ -0,0 +1,380 @@ +From 94f106f95e887d1d706e8f771fd6ad287ddac2dc Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:18 +0530 +Subject: [PATCH] vfio: Add save state functions to SaveVMHandlers + +Added .save_live_pending, .save_live_iterate and .save_live_complete_precopy +functions. These functions handles pre-copy and stop-and-copy phase. + +In _SAVING|_RUNNING device state or pre-copy phase: +- read pending_bytes. If pending_bytes > 0, go through below steps. +- read data_offset - indicates kernel driver to write data to staging + buffer. +- read data_size - amount of data in bytes written by vendor driver in + migration region. +- read data_size bytes of data from data_offset in the migration region. +- Write data packet to file stream as below: +{VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data, +VFIO_MIG_FLAG_END_OF_STATE } + +In _SAVING device state or stop-and-copy phase +a. read config space of device and save to migration file stream. This + doesn't need to be from vendor driver. Any other special config state + from driver can be saved as data in following iteration. +b. read pending_bytes. If pending_bytes > 0, go through below steps. +c. read data_offset - indicates kernel driver to write data to staging + buffer. +d. read data_size - amount of data in bytes written by vendor driver in + migration region. +e. read data_size bytes of data from data_offset in the migration region. +f. Write data packet as below: + {VFIO_MIG_FLAG_DEV_DATA_STATE, data_size, actual data} +g. iterate through steps b to f while (pending_bytes > 0) +h. Write {VFIO_MIG_FLAG_END_OF_STATE} + +When data region is mapped, its user's responsibility to read data from +data_offset of data_size before moving to next steps. + +Added fix suggested by Artem Polyakov to reset pending_bytes in +vfio_save_iterate(). +Added fix suggested by Zhi Wang to add 0 as data size in migration stream and +add END_OF_STATE delimiter to indicate phase complete. + +Suggested-by: Artem Polyakov +Suggested-by: Zhi Wang +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 276 ++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 6 + + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 283 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 405228fc5a..f78a77e1e3 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -148,6 +148,151 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + return 0; + } + ++static void *get_data_section_size(VFIORegion *region, uint64_t data_offset, ++ uint64_t data_size, uint64_t *size) ++{ ++ void *ptr = NULL; ++ uint64_t limit = 0; ++ int i; ++ ++ if (!region->mmaps) { ++ if (size) { ++ *size = MIN(data_size, region->size - data_offset); ++ } ++ return ptr; ++ } ++ ++ for (i = 0; i < region->nr_mmaps; i++) { ++ VFIOMmap *map = region->mmaps + i; ++ ++ if ((data_offset >= map->offset) && ++ (data_offset < map->offset + map->size)) { ++ ++ /* check if data_offset is within sparse mmap areas */ ++ ptr = map->mmap + data_offset - map->offset; ++ if (size) { ++ *size = MIN(data_size, map->offset + map->size - data_offset); ++ } ++ break; ++ } else if ((data_offset < map->offset) && ++ (!limit || limit > map->offset)) { ++ /* ++ * data_offset is not within sparse mmap areas, find size of ++ * non-mapped area. Check through all list since region->mmaps list ++ * is not sorted. ++ */ ++ limit = map->offset; ++ } ++ } ++ ++ if (!ptr && size) { ++ *size = limit ? MIN(data_size, limit - data_offset) : data_size; ++ } ++ return ptr; ++} ++ ++static int vfio_save_buffer(QEMUFile *f, VFIODevice *vbasedev, uint64_t *size) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ uint64_t data_offset = 0, data_size = 0, sz; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_offset)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(data_size)); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ trace_vfio_save_buffer(vbasedev->name, data_offset, data_size, ++ migration->pending_bytes); ++ ++ qemu_put_be64(f, data_size); ++ sz = data_size; ++ ++ while (sz) { ++ void *buf; ++ uint64_t sec_size; ++ bool buf_allocated = false; ++ ++ buf = get_data_section_size(region, data_offset, sz, &sec_size); ++ ++ if (!buf) { ++ buf = g_try_malloc(sec_size); ++ if (!buf) { ++ error_report("%s: Error allocating buffer ", __func__); ++ return -ENOMEM; ++ } ++ buf_allocated = true; ++ ++ ret = vfio_mig_read(vbasedev, buf, sec_size, ++ region->fd_offset + data_offset); ++ if (ret < 0) { ++ g_free(buf); ++ return ret; ++ } ++ } ++ ++ qemu_put_buffer(f, buf, sec_size); ++ ++ if (buf_allocated) { ++ g_free(buf); ++ } ++ sz -= sec_size; ++ data_offset += sec_size; ++ } ++ ++ ret = qemu_file_get_error(f); ++ ++ if (!ret && size) { ++ *size = data_size; ++ } ++ ++ return ret; ++} ++ ++static int vfio_update_pending(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ VFIORegion *region = &migration->region; ++ uint64_t pending_bytes = 0; ++ int ret; ++ ++ ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes), ++ region->fd_offset + VFIO_MIG_STRUCT_OFFSET(pending_bytes)); ++ if (ret < 0) { ++ migration->pending_bytes = 0; ++ return ret; ++ } ++ ++ migration->pending_bytes = pending_bytes; ++ trace_vfio_update_pending(vbasedev->name, pending_bytes); ++ return 0; ++} ++ ++static int vfio_save_device_config_state(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_CONFIG_STATE); ++ ++ if (vbasedev->ops && vbasedev->ops->vfio_save_config) { ++ vbasedev->ops->vfio_save_config(vbasedev, f); ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ trace_vfio_save_device_config_state(vbasedev->name); ++ ++ return qemu_file_get_error(f); ++} ++ + static void vfio_migration_cleanup(VFIODevice *vbasedev) + { + VFIOMigration *migration = vbasedev->migration; +@@ -210,9 +355,140 @@ static void vfio_save_cleanup(void *opaque) + trace_vfio_save_cleanup(vbasedev->name); + } + ++static void vfio_save_pending(QEMUFile *f, void *opaque, ++ uint64_t threshold_size, ++ uint64_t *res_precopy_only, ++ uint64_t *res_compatible, ++ uint64_t *res_postcopy_only) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret; ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return; ++ } ++ ++ *res_precopy_only += migration->pending_bytes; ++ ++ trace_vfio_save_pending(vbasedev->name, *res_precopy_only, ++ *res_postcopy_only, *res_compatible); ++} ++ ++static int vfio_save_iterate(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint64_t data_size; ++ int ret; ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); ++ ++ if (migration->pending_bytes == 0) { ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ ++ if (migration->pending_bytes == 0) { ++ qemu_put_be64(f, 0); ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ /* indicates data finished, goto complete phase */ ++ return 1; ++ } ++ } ++ ++ ret = vfio_save_buffer(f, vbasedev, &data_size); ++ if (ret) { ++ error_report("%s: vfio_save_buffer failed %s", vbasedev->name, ++ strerror(errno)); ++ return ret; ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ /* ++ * Reset pending_bytes as .save_live_pending is not called during savevm or ++ * snapshot case, in such case vfio_update_pending() at the start of this ++ * function updates pending_bytes. ++ */ ++ migration->pending_bytes = 0; ++ trace_vfio_save_iterate(vbasedev->name, data_size); ++ return 0; ++} ++ ++static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ uint64_t data_size; ++ int ret; ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_RUNNING, ++ VFIO_DEVICE_STATE_SAVING); ++ if (ret) { ++ error_report("%s: Failed to set state STOP and SAVING", ++ vbasedev->name); ++ return ret; ++ } ++ ++ ret = vfio_save_device_config_state(f, opaque); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ ++ while (migration->pending_bytes > 0) { ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); ++ ret = vfio_save_buffer(f, vbasedev, &data_size); ++ if (ret < 0) { ++ error_report("%s: Failed to save buffer", vbasedev->name); ++ return ret; ++ } ++ ++ if (data_size == 0) { ++ break; ++ } ++ ++ ret = vfio_update_pending(vbasedev); ++ if (ret) { ++ return ret; ++ } ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, ~VFIO_DEVICE_STATE_SAVING, 0); ++ if (ret) { ++ error_report("%s: Failed to set state STOPPED", vbasedev->name); ++ return ret; ++ } ++ ++ trace_vfio_save_complete_precopy(vbasedev->name); ++ return ret; ++} ++ + static SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, ++ .save_live_pending = vfio_save_pending, ++ .save_live_iterate = vfio_save_iterate, ++ .save_live_complete_precopy = vfio_save_complete_precopy, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 86c18def01..9a1c5e17d9 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -151,3 +151,9 @@ vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" + vfio_save_setup(const char *name) " (%s)" + vfio_save_cleanup(const char *name) " (%s)" ++vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64 ++vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64 ++vfio_save_device_config_state(const char *name) " (%s)" ++vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64 ++vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d" ++vfio_save_complete_precopy(const char *name) " (%s)" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index efff0590ae..c825524606 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -65,6 +65,7 @@ typedef struct VFIOMigration { + uint32_t device_state; + int vm_running; + Notifier migration_state; ++ uint64_t pending_bytes; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.27.0 + diff --git a/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch b/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch new file mode 100644 index 0000000000000000000000000000000000000000..e13a1daf40d518a550e8af5618b2d2b537cb43cc --- /dev/null +++ b/vfio-Add-vfio_get_object-callback-to-VFIODeviceOps.patch @@ -0,0 +1,55 @@ +From c1de789d89132b66243fbfe253f10764ce514a08 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:12 +0530 +Subject: [PATCH] vfio: Add vfio_get_object callback to VFIODeviceOps + +Hook vfio_get_object callback for PCI devices. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Suggested-by: Cornelia Huck +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +--- + hw/vfio/pci.c | 8 ++++++++ + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 9 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d7a4e1875c..de0d286fc9 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2388,10 +2388,18 @@ static void vfio_pci_compute_needs_reset(VFIODevice *vbasedev) + } + } + ++static Object *vfio_pci_get_object(VFIODevice *vbasedev) ++{ ++ VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); ++ ++ return OBJECT(vdev); ++} ++ + static VFIODeviceOps vfio_pci_ops = { + .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, + .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, + .vfio_eoi = vfio_intx_eoi, ++ .vfio_get_object = vfio_pci_get_object, + }; + + int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 93493891ba..771b6d59a3 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -119,6 +119,7 @@ struct VFIODeviceOps { + void (*vfio_compute_needs_reset)(VFIODevice *vdev); + int (*vfio_hot_reset_multi)(VFIODevice *vdev); + void (*vfio_eoi)(VFIODevice *vdev); ++ Object *(*vfio_get_object)(VFIODevice *vdev); + }; + + typedef struct VFIOGroup { +-- +2.27.0 + diff --git a/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch b/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch new file mode 100644 index 0000000000000000000000000000000000000000..6479a2550592f6eff10e6c244ed3648d887ed1a6 --- /dev/null +++ b/vfio-Add-vfio_listener_log_sync-to-mark-dirty-pages.patch @@ -0,0 +1,182 @@ +From 3ac0647003d192579bcb6c1081b75d9c8ada78e0 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:23 +0530 +Subject: [PATCH] vfio: Add vfio_listener_log_sync to mark dirty pages + +vfio_listener_log_sync gets list of dirty pages from container using +VFIO_IOMMU_GET_DIRTY_BITMAP ioctl and mark those pages dirty when all +devices are stopped and saving state. +Return early for the RAM block section of mapped MMIO region. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +[aw: fix error_report types, fix cpu_physical_memory_set_dirty_lebitmap() cast] +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 116 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 1 + + 2 files changed, 117 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 35168b8f3e..4d2828fc97 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -29,6 +29,7 @@ + #include "hw/vfio/vfio.h" + #include "exec/address-spaces.h" + #include "exec/memory.h" ++#include "exec/ram_addr.h" + #include "hw/hw.h" + #include "qemu/error-report.h" + #include "qemu/range.h" +@@ -36,6 +37,7 @@ + #include "sysemu/kvm.h" + #include "trace.h" + #include "qapi/error.h" ++#include "migration/migration.h" + + VFIOGroupList vfio_group_list = + QLIST_HEAD_INITIALIZER(vfio_group_list); +@@ -285,6 +287,39 @@ const MemoryRegionOps vfio_region_ops = { + }, + }; + ++/* ++ * Device state interfaces ++ */ ++ ++static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) ++{ ++ VFIOGroup *group; ++ VFIODevice *vbasedev; ++ MigrationState *ms = migrate_get_current(); ++ ++ if (!migration_is_setup_or_active(ms->state)) { ++ return false; ++ } ++ ++ QLIST_FOREACH(group, &container->group_list, container_next) { ++ QLIST_FOREACH(vbasedev, &group->device_list, next) { ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (!migration) { ++ return false; ++ } ++ ++ if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && ++ !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ continue; ++ } else { ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ + /* + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86 + */ +@@ -794,9 +829,90 @@ static void vfio_listener_region_del(MemoryListener *listener, + } + } + ++static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova, ++ uint64_t size, ram_addr_t ram_addr) ++{ ++ struct vfio_iommu_type1_dirty_bitmap *dbitmap; ++ struct vfio_iommu_type1_dirty_bitmap_get *range; ++ uint64_t pages; ++ int ret; ++ ++ dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range)); ++ ++ dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range); ++ dbitmap->flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; ++ range = (struct vfio_iommu_type1_dirty_bitmap_get *)&dbitmap->data; ++ range->iova = iova; ++ range->size = size; ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. Hence set bitmap's pgsize to ++ * TARGET_PAGE_SIZE. ++ */ ++ range->bitmap.pgsize = TARGET_PAGE_SIZE; ++ ++ pages = TARGET_PAGE_ALIGN(range->size) >> TARGET_PAGE_BITS; ++ range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / ++ BITS_PER_BYTE; ++ range->bitmap.data = g_try_malloc0(range->bitmap.size); ++ if (!range->bitmap.data) { ++ ret = -ENOMEM; ++ goto err_out; ++ } ++ ++ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap); ++ if (ret) { ++ error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64 ++ " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova, ++ (uint64_t)range->size, errno); ++ goto err_out; ++ } ++ ++ cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data, ++ ram_addr, pages); ++ ++ trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size, ++ range->bitmap.size, ram_addr); ++err_out: ++ g_free(range->bitmap.data); ++ g_free(dbitmap); ++ ++ return ret; ++} ++ ++static int vfio_sync_dirty_bitmap(VFIOContainer *container, ++ MemoryRegionSection *section) ++{ ++ ram_addr_t ram_addr; ++ ++ ram_addr = memory_region_get_ram_addr(section->mr) + ++ section->offset_within_region; ++ ++ return vfio_get_dirty_bitmap(container, ++ TARGET_PAGE_ALIGN(section->offset_within_address_space), ++ int128_get64(section->size), ram_addr); ++} ++ ++static void vfio_listerner_log_sync(MemoryListener *listener, ++ MemoryRegionSection *section) ++{ ++ VFIOContainer *container = container_of(listener, VFIOContainer, listener); ++ ++ if (vfio_listener_skipped_section(section) || ++ !container->dirty_pages_supported) { ++ return; ++ } ++ ++ if (vfio_devices_all_stopped_and_saving(container)) { ++ vfio_sync_dirty_bitmap(container, section); ++ } ++} ++ + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, ++ .log_sync = vfio_listerner_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4f08f5a633..4167f35d64 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -161,3 +161,4 @@ vfio_load_device_config_state(const char *name) " (%s)" + vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 + vfio_load_cleanup(const char *name) " (%s)" ++vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 +-- +2.27.0 + diff --git a/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch b/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch new file mode 100644 index 0000000000000000000000000000000000000000..65949f079ed1eb2baea2e626fbd4d6140d23350c --- /dev/null +++ b/vfio-Dirty-page-tracking-when-vIOMMU-is-enabled.patch @@ -0,0 +1,162 @@ +From a400753d0f1a008367165aadf375abfe86a66ed7 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:24 +0530 +Subject: [PATCH] vfio: Dirty page tracking when vIOMMU is enabled + +When vIOMMU is enabled, register MAP notifier from log_sync when all +devices in container are in stop and copy phase of migration. Call replay +and get dirty pages from notifier callback. + +Suggested-by: Alex Williamson +Signed-off-by: Kirti Wankhede +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 88 +++++++++++++++++++++++++++++++++++++++++--- + hw/vfio/trace-events | 1 + + 2 files changed, 83 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4d2828fc97..8773b998ac 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -441,8 +441,8 @@ static bool vfio_listener_skipped_section(MemoryRegionSection *section) + } + + /* Called with rcu_read_lock held. */ +-static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, +- bool *read_only) ++static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr, ++ ram_addr_t *ram_addr, bool *read_only) + { + MemoryRegion *mr; + hwaddr xlat; +@@ -473,8 +473,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr, + return false; + } + +- *vaddr = memory_region_get_ram_ptr(mr) + xlat; +- *read_only = !writable || mr->readonly; ++ if (vaddr) { ++ *vaddr = memory_region_get_ram_ptr(mr) + xlat; ++ } ++ ++ if (ram_addr) { ++ *ram_addr = memory_region_get_ram_addr(mr) + xlat; ++ } ++ ++ if (read_only) { ++ *read_only = !writable || mr->readonly; ++ } + + return true; + } +@@ -484,7 +493,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n); + VFIOContainer *container = giommu->container; + hwaddr iova = iotlb->iova + giommu->iommu_offset; +- bool read_only; + void *vaddr; + int ret; + +@@ -500,7 +508,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) + rcu_read_lock(); + + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { +- if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) { ++ bool read_only; ++ ++ if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only)) { + goto out; + } + /* +@@ -881,11 +891,77 @@ err_out: + return ret; + } + ++typedef struct { ++ IOMMUNotifier n; ++ VFIOGuestIOMMU *giommu; ++} vfio_giommu_dirty_notifier; ++ ++static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) ++{ ++ vfio_giommu_dirty_notifier *gdn = container_of(n, ++ vfio_giommu_dirty_notifier, n); ++ VFIOGuestIOMMU *giommu = gdn->giommu; ++ VFIOContainer *container = giommu->container; ++ hwaddr iova = iotlb->iova + giommu->iommu_offset; ++ ram_addr_t translated_addr; ++ ++ trace_vfio_iommu_map_dirty_notify(iova, iova + iotlb->addr_mask); ++ ++ if (iotlb->target_as != &address_space_memory) { ++ error_report("Wrong target AS \"%s\", only system memory is allowed", ++ iotlb->target_as->name ? iotlb->target_as->name : "none"); ++ return; ++ } ++ ++ rcu_read_lock(); ++ if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) { ++ int ret; ++ ++ ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1, ++ translated_addr); ++ if (ret) { ++ error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", " ++ "0x%"HWADDR_PRIx") = %d (%m)", ++ container, iova, ++ iotlb->addr_mask + 1, ret); ++ } ++ } ++ rcu_read_unlock(); ++} ++ + static int vfio_sync_dirty_bitmap(VFIOContainer *container, + MemoryRegionSection *section) + { + ram_addr_t ram_addr; + ++ if (memory_region_is_iommu(section->mr)) { ++ VFIOGuestIOMMU *giommu; ++ ++ QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) { ++ if (MEMORY_REGION(giommu->iommu) == section->mr && ++ giommu->n.start == section->offset_within_region) { ++ Int128 llend; ++ vfio_giommu_dirty_notifier gdn = { .giommu = giommu }; ++ int idx = memory_region_iommu_attrs_to_index(giommu->iommu, ++ MEMTXATTRS_UNSPECIFIED); ++ ++ llend = int128_add(int128_make64(section->offset_within_region), ++ section->size); ++ llend = int128_sub(llend, int128_one()); ++ ++ iommu_notifier_init(&gdn.n, ++ vfio_iommu_map_dirty_notify, ++ IOMMU_NOTIFIER_MAP, ++ section->offset_within_region, ++ int128_get64(llend), ++ idx); ++ memory_region_iommu_replay(giommu->iommu, &gdn.n); ++ break; ++ } ++ } ++ return 0; ++ } ++ + ram_addr = memory_region_get_ram_addr(section->mr) + + section->offset_within_region; + +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4167f35d64..575ebde6e0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -162,3 +162,4 @@ vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_offset, uint64_t data_size) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64 + vfio_load_cleanup(const char *name) " (%s)" + vfio_get_dirty_bitmap(int fd, uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start) "container fd=%d, iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64 ++vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 +-- +2.27.0 + diff --git a/vfio-Get-migration-capability-flags-for-container.patch b/vfio-Get-migration-capability-flags-for-container.patch new file mode 100644 index 0000000000000000000000000000000000000000..88b9bb7e6fdfe4f4a75808bbdcb5ec45d354ae15 --- /dev/null +++ b/vfio-Get-migration-capability-flags-for-container.patch @@ -0,0 +1,186 @@ +From fc49c9cbf2deba53370f48ad9db2adc5f6ceb3ba Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:21 +0530 +Subject: [PATCH] vfio: Get migration capability flags for container + +Added helper functions to get IOMMU info capability chain. +Added function to get migration capability information from that +capability chain for IOMMU container. + +Similar change was proposed earlier: +https://lists.gnu.org/archive/html/qemu-devel/2018-05/msg03759.html + +Disable migration for devices if IOMMU module doesn't support migration +capability. + +Signed-off-by: Kirti Wankhede +Cc: Shameer Kolothum +Cc: Eric Auger +Signed-off-by: Alex Williamson +--- + hw/vfio/common.c | 90 +++++++++++++++++++++++++++++++---- + hw/vfio/migration.c | 7 ++- + include/hw/vfio/vfio-common.h | 3 ++ + 3 files changed, 91 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4c32b1bb99..35168b8f3e 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1210,6 +1210,75 @@ static int vfio_init_container(VFIOContainer *container, int group_fd, + return 0; + } + ++static int vfio_get_iommu_info(VFIOContainer *container, ++ struct vfio_iommu_type1_info **info) ++{ ++ ++ size_t argsz = sizeof(struct vfio_iommu_type1_info); ++ ++ *info = g_new0(struct vfio_iommu_type1_info, 1); ++again: ++ (*info)->argsz = argsz; ++ ++ if (ioctl(container->fd, VFIO_IOMMU_GET_INFO, *info)) { ++ g_free(*info); ++ *info = NULL; ++ return -errno; ++ } ++ ++ if (((*info)->argsz > argsz)) { ++ argsz = (*info)->argsz; ++ *info = g_realloc(*info, argsz); ++ goto again; ++ } ++ ++ return 0; ++} ++ ++static struct vfio_info_cap_header * ++vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id) ++{ ++ struct vfio_info_cap_header *hdr; ++ void *ptr = info; ++ ++ if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) { ++ return NULL; ++ } ++ ++ for (hdr = ptr + info->cap_offset; hdr != ptr; hdr = ptr + hdr->next) { ++ if (hdr->id == id) { ++ return hdr; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void vfio_get_iommu_info_migration(VFIOContainer *container, ++ struct vfio_iommu_type1_info *info) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_iommu_type1_info_cap_migration *cap_mig; ++ ++ hdr = vfio_get_iommu_info_cap(info, VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION); ++ if (!hdr) { ++ return; ++ } ++ ++ cap_mig = container_of(hdr, struct vfio_iommu_type1_info_cap_migration, ++ header); ++ ++ /* ++ * cpu_physical_memory_set_dirty_lebitmap() expects pages in bitmap of ++ * TARGET_PAGE_SIZE to mark those dirty. ++ */ ++ if (cap_mig->pgsize_bitmap & TARGET_PAGE_SIZE) { ++ container->dirty_pages_supported = true; ++ container->max_dirty_bitmap_size = cap_mig->max_dirty_bitmap_size; ++ container->dirty_pgsizes = cap_mig->pgsize_bitmap; ++ } ++} ++ + static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + Error **errp) + { +@@ -1273,6 +1342,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + container = g_malloc0(sizeof(*container)); + container->space = space; + container->fd = fd; ++ container->dirty_pages_supported = false; + QLIST_INIT(&container->giommu_list); + QLIST_INIT(&container->hostwin_list); + +@@ -1285,7 +1355,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + case VFIO_TYPE1v2_IOMMU: + case VFIO_TYPE1_IOMMU: + { +- struct vfio_iommu_type1_info info; ++ struct vfio_iommu_type1_info *info; + + /* + * FIXME: This assumes that a Type1 IOMMU can map any 64-bit +@@ -1294,15 +1364,19 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as, + * existing Type1 IOMMUs generally support any IOVA we're + * going to actually try in practice. + */ +- info.argsz = sizeof(info); +- ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info); +- /* Ignore errors */ +- if (ret || !(info.flags & VFIO_IOMMU_INFO_PGSIZES)) { ++ ret = vfio_get_iommu_info(container, &info); ++ ++ if (ret || !(info->flags & VFIO_IOMMU_INFO_PGSIZES)) { + /* Assume 4k IOVA page size */ +- info.iova_pgsizes = 4096; ++ info->iova_pgsizes = 4096; + } +- vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes); +- container->pgsizes = info.iova_pgsizes; ++ vfio_host_win_add(container, 0, (hwaddr)-1, info->iova_pgsizes); ++ container->pgsizes = info->iova_pgsizes; ++ ++ if (!ret) { ++ vfio_get_iommu_info_migration(container, info); ++ } ++ g_free(info); + break; + } + case VFIO_SPAPR_TCE_v2_IOMMU: +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 954c064435..0d2bd9e5cd 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -832,9 +832,14 @@ err: + + int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + { ++ VFIOContainer *container = vbasedev->group->container; + struct vfio_region_info *info = NULL; + Error *local_err = NULL; +- int ret; ++ int ret = -ENOTSUP; ++ ++ if (!container->dirty_pages_supported) { ++ goto add_blocker; ++ } + + ret = vfio_get_dev_region_info(vbasedev, VFIO_REGION_TYPE_MIGRATION, + VFIO_REGION_SUBTYPE_MIGRATION, &info); +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index c825524606..8fd0212264 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -84,6 +84,9 @@ typedef struct VFIOContainer { + unsigned iommu_type; + int error; + bool initialized; ++ bool dirty_pages_supported; ++ uint64_t dirty_pgsizes; ++ uint64_t max_dirty_bitmap_size; + unsigned long pgsizes; + QLIST_HEAD(, VFIOGuestIOMMU) giommu_list; + QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list; +-- +2.27.0 + diff --git a/vfio-Make-vfio-pci-device-migration-capable.patch b/vfio-Make-vfio-pci-device-migration-capable.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e87ec7d7820b8ee4be0de5d55039336c47988fd --- /dev/null +++ b/vfio-Make-vfio-pci-device-migration-capable.patch @@ -0,0 +1,73 @@ +From b20bf027d44809dd6c6376cf0b77e5c5b2057cba Mon Sep 17 00:00:00 2001 +From: Jens Freimann +Date: Tue, 29 Oct 2019 12:49:05 +0100 +Subject: [PATCH] vfio: Make vfio-pci device migration capable + +If the device is not a failover primary device, call +vfio_migration_probe() and vfio_migration_finalize() to enable +migration support for those devices that support it respectively to +tear it down again. +Removed migration blocker from VFIO PCI device specific structure and use +migration blocker from generic structure of VFIO device. + +Note: Since the current version don't add the failover feature for assigned +PCI devices, just remove the failover related code in the original patch for +simplicity. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Dr. David Alan Gilbert +Reviewed-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Shenming Lu +--- + hw/vfio/pci.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index b9fae3ad28..a637c35e7a 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3049,6 +3049,11 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + } + } + ++ ret = vfio_migration_probe(&vdev->vbasedev, errp); ++ if (ret) { ++ error_report("%s: Migration disabled", vdev->vbasedev.name); ++ } ++ + vfio_register_err_notifier(vdev); + vfio_register_req_notifier(vdev); + vfio_setup_resetfn_quirk(vdev); +@@ -3096,6 +3101,7 @@ static void vfio_exitfn(PCIDevice *pdev) + } + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); ++ vfio_migration_finalize(&vdev->vbasedev); + } + + static void vfio_pci_reset(DeviceState *dev) +@@ -3204,11 +3210,6 @@ static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_END_OF_LIST(), + }; + +-static const VMStateDescription vfio_pci_vmstate = { +- .name = "vfio-pci", +- .unmigratable = 1, +-}; +- + static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +@@ -3216,7 +3217,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) + + dc->reset = vfio_pci_reset; + dc->props = vfio_pci_dev_properties; +- dc->vmsd = &vfio_pci_vmstate; + dc->desc = "VFIO-based PCI device assignment"; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + pdc->realize = vfio_realize; +-- +2.27.0 + diff --git a/vfio-Register-SaveVMHandlers-for-VFIO-device.patch b/vfio-Register-SaveVMHandlers-for-VFIO-device.patch new file mode 100644 index 0000000000000000000000000000000000000000..8e12cd2bb2bbfede3be871fb31a3a96562fedc15 --- /dev/null +++ b/vfio-Register-SaveVMHandlers-for-VFIO-device.patch @@ -0,0 +1,183 @@ +From cd5b58f2ba20e59f2c29d955b8bbd7f5016030b7 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 26 Oct 2020 15:06:17 +0530 +Subject: [PATCH] vfio: Register SaveVMHandlers for VFIO device + +Define flags to be used as delimiter in migration stream for VFIO devices. +Added .save_setup and .save_cleanup functions. Map & unmap migration +region from these functions at source during saving or pre-copy phase. + +Set VFIO device state depending on VM's state. During live migration, VM is +running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO +device. During save-restore, VM is paused, _SAVING state is set for VFIO device. + +Signed-off-by: Kirti Wankhede +Reviewed-by: Neo Jia +Reviewed-by: Cornelia Huck +Reviewed-by: Yan Zhao +Signed-off-by: Alex Williamson +--- + hw/vfio/migration.c | 102 +++++++++++++++++++++++++++++++++++++++++++ + hw/vfio/trace-events | 2 + + 2 files changed, 104 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 0c6c9b655f..405228fc5a 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -8,12 +8,15 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/main-loop.h" ++#include "qemu/cutils.h" + #include + + #include "sysemu/sysemu.h" + #include "hw/vfio/vfio-common.h" + #include "cpu.h" + #include "migration/migration.h" ++#include "migration/vmstate.h" + #include "migration/qemu-file.h" + #include "migration/register.h" + #include "migration/blocker.h" +@@ -25,6 +28,22 @@ + #include "trace.h" + #include "hw/hw.h" + ++/* ++ * Flags to be used as unique delimiters for VFIO devices in the migration ++ * stream. These flags are composed as: ++ * 0xffffffff => MSB 32-bit all 1s ++ * 0xef10 => Magic ID, represents emulated (virtual) function IO ++ * 0x0000 => 16-bits reserved for flags ++ * ++ * The beginning of state information is marked by _DEV_CONFIG_STATE, ++ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a ++ * certain state information is marked by _END_OF_STATE. ++ */ ++#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL) ++#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) ++#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) ++#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) ++ + static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count, + off_t off, bool iswrite) + { +@@ -129,6 +148,75 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask, + return 0; + } + ++static void vfio_migration_cleanup(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (migration->region.mmaps) { ++ vfio_region_unmap(&migration->region); ++ } ++} ++ ++/* ---------------------------------------------------------------------- */ ++ ++static int vfio_save_setup(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ int ret; ++ ++ trace_vfio_save_setup(vbasedev->name); ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); ++ ++ if (migration->region.mmaps) { ++ /* ++ * Calling vfio_region_mmap() from migration thread. Memory API called ++ * from this function require locking the iothread when called from ++ * outside the main loop thread. ++ */ ++ qemu_mutex_lock_iothread(); ++ ret = vfio_region_mmap(&migration->region); ++ qemu_mutex_unlock_iothread(); ++ if (ret) { ++ error_report("%s: Failed to mmap VFIO migration region: %s", ++ vbasedev->name, strerror(-ret)); ++ error_report("%s: Falling back to slow path", vbasedev->name); ++ } ++ } ++ ++ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK, ++ VFIO_DEVICE_STATE_SAVING); ++ if (ret) { ++ error_report("%s: Failed to set state SAVING", vbasedev->name); ++ return ret; ++ } ++ ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ ret = qemu_file_get_error(f); ++ if (ret) { ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static void vfio_save_cleanup(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ vfio_migration_cleanup(vbasedev); ++ trace_vfio_save_cleanup(vbasedev->name); ++} ++ ++static SaveVMHandlers savevm_vfio_handlers = { ++ .save_setup = vfio_save_setup, ++ .save_cleanup = vfio_save_cleanup, ++}; ++ ++/* ---------------------------------------------------------------------- */ ++ + static void vfio_vmstate_change(void *opaque, int running, RunState state) + { + VFIODevice *vbasedev = opaque; +@@ -215,6 +303,8 @@ static int vfio_migration_init(VFIODevice *vbasedev, + int ret; + Object *obj; + VFIOMigration *migration; ++ char id[256] = ""; ++ g_autofree char *path = NULL, *oid = NULL; + + if (!vbasedev->ops->vfio_get_object) { + return -EINVAL; +@@ -244,6 +334,18 @@ static int vfio_migration_init(VFIODevice *vbasedev, + + migration = vbasedev->migration; + migration->vbasedev = vbasedev; ++ ++ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj))); ++ if (oid) { ++ path = g_strdup_printf("%s/vfio", oid); ++ } else { ++ path = g_strdup("vfio"); ++ } ++ strpadcpy(id, sizeof(id), path, '\0'); ++ ++ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers, ++ vbasedev); ++ + migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change, + vbasedev); + migration->migration_state.notify = vfio_migration_state_notifier; +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index bd3d47b005..86c18def01 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -149,3 +149,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d" + vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d" + vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d" + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" ++vfio_save_setup(const char *name) " (%s)" ++vfio_save_cleanup(const char *name) " (%s)" +-- +2.27.0 + diff --git a/vmstate-add-qom-interface-to-get-id.patch b/vmstate-add-qom-interface-to-get-id.patch new file mode 100644 index 0000000000000000000000000000000000000000..53a004405a907109dfb0bbc9354a3b0ef979846f --- /dev/null +++ b/vmstate-add-qom-interface-to-get-id.patch @@ -0,0 +1,210 @@ +From d771fca664e40c7d7ec5dfa2c656a282bff705b7 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Wed, 28 Aug 2019 16:00:19 +0400 +Subject: [PATCH] vmstate: add qom interface to get id +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Add an interface to get the instance id, instead of depending on +Device and qdev_get_dev_path(). + +Signed-off-by: Marc-André Lureau +Reviewed-by: Daniel P. Berrangé +Acked-by: Dr. David Alan Gilbert +--- + MAINTAINERS | 2 ++ + hw/core/Makefile.objs | 1 + + hw/core/qdev.c | 14 +++++++++++++ + hw/core/vmstate-if.c | 23 +++++++++++++++++++++ + include/hw/vmstate-if.h | 40 ++++++++++++++++++++++++++++++++++++ + include/migration/register.h | 2 ++ + include/migration/vmstate.h | 2 ++ + tests/Makefile.include | 1 + + 8 files changed, 85 insertions(+) + create mode 100644 hw/core/vmstate-if.c + create mode 100644 include/hw/vmstate-if.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index d6de200453..e2d74d7ec3 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -2135,6 +2135,8 @@ Migration + M: Juan Quintela + M: Dr. David Alan Gilbert + S: Maintained ++F: hw/core/vmstate-if.c ++F: include/hw/vmstate-if.h + F: include/migration/ + F: migration/ + F: scripts/vmstate-static-checker.py +diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs +index f8481d959f..54c51583d8 100644 +--- a/hw/core/Makefile.objs ++++ b/hw/core/Makefile.objs +@@ -8,6 +8,7 @@ common-obj-y += irq.o + common-obj-y += hotplug.o + common-obj-$(CONFIG_SOFTMMU) += nmi.o + common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o ++common-obj-y += vmstate-if.o + + common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o + common-obj-$(CONFIG_XILINX_AXI) += stream.o +diff --git a/hw/core/qdev.c b/hw/core/qdev.c +index 4b32f2f46d..13931b1117 100644 +--- a/hw/core/qdev.c ++++ b/hw/core/qdev.c +@@ -1048,9 +1048,18 @@ static void device_unparent(Object *obj) + } + } + ++static char * ++device_vmstate_if_get_id(VMStateIf *obj) ++{ ++ DeviceState *dev = DEVICE(obj); ++ ++ return qdev_get_dev_path(dev); ++} ++ + static void device_class_init(ObjectClass *class, void *data) + { + DeviceClass *dc = DEVICE_CLASS(class); ++ VMStateIfClass *vc = VMSTATE_IF_CLASS(class); + + class->unparent = device_unparent; + +@@ -1062,6 +1071,7 @@ static void device_class_init(ObjectClass *class, void *data) + */ + dc->hotpluggable = true; + dc->user_creatable = true; ++ vc->get_id = device_vmstate_if_get_id; + } + + void device_class_set_parent_reset(DeviceClass *dc, +@@ -1119,6 +1129,10 @@ static const TypeInfo device_type_info = { + .class_init = device_class_init, + .abstract = true, + .class_size = sizeof(DeviceClass), ++ .interfaces = (InterfaceInfo[]) { ++ { TYPE_VMSTATE_IF }, ++ { } ++ } + }; + + static void qdev_register_types(void) +diff --git a/hw/core/vmstate-if.c b/hw/core/vmstate-if.c +new file mode 100644 +index 0000000000..bf453620fe +--- /dev/null ++++ b/hw/core/vmstate-if.c +@@ -0,0 +1,23 @@ ++/* ++ * VMState interface ++ * ++ * Copyright (c) 2009-2019 Red Hat Inc ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "hw/vmstate-if.h" ++ ++static const TypeInfo vmstate_if_info = { ++ .name = TYPE_VMSTATE_IF, ++ .parent = TYPE_INTERFACE, ++ .class_size = sizeof(VMStateIfClass), ++}; ++ ++static void vmstate_register_types(void) ++{ ++ type_register_static(&vmstate_if_info); ++} ++ ++type_init(vmstate_register_types); +diff --git a/include/hw/vmstate-if.h b/include/hw/vmstate-if.h +new file mode 100644 +index 0000000000..8ff7f0f292 +--- /dev/null ++++ b/include/hw/vmstate-if.h +@@ -0,0 +1,40 @@ ++/* ++ * VMState interface ++ * ++ * Copyright (c) 2009-2019 Red Hat Inc ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#ifndef VMSTATE_IF_H ++#define VMSTATE_IF_H ++ ++#include "qom/object.h" ++ ++#define TYPE_VMSTATE_IF "vmstate-if" ++ ++#define VMSTATE_IF_CLASS(klass) \ ++ OBJECT_CLASS_CHECK(VMStateIfClass, (klass), TYPE_VMSTATE_IF) ++#define VMSTATE_IF_GET_CLASS(obj) \ ++ OBJECT_GET_CLASS(VMStateIfClass, (obj), TYPE_VMSTATE_IF) ++#define VMSTATE_IF(obj) \ ++ INTERFACE_CHECK(VMStateIf, (obj), TYPE_VMSTATE_IF) ++ ++typedef struct VMStateIf VMStateIf; ++ ++typedef struct VMStateIfClass { ++ InterfaceClass parent_class; ++ ++ char * (*get_id)(VMStateIf *obj); ++} VMStateIfClass; ++ ++static inline char *vmstate_if_get_id(VMStateIf *vmif) ++{ ++ if (!vmif) { ++ return NULL; ++ } ++ ++ return VMSTATE_IF_GET_CLASS(vmif)->get_id(vmif); ++} ++ ++#endif /* VMSTATE_IF_H */ +diff --git a/include/migration/register.h b/include/migration/register.h +index f3ba10b6ef..158130c8c4 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -14,6 +14,8 @@ + #ifndef MIGRATION_REGISTER_H + #define MIGRATION_REGISTER_H + ++#include "hw/vmstate-if.h" ++ + typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ + SaveStateHandler *save_state; +diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h +index 8abd2e3b80..8cc1e19fd9 100644 +--- a/include/migration/vmstate.h ++++ b/include/migration/vmstate.h +@@ -27,6 +27,8 @@ + #ifndef QEMU_VMSTATE_H + #define QEMU_VMSTATE_H + ++#include "hw/vmstate-if.h" ++ + typedef struct VMStateInfo VMStateInfo; + typedef struct VMStateDescription VMStateDescription; + typedef struct VMStateField VMStateField; +diff --git a/tests/Makefile.include b/tests/Makefile.include +index 3be60ab999..1c7772a230 100644 +--- a/tests/Makefile.include ++++ b/tests/Makefile.include +@@ -566,6 +566,7 @@ tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \ + hw/core/irq.o \ + hw/core/fw-path-provider.o \ + hw/core/reset.o \ ++ hw/core/vmstate-if.o \ + $(test-qapi-obj-y) + tests/test-vmstate$(EXESUF): tests/test-vmstate.o \ + migration/vmstate.o migration/vmstate-types.o migration/qemu-file.o \ +-- +2.27.0 +