diff --git a/Fix-use-after-free-in-vfio_migration_probe.patch b/Fix-use-after-free-in-vfio_migration_probe.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0a94e60054da414102dbda43f9d111c4bc2e6d9 --- /dev/null +++ b/Fix-use-after-free-in-vfio_migration_probe.patch @@ -0,0 +1,39 @@ +From 126fc13ebe9c5e58a5b1daeb4e102e6fa5845779 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Fri, 6 Nov 2020 23:32:24 +0530 +Subject: [PATCH] Fix use after free in vfio_migration_probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes Coverity issue: +CID 1436126: Memory - illegal accesses (USE_AFTER_FREE) + +Fixes: a9e271ec9b36 ("vfio: Add migration region initialization and finalize function") +Signed-off-by: Kirti Wankhede +Reviewed-by: David Edmondson +Reviewed-by: Alex Bennée +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1a97784486..8546075706 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -903,8 +903,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + goto add_blocker; + } + +- g_free(info); + trace_vfio_migration_probe(vbasedev->name, info->index); ++ g_free(info); + return 0; + + add_blocker: +-- +2.27.0 + diff --git a/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..daff52e901686e17c1c492e899165b773db96258 --- /dev/null +++ b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch @@ -0,0 +1,45 @@ +From b50b9a0e2e5e8262c830df5994f3abbe0a37655a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 17 Dec 2020 09:49:40 +0800 +Subject: [PATCH] accel: kvm: Fix memory waste under mismatch page size + +When handle dirty log, we face qemu_real_host_page_size and +TARGET_PAGE_SIZE. The first one is the granule of KVM dirty +bitmap, and the second one is the granule of QEMU dirty bitmap. + +As qemu_real_host_page_size >= TARGET_PAGE_SIZE (kvm_init() +enforced it), misuse TARGET_PAGE_SIZE to init kvmslot dirty_bmap +may waste memory. For example, when qemu_real_host_page_size is +64K and TARGET_PAGE_SIZE is 4K, it wastes 93.75% (15/16) memory. + +Signed-off-by: Keqian Zhu +Reviewed-by: Andrew Jones +Reviewed-by: Peter Xu +Message-Id: <20201217014941.22872-2-zhukeqian1@huawei.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5a6b89cc2a..4daff563a0 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -551,8 +551,12 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) + * too, in most cases). + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. ++ * ++ * Note: the granule of kvm dirty log is qemu_real_host_page_size. ++ * And mem->memory_size is aligned to it (otherwise this mem can't ++ * be registered to KVM). + */ +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size, + /*HOST_LONG_BITS*/ 64) / 8; + mem->dirty_bmap = g_malloc0(bitmap_size); + } +-- +2.27.0 + diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000000000000000000000000000000000000..dfa8bf6a01201096881ec49e34ddf0ed18eec84f --- /dev/null +++ b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,99 @@ +From ccfc5c99103e2f633084c906197075392f625a80 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 21 Nov 2019 16:56:45 +0000 +Subject: [PATCH] kvm: Reallocate dirty_bmap when we change a slot + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6828f6a1f9..5a6b89cc2a 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -536,6 +536,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -568,23 +589,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1066,6 +1073,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +2.27.0 + diff --git a/kvm-split-too-big-memory-section-on-several-memslots.patch b/kvm-split-too-big-memory-section-on-several-memslots.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a94e21a773498e07764996501664313b9c98522 --- /dev/null +++ b/kvm-split-too-big-memory-section-on-several-memslots.patch @@ -0,0 +1,246 @@ +From 33f5a810b0edc1ac67163f396bd345e04b5c11e8 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 24 Sep 2019 10:47:50 -0400 +Subject: [PATCH] kvm: split too big memory section on several memslots + +Max memslot size supported by kvm on s390 is 8Tb, +move logic of splitting RAM in chunks upto 8T to KVM code. + +This way it will hide KVM specific restrictions in KVM code +and won't affect board level design decisions. Which would allow +us to avoid misusing memory_region_allocate_system_memory() API +and eventually use a single hostmem backend for guest RAM. + +Signed-off-by: Igor Mammedov +Message-Id: <20190924144751.24149-4-imammedo@redhat.com> +Reviewed-by: Peter Xu +Acked-by: Paolo Bonzini +Signed-off-by: Christian Borntraeger +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 124 +++++++++++++++++++++++++-------------- + include/sysemu/kvm_int.h | 1 + + 2 files changed, 81 insertions(+), 44 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 84edbe8bb1..6828f6a1f9 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed; + bool kvm_ioeventfd_any_length_allowed; + bool kvm_msi_use_devid; + static bool kvm_immediate_exit; ++static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { + KVM_CAP_INFO(USER_MEMORY), +@@ -458,7 +459,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, + static int kvm_section_update_flags(KVMMemoryListener *kml, + MemoryRegionSection *section) + { +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + KVMSlot *mem; + int ret = 0; + +@@ -469,13 +470,18 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, + + kvm_slots_lock(kml); + +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- /* We don't have a slot if we want to trap every access. */ +- goto out; +- } ++ while (size && !ret) { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ /* We don't have a slot if we want to trap every access. */ ++ goto out; ++ } + +- ret = kvm_slot_update_flags(kml, mem, section->mr); ++ ret = kvm_slot_update_flags(kml, mem, section->mr); ++ start_addr += slot_size; ++ size -= slot_size; ++ } + + out: + kvm_slots_unlock(kml); +@@ -548,11 +554,15 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + struct kvm_dirty_log d = {}; + KVMSlot *mem; + hwaddr start_addr, size; ++ hwaddr slot_size, slot_offset = 0; + int ret = 0; + + size = kvm_align_section(section, &start_addr); +- if (size) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); ++ while (size) { ++ MemoryRegionSection subsection = *section; ++ ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { + /* We don't have a slot if we want to trap every access. */ + goto out; +@@ -570,11 +580,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. + */ +- size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + if (!mem->dirty_bmap) { ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(size); ++ mem->dirty_bmap = g_malloc0(bitmap_size); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -585,7 +595,13 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); ++ subsection.offset_within_region += slot_offset; ++ subsection.size = int128_make64(slot_size); ++ kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); ++ ++ slot_offset += slot_size; ++ start_addr += slot_size; ++ size -= slot_size; + } + out: + return ret; +@@ -974,6 +990,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) + return NULL; + } + ++void kvm_set_max_memslot_size(hwaddr max_slot_size) ++{ ++ g_assert( ++ ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size ++ ); ++ kvm_max_slot_size = max_slot_size; ++} ++ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -981,7 +1005,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + int err; + MemoryRegion *mr = section->mr; + bool writeable = !mr->readonly && !mr->rom_device; +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + void *ram; + + if (!memory_region_is_ram(mr)) { +@@ -1006,41 +1030,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + kvm_slots_lock(kml); + + if (!add) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- goto out; +- } +- if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { +- kvm_physical_sync_dirty_bitmap(kml, section); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ goto out; ++ } ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ kvm_physical_sync_dirty_bitmap(kml, section); ++ } + +- /* unregister the slot */ +- g_free(mem->dirty_bmap); +- mem->dirty_bmap = NULL; +- mem->memory_size = 0; +- mem->flags = 0; +- err = kvm_set_user_memory_region(kml, mem, false); +- if (err) { +- fprintf(stderr, "%s: error unregistering slot: %s\n", +- __func__, strerror(-err)); +- abort(); +- } ++ /* unregister the slot */ ++ g_free(mem->dirty_bmap); ++ mem->dirty_bmap = NULL; ++ mem->memory_size = 0; ++ mem->flags = 0; ++ err = kvm_set_user_memory_region(kml, mem, false); ++ if (err) { ++ fprintf(stderr, "%s: error unregistering slot: %s\n", ++ __func__, strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ size -= slot_size; ++ } while (size); + goto out; + } + + /* register the new slot */ +- mem = kvm_alloc_slot(kml); +- mem->memory_size = size; +- mem->start_addr = start_addr; +- mem->ram = ram; +- mem->flags = kvm_mem_flags(mr); +- +- err = kvm_set_user_memory_region(kml, mem, true); +- if (err) { +- fprintf(stderr, "%s: error registering slot: %s\n", __func__, +- strerror(-err)); +- abort(); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_alloc_slot(kml); ++ mem->memory_size = slot_size; ++ mem->start_addr = start_addr; ++ mem->ram = ram; ++ mem->flags = kvm_mem_flags(mr); ++ ++ err = kvm_set_user_memory_region(kml, mem, true); ++ if (err) { ++ fprintf(stderr, "%s: error registering slot: %s\n", __func__, ++ strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ ram += slot_size; ++ size -= slot_size; ++ } while (size); + + out: + kvm_slots_unlock(kml); +@@ -2880,6 +2915,7 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + + for (i = 0; i < kvm->nr_as; ++i) { + if (kvm->as[i].as == as && kvm->as[i].ml) { ++ size = MIN(kvm_max_slot_size, size); + return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, + start_addr, size); + } +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 787dbc7770..f8e884f146 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -43,4 +43,5 @@ typedef struct KVMMemoryListener { + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id); + ++void kvm_set_max_memslot_size(hwaddr max_slot_size); + #endif +-- +2.27.0 + diff --git a/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch new file mode 100644 index 0000000000000000000000000000000000000000..8a25d177e1349ef0faca9b9280e70bb43dfd2837 --- /dev/null +++ b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch @@ -0,0 +1,42 @@ +From d0d816682b790b7d8a9caf17c32eadde7756ac9c Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Mon, 16 Nov 2020 21:22:10 +0800 +Subject: [PATCH] memory: Skip dirty tracking for un-migratable memory regions + +It makes no sense to track dirty pages for those un-migratable memory +regions (e.g., Memory BAR region of the VFIO PCI device) and doing so +will potentially lead to some unpleasant issues during migration [1]. + +Skip dirty tracking for those regions by evaluating if the region is +migratable before setting dirty_log_mask (DIRTY_MEMORY_MIGRATION). + +[1] https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg03757.html + +Signed-off-by: Zenghui Yu +Message-Id: <20201116132210.1730-1-yuzenghui@huawei.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 44713efc66..708b3dff3d 100644 +--- a/memory.c ++++ b/memory.c +@@ -1825,7 +1825,10 @@ bool memory_region_is_ram_device(MemoryRegion *mr) + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +- if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { ++ RAMBlock *rb = mr->ram_block; ++ ++ if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) || ++ memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index 752829b2f256d079774dde225b296a3129b0c439..62c6f6cc22e5def7e674f1fa31baa900f0827eca 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 4.1.0 -Release: 74 +Release: 75 Epoch: 2 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -501,6 +501,14 @@ Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch Patch0489: vfio-Move-the-saving-of-the-config-space-to-the-righ.patch Patch0490: vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch Patch0491: vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch +Patch0492: kvm-split-too-big-memory-section-on-several-memslots.patch +Patch0493: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +Patch0494: accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch +Patch0495: memory-Skip-dirty-tracking-for-un-migratable-memory-.patch +Patch0496: Fix-use-after-free-in-vfio_migration_probe.patch +Patch0497: vfio-Make-migration-support-experimental.patch +Patch0498: vfio-Change-default-dirty-pages-tracking-behavior-du.patch +Patch0499: vfio-Fix-vfio_listener_log_sync-function-name-typo.patch BuildRequires: flex BuildRequires: gcc @@ -895,6 +903,16 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Tue Aug 03 2021 Chen Qun +- kvm: split too big memory section on several memslots +- kvm: Reallocate dirty_bmap when we change a slot +- accel: kvm: Fix memory waste under mismatch page size +- memory: Skip dirty tracking for un-migratable memory regions +- Fix use after free in vfio_migration_probe +- vfio: Make migration support experimental +- vfio: Change default dirty pages tracking behavior during migration +- vfio: Fix vfio_listener_log_sync function name typo + * Thu Jul 29 2021 Chen Qun - vfio: Move the saving of the config space to the right place in VFIO migration - vfio: Set the priority of the VFIO VM state change handler explicitly diff --git a/vfio-Change-default-dirty-pages-tracking-behavior-du.patch b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch new file mode 100644 index 0000000000000000000000000000000000000000..d34f0541c8589124e35a10bb220be59e64f21e53 --- /dev/null +++ b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch @@ -0,0 +1,87 @@ +From 69d1cc17c0a77dbd0d8e811cfaa899b01bf2e5bc Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 23 Nov 2020 19:53:19 +0530 +Subject: [PATCH] vfio: Change default dirty pages tracking behavior during + migration + +By default dirty pages tracking is enabled during iterative phase +(pre-copy phase). +Added per device opt-out option 'x-pre-copy-dirty-page-tracking' to +disable dirty pages tracking during iterative phase. If the option +'x-pre-copy-dirty-page-tracking=off' is set for any VFIO device, dirty +pages tracking during iterative phase will be disabled. + +Signed-off-by: Kirti Wankhede +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 11 +++++++---- + hw/vfio/pci.c | 3 +++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a86a4c4506..d9cc3509ef 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -310,7 +310,7 @@ bool vfio_mig_active(void) + return true; + } + +-static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) ++static bool vfio_devices_all_saving(VFIOContainer *container) + { + VFIOGroup *group; + VFIODevice *vbasedev; +@@ -328,8 +328,11 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + return false; + } + +- if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && +- !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { ++ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) ++ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ return false; ++ } + continue; + } else { + return false; +@@ -1088,7 +1091,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_stopped_and_saving(container)) { ++ if (vfio_devices_all_saving(container)) { + vfio_sync_dirty_bitmap(container, section); + } + } +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2795b8bd12..3641ad0c5c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3180,6 +3180,9 @@ static void vfio_instance_init(Object *obj) + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), ++ DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, ++ vbasedev.pre_copy_dirty_page_tracking, ++ ON_OFF_AUTO_ON), + DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, + display, ON_OFF_AUTO_OFF), + DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 7398631d4c..475aa9fb40 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -130,6 +130,7 @@ typedef struct VFIODevice { + unsigned int flags; + VFIOMigration *migration; + Error *migration_blocker; ++ OnOffAuto pre_copy_dirty_page_tracking; + } VFIODevice; + + struct VFIODeviceOps { +-- +2.27.0 + diff --git a/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch new file mode 100644 index 0000000000000000000000000000000000000000..6a2324b57811f6d375bbc7f795dc07f78baa42e2 --- /dev/null +++ b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch @@ -0,0 +1,41 @@ +From 094aca3a87e63a0e6ae01b22f382c21dd91bb03e Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Fri, 4 Dec 2020 09:42:40 +0800 +Subject: [PATCH] vfio: Fix vfio_listener_log_sync function name typo + +There is an obvious typo in the function name of the .log_sync() callback. +Spell it correctly. + +Signed-off-by: Zenghui Yu +Message-Id: <20201204014240.772-1-yuzenghui@huawei.com> +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d9cc3509ef..ebd701faa0 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1081,7 +1081,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + int128_get64(section->size), ram_addr); + } + +-static void vfio_listerner_log_sync(MemoryListener *listener, ++static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); +@@ -1099,7 +1099,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, +- .log_sync = vfio_listerner_log_sync, ++ .log_sync = vfio_listener_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +-- +2.27.0 + diff --git a/vfio-Make-migration-support-experimental.patch b/vfio-Make-migration-support-experimental.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bf32ecaf443b40929932743cd3d9f3b951011b2 --- /dev/null +++ b/vfio-Make-migration-support-experimental.patch @@ -0,0 +1,72 @@ +From d0a8ba1957743c55547ec2ccd8cb09b84a3354d2 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 9 Nov 2020 11:56:02 -0700 +Subject: [PATCH] vfio: Make migration support experimental + +Support for migration of vfio devices is still in flux. Developers +are attempting to add support for new devices and new architectures, +but none are yet readily available for validation. We have concerns +whether we're transferring device resources at the right point in the +migration, whether we're guaranteeing that updates during pre-copy are +migrated, and whether we can provide bit-stream compatibility should +any of this change. Even the question of whether devices should +participate in dirty page tracking during pre-copy seems contentious. +In short, migration support has not had enough soak time and it feels +premature to mark it as supported. + +Create an experimental option such that we can continue to develop. + +[Retaining previous acks/reviews for a previously identical code + change with different specifics in the commit log.] + +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + hw/vfio/pci.c | 2 ++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8546075706..033cb2b0c9 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -888,7 +888,7 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + Error *local_err = NULL; + int ret = -ENOTSUP; + +- if (!container->dirty_pages_supported) { ++ if (!vbasedev->enable_migration || !container->dirty_pages_supported) { + goto add_blocker; + } + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index da7c740bce..2795b8bd12 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3192,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), ++ DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, false), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.balloon_allowed, false), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 048731e81f..7398631d4c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -123,6 +123,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool balloon_allowed; ++ bool enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +-- +2.27.0 +