From 6af46536db08f3192a4be2e05e08b05719a68899 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Tue, 24 Sep 2019 10:47:50 -0400 Subject: [PATCH 01/10] kvm: split too big memory section on several memslots Max memslot size supported by kvm on s390 is 8Tb, move logic of splitting RAM in chunks upto 8T to KVM code. This way it will hide KVM specific restrictions in KVM code and won't affect board level design decisions. Which would allow us to avoid misusing memory_region_allocate_system_memory() API and eventually use a single hostmem backend for guest RAM. Signed-off-by: Igor Mammedov Message-Id: <20190924144751.24149-4-imammedo@redhat.com> Reviewed-by: Peter Xu Acked-by: Paolo Bonzini Signed-off-by: Christian Borntraeger Signed-off-by: Kunkun Jiang --- ...g-memory-section-on-several-memslots.patch | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 kvm-split-too-big-memory-section-on-several-memslots.patch diff --git a/kvm-split-too-big-memory-section-on-several-memslots.patch b/kvm-split-too-big-memory-section-on-several-memslots.patch new file mode 100644 index 0000000..9a94e21 --- /dev/null +++ b/kvm-split-too-big-memory-section-on-several-memslots.patch @@ -0,0 +1,246 @@ +From 33f5a810b0edc1ac67163f396bd345e04b5c11e8 Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 24 Sep 2019 10:47:50 -0400 +Subject: [PATCH] kvm: split too big memory section on several memslots + +Max memslot size supported by kvm on s390 is 8Tb, +move logic of splitting RAM in chunks upto 8T to KVM code. + +This way it will hide KVM specific restrictions in KVM code +and won't affect board level design decisions. Which would allow +us to avoid misusing memory_region_allocate_system_memory() API +and eventually use a single hostmem backend for guest RAM. + +Signed-off-by: Igor Mammedov +Message-Id: <20190924144751.24149-4-imammedo@redhat.com> +Reviewed-by: Peter Xu +Acked-by: Paolo Bonzini +Signed-off-by: Christian Borntraeger +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 124 +++++++++++++++++++++++++-------------- + include/sysemu/kvm_int.h | 1 + + 2 files changed, 81 insertions(+), 44 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 84edbe8bb1..6828f6a1f9 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed; + bool kvm_ioeventfd_any_length_allowed; + bool kvm_msi_use_devid; + static bool kvm_immediate_exit; ++static hwaddr kvm_max_slot_size = ~0; + + static const KVMCapabilityInfo kvm_required_capabilites[] = { + KVM_CAP_INFO(USER_MEMORY), +@@ -458,7 +459,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, + static int kvm_section_update_flags(KVMMemoryListener *kml, + MemoryRegionSection *section) + { +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + KVMSlot *mem; + int ret = 0; + +@@ -469,13 +470,18 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, + + kvm_slots_lock(kml); + +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- /* We don't have a slot if we want to trap every access. */ +- goto out; +- } ++ while (size && !ret) { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ /* We don't have a slot if we want to trap every access. */ ++ goto out; ++ } + +- ret = kvm_slot_update_flags(kml, mem, section->mr); ++ ret = kvm_slot_update_flags(kml, mem, section->mr); ++ start_addr += slot_size; ++ size -= slot_size; ++ } + + out: + kvm_slots_unlock(kml); +@@ -548,11 +554,15 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + struct kvm_dirty_log d = {}; + KVMSlot *mem; + hwaddr start_addr, size; ++ hwaddr slot_size, slot_offset = 0; + int ret = 0; + + size = kvm_align_section(section, &start_addr); +- if (size) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); ++ while (size) { ++ MemoryRegionSection subsection = *section; ++ ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { + /* We don't have a slot if we want to trap every access. */ + goto out; +@@ -570,11 +580,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. + */ +- size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + if (!mem->dirty_bmap) { ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(size); ++ mem->dirty_bmap = g_malloc0(bitmap_size); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -585,7 +595,13 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); ++ subsection.offset_within_region += slot_offset; ++ subsection.size = int128_make64(slot_size); ++ kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); ++ ++ slot_offset += slot_size; ++ start_addr += slot_size; ++ size -= slot_size; + } + out: + return ret; +@@ -974,6 +990,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) + return NULL; + } + ++void kvm_set_max_memslot_size(hwaddr max_slot_size) ++{ ++ g_assert( ++ ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size ++ ); ++ kvm_max_slot_size = max_slot_size; ++} ++ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -981,7 +1005,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + int err; + MemoryRegion *mr = section->mr; + bool writeable = !mr->readonly && !mr->rom_device; +- hwaddr start_addr, size; ++ hwaddr start_addr, size, slot_size; + void *ram; + + if (!memory_region_is_ram(mr)) { +@@ -1006,41 +1030,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + kvm_slots_lock(kml); + + if (!add) { +- mem = kvm_lookup_matching_slot(kml, start_addr, size); +- if (!mem) { +- goto out; +- } +- if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { +- kvm_physical_sync_dirty_bitmap(kml, section); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); ++ if (!mem) { ++ goto out; ++ } ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ kvm_physical_sync_dirty_bitmap(kml, section); ++ } + +- /* unregister the slot */ +- g_free(mem->dirty_bmap); +- mem->dirty_bmap = NULL; +- mem->memory_size = 0; +- mem->flags = 0; +- err = kvm_set_user_memory_region(kml, mem, false); +- if (err) { +- fprintf(stderr, "%s: error unregistering slot: %s\n", +- __func__, strerror(-err)); +- abort(); +- } ++ /* unregister the slot */ ++ g_free(mem->dirty_bmap); ++ mem->dirty_bmap = NULL; ++ mem->memory_size = 0; ++ mem->flags = 0; ++ err = kvm_set_user_memory_region(kml, mem, false); ++ if (err) { ++ fprintf(stderr, "%s: error unregistering slot: %s\n", ++ __func__, strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ size -= slot_size; ++ } while (size); + goto out; + } + + /* register the new slot */ +- mem = kvm_alloc_slot(kml); +- mem->memory_size = size; +- mem->start_addr = start_addr; +- mem->ram = ram; +- mem->flags = kvm_mem_flags(mr); +- +- err = kvm_set_user_memory_region(kml, mem, true); +- if (err) { +- fprintf(stderr, "%s: error registering slot: %s\n", __func__, +- strerror(-err)); +- abort(); +- } ++ do { ++ slot_size = MIN(kvm_max_slot_size, size); ++ mem = kvm_alloc_slot(kml); ++ mem->memory_size = slot_size; ++ mem->start_addr = start_addr; ++ mem->ram = ram; ++ mem->flags = kvm_mem_flags(mr); ++ ++ err = kvm_set_user_memory_region(kml, mem, true); ++ if (err) { ++ fprintf(stderr, "%s: error registering slot: %s\n", __func__, ++ strerror(-err)); ++ abort(); ++ } ++ start_addr += slot_size; ++ ram += slot_size; ++ size -= slot_size; ++ } while (size); + + out: + kvm_slots_unlock(kml); +@@ -2880,6 +2915,7 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, + + for (i = 0; i < kvm->nr_as; ++i) { + if (kvm->as[i].as == as && kvm->as[i].ml) { ++ size = MIN(kvm_max_slot_size, size); + return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, + start_addr, size); + } +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 787dbc7770..f8e884f146 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -43,4 +43,5 @@ typedef struct KVMMemoryListener { + void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + AddressSpace *as, int as_id); + ++void kvm_set_max_memslot_size(hwaddr max_slot_size); + #endif +-- +2.27.0 + -- Gitee From 0412463a7605c87ea14044f95bf313f4a74c7415 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Thu, 21 Nov 2019 16:56:45 +0000 Subject: [PATCH 02/10] kvm: Reallocate dirty_bmap when we change a slot kvm_set_phys_mem can be called to reallocate a slot by something the guest does (e.g. writing to PAM and other chipset registers). This can happen in the middle of a migration, and if we're unlucky it can now happen between the split 'sync' and 'clear'; the clear asserts if there's no bmap to clear. Recreate the bmap whenever we change the slot, keeping the clear path happy. Typically this is triggered by the guest rebooting during a migrate. Corresponds to: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 https://bugzilla.redhat.com/show_bug.cgi?id=1771032 Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Signed-off-by: Kunkun Jiang --- ...ate-dirty_bmap-when-we-change-a-slot.patch | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch diff --git a/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch new file mode 100644 index 0000000..dfa8bf6 --- /dev/null +++ b/kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch @@ -0,0 +1,99 @@ +From ccfc5c99103e2f633084c906197075392f625a80 Mon Sep 17 00:00:00 2001 +From: "Dr. David Alan Gilbert" +Date: Thu, 21 Nov 2019 16:56:45 +0000 +Subject: [PATCH] kvm: Reallocate dirty_bmap when we change a slot + +kvm_set_phys_mem can be called to reallocate a slot by something the +guest does (e.g. writing to PAM and other chipset registers). +This can happen in the middle of a migration, and if we're unlucky +it can now happen between the split 'sync' and 'clear'; the clear +asserts if there's no bmap to clear. Recreate the bmap whenever +we change the slot, keeping the clear path happy. + +Typically this is triggered by the guest rebooting during a migrate. + +Corresponds to: +https://bugzilla.redhat.com/show_bug.cgi?id=1772774 +https://bugzilla.redhat.com/show_bug.cgi?id=1771032 + +Signed-off-by: Dr. David Alan Gilbert +Reviewed-by: Peter Xu +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- + 1 file changed, 29 insertions(+), 15 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 6828f6a1f9..5a6b89cc2a 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -536,6 +536,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, + + #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + ++/* Allocate the dirty bitmap for a slot */ ++static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) ++{ ++ /* ++ * XXX bad kernel interface alert ++ * For dirty bitmap, kernel allocates array of size aligned to ++ * bits-per-long. But for case when the kernel is 64bits and ++ * the userspace is 32bits, userspace can't align to the same ++ * bits-per-long, since sizeof(long) is different between kernel ++ * and user space. This way, userspace will provide buffer which ++ * may be 4 bytes less than the kernel will use, resulting in ++ * userspace memory corruption (which is not detectable by valgrind ++ * too, in most cases). ++ * So for now, let's align to 64 instead of HOST_LONG_BITS here, in ++ * a hope that sizeof(long) won't become >8 any time soon. ++ */ ++ hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ /*HOST_LONG_BITS*/ 64) / 8; ++ mem->dirty_bmap = g_malloc0(bitmap_size); ++} ++ + /** + * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space + * +@@ -568,23 +589,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, + goto out; + } + +- /* XXX bad kernel interface alert +- * For dirty bitmap, kernel allocates array of size aligned to +- * bits-per-long. But for case when the kernel is 64bits and +- * the userspace is 32bits, userspace can't align to the same +- * bits-per-long, since sizeof(long) is different between kernel +- * and user space. This way, userspace will provide buffer which +- * may be 4 bytes less than the kernel will use, resulting in +- * userspace memory corruption (which is not detectable by valgrind +- * too, in most cases). +- * So for now, let's align to 64 instead of HOST_LONG_BITS here, in +- * a hope that sizeof(long) won't become >8 any time soon. +- */ + if (!mem->dirty_bmap) { +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), +- /*HOST_LONG_BITS*/ 64) / 8; + /* Allocate on the first log_sync, once and for all */ +- mem->dirty_bmap = g_malloc0(bitmap_size); ++ kvm_memslot_init_dirty_bitmap(mem); + } + + d.dirty_bitmap = mem->dirty_bmap; +@@ -1066,6 +1073,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + ++ if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { ++ /* ++ * Reallocate the bmap; it means it doesn't disappear in ++ * middle of a migrate. ++ */ ++ kvm_memslot_init_dirty_bitmap(mem); ++ } + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, +-- +2.27.0 + -- Gitee From 77ac49a90028d1be8c7b4e58b0c368873a11ca1d Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Thu, 17 Dec 2020 09:49:40 +0800 Subject: [PATCH 03/10] accel: kvm: Fix memory waste under mismatch page size When handle dirty log, we face qemu_real_host_page_size and TARGET_PAGE_SIZE. The first one is the granule of KVM dirty bitmap, and the second one is the granule of QEMU dirty bitmap. As qemu_real_host_page_size >= TARGET_PAGE_SIZE (kvm_init() enforced it), misuse TARGET_PAGE_SIZE to init kvmslot dirty_bmap may waste memory. For example, when qemu_real_host_page_size is 64K and TARGET_PAGE_SIZE is 4K, it wastes 93.75% (15/16) memory. Signed-off-by: Keqian Zhu Reviewed-by: Andrew Jones Reviewed-by: Peter Xu Message-Id: <20201217014941.22872-2-zhukeqian1@huawei.com> Signed-off-by: Paolo Bonzini Signed-off-by: Kunkun Jiang --- ...emory-waste-under-mismatch-page-size.patch | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch diff --git a/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch new file mode 100644 index 0000000..daff52e --- /dev/null +++ b/accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch @@ -0,0 +1,45 @@ +From b50b9a0e2e5e8262c830df5994f3abbe0a37655a Mon Sep 17 00:00:00 2001 +From: Keqian Zhu +Date: Thu, 17 Dec 2020 09:49:40 +0800 +Subject: [PATCH] accel: kvm: Fix memory waste under mismatch page size + +When handle dirty log, we face qemu_real_host_page_size and +TARGET_PAGE_SIZE. The first one is the granule of KVM dirty +bitmap, and the second one is the granule of QEMU dirty bitmap. + +As qemu_real_host_page_size >= TARGET_PAGE_SIZE (kvm_init() +enforced it), misuse TARGET_PAGE_SIZE to init kvmslot dirty_bmap +may waste memory. For example, when qemu_real_host_page_size is +64K and TARGET_PAGE_SIZE is 4K, it wastes 93.75% (15/16) memory. + +Signed-off-by: Keqian Zhu +Reviewed-by: Andrew Jones +Reviewed-by: Peter Xu +Message-Id: <20201217014941.22872-2-zhukeqian1@huawei.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + accel/kvm/kvm-all.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index 5a6b89cc2a..4daff563a0 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -551,8 +551,12 @@ static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) + * too, in most cases). + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. ++ * ++ * Note: the granule of kvm dirty log is qemu_real_host_page_size. ++ * And mem->memory_size is aligned to it (otherwise this mem can't ++ * be registered to KVM). + */ +- hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), ++ hwaddr bitmap_size = ALIGN(mem->memory_size / qemu_real_host_page_size, + /*HOST_LONG_BITS*/ 64) / 8; + mem->dirty_bmap = g_malloc0(bitmap_size); + } +-- +2.27.0 + -- Gitee From 9b987b51e044b257244dbfba2b0f14d9a1e50aa3 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Mon, 16 Nov 2020 21:22:10 +0800 Subject: [PATCH 04/10] memory: Skip dirty tracking for un-migratable memory regions It makes no sense to track dirty pages for those un-migratable memory regions (e.g., Memory BAR region of the VFIO PCI device) and doing so will potentially lead to some unpleasant issues during migration [1]. Skip dirty tracking for those regions by evaluating if the region is migratable before setting dirty_log_mask (DIRTY_MEMORY_MIGRATION). [1] https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg03757.html Signed-off-by: Zenghui Yu Message-Id: <20201116132210.1730-1-yuzenghui@huawei.com> Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini Signed-off-by: Kunkun Jiang --- ...y-tracking-for-un-migratable-memory-.patch | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 memory-Skip-dirty-tracking-for-un-migratable-memory-.patch diff --git a/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch new file mode 100644 index 0000000..8a25d17 --- /dev/null +++ b/memory-Skip-dirty-tracking-for-un-migratable-memory-.patch @@ -0,0 +1,42 @@ +From d0d816682b790b7d8a9caf17c32eadde7756ac9c Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Mon, 16 Nov 2020 21:22:10 +0800 +Subject: [PATCH] memory: Skip dirty tracking for un-migratable memory regions + +It makes no sense to track dirty pages for those un-migratable memory +regions (e.g., Memory BAR region of the VFIO PCI device) and doing so +will potentially lead to some unpleasant issues during migration [1]. + +Skip dirty tracking for those regions by evaluating if the region is +migratable before setting dirty_log_mask (DIRTY_MEMORY_MIGRATION). + +[1] https://lists.gnu.org/archive/html/qemu-devel/2020-11/msg03757.html + +Signed-off-by: Zenghui Yu +Message-Id: <20201116132210.1730-1-yuzenghui@huawei.com> +Reviewed-by: Cornelia Huck +Signed-off-by: Paolo Bonzini +Signed-off-by: Kunkun Jiang +--- + memory.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/memory.c b/memory.c +index 44713efc66..708b3dff3d 100644 +--- a/memory.c ++++ b/memory.c +@@ -1825,7 +1825,10 @@ bool memory_region_is_ram_device(MemoryRegion *mr) + uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) + { + uint8_t mask = mr->dirty_log_mask; +- if (global_dirty_log && (mr->ram_block || memory_region_is_iommu(mr))) { ++ RAMBlock *rb = mr->ram_block; ++ ++ if (global_dirty_log && ((rb && qemu_ram_is_migratable(rb)) || ++ memory_region_is_iommu(mr))) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; +-- +2.27.0 + -- Gitee From d65f9e660af3df23635c236b635dd438ded502ad Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Fri, 6 Nov 2020 23:32:24 +0530 Subject: [PATCH 05/10] Fix use after free in vfio_migration_probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes Coverity issue: CID 1436126: Memory - illegal accesses (USE_AFTER_FREE) Fixes: a9e271ec9b36 ("vfio: Add migration region initialization and finalize function") Signed-off-by: Kirti Wankhede Reviewed-by: David Edmondson Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Alex Williamson Signed-off-by: Kunkun Jiang --- ...e-after-free-in-vfio_migration_probe.patch | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 Fix-use-after-free-in-vfio_migration_probe.patch diff --git a/Fix-use-after-free-in-vfio_migration_probe.patch b/Fix-use-after-free-in-vfio_migration_probe.patch new file mode 100644 index 0000000..f0a94e6 --- /dev/null +++ b/Fix-use-after-free-in-vfio_migration_probe.patch @@ -0,0 +1,39 @@ +From 126fc13ebe9c5e58a5b1daeb4e102e6fa5845779 Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Fri, 6 Nov 2020 23:32:24 +0530 +Subject: [PATCH] Fix use after free in vfio_migration_probe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Fixes Coverity issue: +CID 1436126: Memory - illegal accesses (USE_AFTER_FREE) + +Fixes: a9e271ec9b36 ("vfio: Add migration region initialization and finalize function") +Signed-off-by: Kirti Wankhede +Reviewed-by: David Edmondson +Reviewed-by: Alex Bennée +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1a97784486..8546075706 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -903,8 +903,8 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + goto add_blocker; + } + +- g_free(info); + trace_vfio_migration_probe(vbasedev->name, info->index); ++ g_free(info); + return 0; + + add_blocker: +-- +2.27.0 + -- Gitee From 2681fc71af132f6a674aa54a36d1b4e5b1ab9917 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Mon, 9 Nov 2020 11:56:02 -0700 Subject: [PATCH 06/10] vfio: Make migration support experimental Support for migration of vfio devices is still in flux. Developers are attempting to add support for new devices and new architectures, but none are yet readily available for validation. We have concerns whether we're transferring device resources at the right point in the migration, whether we're guaranteeing that updates during pre-copy are migrated, and whether we can provide bit-stream compatibility should any of this change. Even the question of whether devices should participate in dirty page tracking during pre-copy seems contentious. In short, migration support has not had enough soak time and it feels premature to mark it as supported. Create an experimental option such that we can continue to develop. [Retaining previous acks/reviews for a previously identical code change with different specifics in the commit log.] Reviewed-by: Dr. David Alan Gilbert Acked-by: Cornelia Huck Signed-off-by: Alex Williamson Signed-off-by: Kunkun Jiang --- ...-Make-migration-support-experimental.patch | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 vfio-Make-migration-support-experimental.patch diff --git a/vfio-Make-migration-support-experimental.patch b/vfio-Make-migration-support-experimental.patch new file mode 100644 index 0000000..3bf32ec --- /dev/null +++ b/vfio-Make-migration-support-experimental.patch @@ -0,0 +1,72 @@ +From d0a8ba1957743c55547ec2ccd8cb09b84a3354d2 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Mon, 9 Nov 2020 11:56:02 -0700 +Subject: [PATCH] vfio: Make migration support experimental + +Support for migration of vfio devices is still in flux. Developers +are attempting to add support for new devices and new architectures, +but none are yet readily available for validation. We have concerns +whether we're transferring device resources at the right point in the +migration, whether we're guaranteeing that updates during pre-copy are +migrated, and whether we can provide bit-stream compatibility should +any of this change. Even the question of whether devices should +participate in dirty page tracking during pre-copy seems contentious. +In short, migration support has not had enough soak time and it feels +premature to mark it as supported. + +Create an experimental option such that we can continue to develop. + +[Retaining previous acks/reviews for a previously identical code + change with different specifics in the commit log.] + +Reviewed-by: Dr. David Alan Gilbert +Acked-by: Cornelia Huck +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/migration.c | 2 +- + hw/vfio/pci.c | 2 ++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8546075706..033cb2b0c9 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -888,7 +888,7 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp) + Error *local_err = NULL; + int ret = -ENOTSUP; + +- if (!container->dirty_pages_supported) { ++ if (!vbasedev->enable_migration || !container->dirty_pages_supported) { + goto add_blocker; + } + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index da7c740bce..2795b8bd12 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3192,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), ++ DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, false), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.balloon_allowed, false), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 048731e81f..7398631d4c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -123,6 +123,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool balloon_allowed; ++ bool enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +-- +2.27.0 + -- Gitee From a00928107ec9f28cf652975b2912323692eb804d Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Mon, 23 Nov 2020 19:53:19 +0530 Subject: [PATCH 07/10] vfio: Change default dirty pages tracking behavior during migration By default dirty pages tracking is enabled during iterative phase (pre-copy phase). Added per device opt-out option 'x-pre-copy-dirty-page-tracking' to disable dirty pages tracking during iterative phase. If the option 'x-pre-copy-dirty-page-tracking=off' is set for any VFIO device, dirty pages tracking during iterative phase will be disabled. Signed-off-by: Kirti Wankhede Signed-off-by: Alex Williamson Signed-off-by: Kunkun Jiang --- ...ult-dirty-pages-tracking-behavior-du.patch | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 vfio-Change-default-dirty-pages-tracking-behavior-du.patch diff --git a/vfio-Change-default-dirty-pages-tracking-behavior-du.patch b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch new file mode 100644 index 0000000..d34f054 --- /dev/null +++ b/vfio-Change-default-dirty-pages-tracking-behavior-du.patch @@ -0,0 +1,87 @@ +From 69d1cc17c0a77dbd0d8e811cfaa899b01bf2e5bc Mon Sep 17 00:00:00 2001 +From: Kirti Wankhede +Date: Mon, 23 Nov 2020 19:53:19 +0530 +Subject: [PATCH] vfio: Change default dirty pages tracking behavior during + migration + +By default dirty pages tracking is enabled during iterative phase +(pre-copy phase). +Added per device opt-out option 'x-pre-copy-dirty-page-tracking' to +disable dirty pages tracking during iterative phase. If the option +'x-pre-copy-dirty-page-tracking=off' is set for any VFIO device, dirty +pages tracking during iterative phase will be disabled. + +Signed-off-by: Kirti Wankhede +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 11 +++++++---- + hw/vfio/pci.c | 3 +++ + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index a86a4c4506..d9cc3509ef 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -310,7 +310,7 @@ bool vfio_mig_active(void) + return true; + } + +-static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) ++static bool vfio_devices_all_saving(VFIOContainer *container) + { + VFIOGroup *group; + VFIODevice *vbasedev; +@@ -328,8 +328,11 @@ static bool vfio_devices_all_stopped_and_saving(VFIOContainer *container) + return false; + } + +- if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && +- !(migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ if (migration->device_state & VFIO_DEVICE_STATE_SAVING) { ++ if ((vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF) ++ && (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { ++ return false; ++ } + continue; + } else { + return false; +@@ -1088,7 +1091,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + return; + } + +- if (vfio_devices_all_stopped_and_saving(container)) { ++ if (vfio_devices_all_saving(container)) { + vfio_sync_dirty_bitmap(container, section); + } + } +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2795b8bd12..3641ad0c5c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3180,6 +3180,9 @@ static void vfio_instance_init(Object *obj) + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), ++ DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, ++ vbasedev.pre_copy_dirty_page_tracking, ++ ON_OFF_AUTO_ON), + DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice, + display, ON_OFF_AUTO_OFF), + DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0), +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 7398631d4c..475aa9fb40 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -130,6 +130,7 @@ typedef struct VFIODevice { + unsigned int flags; + VFIOMigration *migration; + Error *migration_blocker; ++ OnOffAuto pre_copy_dirty_page_tracking; + } VFIODevice; + + struct VFIODeviceOps { +-- +2.27.0 + -- Gitee From e1f8e41b47064819d311f580121363dd8cd19797 Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Fri, 4 Dec 2020 09:42:40 +0800 Subject: [PATCH 08/10] vfio: Fix vfio_listener_log_sync function name typo There is an obvious typo in the function name of the .log_sync() callback. Spell it correctly. Signed-off-by: Zenghui Yu Message-Id: <20201204014240.772-1-yuzenghui@huawei.com> Signed-off-by: Alex Williamson Signed-off-by: Kunkun Jiang --- ...listener_log_sync-function-name-typo.patch | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 vfio-Fix-vfio_listener_log_sync-function-name-typo.patch diff --git a/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch new file mode 100644 index 0000000..6a2324b --- /dev/null +++ b/vfio-Fix-vfio_listener_log_sync-function-name-typo.patch @@ -0,0 +1,41 @@ +From 094aca3a87e63a0e6ae01b22f382c21dd91bb03e Mon Sep 17 00:00:00 2001 +From: Zenghui Yu +Date: Fri, 4 Dec 2020 09:42:40 +0800 +Subject: [PATCH] vfio: Fix vfio_listener_log_sync function name typo + +There is an obvious typo in the function name of the .log_sync() callback. +Spell it correctly. + +Signed-off-by: Zenghui Yu +Message-Id: <20201204014240.772-1-yuzenghui@huawei.com> +Signed-off-by: Alex Williamson +Signed-off-by: Kunkun Jiang +--- + hw/vfio/common.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index d9cc3509ef..ebd701faa0 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1081,7 +1081,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer *container, + int128_get64(section->size), ram_addr); + } + +-static void vfio_listerner_log_sync(MemoryListener *listener, ++static void vfio_listener_log_sync(MemoryListener *listener, + MemoryRegionSection *section) + { + VFIOContainer *container = container_of(listener, VFIOContainer, listener); +@@ -1099,7 +1099,7 @@ static void vfio_listerner_log_sync(MemoryListener *listener, + static const MemoryListener vfio_memory_listener = { + .region_add = vfio_listener_region_add, + .region_del = vfio_listener_region_del, +- .log_sync = vfio_listerner_log_sync, ++ .log_sync = vfio_listener_log_sync, + }; + + static void vfio_listener_release(VFIOContainer *container) +-- +2.27.0 + -- Gitee From c161b2c6fc113cb303d9f72c6689ab1862c124cd Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Tue, 3 Aug 2021 21:27:12 +0800 Subject: [PATCH 09/10] spec: Update patch and changelog with !182 Some fixes and optimizations of migration !182 kvm: split too big memory section on several memslots kvm: Reallocate dirty_bmap when we change a slot accel: kvm: Fix memory waste under mismatch page size memory: Skip dirty tracking for un-migratable memory regions Fix use after free in vfio_migration_probe vfio: Make migration support experimental vfio: Change default dirty pages tracking behavior during migration vfio: Fix vfio_listener_log_sync function name typo Signed-off-by: Chen Qun --- qemu.spec | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/qemu.spec b/qemu.spec index 752829b..7059b7c 100644 --- a/qemu.spec +++ b/qemu.spec @@ -501,6 +501,14 @@ Patch0488: qapi-Add-VFIO-devices-migration-stats-in-Migration-s.patch Patch0489: vfio-Move-the-saving-of-the-config-space-to-the-righ.patch Patch0490: vfio-Set-the-priority-of-the-VFIO-VM-state-change-ha.patch Patch0491: vfio-Avoid-disabling-and-enabling-vectors-repeatedly.patch +Patch0492: kvm-split-too-big-memory-section-on-several-memslots.patch +Patch0493: kvm-Reallocate-dirty_bmap-when-we-change-a-slot.patch +Patch0494: accel-kvm-Fix-memory-waste-under-mismatch-page-size.patch +Patch0495: memory-Skip-dirty-tracking-for-un-migratable-memory-.patch +Patch0496: Fix-use-after-free-in-vfio_migration_probe.patch +Patch0497: vfio-Make-migration-support-experimental.patch +Patch0498: vfio-Change-default-dirty-pages-tracking-behavior-du.patch +Patch0499: vfio-Fix-vfio_listener_log_sync-function-name-typo.patch BuildRequires: flex BuildRequires: gcc @@ -895,6 +903,16 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Tue Aug 03 2021 Chen Qun +- kvm: split too big memory section on several memslots +- kvm: Reallocate dirty_bmap when we change a slot +- accel: kvm: Fix memory waste under mismatch page size +- memory: Skip dirty tracking for un-migratable memory regions +- Fix use after free in vfio_migration_probe +- vfio: Make migration support experimental +- vfio: Change default dirty pages tracking behavior during migration +- vfio: Fix vfio_listener_log_sync function name typo + * Thu Jul 29 2021 Chen Qun - vfio: Move the saving of the config space to the right place in VFIO migration - vfio: Set the priority of the VFIO VM state change handler explicitly -- Gitee From 5305f9170e933a2d1088ea2126c7cefc92f7b22f Mon Sep 17 00:00:00 2001 From: Chen Qun Date: Tue, 3 Aug 2021 21:27:12 +0800 Subject: [PATCH 10/10] spec: Update release version with !182 increase release verison by one Signed-off-by: Chen Qun --- qemu.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu.spec b/qemu.spec index 7059b7c..62c6f6c 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 4.1.0 -Release: 74 +Release: 75 Epoch: 2 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 -- Gitee