From d55da13380bba168e230bd01a52275d517958f05 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 24 Sep 2019 10:47:50 -0400 Subject: [PATCH 1/2] kvm: split too big memory section on several memslots Max memslot size supported by kvm on s390 is 8Tb, move logic of splitting RAM in chunks upto 8T to KVM code. This way it will hide KVM specific restrictions in KVM code and won't affect board level design decisions. Which would allow us to avoid misusing memory_region_allocate_system_memory() API and eventually use a single hostmem backend for guest RAM. Signed-off-by: Igor Mammedov Message-Id: <20190924144751.24149-4-imammedo@redhat.com> Reviewed-by: Peter Xu Acked-by: Paolo Bonzini Signed-off-by: Christian Borntraeger Signed-off-by: Kunkun Jiang --- accel/kvm/kvm-all.c | 124 +++++++++++++++++++++++++-------------- include/sysemu/kvm_int.h | 1 + 2 files changed, 81 insertions(+), 44 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 84edbe8bb1..6828f6a1f9 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -138,6 +138,7 @@ bool kvm_direct_msi_allowed; bool kvm_ioeventfd_any_length_allowed; bool kvm_msi_use_devid; static bool kvm_immediate_exit; +static hwaddr kvm_max_slot_size = ~0; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -458,7 +459,7 @@ static int kvm_slot_update_flags(KVMMemoryListener *kml, KVMSlot *mem, static int kvm_section_update_flags(KVMMemoryListener *kml, MemoryRegionSection *section) { - hwaddr start_addr, size; + hwaddr start_addr, size, slot_size; KVMSlot *mem; int ret = 0; @@ -469,13 +470,18 @@ static int kvm_section_update_flags(KVMMemoryListener *kml, kvm_slots_lock(kml); - mem = kvm_lookup_matching_slot(kml, start_addr, size); - if (!mem) { - /* We don't have a slot if we want to trap every access. */ - goto out; - } + while (size && !ret) { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { + /* We don't have a slot if we want to trap every access. */ + goto out; + } - ret = kvm_slot_update_flags(kml, mem, section->mr); + ret = kvm_slot_update_flags(kml, mem, section->mr); + start_addr += slot_size; + size -= slot_size; + } out: kvm_slots_unlock(kml); @@ -548,11 +554,15 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, struct kvm_dirty_log d = {}; KVMSlot *mem; hwaddr start_addr, size; + hwaddr slot_size, slot_offset = 0; int ret = 0; size = kvm_align_section(section, &start_addr); - if (size) { - mem = kvm_lookup_matching_slot(kml, start_addr, size); + while (size) { + MemoryRegionSection subsection = *section; + + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); if (!mem) { /* We don't have a slot if we want to trap every access. */ goto out; @@ -570,11 +580,11 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, * So for now, let's align to 64 instead of HOST_LONG_BITS here, in * a hope that sizeof(long) won't become >8 any time soon. */ - size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), - /*HOST_LONG_BITS*/ 64) / 8; if (!mem->dirty_bmap) { + hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), + /*HOST_LONG_BITS*/ 64) / 8; /* Allocate on the first log_sync, once and for all */ - mem->dirty_bmap = g_malloc0(size); + mem->dirty_bmap = g_malloc0(bitmap_size); } d.dirty_bitmap = mem->dirty_bmap; @@ -585,7 +595,13 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, goto out; } - kvm_get_dirty_pages_log_range(section, d.dirty_bitmap); + subsection.offset_within_region += slot_offset; + subsection.size = int128_make64(slot_size); + kvm_get_dirty_pages_log_range(&subsection, d.dirty_bitmap); + + slot_offset += slot_size; + start_addr += slot_size; + size -= slot_size; } out: return ret; @@ -974,6 +990,14 @@ kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list) return NULL; } +void kvm_set_max_memslot_size(hwaddr max_slot_size) +{ + g_assert( + ROUND_UP(max_slot_size, qemu_real_host_page_size) == max_slot_size + ); + kvm_max_slot_size = max_slot_size; +} + static void kvm_set_phys_mem(KVMMemoryListener *kml, MemoryRegionSection *section, bool add) { @@ -981,7 +1005,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, int err; MemoryRegion *mr = section->mr; bool writeable = !mr->readonly && !mr->rom_device; - hwaddr start_addr, size; + hwaddr start_addr, size, slot_size; void *ram; if (!memory_region_is_ram(mr)) { @@ -1006,41 +1030,52 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, kvm_slots_lock(kml); if (!add) { - mem = kvm_lookup_matching_slot(kml, start_addr, size); - if (!mem) { - goto out; - } - if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { - kvm_physical_sync_dirty_bitmap(kml, section); - } + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { + goto out; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + kvm_physical_sync_dirty_bitmap(kml, section); + } - /* unregister the slot */ - g_free(mem->dirty_bmap); - mem->dirty_bmap = NULL; - mem->memory_size = 0; - mem->flags = 0; - err = kvm_set_user_memory_region(kml, mem, false); - if (err) { - fprintf(stderr, "%s: error unregistering slot: %s\n", - __func__, strerror(-err)); - abort(); - } + /* unregister the slot */ + g_free(mem->dirty_bmap); + mem->dirty_bmap = NULL; + mem->memory_size = 0; + mem->flags = 0; + err = kvm_set_user_memory_region(kml, mem, false); + if (err) { + fprintf(stderr, "%s: error unregistering slot: %s\n", + __func__, strerror(-err)); + abort(); + } + start_addr += slot_size; + size -= slot_size; + } while (size); goto out; } /* register the new slot */ - mem = kvm_alloc_slot(kml); - mem->memory_size = size; - mem->start_addr = start_addr; - mem->ram = ram; - mem->flags = kvm_mem_flags(mr); - - err = kvm_set_user_memory_region(kml, mem, true); - if (err) { - fprintf(stderr, "%s: error registering slot: %s\n", __func__, - strerror(-err)); - abort(); - } + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_alloc_slot(kml); + mem->memory_size = slot_size; + mem->start_addr = start_addr; + mem->ram = ram; + mem->flags = kvm_mem_flags(mr); + + err = kvm_set_user_memory_region(kml, mem, true); + if (err) { + fprintf(stderr, "%s: error registering slot: %s\n", __func__, + strerror(-err)); + abort(); + } + start_addr += slot_size; + ram += slot_size; + size -= slot_size; + } while (size); out: kvm_slots_unlock(kml); @@ -2880,6 +2915,7 @@ static bool kvm_accel_has_memory(MachineState *ms, AddressSpace *as, for (i = 0; i < kvm->nr_as; ++i) { if (kvm->as[i].as == as && kvm->as[i].ml) { + size = MIN(kvm_max_slot_size, size); return NULL != kvm_lookup_matching_slot(kvm->as[i].ml, start_addr, size); } diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h index 787dbc7770..f8e884f146 100644 --- a/include/sysemu/kvm_int.h +++ b/include/sysemu/kvm_int.h @@ -43,4 +43,5 @@ typedef struct KVMMemoryListener { void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, AddressSpace *as, int as_id); +void kvm_set_max_memslot_size(hwaddr max_slot_size); #endif -- Gitee From b07c7801bb8b4010eab2800160c6d3e5ddfaffa8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 21 Nov 2019 16:56:45 +0000 Subject: [PATCH 2/2] kvm: Reallocate dirty_bmap when we change a slot kvm_set_phys_mem can be called to reallocate a slot by something the guest does (e.g. writing to PAM and other chipset registers). This can happen in the middle of a migration, and if we're unlucky it can now happen between the split 'sync' and 'clear'; the clear asserts if there's no bmap to clear. Recreate the bmap whenever we change the slot, keeping the clear path happy. Typically this is triggered by the guest rebooting during a migrate. Corresponds to: https://bugzilla.redhat.com/show_bug.cgi?id=1772774 https://bugzilla.redhat.com/show_bug.cgi?id=1771032 Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Peter Xu Signed-off-by: Kunkun Jiang --- accel/kvm/kvm-all.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 6828f6a1f9..5a6b89cc2a 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -536,6 +536,27 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, #define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) +/* Allocate the dirty bitmap for a slot */ +static void kvm_memslot_init_dirty_bitmap(KVMSlot *mem) +{ + /* + * XXX bad kernel interface alert + * For dirty bitmap, kernel allocates array of size aligned to + * bits-per-long. But for case when the kernel is 64bits and + * the userspace is 32bits, userspace can't align to the same + * bits-per-long, since sizeof(long) is different between kernel + * and user space. This way, userspace will provide buffer which + * may be 4 bytes less than the kernel will use, resulting in + * userspace memory corruption (which is not detectable by valgrind + * too, in most cases). + * So for now, let's align to 64 instead of HOST_LONG_BITS here, in + * a hope that sizeof(long) won't become >8 any time soon. + */ + hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), + /*HOST_LONG_BITS*/ 64) / 8; + mem->dirty_bmap = g_malloc0(bitmap_size); +} + /** * kvm_physical_sync_dirty_bitmap - Sync dirty bitmap from kernel space * @@ -568,23 +589,9 @@ static int kvm_physical_sync_dirty_bitmap(KVMMemoryListener *kml, goto out; } - /* XXX bad kernel interface alert - * For dirty bitmap, kernel allocates array of size aligned to - * bits-per-long. But for case when the kernel is 64bits and - * the userspace is 32bits, userspace can't align to the same - * bits-per-long, since sizeof(long) is different between kernel - * and user space. This way, userspace will provide buffer which - * may be 4 bytes less than the kernel will use, resulting in - * userspace memory corruption (which is not detectable by valgrind - * too, in most cases). - * So for now, let's align to 64 instead of HOST_LONG_BITS here, in - * a hope that sizeof(long) won't become >8 any time soon. - */ if (!mem->dirty_bmap) { - hwaddr bitmap_size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), - /*HOST_LONG_BITS*/ 64) / 8; /* Allocate on the first log_sync, once and for all */ - mem->dirty_bmap = g_malloc0(bitmap_size); + kvm_memslot_init_dirty_bitmap(mem); } d.dirty_bitmap = mem->dirty_bmap; @@ -1066,6 +1073,13 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, mem->ram = ram; mem->flags = kvm_mem_flags(mr); + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* + * Reallocate the bmap; it means it doesn't disappear in + * middle of a migrate. + */ + kvm_memslot_init_dirty_bitmap(mem); + } err = kvm_set_user_memory_region(kml, mem, true); if (err) { fprintf(stderr, "%s: error registering slot: %s\n", __func__, -- Gitee