From c709e84c483af5466b9bf1d289a70813942da7e0 Mon Sep 17 00:00:00 2001 From: libai Date: Tue, 1 Apr 2025 15:33:54 +0800 Subject: [PATCH 1/7] virtio-pci:Batch processing of IRQFD mapping for multi queue Virtio devices The virtio device with multiple queues currently calls ioctl every time it establishes an irqfd route for vq. Since the devices will not actually run until all queue irqfds are completed, these irqfd routes can be temporarily stored and submitted to kvm through ioctl at once to reduce the number of ioctl attempts and optimize the startup speed of virtio devices. Signed-off-by: libai --- hw/virtio/virtio-pci.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 06b125ec62..7cd15f70e3 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -49,6 +49,18 @@ * configuration space */ #define VIRTIO_PCI_CONFIG_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_enabled(dev)) +static KVMRouteChange virtio_pci_route_change; + +static inline void virtio_pci_begin_route_changes(void) +{ + virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); +} + +static inline void virtio_pci_commit_route_changes(void) +{ + kvm_irqchip_commit_route_changes(&virtio_pci_route_change); +} + static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, VirtIOPCIProxy *dev); static void virtio_pci_reset(DeviceState *qdev); @@ -815,12 +827,10 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, int ret; if (irqfd->users == 0) { - KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); - ret = kvm_irqchip_add_msi_route(&c, vector, &proxy->pci_dev); + ret = kvm_irqchip_add_msi_route(&virtio_pci_route_change, vector, &proxy->pci_dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_route_changes(&c); irqfd->virq = ret; } irqfd->users++; @@ -950,12 +960,14 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) } #endif + virtio_pci_begin_route_changes(); for (queue_no = 0; queue_no < nvqs; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { return -1; } ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); } + virtio_pci_commit_route_changes(); #ifdef __aarch64__ if (!strcmp(vdev->name, "virtio-net") && ret != 0) { -- Gitee From 66749037256732f369c387c136e14f727a51951f Mon Sep 17 00:00:00 2001 From: libai Date: Tue, 1 Apr 2025 17:09:38 +0800 Subject: [PATCH 2/7] kvm/msi: Mark whether there is an IRQ route table update through changes This patch prevents unnecessary updates to the IRQ route without modification Signed-off-by: libai --- accel/kvm/kvm-all.c | 11 ++++++----- accel/stubs/kvm-stub.c | 2 +- hw/intc/ioapic.c | 5 +++-- hw/misc/ivshmem.c | 6 ++++-- hw/vfio/pci.c | 5 +++-- hw/virtio/virtio-pci.c | 5 +++-- include/sysemu/kvm.h | 2 +- target/i386/kvm/kvm.c | 6 ++++-- 8 files changed, 25 insertions(+), 17 deletions(-) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index 825ecb99a8..aa41b42efc 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1902,10 +1902,11 @@ static void kvm_add_routing_entry(KVMState *s, set_gsi(s, entry->gsi); } -static int kvm_update_routing_entry(KVMState *s, +static int kvm_update_routing_entry(KVMRouteChange *c, struct kvm_irq_routing_entry *new_entry) { struct kvm_irq_routing_entry *entry; + KVMState *s = c->s; int n; for (n = 0; n < s->irq_routes->nr; n++) { @@ -1919,7 +1920,7 @@ static int kvm_update_routing_entry(KVMState *s, } *entry = *new_entry; - + c->changes++; return 0; } @@ -2051,7 +2052,7 @@ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev) return virq; } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; @@ -2081,7 +2082,7 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, trace_kvm_irqchip_update_msi_route(virq); - return kvm_update_routing_entry(s, &kroute); + return kvm_update_routing_entry(c, &kroute); } static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, @@ -2223,7 +2224,7 @@ static int kvm_irqchip_assign_irqfd(KVMState *s, EventNotifier *event, abort(); } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg) { return -ENOSYS; } diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index b071afee45..1fffdc0ea2 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -65,7 +65,7 @@ void kvm_irqchip_release_virq(KVMState *s, int virq) { } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, PCIDevice *dev) { return -ENOSYS; diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index 716ffc8bbb..0b43aec8fa 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -195,6 +195,7 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) int i; if (kvm_irqchip_is_split()) { + KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); for (i = 0; i < IOAPIC_NUM_PINS; i++) { MSIMessage msg; struct ioapic_entry_info info; @@ -202,10 +203,10 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) if (!info.masked) { msg.address = info.addr; msg.data = info.data; - kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + kvm_irqchip_update_msi_route(&c, i, msg, NULL); } } - kvm_irqchip_commit_routes(kvm_state); + kvm_irqchip_commit_route_changes(&c); } #endif } diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c index ad9a3c546e..f66491a7a7 100644 --- a/hw/misc/ivshmem.c +++ b/hw/misc/ivshmem.c @@ -278,6 +278,7 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, IVShmemState *s = IVSHMEM_COMMON(dev); EventNotifier *n = &s->peers[s->vm_id].eventfds[vector]; MSIVector *v = &s->msi_vectors[vector]; + KVMRouteChange c; int ret; IVSHMEM_DPRINTF("vector unmask %p %d\n", dev, vector); @@ -287,11 +288,12 @@ static int ivshmem_vector_unmask(PCIDevice *dev, unsigned vector, } assert(!v->unmasked); - ret = kvm_irqchip_update_msi_route(kvm_state, v->virq, msg, dev); + c = kvm_irqchip_begin_route_changes(kvm_state); + ret = kvm_irqchip_update_msi_route(&c, v->virq, msg, dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_routes(kvm_state); + kvm_irqchip_commit_route_changes(&c); ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, v->virq); if (ret < 0) { diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 293deb8737..ce958848b6 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -507,8 +507,9 @@ static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg, PCIDevice *pdev) { - kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg, pdev); - kvm_irqchip_commit_routes(kvm_state); + KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); + kvm_irqchip_update_msi_route(&c, vector->virq, msg, pdev); + kvm_irqchip_commit_route_changes(&c); } static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 7cd15f70e3..a677fa0736 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1044,12 +1044,13 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, if (proxy->vector_irqfd) { irqfd = &proxy->vector_irqfd[vector]; if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { - ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg, + KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); + ret = kvm_irqchip_update_msi_route(&c, irqfd->virq, msg, &proxy->pci_dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_routes(kvm_state); + kvm_irqchip_commit_route_changes(&c); } } diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 176aa53cbe..16cccc881e 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -501,7 +501,7 @@ void kvm_init_cpu_signals(CPUState *cpu); * @return: virq (>=0) when success, errno (<0) when failed. */ int kvm_irqchip_add_msi_route(KVMRouteChange *c, int vector, PCIDevice *dev); -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, +int kvm_irqchip_update_msi_route(KVMRouteChange *c, int virq, MSIMessage msg, PCIDevice *dev); void kvm_irqchip_commit_routes(KVMState *s); diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 2df3ff99c3..3a88e65635 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -5700,9 +5700,11 @@ void kvm_update_msi_routes_all(void *private, bool global, { int cnt = 0, vector; MSIRouteEntry *entry; + KVMRouteChange c; MSIMessage msg; PCIDevice *dev; + c = kvm_irqchip_begin_route_changes(kvm_state); /* TODO: explicit route update */ QLIST_FOREACH(entry, &msi_route_list, list) { cnt++; @@ -5719,9 +5721,9 @@ void kvm_update_msi_routes_all(void *private, bool global, */ continue; } - kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); + kvm_irqchip_update_msi_route(&c, entry->virq, msg, dev); } - kvm_irqchip_commit_routes(kvm_state); + kvm_irqchip_commit_route_changes(&c); trace_kvm_x86_update_msi_routes(cnt); } -- Gitee From d6e6ef58847bf34db9535649bf33e2a72b59495e Mon Sep 17 00:00:00 2001 From: libai Date: Tue, 1 Apr 2025 17:28:02 +0800 Subject: [PATCH 3/7] virtio/irqfd: Batch processing of irqfd related operations during virtio device startup This patch adds batch processing for unmask operations Signed-off-by: libai --- hw/virtio/virtio-pci.c | 97 ++++++++++++++++++++++++++++++++++---- include/hw/virtio/virtio.h | 1 + 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index a677fa0736..558471307a 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -51,14 +51,86 @@ static KVMRouteChange virtio_pci_route_change; -static inline void virtio_pci_begin_route_changes(void) +static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, + EventNotifier *n, + unsigned int vector); + +static inline void virtio_pci_begin_route_changes(VirtIODevice *vdev) +{ + if (!vdev->defer_kvm_irq_routing) { + virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); + } +} + +static inline void virtio_pci_commit_route_changes(VirtIODevice *vdev) { + if (!vdev->defer_kvm_irq_routing) { + kvm_irqchip_commit_route_changes(&virtio_pci_route_change); + } +} + +static void virtio_pci_prepare_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) +{ + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + if (vdev->defer_kvm_irq_routing) { + qemu_log("invaild defer kvm irq routing state: %d\n", vdev->defer_kvm_irq_routing); + return; + } virtio_pci_route_change = kvm_irqchip_begin_route_changes(kvm_state); + vdev->defer_kvm_irq_routing = true; } -static inline void virtio_pci_commit_route_changes(void) +static void virtio_pci_commit_kvm_msi_virq_batch(VirtIOPCIProxy *proxy) { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + EventNotifier *n; + VirtQueue *vq; + int vector, index, ret; + + if (!vdev->defer_kvm_irq_routing) { + qemu_log("invaild defer kvm irq routing state: %d\n", vdev->defer_kvm_irq_routing); + return; + } + vdev->defer_kvm_irq_routing = false; kvm_irqchip_commit_route_changes(&virtio_pci_route_change); + + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { + return; + } + + for (vector = 0; vector < proxy->pci_dev.msix_entries_nr; vector++) { + if (msix_is_masked(&proxy->pci_dev, vector)) { + continue; + } + + if (vector == vdev->config_vector) { + n = virtio_config_get_guest_notifier(vdev); + ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret) { + qemu_log("config irqfd use failed: %d\n", ret); + } + continue; + } + + vq = virtio_vector_first_queue(vdev, vector); + + while (vq) { + index = virtio_get_queue_index(vq); + if (!virtio_queue_get_num(vdev, index)) { + break; + } + if (index < proxy->nvqs_with_notifiers) { + n = virtio_queue_get_guest_notifier(vq); + ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { + qemu_log("Error: irqfd use failed: %d\n", ret); + } + } + vq = virtio_vector_next_queue(vq); + } + } } static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, @@ -959,15 +1031,17 @@ static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) kvm_create_shadow_device(&proxy->pci_dev); } #endif - - virtio_pci_begin_route_changes(); for (queue_no = 0; queue_no < nvqs; queue_no++) { if (!virtio_queue_get_num(vdev, queue_no)) { return -1; } + } + + virtio_pci_begin_route_changes(vdev); + for (queue_no = 0; queue_no < nvqs; queue_no++) { ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); } - virtio_pci_commit_route_changes(); + virtio_pci_commit_route_changes(vdev); #ifdef __aarch64__ if (!strcmp(vdev->name, "virtio-net") && ret != 0) { @@ -1044,13 +1118,13 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, if (proxy->vector_irqfd) { irqfd = &proxy->vector_irqfd[vector]; if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) { - KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state); - ret = kvm_irqchip_update_msi_route(&c, irqfd->virq, msg, + virtio_pci_begin_route_changes(vdev); + ret = kvm_irqchip_update_msi_route(&virtio_pci_route_change, irqfd->virq, msg, &proxy->pci_dev); if (ret < 0) { return ret; } - kvm_irqchip_commit_route_changes(&c); + virtio_pci_commit_route_changes(vdev); } } @@ -1065,7 +1139,9 @@ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, event_notifier_set(n); } } else { - ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (!vdev->defer_kvm_irq_routing) { + ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + } } return ret; } @@ -1322,6 +1398,8 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) if ((with_irqfd || (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && assign) { + + virtio_pci_prepare_kvm_msi_virq_batch(proxy); if (with_irqfd) { proxy->vector_irqfd = g_malloc0(sizeof(*proxy->vector_irqfd) * @@ -1339,6 +1417,7 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, virtio_pci_vector_mask, virtio_pci_vector_poll); + virtio_pci_commit_kvm_msi_virq_batch(proxy); if (r < 0) { goto notifiers_error; } diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 78db2bde98..672f7445dd 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -147,6 +147,7 @@ struct VirtIODevice bool use_started; bool started; bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ + bool defer_kvm_irq_routing; bool disable_legacy_check; bool vhost_started; VMChangeStateEntry *vmstate; -- Gitee From 0e3d3b9a3cd54340b2d9991918a172ed38670bcd Mon Sep 17 00:00:00 2001 From: libai Date: Wed, 2 Apr 2025 20:14:10 +0800 Subject: [PATCH 4/7] migration:Extand the fdtable in the incoming phase of migration Perform the fdtable extension in advance to avoid time consumption caused by triggering the fdtable extension during the migration downtime. Signed-off-by: libai --- migration/migration.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index dce22c2da5..9a433e615b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -68,6 +68,8 @@ #include "sysemu/dirtylimit.h" #include "qemu/sockets.h" +#define DEFAULT_FD_MAX 4096 + static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -1712,6 +1714,31 @@ void migrate_del_blocker(Error **reasonp) } } +/* + * Kernel will expand the fatable allocated to the qemu process when + * the number of fds held by qemu process exceeds a power of 2 (starting from 64). + * Each expansion introduces tens of ms of latency due to RCU synchronization. + * The expansion is completed during qemu process initialization to avoid + * triggering this action during the migration downtime phase. + */ +static void qemu_pre_extend_fdtable(void) +{ + int buffer[DEFAULT_FD_MAX] = {0}; + int i; + + /* expand fdtable */ + for (i = 0; i < DEFAULT_FD_MAX; i++) { + buffer[i] = qemu_dup(STDIN_FILENO); + } + + /* close tmp fd */ + for (i = 0; i < DEFAULT_FD_MAX; i++) { + if (buffer[i] > 0) { + (void)qemu_close(buffer[i]); + } + } +} + void qmp_migrate_incoming(const char *uri, bool has_channels, MigrationChannelList *channels, Error **errp) { @@ -1731,6 +1758,8 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, return; } + qemu_pre_extend_fdtable(); + qemu_start_incoming_migration(uri, has_channels, channels, &local_err); if (local_err) { -- Gitee From 0cc093ba0d25536162685a0bd45b80f97d91cf15 Mon Sep 17 00:00:00 2001 From: libai Date: Wed, 9 Apr 2025 11:06:52 +0800 Subject: [PATCH 5/7] migration/memory:Optimize unnecessary memory region updates during live migration During the startup phase of the destination VM for live migration, there is no need to update the memory region in real time. Instead, just force commit once before each device load state. Signed-off-by: libai --- include/exec/memory.h | 5 +++++ migration/savevm.c | 7 ++++++ migration/vmstate.c | 8 +++++++ system/memory.c | 45 ++++++++++++++++++++++----------------- tests/unit/test-vmstate.c | 6 ++++++ 5 files changed, 51 insertions(+), 20 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index c14dc69d27..924bdbd481 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -2567,6 +2567,11 @@ void memory_region_transaction_begin(void); */ void memory_region_transaction_commit(void); +/** + * memory_region_commit: Force commit memory region immediately. + */ +void memory_region_commit(void); + /** * memory_listener_register: register callbacks to be called when memory * sections are mapped or unmapped into an address diff --git a/migration/savevm.c b/migration/savevm.c index cc65da605e..030a4bf7d2 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2857,6 +2857,10 @@ int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis) uint8_t section_type; int ret = 0; + if (qemu_mutex_iothread_locked()) { + memory_region_transaction_begin(); + } + retry: while (true) { section_type = qemu_get_byte(f); @@ -2900,6 +2904,9 @@ retry: } out: + if (qemu_mutex_iothread_locked()) { + memory_region_transaction_commit(); + } if (ret < 0) { qemu_file_set_error(f, ret); diff --git a/migration/vmstate.c b/migration/vmstate.c index bd08e390c5..e621d8ddb7 100644 --- a/migration/vmstate.c +++ b/migration/vmstate.c @@ -20,6 +20,7 @@ #include "qemu/bitops.h" #include "qemu/error-report.h" #include "trace.h" +#include "exec/memory.h" static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd, void *opaque, JSONWriter *vmdesc, @@ -184,6 +185,13 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd, return ret; } if (vmsd->post_load) { + /** + * We call memory_transaction_begin in qemu_loadvm_state_main, + * so address space will not be updated during vm state loading. + * But some dev need to use address space here, force commit + * memory region transaction before call post_load. + */ + memory_region_commit(); ret = vmsd->post_load(opaque, version_id); } trace_vmstate_load_state_end(vmsd->name, "end", ret); diff --git a/system/memory.c b/system/memory.c index 9db07fd832..fd76eb7048 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1117,34 +1117,39 @@ void memory_region_transaction_begin(void) ++memory_region_transaction_depth; } -void memory_region_transaction_commit(void) +void memory_region_commit(void) { AddressSpace *as; + if (memory_region_update_pending) { + flatviews_reset(); + + MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); + + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_set_flatview(as); + address_space_update_ioeventfds(as); + } + memory_region_update_pending = false; + ioeventfd_update_pending = false; + MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); + } else if (ioeventfd_update_pending) { + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_ioeventfds(as); + } + ioeventfd_update_pending = false; + } +} + +void memory_region_transaction_commit(void) +{ assert(memory_region_transaction_depth); assert(qemu_mutex_iothread_locked()); --memory_region_transaction_depth; if (!memory_region_transaction_depth) { - if (memory_region_update_pending) { - flatviews_reset(); - - MEMORY_LISTENER_CALL_GLOBAL(begin, Forward); - - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - address_space_set_flatview(as); - address_space_update_ioeventfds(as); - } - memory_region_update_pending = false; - ioeventfd_update_pending = false; - MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); - } else if (ioeventfd_update_pending) { - QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { - address_space_update_ioeventfds(as); - } - ioeventfd_update_pending = false; - } - } + memory_region_commit(); + } } static void memory_region_destructor_none(MemoryRegion *mr) diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c index 0b7d5ecd68..22c586eee0 100644 --- a/tests/unit/test-vmstate.c +++ b/tests/unit/test-vmstate.c @@ -31,6 +31,7 @@ #include "../migration/savevm.h" #include "qemu/module.h" #include "io/channel-file.h" +#include "exec/memory.h" static int temp_fd; @@ -1479,6 +1480,11 @@ static void test_tmp_struct(void) g_assert_cmpint(obj.f, ==, 8); /* From the child->parent */ } +/* stub for ut */ +void memory_region_commit(void) +{ +} + int main(int argc, char **argv) { g_autofree char *temp_file = g_strdup_printf("%s/vmst.test.XXXXXX", -- Gitee From d43019e644fb93c64e9016c5d618d8e20a60270d Mon Sep 17 00:00:00 2001 From: libai Date: Wed, 9 Apr 2025 14:22:19 +0800 Subject: [PATCH 6/7] memory/eventfd:Introduce ioeventfd batch processing to reduce the time required to update ioeventfd Setting ioeventfd triggers kernel RCU synchronization, which is time-consuming. Change it to temporarily store the modification of ioeventfds, and submit it for effect after setting is complete. Signed-off-by: libai --- accel/kvm/kvm-all.c | 32 ++++++++++++++++++++++++++++++++ include/exec/memory.h | 21 +++++++++++++++++++++ linux-headers/linux/kvm.h | 6 ++++++ system/memory.c | 2 ++ 4 files changed, 61 insertions(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index aa41b42efc..f96afb1230 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1737,6 +1737,36 @@ static void kvm_io_ioeventfd_add(MemoryListener *listener, } } +static int kvm_ioeventfd_batch(bool start) +{ + int ret; + struct kvm_ioeventfd iofd = { + .flags = start ? + KVM_IOEVENTFD_FLAG_BATCH_BEGIN : KVM_IOEVENTFD_FLAG_BATCH_END, + }; + + if (!kvm_enabled()) { + return -ENOSYS; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd); + if (ret < 0) { + return -errno; + } + + return 0; +} + +static void kvm_ioeventfd_begin(MemoryListener *listener) +{ + kvm_ioeventfd_batch(true); +} + +static void kvm_ioeventfd_end(MemoryListener *listener) +{ + kvm_ioeventfd_batch(false); +} + static void kvm_io_ioeventfd_del(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, @@ -2631,6 +2661,8 @@ static int kvm_init(MachineState *ms) s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; s->memory_listener.listener.coalesced_io_add = kvm_coalesce_mmio_region; s->memory_listener.listener.coalesced_io_del = kvm_uncoalesce_mmio_region; + s->memory_listener.listener.eventfd_begin = kvm_ioeventfd_begin; + s->memory_listener.listener.eventfd_end = kvm_ioeventfd_end; kvm_memory_listener_register(s, &s->memory_listener, &address_space_memory, 0, "kvm-memory"); diff --git a/include/exec/memory.h b/include/exec/memory.h index 924bdbd481..69021ba491 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1079,6 +1079,27 @@ struct MemoryListener { void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, EventNotifier *e); + /** + * @eventfd_begin: + * + * Called during an address space begin to update ioeventfd, + * notify kvm that ioeventfd will be update in batches. + * + * @listener: The #MemoryListener. + */ + void (*eventfd_begin)(MemoryListener *listener); + + /** + * @eventfd_end: + * + * Called during an address space update ioeventfd end, + * notify kvm that all ioeventfd modifications have been submitted + * and batch processing can be started. + * + * @listener: The #MemoryListener. + */ + void (*eventfd_end)(MemoryListener *listener); + /** * @coalesced_io_add: * diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index a19683f1e9..0714651440 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -819,6 +819,8 @@ enum { kvm_ioeventfd_flag_nr_deassign, kvm_ioeventfd_flag_nr_virtio_ccw_notify, kvm_ioeventfd_flag_nr_fast_mmio, + kvm_ioeventfd_flag_nr_batch_begin, + kvm_ioeventfd_flag_nr_batch_end, kvm_ioeventfd_flag_nr_max, }; @@ -827,6 +829,10 @@ enum { #define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) #define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) +#define KVM_IOEVENTFD_FLAG_BATCH_BEGIN \ + (1<< kvm_ioeventfd_flag_nr_batch_begin) +#define KVM_IOEVENTFD_FLAG_BATCH_END \ + (1 << kvm_ioeventfd_flag_nr_batch_end) #define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) diff --git a/system/memory.c b/system/memory.c index fd76eb7048..08d34262c3 100644 --- a/system/memory.c +++ b/system/memory.c @@ -1134,10 +1134,12 @@ void memory_region_commit(void) ioeventfd_update_pending = false; MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } else if (ioeventfd_update_pending) { + MEMORY_LISTENER_CALL_GLOBAL(eventfd_begin, Forward); QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { address_space_update_ioeventfds(as); } ioeventfd_update_pending = false; + MEMORY_LISTENER_CALL_GLOBAL(eventfd_end, Forward); } } -- Gitee From 3b09c85198f4970be18ba8597d545d5dc73a0ba1 Mon Sep 17 00:00:00 2001 From: libai Date: Thu, 10 Apr 2025 16:13:49 +0800 Subject: [PATCH 7/7] memory:Optimize flatview ioeventfd processing When updating memory regions, do not repeat updates for the same memory region to optimize the memory region update process Signed-off-by: libai --- include/exec/memory.h | 2 ++ system/memory.c | 26 +++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 69021ba491..fe27f323b2 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1201,6 +1201,8 @@ struct FlatView { unsigned nr_allocated; struct AddressSpaceDispatch *dispatch; MemoryRegion *root; + #define FLATVIEW_FLAG_LAST_PROCESSED (1 << 0) + unsigned flags; }; static inline FlatView *address_space_to_flatview(AddressSpace *as) diff --git a/system/memory.c b/system/memory.c index 08d34262c3..7858aa1878 100644 --- a/system/memory.c +++ b/system/memory.c @@ -856,6 +856,13 @@ static void address_space_update_ioeventfds(AddressSpace *as) return; } + view = address_space_get_flatview(as); + if (view->flags & FLATVIEW_FLAG_LAST_PROCESSED) { + flatview_unref(view); + return; + } + view->flags |= FLATVIEW_FLAG_LAST_PROCESSED; + /* * It is likely that the number of ioeventfds hasn't changed much, so use * the previous size as the starting value, with some headroom to avoid @@ -864,7 +871,6 @@ static void address_space_update_ioeventfds(AddressSpace *as) ioeventfd_max = QEMU_ALIGN_UP(as->ioeventfd_nb, 4); ioeventfds = g_new(MemoryRegionIoeventfd, ioeventfd_max); - view = address_space_get_flatview(as); FOR_EACH_FLAT_RANGE(fr, view) { for (i = 0; i < fr->mr->ioeventfd_nb; ++i) { tmp = addrrange_shift(fr->mr->ioeventfds[i].addr, @@ -1111,6 +1117,17 @@ static void address_space_update_topology(AddressSpace *as) address_space_set_flatview(as); } +static void address_space_update_view(AddressSpace *as) +{ + FlatView *view; + + view = address_space_get_flatview(as); + if (view->flags & FLATVIEW_FLAG_LAST_PROCESSED) { + view->flags &= ~FLATVIEW_FLAG_LAST_PROCESSED; + } + flatview_unref(view); +} + void memory_region_transaction_begin(void) { qemu_flush_coalesced_mmio_buffer(); @@ -1132,6 +1149,9 @@ void memory_region_commit(void) } memory_region_update_pending = false; ioeventfd_update_pending = false; + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_view(as); + } MEMORY_LISTENER_CALL_GLOBAL(commit, Forward); } else if (ioeventfd_update_pending) { MEMORY_LISTENER_CALL_GLOBAL(eventfd_begin, Forward); @@ -1139,6 +1159,9 @@ void memory_region_commit(void) address_space_update_ioeventfds(as); } ioeventfd_update_pending = false; + QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) { + address_space_update_view(as); + } MEMORY_LISTENER_CALL_GLOBAL(eventfd_end, Forward); } } @@ -3149,6 +3172,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name) as->name = g_strdup(name ? name : "anonymous"); address_space_update_topology(as); address_space_update_ioeventfds(as); + address_space_update_view(as); } static void do_address_space_destroy(AddressSpace *as) -- Gitee