From 343f6aad8d15853b9b9b566cd297d3a4264f9dbf Mon Sep 17 00:00:00 2001 From: zhangpengrui Date: Tue, 27 May 2025 18:51:19 +0800 Subject: [PATCH] QEMU update to version 8.2.0-33: - arm: cvm: fix arm-softmmu build on x86 platform - arm: VirtCCA: fix arm-softmmu build on x86 platform - hw/arm/virt: HDBSS: fix arm-softmmu build on x86 platform - hw/arm/virt: decouple migrate_hdbss_buffer_size() with kvm_update_hdbss_cap() - hw/arm/virt: only support the HDBSS feature in aarch64 - multifd: bugfix for incorrect migration data with qatzip compression - multifd: bugfix for incorrect migration data with QPL compression - multifd: bugfix for migration using compression methods - migration/multifd: Zero p->flags before starting filling a packet - migration/multifd: Ensure packet->ramblock is null-terminated - migration/multifd: Fix rb->receivedmap cleanup race - migration/multifd: Fix loop conditions in multifd_zstd_send_prepare and multifd_zstd_recv - tests/migration: Add integration test for 'qatzip' compression method - meson: Introduce 'qatzip' feature to the build system - docs/migration: add qatzip compression feature - migration/multifd: Fix p->iov leak in multifd-uadk.c - tests/migration-test: add uadk compression test - migration/multifd: Switch to no compression when no hardware support - migration/multifd: Add UADK based compression and decompression - configure: Add uadk option - docs/migration: add uadk compression feature - configure: add --enable-qpl build option - migration/multifd: implement qpl compression and decompression - migration/multifd: implement initialization of qpl compression - migration/multifd: include ram.h in multifd.h - migration/multifd: add qpl compression method - migration/multifd: put IOV initialization into compression method - docs/migration: add qpl compression feature - migration/multifd: Allow multifd without packets - migration/multifd: Rename MultiFDSend|RecvParams::data to compress_data - migration/multifd: Cleanup multifd_recv_sync_main - tests/migration: Set compression level in migration tests - migration/multifd: Cleanup outgoing_args in state destroy - migration/multifd: Make multifd_channel_connect() return void - migration/multifd: Release recv sem_sync earlier - migration/multifd: Remove p->quit from recv side - migration/multifd: Move multifd_send_setup into migration thread - migration/multifd: Move multifd_send_setup error handling in to the function - migration/multifd: Remove p->running - migration/multifd: Join the TLS thread - migration: Fix logic of channels and transport compatibility check - migration/multifd: Optimize sender side to be lockless - migration/multifd: Stick with send/recv on function names - migration/multifd: Cleanup multifd_load_cleanup() - migration/multifd: Cleanup multifd_save_cleanup() - migration/multifd: Rewrite multifd_queue_page() - migration/multifd: Split multifd_send_terminate_threads() - migration/multifd: Forbid spurious wakeups - migration/multifd: Move header prepare/fill into send_prepare() - migration/multifd: Move trace_multifd_send|recv() - migration/multifd: Move total_normal_pages accounting - migration/multifd: Rename p->num_packets and clean it up - migration/multifd: Drop pages->num check in sender thread - migration/multifd: Simplify locking in sender thread - migration/multifd: Separate SYNC request with normal jobs - migration/multifd: Drop MultiFDSendParams.normal[] array - migration/multifd: Postpone reset of MultiFDPages_t - migration/multifd: Drop MultiFDSendParams.quit, cleanup error paths - migration/multifd: multifd_send_kick_main() - migration/multifd: Drop stale comment for multifd zero copy - docs/migration: Further move virtio to be feature of migration - docs/migration: Further move vfio to be feature of migration - docs/migration: Organize "Postcopy" page - docs/migration: Split "dirty limit" - docs/migration: Split "Postcopy" - docs/migration: Split "Debugging" and "Firmware" - docs/migration: Split "Backwards compatibility" separately - docs/migration: Convert virtio.txt into rST - docs/migration: Create index page - docs/migration: Create migration/ directory - tests/qtest: Re-enable multifd cancel test - tests/qtest/migration: Use the new migration_test_add - tests/qtest/migration: Add a wrapper to print test names - tests/qtest/migration: Print migration incoming errors - migration: Report error in incoming migration - migration/multifd: Change multifd_pages_init argument - migration/multifd: Remove QEMUFile from where it is not needed - migration/multifd: Remove MultiFDPages_t::packet_num - migration/multifd: Remove unnecessary usage of local Error - migration: Fix migration_channel_read_peek() error path - migration/multifd: Remove error_setg() in migration_ioc_process_incoming() - migration/multifd: Fix leaking of Error in TLS error flow - migration/multifd: Simplify multifd_channel_connect() if else statement - migration/multifd: Fix error message in multifd_recv_initial_packet() - hw/arm/virt: support the HDBSS feature - Signed-off-by:Pengrui Zhang (cherry picked from commit 0fe82503a32ca851bf58addfd0285d6187f42e8c) --- ...ix-arm-softmmu-build-on-x86-platform.patch | 30 + ...ix-arm-softmmu-build-on-x86-platform.patch | 37 + configure-Add-uadk-option.patch | 98 ++ configure-add-enable-qpl-build-option.patch | 100 ++ ...igration-Convert-virtio.txt-into-rST.patch | 271 ++++ docs-migration-Create-index-page.patch | 94 ++ ...migration-Create-migration-directory.patch | 65 + ...urther-move-vfio-to-be-feature-of-mi.patch | 47 + ...urther-move-virtio-to-be-feature-of-.patch | 47 + docs-migration-Organize-Postcopy-page.patch | 229 ++++ ...plit-Backwards-compatibility-separat.patch | 1088 +++++++++++++++++ ...gration-Split-Debugging-and-Firmware.patch | 149 +++ docs-migration-Split-Postcopy.patch | 679 ++++++++++ docs-migration-Split-dirty-limit.patch | 192 +++ ...ation-add-qatzip-compression-feature.patch | 206 ++++ ...igration-add-qpl-compression-feature.patch | 304 +++++ ...gration-add-uadk-compression-feature.patch | 183 +++ ...S-fix-arm-softmmu-build-on-x86-platf.patch | 170 +++ ...uple-migrate_hdbss_buffer_size-with-.patch | 56 + ...-support-the-HDBSS-feature-in-aarch6.patch | 54 + hw-arm-virt-support-the-HDBSS-feature.patch | 285 +++++ ...e-qatzip-feature-to-the-build-system.patch | 99 ++ ...-Add-migration-parameters-for-QATzip.patch | 214 ++++ ...gic-of-channels-and-transport-compat.patch | 72 ++ ...gration_channel_read_peek-error-path.patch | 52 + ...-Introduce-qatzip-compression-method.patch | 500 ++++++++ ...ly-apply-migration-compression-level.patch | 53 + ...n-Report-error-in-incoming-migration.patch | 40 + ...d-Add-UADK-based-compression-and-dec.patch | 187 +++ ...tion-multifd-Add-UADK-initialization.patch | 244 ++++ ...d-Add-a-synchronization-point-for-ch.patch | 127 ++ ...d-Add-new-migration-option-zero-page.patch | 289 +++++ ...ultifd-Allow-multifd-without-packets.patch | 363 ++++++ ...d-Change-multifd_pages_init-argument.patch | 42 + ...d-Change-retval-of-multifd_queue_pag.patch | 88 ++ ...d-Change-retval-of-multifd_send_page.patch | 83 ++ ...fd-Cleanup-TLS-iochannel-referencing.patch | 117 ++ ...multifd-Cleanup-multifd_load_cleanup.patch | 94 ++ ...ltifd-Cleanup-multifd_recv_sync_main.patch | 75 ++ ...multifd-Cleanup-multifd_save_cleanup.patch | 159 +++ ...d-Cleanup-outgoing_args-in-state-des.patch | 78 ++ ...tifd-Decouple-recv-method-from-pages.patch | 157 +++ ...d-Drop-MultiFDSendParams.normal-arra.patch | 212 ++++ ...d-Drop-MultiFDSendParams.quit-cleanu.patch | 251 ++++ ...d-Drop-pages-num-check-in-sender-thr.patch | 46 + migration-multifd-Drop-registered_yank.patch | 65 + ...d-Drop-stale-comment-for-multifd-zer.patch | 43 + ...d-Drop-unnecessary-helper-to-destroy.patch | 77 ++ ...d-Ensure-packet-ramblock-is-null-ter.patch | 69 ++ ...d-Fix-MultiFDSendParams.packet_num-r.patch | 167 +++ ...d-Fix-error-message-in-multifd_recv_.patch | 41 + ...d-Fix-leaking-of-Error-in-TLS-error-.patch | 49 + ...d-Fix-loop-conditions-in-multifd_zst.patch | 57 + ...ifd-Fix-p-iov-leak-in-multifd-uadk.c.patch | 36 + ...tifd-Fix-rb-receivedmap-cleanup-race.patch | 95 ++ ...tion-multifd-Forbid-spurious-wakeups.patch | 51 + ...d-Implement-ram_save_target_page_mul.patch | 94 ++ ...d-Implement-zero-page-transmission-o.patch | 622 ++++++++++ migration-multifd-Join-the-TLS-thread.patch | 64 + ...d-Make-multifd_channel_connect-retur.patch | 54 + ...d-Move-header-prepare-fill-into-send.patch | 227 ++++ ...d-Move-multifd_send_setup-error-hand.patch | 106 ++ ...d-Move-multifd_send_setup-into-migra.patch | 90 ++ ...d-Move-total_normal_pages-accounting.patch | 57 + ...multifd-Move-trace_multifd_send-recv.patch | 71 ++ ...d-Optimize-sender-side-to-be-lockles.patch | 204 ++++ ...ifd-Postpone-reset-of-MultiFDPages_t.patch | 84 ++ ...ultifd-Release-recv-sem_sync-earlier.patch | 52 + ...ifd-Remove-MultiFDPages_t-packet_num.patch | 48 + ...d-Remove-QEMUFile-from-where-it-is-n.patch | 159 +++ ...d-Remove-error_setg-in-migration_ioc.patch | 39 + ...multifd-Remove-p-quit-from-recv-side.patch | 129 ++ migration-multifd-Remove-p-running.patch | 175 +++ ...d-Remove-unnecessary-usage-of-local-.patch | 61 + ...d-Rename-MultiFDSend-RecvParams-data.patch | 199 +++ ...d-Rename-p-num_packets-and-clean-it-.patch | 140 +++ ...n-multifd-Rewrite-multifd_queue_page.patch | 112 ++ ...d-Separate-SYNC-request-with-normal-.patch | 190 +++ ...fd-Simplify-locking-in-sender-thread.patch | 99 ++ ...d-Simplify-multifd_channel_connect-i.patch | 53 + ...d-Split-multifd_send_terminate_threa.patch | 131 ++ ...d-Stick-with-send-recv-on-function-n.patch | 156 +++ ...d-Switch-to-no-compression-when-no-h.patch | 169 +++ ...d-Unify-multifd-and-TLS-connection-p.patch | 175 +++ ...d-Zero-p-flags-before-starting-filli.patch | 50 + ...n-multifd-add-qpl-compression-method.patch | 125 ++ ...ltifd-add-uadk-compression-framework.patch | 121 ++ ...d-implement-initialization-of-qpl-co.patch | 369 ++++++ ...d-implement-qpl-compression-and-deco.patch | 510 ++++++++ ...n-multifd-include-ram.h-in-multifd.h.patch | 31 + ...ation-multifd-multifd_send_kick_main.patch | 76 ++ ...-multifd-multifd_send_prepare_header.patch | 82 ++ ...d-put-IOV-initialization-into-compre.patch | 168 +++ ...d-solve-zero-page-causing-multiple-p.patch | 132 ++ ...or-incorrect-migration-data-with-QPL.patch | 47 + ...or-incorrect-migration-data-with-qat.patch | 51 + ...or-migration-using-compression-metho.patch | 63 + qemu.spec | 210 +++- ...Add-integration-test-for-qatzip-comp.patch | 76 ++ ...Set-compression-level-in-migration-t.patch | 49 + ...ration-test-add-qpl-compression-test.patch | 80 ++ ...ation-test-add-uadk-compression-test.patch | 66 + ...-qtest-Re-enable-multifd-cancel-test.patch | 43 + ...ation-Add-a-wrapper-to-print-test-na.patch | 88 ++ ...ation-Print-migration-incoming-error.patch | 39 + ...ation-Use-the-new-migration_test_add.patch | 308 +++++ 106 files changed, 15319 insertions(+), 1 deletion(-) create mode 100644 arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch create mode 100644 arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch create mode 100644 configure-Add-uadk-option.patch create mode 100644 configure-add-enable-qpl-build-option.patch create mode 100644 docs-migration-Convert-virtio.txt-into-rST.patch create mode 100644 docs-migration-Create-index-page.patch create mode 100644 docs-migration-Create-migration-directory.patch create mode 100644 docs-migration-Further-move-vfio-to-be-feature-of-mi.patch create mode 100644 docs-migration-Further-move-virtio-to-be-feature-of-.patch create mode 100644 docs-migration-Organize-Postcopy-page.patch create mode 100644 docs-migration-Split-Backwards-compatibility-separat.patch create mode 100644 docs-migration-Split-Debugging-and-Firmware.patch create mode 100644 docs-migration-Split-Postcopy.patch create mode 100644 docs-migration-Split-dirty-limit.patch create mode 100644 docs-migration-add-qatzip-compression-feature.patch create mode 100644 docs-migration-add-qpl-compression-feature.patch create mode 100644 docs-migration-add-uadk-compression-feature.patch create mode 100644 hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch create mode 100644 hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch create mode 100644 hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch create mode 100644 hw-arm-virt-support-the-HDBSS-feature.patch create mode 100644 meson-Introduce-qatzip-feature-to-the-build-system.patch create mode 100644 migration-Add-migration-parameters-for-QATzip.patch create mode 100644 migration-Fix-logic-of-channels-and-transport-compat.patch create mode 100644 migration-Fix-migration_channel_read_peek-error-path.patch create mode 100644 migration-Introduce-qatzip-compression-method.patch create mode 100644 migration-Properly-apply-migration-compression-level.patch create mode 100644 migration-Report-error-in-incoming-migration.patch create mode 100644 migration-multifd-Add-UADK-based-compression-and-dec.patch create mode 100644 migration-multifd-Add-UADK-initialization.patch create mode 100644 migration-multifd-Add-a-synchronization-point-for-ch.patch create mode 100644 migration-multifd-Add-new-migration-option-zero-page.patch create mode 100644 migration-multifd-Allow-multifd-without-packets.patch create mode 100644 migration-multifd-Change-multifd_pages_init-argument.patch create mode 100644 migration-multifd-Change-retval-of-multifd_queue_pag.patch create mode 100644 migration-multifd-Change-retval-of-multifd_send_page.patch create mode 100644 migration-multifd-Cleanup-TLS-iochannel-referencing.patch create mode 100644 migration-multifd-Cleanup-multifd_load_cleanup.patch create mode 100644 migration-multifd-Cleanup-multifd_recv_sync_main.patch create mode 100644 migration-multifd-Cleanup-multifd_save_cleanup.patch create mode 100644 migration-multifd-Cleanup-outgoing_args-in-state-des.patch create mode 100644 migration-multifd-Decouple-recv-method-from-pages.patch create mode 100644 migration-multifd-Drop-MultiFDSendParams.normal-arra.patch create mode 100644 migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch create mode 100644 migration-multifd-Drop-pages-num-check-in-sender-thr.patch create mode 100644 migration-multifd-Drop-registered_yank.patch create mode 100644 migration-multifd-Drop-stale-comment-for-multifd-zer.patch create mode 100644 migration-multifd-Drop-unnecessary-helper-to-destroy.patch create mode 100644 migration-multifd-Ensure-packet-ramblock-is-null-ter.patch create mode 100644 migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch create mode 100644 migration-multifd-Fix-error-message-in-multifd_recv_.patch create mode 100644 migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch create mode 100644 migration-multifd-Fix-loop-conditions-in-multifd_zst.patch create mode 100644 migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch create mode 100644 migration-multifd-Fix-rb-receivedmap-cleanup-race.patch create mode 100644 migration-multifd-Forbid-spurious-wakeups.patch create mode 100644 migration-multifd-Implement-ram_save_target_page_mul.patch create mode 100644 migration-multifd-Implement-zero-page-transmission-o.patch create mode 100644 migration-multifd-Join-the-TLS-thread.patch create mode 100644 migration-multifd-Make-multifd_channel_connect-retur.patch create mode 100644 migration-multifd-Move-header-prepare-fill-into-send.patch create mode 100644 migration-multifd-Move-multifd_send_setup-error-hand.patch create mode 100644 migration-multifd-Move-multifd_send_setup-into-migra.patch create mode 100644 migration-multifd-Move-total_normal_pages-accounting.patch create mode 100644 migration-multifd-Move-trace_multifd_send-recv.patch create mode 100644 migration-multifd-Optimize-sender-side-to-be-lockles.patch create mode 100644 migration-multifd-Postpone-reset-of-MultiFDPages_t.patch create mode 100644 migration-multifd-Release-recv-sem_sync-earlier.patch create mode 100644 migration-multifd-Remove-MultiFDPages_t-packet_num.patch create mode 100644 migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch create mode 100644 migration-multifd-Remove-error_setg-in-migration_ioc.patch create mode 100644 migration-multifd-Remove-p-quit-from-recv-side.patch create mode 100644 migration-multifd-Remove-p-running.patch create mode 100644 migration-multifd-Remove-unnecessary-usage-of-local-.patch create mode 100644 migration-multifd-Rename-MultiFDSend-RecvParams-data.patch create mode 100644 migration-multifd-Rename-p-num_packets-and-clean-it-.patch create mode 100644 migration-multifd-Rewrite-multifd_queue_page.patch create mode 100644 migration-multifd-Separate-SYNC-request-with-normal-.patch create mode 100644 migration-multifd-Simplify-locking-in-sender-thread.patch create mode 100644 migration-multifd-Simplify-multifd_channel_connect-i.patch create mode 100644 migration-multifd-Split-multifd_send_terminate_threa.patch create mode 100644 migration-multifd-Stick-with-send-recv-on-function-n.patch create mode 100644 migration-multifd-Switch-to-no-compression-when-no-h.patch create mode 100644 migration-multifd-Unify-multifd-and-TLS-connection-p.patch create mode 100644 migration-multifd-Zero-p-flags-before-starting-filli.patch create mode 100644 migration-multifd-add-qpl-compression-method.patch create mode 100644 migration-multifd-add-uadk-compression-framework.patch create mode 100644 migration-multifd-implement-initialization-of-qpl-co.patch create mode 100644 migration-multifd-implement-qpl-compression-and-deco.patch create mode 100644 migration-multifd-include-ram.h-in-multifd.h.patch create mode 100644 migration-multifd-multifd_send_kick_main.patch create mode 100644 migration-multifd-multifd_send_prepare_header.patch create mode 100644 migration-multifd-put-IOV-initialization-into-compre.patch create mode 100644 migration-multifd-solve-zero-page-causing-multiple-p.patch create mode 100644 multifd-bugfix-for-incorrect-migration-data-with-QPL.patch create mode 100644 multifd-bugfix-for-incorrect-migration-data-with-qat.patch create mode 100644 multifd-bugfix-for-migration-using-compression-metho.patch create mode 100644 tests-migration-Add-integration-test-for-qatzip-comp.patch create mode 100644 tests-migration-Set-compression-level-in-migration-t.patch create mode 100644 tests-migration-test-add-qpl-compression-test.patch create mode 100644 tests-migration-test-add-uadk-compression-test.patch create mode 100644 tests-qtest-Re-enable-multifd-cancel-test.patch create mode 100644 tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch create mode 100644 tests-qtest-migration-Print-migration-incoming-error.patch create mode 100644 tests-qtest-migration-Use-the-new-migration_test_add.patch diff --git a/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch b/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch new file mode 100644 index 0000000..b28cd3e --- /dev/null +++ b/arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch @@ -0,0 +1,30 @@ +From 3f2e953c7faf3043396a649d4891d3d95441e70f Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 17:06:57 +0800 +Subject: [PATCH 3/4] arm: VirtCCA: fix arm-softmmu build on x86 platform + +Add stub function for kvm_load_user_data(). + +Fixes: 9eacd1a6df68 ("arm: VirtCCA: CVM support UEFI boot") +Signed-off-by: Jason Zeng +--- + accel/stubs/kvm-stub.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 2625175b99..e68f3433ad 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -133,3 +133,9 @@ uint32_t kvm_dirty_ring_size(void) + { + return 0; + } ++ ++int kvm_load_user_data(hwaddr loader_start, hwaddr image_end, hwaddr initrd_start, hwaddr dtb_end, hwaddr ram_size, ++ struct kvm_numa_info *numa_info) ++{ ++ return -ENOSYS; ++} +-- +2.33.0 + diff --git a/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch b/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch new file mode 100644 index 0000000..a8d4850 --- /dev/null +++ b/arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch @@ -0,0 +1,37 @@ +From e97171b8b362b0122754a936053c9793a6ad2f57 Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 17:08:49 +0800 +Subject: [PATCH 4/4] arm: cvm: fix arm-softmmu build on x86 platform + +Add stub function for tmm_set_sec_addr() and tmm_set_hpre_addr() + +Fixes: dffc0f55d93e ("cvm : Add support for TEE-based national encryption acceleration.") +Signed-off-by: Jason Zeng +--- + target/arm/kvm_arm.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h +index 62fbb713f4..76137289df 100644 +--- a/target/arm/kvm_arm.h ++++ b/target/arm/kvm_arm.h +@@ -497,6 +497,16 @@ static inline void tmm_add_ram_region(hwaddr base1, hwaddr len1, hwaddr base2, + { + g_assert_not_reached(); + } ++ ++static inline void tmm_set_sec_addr(hwaddr base, int num) ++{ ++ g_assert_not_reached(); ++} ++ ++static inline void tmm_set_hpre_addr(hwaddr base, int num) ++{ ++ g_assert_not_reached(); ++} + #endif + + /** +-- +2.33.0 + diff --git a/configure-Add-uadk-option.patch b/configure-Add-uadk-option.patch new file mode 100644 index 0000000..2470452 --- /dev/null +++ b/configure-Add-uadk-option.patch @@ -0,0 +1,98 @@ +From 49db5292ea971c00a7e29eb6d20be24012c553bf Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:05 +0100 +Subject: [81/99] configure: Add uadk option + +commit cfc589a89b31930d9d658f4b0b6c4e6f33280e10 upstream. + +Add --enable-uadk and --disable-uadk options to enable and disable +UADK compression accelerator. This is for using UADK based hardware +accelerators for live migration. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + meson.build | 14 ++++++++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 19 insertions(+) + +diff --git a/meson.build b/meson.build +index 888af7e099..e3599b9a09 100644 +--- a/meson.build ++++ b/meson.build +@@ -1049,6 +1049,18 @@ if not get_option('qpl').auto() or have_system + required: get_option('qpl'), + method: 'pkg-config') + endif ++uadk = not_found ++if not get_option('uadk').auto() or have_system ++ libwd = dependency('libwd', version: '>=2.6', ++ required: get_option('uadk'), ++ method: 'pkg-config') ++ libwd_comp = dependency('libwd_comp', version: '>=2.6', ++ required: get_option('uadk'), ++ method: 'pkg-config') ++ if libwd.found() and libwd_comp.found() ++ uadk = declare_dependency(dependencies: [libwd, libwd_comp]) ++ endif ++endif + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2288,6 +2300,7 @@ config_host_data.set('CONFIG_STATX', has_statx) + config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) + config_host_data.set('CONFIG_QPL', qpl.found()) ++config_host_data.set('CONFIG_UADK', uadk.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4463,6 +4476,7 @@ summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'Query Processing Library support': qpl} ++summary_info += {'UADK Library support': uadk} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index 82f73d51ce..709678fa18 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -261,6 +261,8 @@ option('zstd', type : 'feature', value : 'auto', + description: 'zstd compression support') + option('qpl', type : 'feature', value : 'auto', + description: 'Query Processing Library support') ++option('uadk', type : 'feature', value : 'auto', ++ description: 'UADK Library support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 784f74fde9..833b996818 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -223,6 +223,7 @@ meson_options_help() { + printf "%s\n" ' xkbcommon xkbcommon support' + printf "%s\n" ' zstd zstd compression support' + printf "%s\n" ' qpl Query Processing Library support' ++ printf "%s\n" ' uadk UADK Library support' + } + _meson_option_parse() { + case $1 in +@@ -565,6 +566,8 @@ _meson_option_parse() { + --disable-zstd) printf "%s" -Dzstd=disabled ;; + --enable-qpl) printf "%s" -Dqpl=enabled ;; + --disable-qpl) printf "%s" -Dqpl=disabled ;; ++ --enable-uadk) printf "%s" -Duadk=enabled ;; ++ --disable-uadk) printf "%s" -Duadk=disabled ;; + *) return 1 ;; + esac + } +-- +2.33.0 + diff --git a/configure-add-enable-qpl-build-option.patch b/configure-add-enable-qpl-build-option.patch new file mode 100644 index 0000000..6eb1fb5 --- /dev/null +++ b/configure-add-enable-qpl-build-option.patch @@ -0,0 +1,100 @@ +From e75b4a4c735e07431d02dd85002f8175cfbd5db3 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:06 +0800 +Subject: [74/99] configure: add --enable-qpl build option + +commit b844a2c7cc7f7c7756a27d372e64f6688d67c4eb upstream. + +add --enable-qpl and --disable-qpl options to enable and disable +the QPL compression method for multifd migration. + +The Query Processing Library (QPL) is an open-source library +that supports data compression and decompression features. It +is based on the deflate compression algorithm and use Intel +In-Memory Analytics Accelerator(IAA) hardware for compression +and decompression acceleration. + +For more live migration with IAA, please refer to the document +docs/devel/migration/qpl-compression.rst + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + meson.build | 8 ++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 13 insertions(+) + +diff --git a/meson.build b/meson.build +index aea6a33ca3..888af7e099 100644 +--- a/meson.build ++++ b/meson.build +@@ -1043,6 +1043,12 @@ if not get_option('zstd').auto() or have_block + required: get_option('zstd'), + method: 'pkg-config') + endif ++qpl = not_found ++if not get_option('qpl').auto() or have_system ++ qpl = dependency('qpl', version: '>=1.5.0', ++ required: get_option('qpl'), ++ method: 'pkg-config') ++endif + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2281,6 +2287,7 @@ config_host_data.set('CONFIG_MALLOC_TRIM', has_malloc_trim) + config_host_data.set('CONFIG_STATX', has_statx) + config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) ++config_host_data.set('CONFIG_QPL', qpl.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4455,6 +4462,7 @@ summary_info += {'snappy support': snappy} + summary_info += {'bzip2 support': libbzip2} + summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} ++summary_info += {'Query Processing Library support': qpl} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index cf9706c411..82f73d51ce 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -259,6 +259,8 @@ option('xkbcommon', type : 'feature', value : 'auto', + description: 'xkbcommon support') + option('zstd', type : 'feature', value : 'auto', + description: 'zstd compression support') ++option('qpl', type : 'feature', value : 'auto', ++ description: 'Query Processing Library support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 680fa3f581..784f74fde9 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -222,6 +222,7 @@ meson_options_help() { + printf "%s\n" ' Xen PCI passthrough support' + printf "%s\n" ' xkbcommon xkbcommon support' + printf "%s\n" ' zstd zstd compression support' ++ printf "%s\n" ' qpl Query Processing Library support' + } + _meson_option_parse() { + case $1 in +@@ -562,6 +563,8 @@ _meson_option_parse() { + --disable-xkbcommon) printf "%s" -Dxkbcommon=disabled ;; + --enable-zstd) printf "%s" -Dzstd=enabled ;; + --disable-zstd) printf "%s" -Dzstd=disabled ;; ++ --enable-qpl) printf "%s" -Dqpl=enabled ;; ++ --disable-qpl) printf "%s" -Dqpl=disabled ;; + *) return 1 ;; + esac + } +-- +2.33.0 + diff --git a/docs-migration-Convert-virtio.txt-into-rST.patch b/docs-migration-Convert-virtio.txt-into-rST.patch new file mode 100644 index 0000000..a635eec --- /dev/null +++ b/docs-migration-Convert-virtio.txt-into-rST.patch @@ -0,0 +1,271 @@ +From 689a0e1d7e3fea78bc90ded9b17ccbf66b5e91ad Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:21 +0800 +Subject: [17/99] docs/migration: Convert virtio.txt into rST +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4d7a691bcfeb5580e3f7457e1f1c2fbd64572161 upstream. + +Convert the plain old .txt into .rst, add it into migration/index.rst. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/index.rst | 1 + + docs/devel/migration/virtio.rst | 115 ++++++++++++++++++++++++++++++++ + docs/devel/migration/virtio.txt | 108 ------------------------------ + 3 files changed, 116 insertions(+), 108 deletions(-) + create mode 100644 docs/devel/migration/virtio.rst + delete mode 100644 docs/devel/migration/virtio.txt + +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 02cfdcc969..2cb701c77c 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -9,3 +9,4 @@ QEMU live migration works. + + main + vfio ++ virtio +diff --git a/docs/devel/migration/virtio.rst b/docs/devel/migration/virtio.rst +new file mode 100644 +index 0000000000..611a18b821 +--- /dev/null ++++ b/docs/devel/migration/virtio.rst +@@ -0,0 +1,115 @@ ++======================= ++Virtio device migration ++======================= ++ ++Copyright 2015 IBM Corp. ++ ++This work is licensed under the terms of the GNU GPL, version 2 or later. See ++the COPYING file in the top-level directory. ++ ++Saving and restoring the state of virtio devices is a bit of a twisty maze, ++for several reasons: ++ ++- state is distributed between several parts: ++ ++ - virtio core, for common fields like features, number of queues, ... ++ ++ - virtio transport (pci, ccw, ...), for the different proxy devices and ++ transport specific state (msix vectors, indicators, ...) ++ ++ - virtio device (net, blk, ...), for the different device types and their ++ state (mac address, request queue, ...) ++ ++- most fields are saved via the stream interface; subsequently, subsections ++ have been added to make cross-version migration possible ++ ++This file attempts to document the current procedure and point out some ++caveats. ++ ++Save state procedure ++==================== ++ ++:: ++ ++ virtio core virtio transport virtio device ++ ----------- ---------------- ------------- ++ ++ save() function registered ++ via VMState wrapper on ++ device class ++ virtio_save() <---------- ++ ------> save_config() ++ - save proxy device ++ - save transport-specific ++ device fields ++ - save common device ++ fields ++ - save common virtqueue ++ fields ++ ------> save_queue() ++ - save transport-specific ++ virtqueue fields ++ ------> save_device() ++ - save device-specific ++ fields ++ - save subsections ++ - device endianness, ++ if changed from ++ default endianness ++ - 64 bit features, if ++ any high feature bit ++ is set ++ - virtio-1 virtqueue ++ fields, if VERSION_1 ++ is set ++ ++Load state procedure ++==================== ++ ++:: ++ ++ virtio core virtio transport virtio device ++ ----------- ---------------- ------------- ++ ++ load() function registered ++ via VMState wrapper on ++ device class ++ virtio_load() <---------- ++ ------> load_config() ++ - load proxy device ++ - load transport-specific ++ device fields ++ - load common device ++ fields ++ - load common virtqueue ++ fields ++ ------> load_queue() ++ - load transport-specific ++ virtqueue fields ++ - notify guest ++ ------> load_device() ++ - load device-specific ++ fields ++ - load subsections ++ - device endianness ++ - 64 bit features ++ - virtio-1 virtqueue ++ fields ++ - sanitize endianness ++ - sanitize features ++ - virtqueue index sanity ++ check ++ - feature-dependent setup ++ ++Implications of this setup ++========================== ++ ++Devices need to be careful in their state processing during load: The ++load_device() procedure is invoked by the core before subsections have ++been loaded. Any code that depends on information transmitted in subsections ++therefore has to be invoked in the device's load() function _after_ ++virtio_load() returned (like e.g. code depending on features). ++ ++Any extension of the state being migrated should be done in subsections ++added to the core for compatibility reasons. If transport or device specific ++state is added, core needs to invoke a callback from the new subsection. +diff --git a/docs/devel/migration/virtio.txt b/docs/devel/migration/virtio.txt +deleted file mode 100644 +index 98a6b0ffb5..0000000000 +--- a/docs/devel/migration/virtio.txt ++++ /dev/null +@@ -1,108 +0,0 @@ +-Virtio devices and migration +-============================ +- +-Copyright 2015 IBM Corp. +- +-This work is licensed under the terms of the GNU GPL, version 2 or later. See +-the COPYING file in the top-level directory. +- +-Saving and restoring the state of virtio devices is a bit of a twisty maze, +-for several reasons: +-- state is distributed between several parts: +- - virtio core, for common fields like features, number of queues, ... +- - virtio transport (pci, ccw, ...), for the different proxy devices and +- transport specific state (msix vectors, indicators, ...) +- - virtio device (net, blk, ...), for the different device types and their +- state (mac address, request queue, ...) +-- most fields are saved via the stream interface; subsequently, subsections +- have been added to make cross-version migration possible +- +-This file attempts to document the current procedure and point out some +-caveats. +- +- +-Save state procedure +-==================== +- +-virtio core virtio transport virtio device +------------ ---------------- ------------- +- +- save() function registered +- via VMState wrapper on +- device class +-virtio_save() <---------- +- ------> save_config() +- - save proxy device +- - save transport-specific +- device fields +-- save common device +- fields +-- save common virtqueue +- fields +- ------> save_queue() +- - save transport-specific +- virtqueue fields +- ------> save_device() +- - save device-specific +- fields +-- save subsections +- - device endianness, +- if changed from +- default endianness +- - 64 bit features, if +- any high feature bit +- is set +- - virtio-1 virtqueue +- fields, if VERSION_1 +- is set +- +- +-Load state procedure +-==================== +- +-virtio core virtio transport virtio device +------------ ---------------- ------------- +- +- load() function registered +- via VMState wrapper on +- device class +-virtio_load() <---------- +- ------> load_config() +- - load proxy device +- - load transport-specific +- device fields +-- load common device +- fields +-- load common virtqueue +- fields +- ------> load_queue() +- - load transport-specific +- virtqueue fields +-- notify guest +- ------> load_device() +- - load device-specific +- fields +-- load subsections +- - device endianness +- - 64 bit features +- - virtio-1 virtqueue +- fields +-- sanitize endianness +-- sanitize features +-- virtqueue index sanity +- check +- - feature-dependent setup +- +- +-Implications of this setup +-========================== +- +-Devices need to be careful in their state processing during load: The +-load_device() procedure is invoked by the core before subsections have +-been loaded. Any code that depends on information transmitted in subsections +-therefore has to be invoked in the device's load() function _after_ +-virtio_load() returned (like e.g. code depending on features). +- +-Any extension of the state being migrated should be done in subsections +-added to the core for compatibility reasons. If transport or device specific +-state is added, core needs to invoke a callback from the new subsection. +-- +2.33.0 + diff --git a/docs-migration-Create-index-page.patch b/docs-migration-Create-index-page.patch new file mode 100644 index 0000000..402e4f5 --- /dev/null +++ b/docs-migration-Create-index-page.patch @@ -0,0 +1,94 @@ +From d91782d895b71e416f66bc7e42797d50699839bb Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:20 +0800 +Subject: [16/99] docs/migration: Create index page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit f6bbac985e6df492f2c6be94fb893ada75ffdefa upstream. + +Create an index page for migration module. Move VFIO migration there too. +A trivial touch-up on the title to use lower case there. + +Since then we'll have "migration" as the top title, make the main doc file +renamed to "migration framework". + +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/index-internals.rst | 3 +-- + docs/devel/migration/index.rst | 11 +++++++++++ + docs/devel/migration/main.rst | 6 +++--- + docs/devel/migration/vfio.rst | 2 +- + 4 files changed, 16 insertions(+), 6 deletions(-) + create mode 100644 docs/devel/migration/index.rst + +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index a41d62c1eb..5636e9cf1d 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -11,13 +11,12 @@ Details about QEMU's various subsystems including how to add features to them. + block-coroutine-wrapper + clocks + ebpf_rss +- migration/main ++ migration/index + multi-process + reset + s390-cpu-topology + s390-dasd-ipl + tracing +- vfio-migration + vfio-iommufd + writing-monitor-commands + virtio-backends +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +new file mode 100644 +index 0000000000..02cfdcc969 +--- /dev/null ++++ b/docs/devel/migration/index.rst +@@ -0,0 +1,11 @@ ++Migration ++========= ++ ++This is the main entry for QEMU migration documentations. It explains how ++QEMU live migration works. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ main ++ vfio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index ec55089b25..82cdb420bf 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -1,6 +1,6 @@ +-========= +-Migration +-========= ++=================== ++Migration framework ++=================== + + QEMU has code to load/save the state of the guest that it is running. + These are two complementary operations. Saving the state just does +diff --git a/docs/devel/migration/vfio.rst b/docs/devel/migration/vfio.rst +index 605fe60e96..c49482eab6 100644 +--- a/docs/devel/migration/vfio.rst ++++ b/docs/devel/migration/vfio.rst +@@ -1,5 +1,5 @@ + ===================== +-VFIO device Migration ++VFIO device migration + ===================== + + Migration of virtual machine involves saving the state for each device that +-- +2.33.0 + diff --git a/docs-migration-Create-migration-directory.patch b/docs-migration-Create-migration-directory.patch new file mode 100644 index 0000000..9734695 --- /dev/null +++ b/docs-migration-Create-migration-directory.patch @@ -0,0 +1,65 @@ +From 830cfda7df1e63448c916492ce6be497511d6fb7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:19 +0800 +Subject: [15/99] docs/migration: Create migration/ directory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 8cb2f8b172e74a7279fabb5d5c20aee32b5b98cd upstream. + +Migration documentation is growing into a single file too large. Create a +sub-directory for it for a split. + +We also already have separate vfio/virtio documentations, move it all over +into the directory. + +Note that the virtio one is still not yet converted to rST. That is a job +for later. + +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/index-internals.rst | 2 +- + docs/devel/{migration.rst => migration/main.rst} | 0 + docs/devel/{vfio-migration.rst => migration/vfio.rst} | 0 + docs/devel/{virtio-migration.txt => migration/virtio.txt} | 0 + 4 files changed, 1 insertion(+), 1 deletion(-) + rename docs/devel/{migration.rst => migration/main.rst} (100%) + rename docs/devel/{vfio-migration.rst => migration/vfio.rst} (100%) + rename docs/devel/{virtio-migration.txt => migration/virtio.txt} (100%) + +diff --git a/docs/devel/index-internals.rst b/docs/devel/index-internals.rst +index 3def4a138b..a41d62c1eb 100644 +--- a/docs/devel/index-internals.rst ++++ b/docs/devel/index-internals.rst +@@ -11,7 +11,7 @@ Details about QEMU's various subsystems including how to add features to them. + block-coroutine-wrapper + clocks + ebpf_rss +- migration ++ migration/main + multi-process + reset + s390-cpu-topology +diff --git a/docs/devel/migration.rst b/docs/devel/migration/main.rst +similarity index 100% +rename from docs/devel/migration.rst +rename to docs/devel/migration/main.rst +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/migration/vfio.rst +similarity index 100% +rename from docs/devel/vfio-migration.rst +rename to docs/devel/migration/vfio.rst +diff --git a/docs/devel/virtio-migration.txt b/docs/devel/migration/virtio.txt +similarity index 100% +rename from docs/devel/virtio-migration.txt +rename to docs/devel/migration/virtio.txt +-- +2.33.0 + diff --git a/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch b/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch new file mode 100644 index 0000000..ffc17bf --- /dev/null +++ b/docs-migration-Further-move-vfio-to-be-feature-of-mi.patch @@ -0,0 +1,47 @@ +From e9614f86ff43d0417ddaa3eab8be67c565e561b9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:27 +0800 +Subject: [23/99] docs/migration: Further move vfio to be feature of migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 66fd3b1a7ab02f7d8c84f92eba23e3ddc955204d upstream. + +Move it one layer down, so taking VFIO-migration as a feature for +migration. + +Cc: Alex Williamson +Cc: Cédric Le Goater +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-10-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/index.rst | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index e257d0d100..dea016f707 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -8,3 +8,4 @@ Migration has plenty of features to support different use cases. + + postcopy + dirty-limit ++ vfio +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 21ad58b189..b1357309e1 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -10,6 +10,5 @@ QEMU live migration works. + main + features + compatibility +- vfio + virtio + best-practices +-- +2.33.0 + diff --git a/docs-migration-Further-move-virtio-to-be-feature-of-.patch b/docs-migration-Further-move-virtio-to-be-feature-of-.patch new file mode 100644 index 0000000..231fcf1 --- /dev/null +++ b/docs-migration-Further-move-virtio-to-be-feature-of-.patch @@ -0,0 +1,47 @@ +From a8d5d9425ddec134a9e9c164a80b0bf1ba29381b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:28 +0800 +Subject: [24/99] docs/migration: Further move virtio to be feature of + migration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit eb9f6daae49c06bb91e9660908587cc55265e43a upstream. + +Move it one layer down, so taking Virtio-migration as a feature for +migration. + +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-11-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/index.rst | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index dea016f707..a9acaf618e 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -9,3 +9,4 @@ Migration has plenty of features to support different use cases. + postcopy + dirty-limit + vfio ++ virtio +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index b1357309e1..2aa294d631 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -10,5 +10,4 @@ QEMU live migration works. + main + features + compatibility +- virtio + best-practices +-- +2.33.0 + diff --git a/docs-migration-Organize-Postcopy-page.patch b/docs-migration-Organize-Postcopy-page.patch new file mode 100644 index 0000000..eac0d90 --- /dev/null +++ b/docs-migration-Organize-Postcopy-page.patch @@ -0,0 +1,229 @@ +From b15ee6a2f82aa810cfed0401d0843f33f5761d48 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:26 +0800 +Subject: [22/99] docs/migration: Organize "Postcopy" page +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 21b17cd011c959c3fd3fdad994389410a02df901 upstream. + +Reorganize the page, moving things around, and add a few +headlines ("Postcopy internals", "Postcopy features") to cover sub-areas. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-9-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/postcopy.rst | 159 ++++++++++++++++-------------- + 1 file changed, 84 insertions(+), 75 deletions(-) + +diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst +index d60eec06ab..6c51e96d79 100644 +--- a/docs/devel/migration/postcopy.rst ++++ b/docs/devel/migration/postcopy.rst +@@ -1,6 +1,9 @@ ++======== + Postcopy + ======== + ++.. contents:: ++ + 'Postcopy' migration is a way to deal with migrations that refuse to converge + (or take too long to converge) its plus side is that there is an upper bound on + the amount of migration traffic and time it takes, the down side is that during +@@ -14,7 +17,7 @@ Postcopy can be combined with precopy (i.e. normal migration) so that if precopy + doesn't finish in a given time the switch is made to postcopy. + + Enabling postcopy +------------------ ++================= + + To enable postcopy, issue this command on the monitor (both source and + destination) prior to the start of migration: +@@ -49,8 +52,71 @@ time per vCPU. + ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that + the destination is waiting for). + +-Postcopy device transfer +------------------------- ++Postcopy internals ++================== ++ ++State machine ++------------- ++ ++Postcopy moves through a series of states (see postcopy_state) from ++ADVISE->DISCARD->LISTEN->RUNNING->END ++ ++ - Advise ++ ++ Set at the start of migration if postcopy is enabled, even ++ if it hasn't had the start command; here the destination ++ checks that its OS has the support needed for postcopy, and performs ++ setup to ensure the RAM mappings are suitable for later postcopy. ++ The destination will fail early in migration at this point if the ++ required OS support is not present. ++ (Triggered by reception of POSTCOPY_ADVISE command) ++ ++ - Discard ++ ++ Entered on receipt of the first 'discard' command; prior to ++ the first Discard being performed, hugepages are switched off ++ (using madvise) to ensure that no new huge pages are created ++ during the postcopy phase, and to cause any huge pages that ++ have discards on them to be broken. ++ ++ - Listen ++ ++ The first command in the package, POSTCOPY_LISTEN, switches ++ the destination state to Listen, and starts a new thread ++ (the 'listen thread') which takes over the job of receiving ++ pages off the migration stream, while the main thread carries ++ on processing the blob. With this thread able to process page ++ reception, the destination now 'sensitises' the RAM to detect ++ any access to missing pages (on Linux using the 'userfault' ++ system). ++ ++ - Running ++ ++ POSTCOPY_RUN causes the destination to synchronise all ++ state and start the CPUs and IO devices running. The main ++ thread now finishes processing the migration package and ++ now carries on as it would for normal precopy migration ++ (although it can't do the cleanup it would do as it ++ finishes a normal migration). ++ ++ - Paused ++ ++ Postcopy can run into a paused state (normally on both sides when ++ happens), where all threads will be temporarily halted mostly due to ++ network errors. When reaching paused state, migration will make sure ++ the qemu binary on both sides maintain the data without corrupting ++ the VM. To continue the migration, the admin needs to fix the ++ migration channel using the QMP command 'migrate-recover' on the ++ destination node, then resume the migration using QMP command 'migrate' ++ again on source node, with resume=true flag set. ++ ++ - End ++ ++ The listen thread can now quit, and perform the cleanup of migration ++ state, the migration is now complete. ++ ++Device transfer ++--------------- + + Loading of device data may cause the device emulation to access guest RAM + that may trigger faults that have to be resolved by the source, as such +@@ -130,7 +196,20 @@ processing. + is no longer used by migration, while the listen thread carries on servicing + page data until the end of migration. + +-Postcopy Recovery ++Source side page bitmap ++----------------------- ++ ++The 'migration bitmap' in postcopy is basically the same as in the precopy, ++where each of the bit to indicate that page is 'dirty' - i.e. needs ++sending. During the precopy phase this is updated as the CPU dirties ++pages, however during postcopy the CPUs are stopped and nothing should ++dirty anything any more. Instead, dirty bits are cleared when the relevant ++pages are sent during postcopy. ++ ++Postcopy features ++================= ++ ++Postcopy recovery + ----------------- + + Comparing to precopy, postcopy is special on error handlings. When any +@@ -166,76 +245,6 @@ configurations of the guest. For example, when with async page fault + enabled, logically the guest can proactively schedule out the threads + accessing missing pages. + +-Postcopy states +---------------- +- +-Postcopy moves through a series of states (see postcopy_state) from +-ADVISE->DISCARD->LISTEN->RUNNING->END +- +- - Advise +- +- Set at the start of migration if postcopy is enabled, even +- if it hasn't had the start command; here the destination +- checks that its OS has the support needed for postcopy, and performs +- setup to ensure the RAM mappings are suitable for later postcopy. +- The destination will fail early in migration at this point if the +- required OS support is not present. +- (Triggered by reception of POSTCOPY_ADVISE command) +- +- - Discard +- +- Entered on receipt of the first 'discard' command; prior to +- the first Discard being performed, hugepages are switched off +- (using madvise) to ensure that no new huge pages are created +- during the postcopy phase, and to cause any huge pages that +- have discards on them to be broken. +- +- - Listen +- +- The first command in the package, POSTCOPY_LISTEN, switches +- the destination state to Listen, and starts a new thread +- (the 'listen thread') which takes over the job of receiving +- pages off the migration stream, while the main thread carries +- on processing the blob. With this thread able to process page +- reception, the destination now 'sensitises' the RAM to detect +- any access to missing pages (on Linux using the 'userfault' +- system). +- +- - Running +- +- POSTCOPY_RUN causes the destination to synchronise all +- state and start the CPUs and IO devices running. The main +- thread now finishes processing the migration package and +- now carries on as it would for normal precopy migration +- (although it can't do the cleanup it would do as it +- finishes a normal migration). +- +- - Paused +- +- Postcopy can run into a paused state (normally on both sides when +- happens), where all threads will be temporarily halted mostly due to +- network errors. When reaching paused state, migration will make sure +- the qemu binary on both sides maintain the data without corrupting +- the VM. To continue the migration, the admin needs to fix the +- migration channel using the QMP command 'migrate-recover' on the +- destination node, then resume the migration using QMP command 'migrate' +- again on source node, with resume=true flag set. +- +- - End +- +- The listen thread can now quit, and perform the cleanup of migration +- state, the migration is now complete. +- +-Source side page map +--------------------- +- +-The 'migration bitmap' in postcopy is basically the same as in the precopy, +-where each of the bit to indicate that page is 'dirty' - i.e. needs +-sending. During the precopy phase this is updated as the CPU dirties +-pages, however during postcopy the CPUs are stopped and nothing should +-dirty anything any more. Instead, dirty bits are cleared when the relevant +-pages are sent during postcopy. +- + Postcopy with hugepages + ----------------------- + +@@ -293,7 +302,7 @@ Retro-fitting postcopy to existing clients is possible: + guest memory access is made while holding a lock then all other + threads waiting for that lock will also be blocked. + +-Postcopy Preemption Mode ++Postcopy preemption mode + ------------------------ + + Postcopy preempt is a new capability introduced in 8.0 QEMU release, it +-- +2.33.0 + diff --git a/docs-migration-Split-Backwards-compatibility-separat.patch b/docs-migration-Split-Backwards-compatibility-separat.patch new file mode 100644 index 0000000..e02fdad --- /dev/null +++ b/docs-migration-Split-Backwards-compatibility-separat.patch @@ -0,0 +1,1088 @@ +From ed43780ea13b581be42a154890bdcc8e58919dd9 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:22 +0800 +Subject: [18/99] docs/migration: Split "Backwards compatibility" separately +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 6cc6a7b98b88f1a7d1d5ed99db0d373a46606aac upstream. + +Split the section from main.rst into a separate file. Reference it in the +index.rst. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-5-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/compatibility.rst | 517 ++++++++++++++++++++++++ + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 519 ------------------------- + 3 files changed, 518 insertions(+), 519 deletions(-) + create mode 100644 docs/devel/migration/compatibility.rst + +diff --git a/docs/devel/migration/compatibility.rst b/docs/devel/migration/compatibility.rst +new file mode 100644 +index 0000000000..5a5417ef06 +--- /dev/null ++++ b/docs/devel/migration/compatibility.rst +@@ -0,0 +1,517 @@ ++Backwards compatibility ++======================= ++ ++How backwards compatibility works ++--------------------------------- ++ ++When we do migration, we have two QEMU processes: the source and the ++target. There are two cases, they are the same version or they are ++different versions. The easy case is when they are the same version. ++The difficult one is when they are different versions. ++ ++There are two things that are different, but they have very similar ++names and sometimes get confused: ++ ++- QEMU version ++- machine type version ++ ++Let's start with a practical example, we start with: ++ ++- qemu-system-x86_64 (v5.2), from now on qemu-5.2. ++- qemu-system-x86_64 (v5.1), from now on qemu-5.1. ++ ++Related to this are the "latest" machine types defined on each of ++them: ++ ++- pc-q35-5.2 (newer one in qemu-5.2) from now on pc-5.2 ++- pc-q35-5.1 (newer one in qemu-5.1) from now on pc-5.1 ++ ++First of all, migration is only supposed to work if you use the same ++machine type in both source and destination. The QEMU hardware ++configuration needs to be the same also on source and destination. ++Most aspects of the backend configuration can be changed at will, ++except for a few cases where the backend features influence frontend ++device feature exposure. But that is not relevant for this section. ++ ++I am going to list the number of combinations that we can have. Let's ++start with the trivial ones, QEMU is the same on source and ++destination: ++ ++1 - qemu-5.2 -M pc-5.2 -> migrates to -> qemu-5.2 -M pc-5.2 ++ ++ This is the latest QEMU with the latest machine type. ++ This have to work, and if it doesn't work it is a bug. ++ ++2 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ Exactly the same case than the previous one, but for 5.1. ++ Nothing to see here either. ++ ++This are the easiest ones, we will not talk more about them in this ++section. ++ ++Now we start with the more interesting cases. Consider the case where ++we have the same QEMU version in both sides (qemu-5.2) but we are using ++the latest machine type for that version (pc-5.2) but one of an older ++QEMU version, in this case pc-5.1. ++ ++3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ It needs to use the definition of pc-5.1 and the devices as they ++ were configured on 5.1, but this should be easy in the sense that ++ both sides are the same QEMU and both sides have exactly the same ++ idea of what the pc-5.1 machine is. ++ ++4 - qemu-5.1 -M pc-5.2 -> migrates to -> qemu-5.1 -M pc-5.2 ++ ++ This combination is not possible as the qemu-5.1 doesn't understand ++ pc-5.2 machine type. So nothing to worry here. ++ ++Now it comes the interesting ones, when both QEMU processes are ++different. Notice also that the machine type needs to be pc-5.1, ++because we have the limitation than qemu-5.1 doesn't know pc-5.2. So ++the possible cases are: ++ ++5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ This migration is known as newer to older. We need to make sure ++ when we are developing 5.2 we need to take care about not to break ++ migration to qemu-5.1. Notice that we can't make updates to ++ qemu-5.1 to understand whatever qemu-5.2 decides to change, so it is ++ in qemu-5.2 side to make the relevant changes. ++ ++6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ This migration is known as older to newer. We need to make sure ++ than we are able to receive migrations from qemu-5.1. The problem is ++ similar to the previous one. ++ ++If qemu-5.1 and qemu-5.2 were the same, there will not be any ++compatibility problems. But the reason that we create qemu-5.2 is to ++get new features, devices, defaults, etc. ++ ++If we get a device that has a new feature, or change a default value, ++we have a problem when we try to migrate between different QEMU ++versions. ++ ++So we need a way to tell qemu-5.2 that when we are using machine type ++pc-5.1, it needs to **not** use the feature, to be able to migrate to ++real qemu-5.1. ++ ++And the equivalent part when migrating from qemu-5.1 to qemu-5.2. ++qemu-5.2 has to expect that it is not going to get data for the new ++feature, because qemu-5.1 doesn't know about it. ++ ++How do we tell QEMU about these device feature changes? In ++hw/core/machine.c:hw_compat_X_Y arrays. ++ ++If we change a default value, we need to put back the old value on ++that array. And the device, during initialization needs to look at ++that array to see what value it needs to get for that feature. And ++what are we going to put in that array, the value of a property. ++ ++To create a property for a device, we need to use one of the ++DEFINE_PROP_*() macros. See include/hw/qdev-properties.h to find the ++macros that exist. With it, we set the default value for that ++property, and that is what it is going to get in the latest released ++version. But if we want a different value for a previous version, we ++can change that in the hw_compat_X_Y arrays. ++ ++hw_compat_X_Y is an array of registers that have the format: ++ ++- name_device ++- name_property ++- value ++ ++Let's see a practical example. ++ ++In qemu-5.2 virtio-blk-device got multi queue support. This is a ++change that is not backward compatible. In qemu-5.1 it has one ++queue. In qemu-5.2 it has the same number of queues as the number of ++cpus in the system. ++ ++When we are doing migration, if we migrate from a device that has 4 ++queues to a device that have only one queue, we don't know where to ++put the extra information for the other 3 queues, and we fail ++migration. ++ ++Similar problem when we migrate from qemu-5.1 that has only one queue ++to qemu-5.2, we only sent information for one queue, but destination ++has 4, and we have 3 queues that are not properly initialized and ++anything can happen. ++ ++So, how can we address this problem. Easy, just convince qemu-5.2 ++that when it is running pc-5.1, it needs to set the number of queues ++for virtio-blk-devices to 1. ++ ++That way we fix the cases 5 and 6. ++ ++5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 ++ ++ qemu-5.2 -M pc-5.1 sets number of queues to be 1. ++ qemu-5.1 -M pc-5.1 expects number of queues to be 1. ++ ++ correct. migration works. ++ ++6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ qemu-5.1 -M pc-5.1 sets number of queues to be 1. ++ qemu-5.2 -M pc-5.1 expects number of queues to be 1. ++ ++ correct. migration works. ++ ++And now the other interesting case, case 3. In this case we have: ++ ++3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 ++ ++ Here we have the same QEMU in both sides. So it doesn't matter a ++ lot if we have set the number of queues to 1 or not, because ++ they are the same. ++ ++ WRONG! ++ ++ Think what happens if we do one of this double migrations: ++ ++ A -> migrates -> B -> migrates -> C ++ ++ where: ++ ++ A: qemu-5.1 -M pc-5.1 ++ B: qemu-5.2 -M pc-5.1 ++ C: qemu-5.2 -M pc-5.1 ++ ++ migration A -> B is case 6, so number of queues needs to be 1. ++ ++ migration B -> C is case 3, so we don't care. But actually we ++ care because we haven't started the guest in qemu-5.2, it came ++ migrated from qemu-5.1. So to be in the safe place, we need to ++ always use number of queues 1 when we are using pc-5.1. ++ ++Now, how was this done in reality? The following commit shows how it ++was done:: ++ ++ commit 9445e1e15e66c19e42bea942ba810db28052cd05 ++ Author: Stefan Hajnoczi ++ Date: Tue Aug 18 15:33:47 2020 +0100 ++ ++ virtio-blk-pci: default num_queues to -smp N ++ ++The relevant parts for migration are:: ++ ++ @@ -1281,7 +1284,8 @@ static Property virtio_blk_properties[] = { ++ #endif ++ DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, ++ true), ++ - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), ++ + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, ++ + VIRTIO_BLK_AUTO_NUM_QUEUES), ++ DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), ++ ++It changes the default value of num_queues. But it fishes it for old ++machine types to have the right value:: ++ ++ @@ -31,6 +31,7 @@ ++ GlobalProperty hw_compat_5_1[] = { ++ ... ++ + { "virtio-blk-device", "num-queues", "1"}, ++ ... ++ }; ++ ++A device with different features on both sides ++---------------------------------------------- ++ ++Let's assume that we are using the same QEMU binary on both sides, ++just to make the things easier. But we have a device that has ++different features on both sides of the migration. That can be ++because the devices are different, because the kernel driver of both ++devices have different features, whatever. ++ ++How can we get this to work with migration. The way to do that is ++"theoretically" easy. You have to get the features that the device ++has in the source of the migration. The features that the device has ++on the target of the migration, you get the intersection of the ++features of both sides, and that is the way that you should launch ++QEMU. ++ ++Notice that this is not completely related to QEMU. The most ++important thing here is that this should be handled by the managing ++application that launches QEMU. If QEMU is configured correctly, the ++migration will succeed. ++ ++That said, actually doing it is complicated. Almost all devices are ++bad at being able to be launched with only some features enabled. ++With one big exception: cpus. ++ ++You can read the documentation for QEMU x86 cpu models here: ++ ++https://qemu-project.gitlab.io/qemu/system/qemu-cpu-models.html ++ ++See when they talk about migration they recommend that one chooses the ++newest cpu model that is supported for all cpus. ++ ++Let's say that we have: ++ ++Host A: ++ ++Device X has the feature Y ++ ++Host B: ++ ++Device X has not the feature Y ++ ++If we try to migrate without any care from host A to host B, it will ++fail because when migration tries to load the feature Y on ++destination, it will find that the hardware is not there. ++ ++Doing this would be the equivalent of doing with cpus: ++ ++Host A: ++ ++$ qemu-system-x86_64 -cpu host ++ ++Host B: ++ ++$ qemu-system-x86_64 -cpu host ++ ++When both hosts have different cpu features this is guaranteed to ++fail. Especially if Host B has less features than host A. If host A ++has less features than host B, sometimes it works. Important word of ++last sentence is "sometimes". ++ ++So, forgetting about cpu models and continuing with the -cpu host ++example, let's see that the differences of the cpus is that Host A and ++B have the following features: ++ ++Features: 'pcid' 'stibp' 'taa-no' ++Host A: X X ++Host B: X ++ ++And we want to migrate between them, the way configure both QEMU cpu ++will be: ++ ++Host A: ++ ++$ qemu-system-x86_64 -cpu host,pcid=off,stibp=off ++ ++Host B: ++ ++$ qemu-system-x86_64 -cpu host,taa-no=off ++ ++And you would be able to migrate between them. It is responsibility ++of the management application or of the user to make sure that the ++configuration is correct. QEMU doesn't know how to look at this kind ++of features in general. ++ ++Notice that we don't recommend to use -cpu host for migration. It is ++used in this example because it makes the example simpler. ++ ++Other devices have worse control about individual features. If they ++want to be able to migrate between hosts that show different features, ++the device needs a way to configure which ones it is going to use. ++ ++In this section we have considered that we are using the same QEMU ++binary in both sides of the migration. If we use different QEMU ++versions process, then we need to have into account all other ++differences and the examples become even more complicated. ++ ++How to mitigate when we have a backward compatibility error ++----------------------------------------------------------- ++ ++We broke migration for old machine types continuously during ++development. But as soon as we find that there is a problem, we fix ++it. The problem is what happens when we detect after we have done a ++release that something has gone wrong. ++ ++Let see how it worked with one example. ++ ++After the release of qemu-8.0 we found a problem when doing migration ++of the machine type pc-7.2. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++ This migration works ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++ This migration works ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++ This migration fails ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++ This migration fails ++ ++So clearly something fails when migration between qemu-7.2 and ++qemu-8.0 with machine type pc-7.2. The error messages, and git bisect ++pointed to this commit. ++ ++In qemu-8.0 we got this commit:: ++ ++ commit 010746ae1db7f52700cb2e2c46eb94f299cfa0d2 ++ Author: Jonathan Cameron ++ Date: Thu Mar 2 13:37:02 2023 +0000 ++ ++ hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register ++ ++ ++The relevant bits of the commit for our example are this ones:: ++ ++ --- a/hw/pci/pcie_aer.c ++ +++ b/hw/pci/pcie_aer.c ++ @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, ++ ++ pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, ++ PCI_ERR_UNC_SUPPORTED); ++ + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_MASK_DEFAULT); ++ + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_SUPPORTED); ++ ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, ++ PCI_ERR_UNC_SEVERITY_DEFAULT); ++ ++The patch changes how we configure PCI space for AER. But QEMU fails ++when the PCI space configuration is different between source and ++destination. ++ ++The following commit shows how this got fixed:: ++ ++ commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f ++ Author: Leonardo Bras ++ Date: Tue May 2 21:27:02 2023 -0300 ++ ++ hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 ++ ++ [...] ++ ++The relevant parts of the fix in QEMU are as follow: ++ ++First, we create a new property for the device to be able to configure ++the old behaviour or the new behaviour:: ++ ++ diff --git a/hw/pci/pci.c b/hw/pci/pci.c ++ index 8a87ccc8b0..5153ad63d6 100644 ++ --- a/hw/pci/pci.c ++ +++ b/hw/pci/pci.c ++ @@ -79,6 +79,8 @@ static Property pci_props[] = { ++ DEFINE_PROP_STRING("failover_pair_id", PCIDevice, ++ failover_pair_id), ++ DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), ++ + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, ++ + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), ++ DEFINE_PROP_END_OF_LIST() ++ }; ++ ++Notice that we enable the feature for new machine types. ++ ++Now we see how the fix is done. This is going to depend on what kind ++of breakage happens, but in this case it is quite simple:: ++ ++ diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c ++ index 103667c368..374d593ead 100644 ++ --- a/hw/pci/pcie_aer.c ++ +++ b/hw/pci/pcie_aer.c ++ @@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, ++ uint16_t offset, ++ ++ pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, ++ PCI_ERR_UNC_SUPPORTED); ++ - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ - PCI_ERR_UNC_MASK_DEFAULT); ++ - pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ - PCI_ERR_UNC_SUPPORTED); ++ + ++ + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { ++ + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_MASK_DEFAULT); ++ + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, ++ + PCI_ERR_UNC_SUPPORTED); ++ + } ++ ++ pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, ++ PCI_ERR_UNC_SEVERITY_DEFAULT); ++ ++I.e. If the property bit is enabled, we configure it as we did for ++qemu-8.0. If the property bit is not set, we configure it as it was in 7.2. ++ ++And now, everything that is missing is disabling the feature for old ++machine types:: ++ ++ diff --git a/hw/core/machine.c b/hw/core/machine.c ++ index 47a34841a5..07f763eb2e 100644 ++ --- a/hw/core/machine.c ++ +++ b/hw/core/machine.c ++ @@ -48,6 +48,7 @@ GlobalProperty hw_compat_7_2[] = { ++ { "e1000e", "migrate-timadj", "off" }, ++ { "virtio-mem", "x-early-migration", "false" }, ++ { "migration", "x-preempt-pre-7-2", "true" }, ++ + { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, ++ }; ++ const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); ++ ++And now, when qemu-8.0.1 is released with this fix, all combinations ++are going to work as supposed. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) ++ ++So the normality has been restored and everything is ok, no? ++ ++Not really, now our matrix is much bigger. We started with the easy ++cases, migration from the same version to the same version always ++works: ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++ ++Now the interesting ones. When the QEMU processes versions are ++different. For the 1st set, their fail and we can do nothing, both ++versions are released and we can't change anything. ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++This two are the ones that work. The whole point of making the ++change in qemu-8.0.1 release was to fix this issue: ++ ++- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 ++ ++But now we found that qemu-8.0 neither can migrate to qemu-7.2 not ++qemu-8.0.1. ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0 -M pc-7.2 ++ ++So, if we start a pc-7.2 machine in qemu-8.0 we can't migrate it to ++anything except to qemu-8.0. ++ ++Can we do better? ++ ++Yeap. If we know that we are going to do this migration: ++ ++- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 ++ ++We can launch the appropriate devices with:: ++ ++ --device...,x-pci-e-err-unc-mask=on ++ ++And now we can receive a migration from 8.0. And from now on, we can ++do that migration to new machine types if we remember to enable that ++property for pc-7.2. Notice that we need to remember, it is not ++enough to know that the source of the migration is qemu-8.0. Think of ++this example: ++ ++$ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 -> qemu-8.2 -M pc-7.2 ++ ++In the second migration, the source is not qemu-8.0, but we still have ++that "problem" and have that property enabled. Notice that we need to ++continue having this mark/property until we have this machine ++rebooted. But it is not a normal reboot (that don't reload QEMU) we ++need the machine to poweroff/poweron on a fixed QEMU. And from now ++on we can use the proper real machine. +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 2cb701c77c..7fc02b9520 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -8,5 +8,6 @@ QEMU live migration works. + :maxdepth: 2 + + main ++ compatibility + vfio + virtio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 82cdb420bf..04194414af 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -993,522 +993,3 @@ In some cases it may be best to tie specific firmware versions to specific + versioned machine types to cut down on the combinations that will need + support. This is also useful when newer versions of firmware outgrow + the padding. +- +- +-Backwards compatibility +-======================= +- +-How backwards compatibility works +---------------------------------- +- +-When we do migration, we have two QEMU processes: the source and the +-target. There are two cases, they are the same version or they are +-different versions. The easy case is when they are the same version. +-The difficult one is when they are different versions. +- +-There are two things that are different, but they have very similar +-names and sometimes get confused: +- +-- QEMU version +-- machine type version +- +-Let's start with a practical example, we start with: +- +-- qemu-system-x86_64 (v5.2), from now on qemu-5.2. +-- qemu-system-x86_64 (v5.1), from now on qemu-5.1. +- +-Related to this are the "latest" machine types defined on each of +-them: +- +-- pc-q35-5.2 (newer one in qemu-5.2) from now on pc-5.2 +-- pc-q35-5.1 (newer one in qemu-5.1) from now on pc-5.1 +- +-First of all, migration is only supposed to work if you use the same +-machine type in both source and destination. The QEMU hardware +-configuration needs to be the same also on source and destination. +-Most aspects of the backend configuration can be changed at will, +-except for a few cases where the backend features influence frontend +-device feature exposure. But that is not relevant for this section. +- +-I am going to list the number of combinations that we can have. Let's +-start with the trivial ones, QEMU is the same on source and +-destination: +- +-1 - qemu-5.2 -M pc-5.2 -> migrates to -> qemu-5.2 -M pc-5.2 +- +- This is the latest QEMU with the latest machine type. +- This have to work, and if it doesn't work it is a bug. +- +-2 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- Exactly the same case than the previous one, but for 5.1. +- Nothing to see here either. +- +-This are the easiest ones, we will not talk more about them in this +-section. +- +-Now we start with the more interesting cases. Consider the case where +-we have the same QEMU version in both sides (qemu-5.2) but we are using +-the latest machine type for that version (pc-5.2) but one of an older +-QEMU version, in this case pc-5.1. +- +-3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- It needs to use the definition of pc-5.1 and the devices as they +- were configured on 5.1, but this should be easy in the sense that +- both sides are the same QEMU and both sides have exactly the same +- idea of what the pc-5.1 machine is. +- +-4 - qemu-5.1 -M pc-5.2 -> migrates to -> qemu-5.1 -M pc-5.2 +- +- This combination is not possible as the qemu-5.1 doesn't understand +- pc-5.2 machine type. So nothing to worry here. +- +-Now it comes the interesting ones, when both QEMU processes are +-different. Notice also that the machine type needs to be pc-5.1, +-because we have the limitation than qemu-5.1 doesn't know pc-5.2. So +-the possible cases are: +- +-5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- This migration is known as newer to older. We need to make sure +- when we are developing 5.2 we need to take care about not to break +- migration to qemu-5.1. Notice that we can't make updates to +- qemu-5.1 to understand whatever qemu-5.2 decides to change, so it is +- in qemu-5.2 side to make the relevant changes. +- +-6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- This migration is known as older to newer. We need to make sure +- than we are able to receive migrations from qemu-5.1. The problem is +- similar to the previous one. +- +-If qemu-5.1 and qemu-5.2 were the same, there will not be any +-compatibility problems. But the reason that we create qemu-5.2 is to +-get new features, devices, defaults, etc. +- +-If we get a device that has a new feature, or change a default value, +-we have a problem when we try to migrate between different QEMU +-versions. +- +-So we need a way to tell qemu-5.2 that when we are using machine type +-pc-5.1, it needs to **not** use the feature, to be able to migrate to +-real qemu-5.1. +- +-And the equivalent part when migrating from qemu-5.1 to qemu-5.2. +-qemu-5.2 has to expect that it is not going to get data for the new +-feature, because qemu-5.1 doesn't know about it. +- +-How do we tell QEMU about these device feature changes? In +-hw/core/machine.c:hw_compat_X_Y arrays. +- +-If we change a default value, we need to put back the old value on +-that array. And the device, during initialization needs to look at +-that array to see what value it needs to get for that feature. And +-what are we going to put in that array, the value of a property. +- +-To create a property for a device, we need to use one of the +-DEFINE_PROP_*() macros. See include/hw/qdev-properties.h to find the +-macros that exist. With it, we set the default value for that +-property, and that is what it is going to get in the latest released +-version. But if we want a different value for a previous version, we +-can change that in the hw_compat_X_Y arrays. +- +-hw_compat_X_Y is an array of registers that have the format: +- +-- name_device +-- name_property +-- value +- +-Let's see a practical example. +- +-In qemu-5.2 virtio-blk-device got multi queue support. This is a +-change that is not backward compatible. In qemu-5.1 it has one +-queue. In qemu-5.2 it has the same number of queues as the number of +-cpus in the system. +- +-When we are doing migration, if we migrate from a device that has 4 +-queues to a device that have only one queue, we don't know where to +-put the extra information for the other 3 queues, and we fail +-migration. +- +-Similar problem when we migrate from qemu-5.1 that has only one queue +-to qemu-5.2, we only sent information for one queue, but destination +-has 4, and we have 3 queues that are not properly initialized and +-anything can happen. +- +-So, how can we address this problem. Easy, just convince qemu-5.2 +-that when it is running pc-5.1, it needs to set the number of queues +-for virtio-blk-devices to 1. +- +-That way we fix the cases 5 and 6. +- +-5 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.1 -M pc-5.1 +- +- qemu-5.2 -M pc-5.1 sets number of queues to be 1. +- qemu-5.1 -M pc-5.1 expects number of queues to be 1. +- +- correct. migration works. +- +-6 - qemu-5.1 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- qemu-5.1 -M pc-5.1 sets number of queues to be 1. +- qemu-5.2 -M pc-5.1 expects number of queues to be 1. +- +- correct. migration works. +- +-And now the other interesting case, case 3. In this case we have: +- +-3 - qemu-5.2 -M pc-5.1 -> migrates to -> qemu-5.2 -M pc-5.1 +- +- Here we have the same QEMU in both sides. So it doesn't matter a +- lot if we have set the number of queues to 1 or not, because +- they are the same. +- +- WRONG! +- +- Think what happens if we do one of this double migrations: +- +- A -> migrates -> B -> migrates -> C +- +- where: +- +- A: qemu-5.1 -M pc-5.1 +- B: qemu-5.2 -M pc-5.1 +- C: qemu-5.2 -M pc-5.1 +- +- migration A -> B is case 6, so number of queues needs to be 1. +- +- migration B -> C is case 3, so we don't care. But actually we +- care because we haven't started the guest in qemu-5.2, it came +- migrated from qemu-5.1. So to be in the safe place, we need to +- always use number of queues 1 when we are using pc-5.1. +- +-Now, how was this done in reality? The following commit shows how it +-was done:: +- +- commit 9445e1e15e66c19e42bea942ba810db28052cd05 +- Author: Stefan Hajnoczi +- Date: Tue Aug 18 15:33:47 2020 +0100 +- +- virtio-blk-pci: default num_queues to -smp N +- +-The relevant parts for migration are:: +- +- @@ -1281,7 +1284,8 @@ static Property virtio_blk_properties[] = { +- #endif +- DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, +- true), +- - DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1), +- + DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, +- + VIRTIO_BLK_AUTO_NUM_QUEUES), +- DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 256), +- +-It changes the default value of num_queues. But it fishes it for old +-machine types to have the right value:: +- +- @@ -31,6 +31,7 @@ +- GlobalProperty hw_compat_5_1[] = { +- ... +- + { "virtio-blk-device", "num-queues", "1"}, +- ... +- }; +- +-A device with different features on both sides +----------------------------------------------- +- +-Let's assume that we are using the same QEMU binary on both sides, +-just to make the things easier. But we have a device that has +-different features on both sides of the migration. That can be +-because the devices are different, because the kernel driver of both +-devices have different features, whatever. +- +-How can we get this to work with migration. The way to do that is +-"theoretically" easy. You have to get the features that the device +-has in the source of the migration. The features that the device has +-on the target of the migration, you get the intersection of the +-features of both sides, and that is the way that you should launch +-QEMU. +- +-Notice that this is not completely related to QEMU. The most +-important thing here is that this should be handled by the managing +-application that launches QEMU. If QEMU is configured correctly, the +-migration will succeed. +- +-That said, actually doing it is complicated. Almost all devices are +-bad at being able to be launched with only some features enabled. +-With one big exception: cpus. +- +-You can read the documentation for QEMU x86 cpu models here: +- +-https://qemu-project.gitlab.io/qemu/system/qemu-cpu-models.html +- +-See when they talk about migration they recommend that one chooses the +-newest cpu model that is supported for all cpus. +- +-Let's say that we have: +- +-Host A: +- +-Device X has the feature Y +- +-Host B: +- +-Device X has not the feature Y +- +-If we try to migrate without any care from host A to host B, it will +-fail because when migration tries to load the feature Y on +-destination, it will find that the hardware is not there. +- +-Doing this would be the equivalent of doing with cpus: +- +-Host A: +- +-$ qemu-system-x86_64 -cpu host +- +-Host B: +- +-$ qemu-system-x86_64 -cpu host +- +-When both hosts have different cpu features this is guaranteed to +-fail. Especially if Host B has less features than host A. If host A +-has less features than host B, sometimes it works. Important word of +-last sentence is "sometimes". +- +-So, forgetting about cpu models and continuing with the -cpu host +-example, let's see that the differences of the cpus is that Host A and +-B have the following features: +- +-Features: 'pcid' 'stibp' 'taa-no' +-Host A: X X +-Host B: X +- +-And we want to migrate between them, the way configure both QEMU cpu +-will be: +- +-Host A: +- +-$ qemu-system-x86_64 -cpu host,pcid=off,stibp=off +- +-Host B: +- +-$ qemu-system-x86_64 -cpu host,taa-no=off +- +-And you would be able to migrate between them. It is responsibility +-of the management application or of the user to make sure that the +-configuration is correct. QEMU doesn't know how to look at this kind +-of features in general. +- +-Notice that we don't recommend to use -cpu host for migration. It is +-used in this example because it makes the example simpler. +- +-Other devices have worse control about individual features. If they +-want to be able to migrate between hosts that show different features, +-the device needs a way to configure which ones it is going to use. +- +-In this section we have considered that we are using the same QEMU +-binary in both sides of the migration. If we use different QEMU +-versions process, then we need to have into account all other +-differences and the examples become even more complicated. +- +-How to mitigate when we have a backward compatibility error +------------------------------------------------------------ +- +-We broke migration for old machine types continuously during +-development. But as soon as we find that there is a problem, we fix +-it. The problem is what happens when we detect after we have done a +-release that something has gone wrong. +- +-Let see how it worked with one example. +- +-After the release of qemu-8.0 we found a problem when doing migration +-of the machine type pc-7.2. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +- This migration works +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +- This migration works +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +- This migration fails +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +- This migration fails +- +-So clearly something fails when migration between qemu-7.2 and +-qemu-8.0 with machine type pc-7.2. The error messages, and git bisect +-pointed to this commit. +- +-In qemu-8.0 we got this commit:: +- +- commit 010746ae1db7f52700cb2e2c46eb94f299cfa0d2 +- Author: Jonathan Cameron +- Date: Thu Mar 2 13:37:02 2023 +0000 +- +- hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register +- +- +-The relevant bits of the commit for our example are this ones:: +- +- --- a/hw/pci/pcie_aer.c +- +++ b/hw/pci/pcie_aer.c +- @@ -112,6 +112,10 @@ int pcie_aer_init(PCIDevice *dev, +- +- pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, +- PCI_ERR_UNC_SUPPORTED); +- + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_MASK_DEFAULT); +- + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_SUPPORTED); +- +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, +- PCI_ERR_UNC_SEVERITY_DEFAULT); +- +-The patch changes how we configure PCI space for AER. But QEMU fails +-when the PCI space configuration is different between source and +-destination. +- +-The following commit shows how this got fixed:: +- +- commit 5ed3dabe57dd9f4c007404345e5f5bf0e347317f +- Author: Leonardo Bras +- Date: Tue May 2 21:27:02 2023 -0300 +- +- hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 +- +- [...] +- +-The relevant parts of the fix in QEMU are as follow: +- +-First, we create a new property for the device to be able to configure +-the old behaviour or the new behaviour:: +- +- diff --git a/hw/pci/pci.c b/hw/pci/pci.c +- index 8a87ccc8b0..5153ad63d6 100644 +- --- a/hw/pci/pci.c +- +++ b/hw/pci/pci.c +- @@ -79,6 +79,8 @@ static Property pci_props[] = { +- DEFINE_PROP_STRING("failover_pair_id", PCIDevice, +- failover_pair_id), +- DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), +- + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, +- + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), +- DEFINE_PROP_END_OF_LIST() +- }; +- +-Notice that we enable the feature for new machine types. +- +-Now we see how the fix is done. This is going to depend on what kind +-of breakage happens, but in this case it is quite simple:: +- +- diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c +- index 103667c368..374d593ead 100644 +- --- a/hw/pci/pcie_aer.c +- +++ b/hw/pci/pcie_aer.c +- @@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, +- uint16_t offset, +- +- pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, +- PCI_ERR_UNC_SUPPORTED); +- - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- - PCI_ERR_UNC_MASK_DEFAULT); +- - pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- - PCI_ERR_UNC_SUPPORTED); +- + +- + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { +- + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_MASK_DEFAULT); +- + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, +- + PCI_ERR_UNC_SUPPORTED); +- + } +- +- pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, +- PCI_ERR_UNC_SEVERITY_DEFAULT); +- +-I.e. If the property bit is enabled, we configure it as we did for +-qemu-8.0. If the property bit is not set, we configure it as it was in 7.2. +- +-And now, everything that is missing is disabling the feature for old +-machine types:: +- +- diff --git a/hw/core/machine.c b/hw/core/machine.c +- index 47a34841a5..07f763eb2e 100644 +- --- a/hw/core/machine.c +- +++ b/hw/core/machine.c +- @@ -48,6 +48,7 @@ GlobalProperty hw_compat_7_2[] = { +- { "e1000e", "migrate-timadj", "off" }, +- { "virtio-mem", "x-early-migration", "false" }, +- { "migration", "x-preempt-pre-7-2", "true" }, +- + { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, +- }; +- const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); +- +-And now, when qemu-8.0.1 is released with this fix, all combinations +-are going to work as supposed. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 (works) +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 (works) +- +-So the normality has been restored and everything is ok, no? +- +-Not really, now our matrix is much bigger. We started with the easy +-cases, migration from the same version to the same version always +-works: +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +- +-Now the interesting ones. When the QEMU processes versions are +-different. For the 1st set, their fail and we can do nothing, both +-versions are released and we can't change anything. +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +-- $ qemu-8.0 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +-This two are the ones that work. The whole point of making the +-change in qemu-8.0.1 release was to fix this issue: +- +-- $ qemu-7.2 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-7.2 -M pc-7.2 +- +-But now we found that qemu-8.0 neither can migrate to qemu-7.2 not +-qemu-8.0.1. +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +-- $ qemu-8.0.1 -M pc-7.2 -> qemu-8.0 -M pc-7.2 +- +-So, if we start a pc-7.2 machine in qemu-8.0 we can't migrate it to +-anything except to qemu-8.0. +- +-Can we do better? +- +-Yeap. If we know that we are going to do this migration: +- +-- $ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 +- +-We can launch the appropriate devices with:: +- +- --device...,x-pci-e-err-unc-mask=on +- +-And now we can receive a migration from 8.0. And from now on, we can +-do that migration to new machine types if we remember to enable that +-property for pc-7.2. Notice that we need to remember, it is not +-enough to know that the source of the migration is qemu-8.0. Think of +-this example: +- +-$ qemu-8.0 -M pc-7.2 -> qemu-8.0.1 -M pc-7.2 -> qemu-8.2 -M pc-7.2 +- +-In the second migration, the source is not qemu-8.0, but we still have +-that "problem" and have that property enabled. Notice that we need to +-continue having this mark/property until we have this machine +-rebooted. But it is not a normal reboot (that don't reload QEMU) we +-need the machine to poweroff/poweron on a fixed QEMU. And from now +-on we can use the proper real machine. +-- +2.33.0 + diff --git a/docs-migration-Split-Debugging-and-Firmware.patch b/docs-migration-Split-Debugging-and-Firmware.patch new file mode 100644 index 0000000..e2c6db4 --- /dev/null +++ b/docs-migration-Split-Debugging-and-Firmware.patch @@ -0,0 +1,149 @@ +From 4d6c041c7c43372921b96446d9731a4797468555 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:23 +0800 +Subject: [19/99] docs/migration: Split "Debugging" and "Firmware" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 774ad6b53b9449223115ffa8851eb93de92b0ce7 upstream. + +Move the two sections into a separate file called "best-practices.rst". +Add the entry into index. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/best-practices.rst | 48 +++++++++++++++++++++++++ + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 44 ----------------------- + 3 files changed, 49 insertions(+), 44 deletions(-) + create mode 100644 docs/devel/migration/best-practices.rst + +diff --git a/docs/devel/migration/best-practices.rst b/docs/devel/migration/best-practices.rst +new file mode 100644 +index 0000000000..d7c34a3014 +--- /dev/null ++++ b/docs/devel/migration/best-practices.rst +@@ -0,0 +1,48 @@ ++============== ++Best practices ++============== ++ ++Debugging ++========= ++ ++The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``. ++ ++Example usage: ++ ++.. code-block:: shell ++ ++ $ qemu-system-x86_64 -display none -monitor stdio ++ (qemu) migrate "exec:cat > mig" ++ (qemu) q ++ $ ./scripts/analyze-migration.py -f mig ++ { ++ "ram (3)": { ++ "section sizes": { ++ "pc.ram": "0x0000000008000000", ++ ... ++ ++See also ``analyze-migration.py -h`` help for more options. ++ ++Firmware ++======== ++ ++Migration migrates the copies of RAM and ROM, and thus when running ++on the destination it includes the firmware from the source. Even after ++resetting a VM, the old firmware is used. Only once QEMU has been restarted ++is the new firmware in use. ++ ++- Changes in firmware size can cause changes in the required RAMBlock size ++ to hold the firmware and thus migration can fail. In practice it's best ++ to pad firmware images to convenient powers of 2 with plenty of space ++ for growth. ++ ++- Care should be taken with device emulation code so that newer ++ emulation code can work with older firmware to allow forward migration. ++ ++- Care should be taken with newer firmware so that backward migration ++ to older systems with older device emulation code will work. ++ ++In some cases it may be best to tie specific firmware versions to specific ++versioned machine types to cut down on the combinations that will need ++support. This is also useful when newer versions of firmware outgrow ++the padding. +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 7fc02b9520..9a8fd1ead7 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -11,3 +11,4 @@ QEMU live migration works. + compatibility + vfio + virtio ++ best-practices +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 04194414af..7ca3b4dd3f 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -52,27 +52,6 @@ All these migration protocols use the same infrastructure to + save/restore state devices. This infrastructure is shared with the + savevm/loadvm functionality. + +-Debugging +-========= +- +-The migration stream can be analyzed thanks to ``scripts/analyze-migration.py``. +- +-Example usage: +- +-.. code-block:: shell +- +- $ qemu-system-x86_64 -display none -monitor stdio +- (qemu) migrate "exec:cat > mig" +- (qemu) q +- $ ./scripts/analyze-migration.py -f mig +- { +- "ram (3)": { +- "section sizes": { +- "pc.ram": "0x0000000008000000", +- ... +- +-See also ``analyze-migration.py -h`` help for more options. +- + Common infrastructure + ===================== + +@@ -970,26 +949,3 @@ the background migration channel. Anyone who cares about latencies of page + faults during a postcopy migration should enable this feature. By default, + it's not enabled. + +-Firmware +-======== +- +-Migration migrates the copies of RAM and ROM, and thus when running +-on the destination it includes the firmware from the source. Even after +-resetting a VM, the old firmware is used. Only once QEMU has been restarted +-is the new firmware in use. +- +-- Changes in firmware size can cause changes in the required RAMBlock size +- to hold the firmware and thus migration can fail. In practice it's best +- to pad firmware images to convenient powers of 2 with plenty of space +- for growth. +- +-- Care should be taken with device emulation code so that newer +- emulation code can work with older firmware to allow forward migration. +- +-- Care should be taken with newer firmware so that backward migration +- to older systems with older device emulation code will work. +- +-In some cases it may be best to tie specific firmware versions to specific +-versioned machine types to cut down on the combinations that will need +-support. This is also useful when newer versions of firmware outgrow +-the padding. +-- +2.33.0 + diff --git a/docs-migration-Split-Postcopy.patch b/docs-migration-Split-Postcopy.patch new file mode 100644 index 0000000..a5f99ba --- /dev/null +++ b/docs-migration-Split-Postcopy.patch @@ -0,0 +1,679 @@ +From f335519e759500adc05157fc0399335a3646461d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:24 +0800 +Subject: [20/99] docs/migration: Split "Postcopy" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit bfb4c7cd99f1c39dedf33381954d03b9f8f244ec upstream. + +Split postcopy into a separate file. Introduce a head page "features.rst" +to keep all the features on top of migration framework. + +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-7-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 9 + + docs/devel/migration/index.rst | 1 + + docs/devel/migration/main.rst | 305 ------------------------------ + docs/devel/migration/postcopy.rst | 304 +++++++++++++++++++++++++++++ + 4 files changed, 314 insertions(+), 305 deletions(-) + create mode 100644 docs/devel/migration/features.rst + create mode 100644 docs/devel/migration/postcopy.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +new file mode 100644 +index 0000000000..0054e0c900 +--- /dev/null ++++ b/docs/devel/migration/features.rst +@@ -0,0 +1,9 @@ ++Migration features ++================== ++ ++Migration has plenty of features to support different use cases. ++ ++.. toctree:: ++ :maxdepth: 2 ++ ++ postcopy +diff --git a/docs/devel/migration/index.rst b/docs/devel/migration/index.rst +index 9a8fd1ead7..21ad58b189 100644 +--- a/docs/devel/migration/index.rst ++++ b/docs/devel/migration/index.rst +@@ -8,6 +8,7 @@ QEMU live migration works. + :maxdepth: 2 + + main ++ features + compatibility + vfio + virtio +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 7ca3b4dd3f..1e98e9e40c 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -644,308 +644,3 @@ algorithm will restrict virtual CPUs as needed to keep their dirty page + rate inside the limit. This leads to more steady reading performance during + live migration and can aid in improving large guest responsiveness. + +-Postcopy +-======== +- +-'Postcopy' migration is a way to deal with migrations that refuse to converge +-(or take too long to converge) its plus side is that there is an upper bound on +-the amount of migration traffic and time it takes, the down side is that during +-the postcopy phase, a failure of *either* side causes the guest to be lost. +- +-In postcopy the destination CPUs are started before all the memory has been +-transferred, and accesses to pages that are yet to be transferred cause +-a fault that's translated by QEMU into a request to the source QEMU. +- +-Postcopy can be combined with precopy (i.e. normal migration) so that if precopy +-doesn't finish in a given time the switch is made to postcopy. +- +-Enabling postcopy +------------------ +- +-To enable postcopy, issue this command on the monitor (both source and +-destination) prior to the start of migration: +- +-``migrate_set_capability postcopy-ram on`` +- +-The normal commands are then used to start a migration, which is still +-started in precopy mode. Issuing: +- +-``migrate_start_postcopy`` +- +-will now cause the transition from precopy to postcopy. +-It can be issued immediately after migration is started or any +-time later on. Issuing it after the end of a migration is harmless. +- +-Blocktime is a postcopy live migration metric, intended to show how +-long the vCPU was in state of interruptible sleep due to pagefault. +-That metric is calculated both for all vCPUs as overlapped value, and +-separately for each vCPU. These values are calculated on destination +-side. To enable postcopy blocktime calculation, enter following +-command on destination monitor: +- +-``migrate_set_capability postcopy-blocktime on`` +- +-Postcopy blocktime can be retrieved by query-migrate qmp command. +-postcopy-blocktime value of qmp command will show overlapped blocking +-time for all vCPU, postcopy-vcpu-blocktime will show list of blocking +-time per vCPU. +- +-.. note:: +- During the postcopy phase, the bandwidth limits set using +- ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that +- the destination is waiting for). +- +-Postcopy device transfer +------------------------- +- +-Loading of device data may cause the device emulation to access guest RAM +-that may trigger faults that have to be resolved by the source, as such +-the migration stream has to be able to respond with page data *during* the +-device load, and hence the device data has to be read from the stream completely +-before the device load begins to free the stream up. This is achieved by +-'packaging' the device data into a blob that's read in one go. +- +-Source behaviour +----------------- +- +-Until postcopy is entered the migration stream is identical to normal +-precopy, except for the addition of a 'postcopy advise' command at +-the beginning, to tell the destination that postcopy might happen. +-When postcopy starts the source sends the page discard data and then +-forms the 'package' containing: +- +- - Command: 'postcopy listen' +- - The device state +- +- A series of sections, identical to the precopy streams device state stream +- containing everything except postcopiable devices (i.e. RAM) +- - Command: 'postcopy run' +- +-The 'package' is sent as the data part of a Command: ``CMD_PACKAGED``, and the +-contents are formatted in the same way as the main migration stream. +- +-During postcopy the source scans the list of dirty pages and sends them +-to the destination without being requested (in much the same way as precopy), +-however when a page request is received from the destination, the dirty page +-scanning restarts from the requested location. This causes requested pages +-to be sent quickly, and also causes pages directly after the requested page +-to be sent quickly in the hope that those pages are likely to be used +-by the destination soon. +- +-Destination behaviour +---------------------- +- +-Initially the destination looks the same as precopy, with a single thread +-reading the migration stream; the 'postcopy advise' and 'discard' commands +-are processed to change the way RAM is managed, but don't affect the stream +-processing. +- +-:: +- +- ------------------------------------------------------------------------------ +- 1 2 3 4 5 6 7 +- main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN ) +- thread | | +- | (page request) +- | \___ +- v \ +- listen thread: --- page -- page -- page -- page -- page -- +- +- a b c +- ------------------------------------------------------------------------------ +- +-- On receipt of ``CMD_PACKAGED`` (1) +- +- All the data associated with the package - the ( ... ) section in the diagram - +- is read into memory, and the main thread recurses into qemu_loadvm_state_main +- to process the contents of the package (2) which contains commands (3,6) and +- devices (4...) +- +-- On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package) +- +- a new thread (a) is started that takes over servicing the migration stream, +- while the main thread carries on loading the package. It loads normal +- background page data (b) but if during a device load a fault happens (5) +- the returned page (c) is loaded by the listen thread allowing the main +- threads device load to carry on. +- +-- The last thing in the ``CMD_PACKAGED`` is a 'RUN' command (6) +- +- letting the destination CPUs start running. At the end of the +- ``CMD_PACKAGED`` (7) the main thread returns to normal running behaviour and +- is no longer used by migration, while the listen thread carries on servicing +- page data until the end of migration. +- +-Postcopy Recovery +------------------ +- +-Comparing to precopy, postcopy is special on error handlings. When any +-error happens (in this case, mostly network errors), QEMU cannot easily +-fail a migration because VM data resides in both source and destination +-QEMU instances. On the other hand, when issue happens QEMU on both sides +-will go into a paused state. It'll need a recovery phase to continue a +-paused postcopy migration. +- +-The recovery phase normally contains a few steps: +- +- - When network issue occurs, both QEMU will go into PAUSED state +- +- - When the network is recovered (or a new network is provided), the admin +- can setup the new channel for migration using QMP command +- 'migrate-recover' on destination node, preparing for a resume. +- +- - On source host, the admin can continue the interrupted postcopy +- migration using QMP command 'migrate' with resume=true flag set. +- +- - After the connection is re-established, QEMU will continue the postcopy +- migration on both sides. +- +-During a paused postcopy migration, the VM can logically still continue +-running, and it will not be impacted from any page access to pages that +-were already migrated to destination VM before the interruption happens. +-However, if any of the missing pages got accessed on destination VM, the VM +-thread will be halted waiting for the page to be migrated, it means it can +-be halted until the recovery is complete. +- +-The impact of accessing missing pages can be relevant to different +-configurations of the guest. For example, when with async page fault +-enabled, logically the guest can proactively schedule out the threads +-accessing missing pages. +- +-Postcopy states +---------------- +- +-Postcopy moves through a series of states (see postcopy_state) from +-ADVISE->DISCARD->LISTEN->RUNNING->END +- +- - Advise +- +- Set at the start of migration if postcopy is enabled, even +- if it hasn't had the start command; here the destination +- checks that its OS has the support needed for postcopy, and performs +- setup to ensure the RAM mappings are suitable for later postcopy. +- The destination will fail early in migration at this point if the +- required OS support is not present. +- (Triggered by reception of POSTCOPY_ADVISE command) +- +- - Discard +- +- Entered on receipt of the first 'discard' command; prior to +- the first Discard being performed, hugepages are switched off +- (using madvise) to ensure that no new huge pages are created +- during the postcopy phase, and to cause any huge pages that +- have discards on them to be broken. +- +- - Listen +- +- The first command in the package, POSTCOPY_LISTEN, switches +- the destination state to Listen, and starts a new thread +- (the 'listen thread') which takes over the job of receiving +- pages off the migration stream, while the main thread carries +- on processing the blob. With this thread able to process page +- reception, the destination now 'sensitises' the RAM to detect +- any access to missing pages (on Linux using the 'userfault' +- system). +- +- - Running +- +- POSTCOPY_RUN causes the destination to synchronise all +- state and start the CPUs and IO devices running. The main +- thread now finishes processing the migration package and +- now carries on as it would for normal precopy migration +- (although it can't do the cleanup it would do as it +- finishes a normal migration). +- +- - Paused +- +- Postcopy can run into a paused state (normally on both sides when +- happens), where all threads will be temporarily halted mostly due to +- network errors. When reaching paused state, migration will make sure +- the qemu binary on both sides maintain the data without corrupting +- the VM. To continue the migration, the admin needs to fix the +- migration channel using the QMP command 'migrate-recover' on the +- destination node, then resume the migration using QMP command 'migrate' +- again on source node, with resume=true flag set. +- +- - End +- +- The listen thread can now quit, and perform the cleanup of migration +- state, the migration is now complete. +- +-Source side page map +--------------------- +- +-The 'migration bitmap' in postcopy is basically the same as in the precopy, +-where each of the bit to indicate that page is 'dirty' - i.e. needs +-sending. During the precopy phase this is updated as the CPU dirties +-pages, however during postcopy the CPUs are stopped and nothing should +-dirty anything any more. Instead, dirty bits are cleared when the relevant +-pages are sent during postcopy. +- +-Postcopy with hugepages +------------------------ +- +-Postcopy now works with hugetlbfs backed memory: +- +- a) The linux kernel on the destination must support userfault on hugepages. +- b) The huge-page configuration on the source and destination VMs must be +- identical; i.e. RAMBlocks on both sides must use the same page size. +- c) Note that ``-mem-path /dev/hugepages`` will fall back to allocating normal +- RAM if it doesn't have enough hugepages, triggering (b) to fail. +- Using ``-mem-prealloc`` enforces the allocation using hugepages. +- d) Care should be taken with the size of hugepage used; postcopy with 2MB +- hugepages works well, however 1GB hugepages are likely to be problematic +- since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, +- and until the full page is transferred the destination thread is blocked. +- +-Postcopy with shared memory +---------------------------- +- +-Postcopy migration with shared memory needs explicit support from the other +-processes that share memory and from QEMU. There are restrictions on the type of +-memory that userfault can support shared. +- +-The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs`` +-(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)`` +-for hugetlbfs which may be a problem in some configurations). +- +-The vhost-user code in QEMU supports clients that have Postcopy support, +-and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes +-to support postcopy. +- +-The client needs to open a userfaultfd and register the areas +-of memory that it maps with userfault. The client must then pass the +-userfaultfd back to QEMU together with a mapping table that allows +-fault addresses in the clients address space to be converted back to +-RAMBlock/offsets. The client's userfaultfd is added to the postcopy +-fault-thread and page requests are made on behalf of the client by QEMU. +-QEMU performs 'wake' operations on the client's userfaultfd to allow it +-to continue after a page has arrived. +- +-.. note:: +- There are two future improvements that would be nice: +- a) Some way to make QEMU ignorant of the addresses in the clients +- address space +- b) Avoiding the need for QEMU to perform ufd-wake calls after the +- pages have arrived +- +-Retro-fitting postcopy to existing clients is possible: +- a) A mechanism is needed for the registration with userfault as above, +- and the registration needs to be coordinated with the phases of +- postcopy. In vhost-user extra messages are added to the existing +- control channel. +- b) Any thread that can block due to guest memory accesses must be +- identified and the implication understood; for example if the +- guest memory access is made while holding a lock then all other +- threads waiting for that lock will also be blocked. +- +-Postcopy Preemption Mode +------------------------- +- +-Postcopy preempt is a new capability introduced in 8.0 QEMU release, it +-allows urgent pages (those got page fault requested from destination QEMU +-explicitly) to be sent in a separate preempt channel, rather than queued in +-the background migration channel. Anyone who cares about latencies of page +-faults during a postcopy migration should enable this feature. By default, +-it's not enabled. +- +diff --git a/docs/devel/migration/postcopy.rst b/docs/devel/migration/postcopy.rst +new file mode 100644 +index 0000000000..d60eec06ab +--- /dev/null ++++ b/docs/devel/migration/postcopy.rst +@@ -0,0 +1,304 @@ ++Postcopy ++======== ++ ++'Postcopy' migration is a way to deal with migrations that refuse to converge ++(or take too long to converge) its plus side is that there is an upper bound on ++the amount of migration traffic and time it takes, the down side is that during ++the postcopy phase, a failure of *either* side causes the guest to be lost. ++ ++In postcopy the destination CPUs are started before all the memory has been ++transferred, and accesses to pages that are yet to be transferred cause ++a fault that's translated by QEMU into a request to the source QEMU. ++ ++Postcopy can be combined with precopy (i.e. normal migration) so that if precopy ++doesn't finish in a given time the switch is made to postcopy. ++ ++Enabling postcopy ++----------------- ++ ++To enable postcopy, issue this command on the monitor (both source and ++destination) prior to the start of migration: ++ ++``migrate_set_capability postcopy-ram on`` ++ ++The normal commands are then used to start a migration, which is still ++started in precopy mode. Issuing: ++ ++``migrate_start_postcopy`` ++ ++will now cause the transition from precopy to postcopy. ++It can be issued immediately after migration is started or any ++time later on. Issuing it after the end of a migration is harmless. ++ ++Blocktime is a postcopy live migration metric, intended to show how ++long the vCPU was in state of interruptible sleep due to pagefault. ++That metric is calculated both for all vCPUs as overlapped value, and ++separately for each vCPU. These values are calculated on destination ++side. To enable postcopy blocktime calculation, enter following ++command on destination monitor: ++ ++``migrate_set_capability postcopy-blocktime on`` ++ ++Postcopy blocktime can be retrieved by query-migrate qmp command. ++postcopy-blocktime value of qmp command will show overlapped blocking ++time for all vCPU, postcopy-vcpu-blocktime will show list of blocking ++time per vCPU. ++ ++.. note:: ++ During the postcopy phase, the bandwidth limits set using ++ ``migrate_set_parameter`` is ignored (to avoid delaying requested pages that ++ the destination is waiting for). ++ ++Postcopy device transfer ++------------------------ ++ ++Loading of device data may cause the device emulation to access guest RAM ++that may trigger faults that have to be resolved by the source, as such ++the migration stream has to be able to respond with page data *during* the ++device load, and hence the device data has to be read from the stream completely ++before the device load begins to free the stream up. This is achieved by ++'packaging' the device data into a blob that's read in one go. ++ ++Source behaviour ++---------------- ++ ++Until postcopy is entered the migration stream is identical to normal ++precopy, except for the addition of a 'postcopy advise' command at ++the beginning, to tell the destination that postcopy might happen. ++When postcopy starts the source sends the page discard data and then ++forms the 'package' containing: ++ ++ - Command: 'postcopy listen' ++ - The device state ++ ++ A series of sections, identical to the precopy streams device state stream ++ containing everything except postcopiable devices (i.e. RAM) ++ - Command: 'postcopy run' ++ ++The 'package' is sent as the data part of a Command: ``CMD_PACKAGED``, and the ++contents are formatted in the same way as the main migration stream. ++ ++During postcopy the source scans the list of dirty pages and sends them ++to the destination without being requested (in much the same way as precopy), ++however when a page request is received from the destination, the dirty page ++scanning restarts from the requested location. This causes requested pages ++to be sent quickly, and also causes pages directly after the requested page ++to be sent quickly in the hope that those pages are likely to be used ++by the destination soon. ++ ++Destination behaviour ++--------------------- ++ ++Initially the destination looks the same as precopy, with a single thread ++reading the migration stream; the 'postcopy advise' and 'discard' commands ++are processed to change the way RAM is managed, but don't affect the stream ++processing. ++ ++:: ++ ++ ------------------------------------------------------------------------------ ++ 1 2 3 4 5 6 7 ++ main -----DISCARD-CMD_PACKAGED ( LISTEN DEVICE DEVICE DEVICE RUN ) ++ thread | | ++ | (page request) ++ | \___ ++ v \ ++ listen thread: --- page -- page -- page -- page -- page -- ++ ++ a b c ++ ------------------------------------------------------------------------------ ++ ++- On receipt of ``CMD_PACKAGED`` (1) ++ ++ All the data associated with the package - the ( ... ) section in the diagram - ++ is read into memory, and the main thread recurses into qemu_loadvm_state_main ++ to process the contents of the package (2) which contains commands (3,6) and ++ devices (4...) ++ ++- On receipt of 'postcopy listen' - 3 -(i.e. the 1st command in the package) ++ ++ a new thread (a) is started that takes over servicing the migration stream, ++ while the main thread carries on loading the package. It loads normal ++ background page data (b) but if during a device load a fault happens (5) ++ the returned page (c) is loaded by the listen thread allowing the main ++ threads device load to carry on. ++ ++- The last thing in the ``CMD_PACKAGED`` is a 'RUN' command (6) ++ ++ letting the destination CPUs start running. At the end of the ++ ``CMD_PACKAGED`` (7) the main thread returns to normal running behaviour and ++ is no longer used by migration, while the listen thread carries on servicing ++ page data until the end of migration. ++ ++Postcopy Recovery ++----------------- ++ ++Comparing to precopy, postcopy is special on error handlings. When any ++error happens (in this case, mostly network errors), QEMU cannot easily ++fail a migration because VM data resides in both source and destination ++QEMU instances. On the other hand, when issue happens QEMU on both sides ++will go into a paused state. It'll need a recovery phase to continue a ++paused postcopy migration. ++ ++The recovery phase normally contains a few steps: ++ ++ - When network issue occurs, both QEMU will go into PAUSED state ++ ++ - When the network is recovered (or a new network is provided), the admin ++ can setup the new channel for migration using QMP command ++ 'migrate-recover' on destination node, preparing for a resume. ++ ++ - On source host, the admin can continue the interrupted postcopy ++ migration using QMP command 'migrate' with resume=true flag set. ++ ++ - After the connection is re-established, QEMU will continue the postcopy ++ migration on both sides. ++ ++During a paused postcopy migration, the VM can logically still continue ++running, and it will not be impacted from any page access to pages that ++were already migrated to destination VM before the interruption happens. ++However, if any of the missing pages got accessed on destination VM, the VM ++thread will be halted waiting for the page to be migrated, it means it can ++be halted until the recovery is complete. ++ ++The impact of accessing missing pages can be relevant to different ++configurations of the guest. For example, when with async page fault ++enabled, logically the guest can proactively schedule out the threads ++accessing missing pages. ++ ++Postcopy states ++--------------- ++ ++Postcopy moves through a series of states (see postcopy_state) from ++ADVISE->DISCARD->LISTEN->RUNNING->END ++ ++ - Advise ++ ++ Set at the start of migration if postcopy is enabled, even ++ if it hasn't had the start command; here the destination ++ checks that its OS has the support needed for postcopy, and performs ++ setup to ensure the RAM mappings are suitable for later postcopy. ++ The destination will fail early in migration at this point if the ++ required OS support is not present. ++ (Triggered by reception of POSTCOPY_ADVISE command) ++ ++ - Discard ++ ++ Entered on receipt of the first 'discard' command; prior to ++ the first Discard being performed, hugepages are switched off ++ (using madvise) to ensure that no new huge pages are created ++ during the postcopy phase, and to cause any huge pages that ++ have discards on them to be broken. ++ ++ - Listen ++ ++ The first command in the package, POSTCOPY_LISTEN, switches ++ the destination state to Listen, and starts a new thread ++ (the 'listen thread') which takes over the job of receiving ++ pages off the migration stream, while the main thread carries ++ on processing the blob. With this thread able to process page ++ reception, the destination now 'sensitises' the RAM to detect ++ any access to missing pages (on Linux using the 'userfault' ++ system). ++ ++ - Running ++ ++ POSTCOPY_RUN causes the destination to synchronise all ++ state and start the CPUs and IO devices running. The main ++ thread now finishes processing the migration package and ++ now carries on as it would for normal precopy migration ++ (although it can't do the cleanup it would do as it ++ finishes a normal migration). ++ ++ - Paused ++ ++ Postcopy can run into a paused state (normally on both sides when ++ happens), where all threads will be temporarily halted mostly due to ++ network errors. When reaching paused state, migration will make sure ++ the qemu binary on both sides maintain the data without corrupting ++ the VM. To continue the migration, the admin needs to fix the ++ migration channel using the QMP command 'migrate-recover' on the ++ destination node, then resume the migration using QMP command 'migrate' ++ again on source node, with resume=true flag set. ++ ++ - End ++ ++ The listen thread can now quit, and perform the cleanup of migration ++ state, the migration is now complete. ++ ++Source side page map ++-------------------- ++ ++The 'migration bitmap' in postcopy is basically the same as in the precopy, ++where each of the bit to indicate that page is 'dirty' - i.e. needs ++sending. During the precopy phase this is updated as the CPU dirties ++pages, however during postcopy the CPUs are stopped and nothing should ++dirty anything any more. Instead, dirty bits are cleared when the relevant ++pages are sent during postcopy. ++ ++Postcopy with hugepages ++----------------------- ++ ++Postcopy now works with hugetlbfs backed memory: ++ ++ a) The linux kernel on the destination must support userfault on hugepages. ++ b) The huge-page configuration on the source and destination VMs must be ++ identical; i.e. RAMBlocks on both sides must use the same page size. ++ c) Note that ``-mem-path /dev/hugepages`` will fall back to allocating normal ++ RAM if it doesn't have enough hugepages, triggering (b) to fail. ++ Using ``-mem-prealloc`` enforces the allocation using hugepages. ++ d) Care should be taken with the size of hugepage used; postcopy with 2MB ++ hugepages works well, however 1GB hugepages are likely to be problematic ++ since it takes ~1 second to transfer a 1GB hugepage across a 10Gbps link, ++ and until the full page is transferred the destination thread is blocked. ++ ++Postcopy with shared memory ++--------------------------- ++ ++Postcopy migration with shared memory needs explicit support from the other ++processes that share memory and from QEMU. There are restrictions on the type of ++memory that userfault can support shared. ++ ++The Linux kernel userfault support works on ``/dev/shm`` memory and on ``hugetlbfs`` ++(although the kernel doesn't provide an equivalent to ``madvise(MADV_DONTNEED)`` ++for hugetlbfs which may be a problem in some configurations). ++ ++The vhost-user code in QEMU supports clients that have Postcopy support, ++and the ``vhost-user-bridge`` (in ``tests/``) and the DPDK package have changes ++to support postcopy. ++ ++The client needs to open a userfaultfd and register the areas ++of memory that it maps with userfault. The client must then pass the ++userfaultfd back to QEMU together with a mapping table that allows ++fault addresses in the clients address space to be converted back to ++RAMBlock/offsets. The client's userfaultfd is added to the postcopy ++fault-thread and page requests are made on behalf of the client by QEMU. ++QEMU performs 'wake' operations on the client's userfaultfd to allow it ++to continue after a page has arrived. ++ ++.. note:: ++ There are two future improvements that would be nice: ++ a) Some way to make QEMU ignorant of the addresses in the clients ++ address space ++ b) Avoiding the need for QEMU to perform ufd-wake calls after the ++ pages have arrived ++ ++Retro-fitting postcopy to existing clients is possible: ++ a) A mechanism is needed for the registration with userfault as above, ++ and the registration needs to be coordinated with the phases of ++ postcopy. In vhost-user extra messages are added to the existing ++ control channel. ++ b) Any thread that can block due to guest memory accesses must be ++ identified and the implication understood; for example if the ++ guest memory access is made while holding a lock then all other ++ threads waiting for that lock will also be blocked. ++ ++Postcopy Preemption Mode ++------------------------ ++ ++Postcopy preempt is a new capability introduced in 8.0 QEMU release, it ++allows urgent pages (those got page fault requested from destination QEMU ++explicitly) to be sent in a separate preempt channel, rather than queued in ++the background migration channel. Anyone who cares about latencies of page ++faults during a postcopy migration should enable this feature. By default, ++it's not enabled. +-- +2.33.0 + diff --git a/docs-migration-Split-dirty-limit.patch b/docs-migration-Split-dirty-limit.patch new file mode 100644 index 0000000..0947d1e --- /dev/null +++ b/docs-migration-Split-dirty-limit.patch @@ -0,0 +1,192 @@ +From 10545ddb8797505ac298960171afaebc327c926c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 9 Jan 2024 14:46:25 +0800 +Subject: [21/99] docs/migration: Split "dirty limit" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4c6f8a79ae539eeb1f86af6522e4000edde3638b upstream. + +Split that into a separate file, put under "features". + +Cc: Yong Huang +Reviewed-by: Cédric Le Goater +Link: https://lore.kernel.org/r/20240109064628.595453-8-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/dirty-limit.rst | 71 ++++++++++++++++++++++++++++ + docs/devel/migration/features.rst | 1 + + docs/devel/migration/main.rst | 71 ---------------------------- + 3 files changed, 72 insertions(+), 71 deletions(-) + create mode 100644 docs/devel/migration/dirty-limit.rst + +diff --git a/docs/devel/migration/dirty-limit.rst b/docs/devel/migration/dirty-limit.rst +new file mode 100644 +index 0000000000..8f32329d5f +--- /dev/null ++++ b/docs/devel/migration/dirty-limit.rst +@@ -0,0 +1,71 @@ ++Dirty limit ++=========== ++ ++The dirty limit, short for dirty page rate upper limit, is a new capability ++introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM ++dirty ring to throttle down the guest during live migration. ++ ++The algorithm framework is as follows: ++ ++:: ++ ++ ------------------------------------------------------------------------------ ++ main --------------> throttle thread ------------> PREPARE(1) <-------- ++ thread \ | | ++ \ | | ++ \ V | ++ -\ CALCULATE(2) | ++ \ | | ++ \ | | ++ \ V | ++ \ SET PENALTY(3) ----- ++ -\ | ++ \ | ++ \ V ++ -> virtual CPU thread -------> ACCEPT PENALTY(4) ++ ------------------------------------------------------------------------------ ++ ++When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, ++the QEMU main thread starts the throttle thread. The throttle thread, once ++launched, executes the loop, which consists of three steps: ++ ++ - PREPARE (1) ++ ++ The entire work of PREPARE (1) is preparation for the second stage, ++ CALCULATE(2), as the name implies. It involves preparing the dirty ++ page rate value and the corresponding upper limit of the VM: ++ The dirty page rate is calculated via the KVM dirty ring mechanism, ++ which tells QEMU how many dirty pages a virtual CPU has had since the ++ last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper ++ limit is specified by caller, therefore fetch it directly. ++ ++ - CALCULATE (2) ++ ++ Calculate a suitable sleep period for each virtual CPU, which will be ++ used to determine the penalty for the target virtual CPU. The ++ computation must be done carefully in order to reduce the dirty page ++ rate progressively down to the upper limit without oscillation. To ++ achieve this, two strategies are provided: the first is to add or ++ subtract sleep time based on the ratio of the current dirty page rate ++ to the limit, which is used when the current dirty page rate is far ++ from the limit; the second is to add or subtract a fixed time when ++ the current dirty page rate is close to the limit. ++ ++ - SET PENALTY (3) ++ ++ Set the sleep time for each virtual CPU that should be penalized based ++ on the results of the calculation supplied by step CALCULATE (2). ++ ++After completing the three above stages, the throttle thread loops back ++to step PREPARE (1) until the dirty limit is reached. ++ ++On the other hand, each virtual CPU thread reads the sleep duration and ++sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that ++is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will ++obviously exit to the path and get penalized, whereas virtual CPUs involved ++with read processes will not. ++ ++In summary, thanks to the KVM dirty ring technology, the dirty limit ++algorithm will restrict virtual CPUs as needed to keep their dirty page ++rate inside the limit. This leads to more steady reading performance during ++live migration and can aid in improving large guest responsiveness. +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 0054e0c900..e257d0d100 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -7,3 +7,4 @@ Migration has plenty of features to support different use cases. + :maxdepth: 2 + + postcopy ++ dirty-limit +diff --git a/docs/devel/migration/main.rst b/docs/devel/migration/main.rst +index 1e98e9e40c..396c7c51ca 100644 +--- a/docs/devel/migration/main.rst ++++ b/docs/devel/migration/main.rst +@@ -573,74 +573,3 @@ path. + Return path - opened by main thread, written by main thread AND postcopy + thread (protected by rp_mutex) + +-Dirty limit +-===================== +-The dirty limit, short for dirty page rate upper limit, is a new capability +-introduced in the 8.1 QEMU release that uses a new algorithm based on the KVM +-dirty ring to throttle down the guest during live migration. +- +-The algorithm framework is as follows: +- +-:: +- +- ------------------------------------------------------------------------------ +- main --------------> throttle thread ------------> PREPARE(1) <-------- +- thread \ | | +- \ | | +- \ V | +- -\ CALCULATE(2) | +- \ | | +- \ | | +- \ V | +- \ SET PENALTY(3) ----- +- -\ | +- \ | +- \ V +- -> virtual CPU thread -------> ACCEPT PENALTY(4) +- ------------------------------------------------------------------------------ +- +-When the qmp command qmp_set_vcpu_dirty_limit is called for the first time, +-the QEMU main thread starts the throttle thread. The throttle thread, once +-launched, executes the loop, which consists of three steps: +- +- - PREPARE (1) +- +- The entire work of PREPARE (1) is preparation for the second stage, +- CALCULATE(2), as the name implies. It involves preparing the dirty +- page rate value and the corresponding upper limit of the VM: +- The dirty page rate is calculated via the KVM dirty ring mechanism, +- which tells QEMU how many dirty pages a virtual CPU has had since the +- last KVM_EXIT_DIRTY_RING_FULL exception; The dirty page rate upper +- limit is specified by caller, therefore fetch it directly. +- +- - CALCULATE (2) +- +- Calculate a suitable sleep period for each virtual CPU, which will be +- used to determine the penalty for the target virtual CPU. The +- computation must be done carefully in order to reduce the dirty page +- rate progressively down to the upper limit without oscillation. To +- achieve this, two strategies are provided: the first is to add or +- subtract sleep time based on the ratio of the current dirty page rate +- to the limit, which is used when the current dirty page rate is far +- from the limit; the second is to add or subtract a fixed time when +- the current dirty page rate is close to the limit. +- +- - SET PENALTY (3) +- +- Set the sleep time for each virtual CPU that should be penalized based +- on the results of the calculation supplied by step CALCULATE (2). +- +-After completing the three above stages, the throttle thread loops back +-to step PREPARE (1) until the dirty limit is reached. +- +-On the other hand, each virtual CPU thread reads the sleep duration and +-sleeps in the path of the KVM_EXIT_DIRTY_RING_FULL exception handler, that +-is ACCEPT PENALTY (4). Virtual CPUs tied with writing processes will +-obviously exit to the path and get penalized, whereas virtual CPUs involved +-with read processes will not. +- +-In summary, thanks to the KVM dirty ring technology, the dirty limit +-algorithm will restrict virtual CPUs as needed to keep their dirty page +-rate inside the limit. This leads to more steady reading performance during +-live migration and can aid in improving large guest responsiveness. +- +-- +2.33.0 + diff --git a/docs-migration-add-qatzip-compression-feature.patch b/docs-migration-add-qatzip-compression-feature.patch new file mode 100644 index 0000000..b300a31 --- /dev/null +++ b/docs-migration-add-qatzip-compression-feature.patch @@ -0,0 +1,206 @@ +From 5fa111eb3e3d73a0500d33d0b81638c579476845 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Fri, 30 Aug 2024 16:27:18 -0700 +Subject: [88/99] docs/migration: add qatzip compression feature + +commit 85da4cbe6e5eb6ba6f31c8b30ee4582625546da7 upstream. + +add Intel QATzip compression method introduction + +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +Signed-off-by: Yuan Liu +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-2-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/qatzip-compression.rst | 165 ++++++++++++++++++++ + 2 files changed, 166 insertions(+) + create mode 100644 docs/devel/migration/qatzip-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 0c9cb3dd6c..7c5ce9e79d 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -12,3 +12,4 @@ Migration has plenty of features to support different use cases. + virtio + qpl-compression + uadk-compression ++ qatzip-compression +diff --git a/docs/devel/migration/qatzip-compression.rst b/docs/devel/migration/qatzip-compression.rst +new file mode 100644 +index 0000000000..862b383164 +--- /dev/null ++++ b/docs/devel/migration/qatzip-compression.rst +@@ -0,0 +1,165 @@ ++================== ++QATzip Compression ++================== ++In scenarios with limited network bandwidth, the ``QATzip`` solution can help ++users save a lot of host CPU resources by accelerating compression and ++decompression through the Intel QuickAssist Technology(``QAT``) hardware. ++ ++ ++The following test was conducted using 8 multifd channels and 10Gbps network ++bandwidth. The results show that, compared to zstd, ``QATzip`` significantly ++saves CPU resources on the sender and reduces migration time. Compared to the ++uncompressed solution, ``QATzip`` greatly improves the dirty page processing ++capability, indicated by the Pages per Second metric, and also reduces the ++total migration time. ++ ++:: ++ ++ VM Configuration: 16 vCPU and 64G memory ++ VM Workload: all vCPUs are idle and 54G memory is filled with Silesia data. ++ QAT Devices: 4 ++ |-----------|--------|---------|----------|----------|------|------| ++ |8 Channels |Total |down |throughput|pages per | send | recv | ++ | |time(ms)|time(ms) |(mbps) |second | cpu %| cpu% | ++ |-----------|--------|---------|----------|----------|------|------| ++ |qatzip | 16630| 28| 10467| 2940235| 160| 360| ++ |-----------|--------|---------|----------|----------|------|------| ++ |zstd | 20165| 24| 8579| 2391465| 810| 340| ++ |-----------|--------|---------|----------|----------|------|------| ++ |none | 46063| 40| 10848| 330240| 45| 85| ++ |-----------|--------|---------|----------|----------|------|------| ++ ++ ++QATzip Compression Framework ++============================ ++ ++``QATzip`` is a user space library which builds on top of the Intel QuickAssist ++Technology to provide extended accelerated compression and decompression ++services. ++ ++For more ``QATzip`` introduction, please refer to `QATzip Introduction ++`_ ++ ++:: ++ ++ +----------------+ ++ | MultiFd Thread | ++ +-------+--------+ ++ | ++ | compress/decompress ++ +-------+--------+ ++ | QATzip library | ++ +-------+--------+ ++ | ++ +-------+--------+ ++ | QAT library | ++ +-------+--------+ ++ | user space ++ --------+--------------------- ++ | kernel space ++ +------+-------+ ++ | QAT Driver | ++ +------+-------+ ++ | ++ +------+-------+ ++ | QAT Devices | ++ +--------------+ ++ ++ ++QATzip Installation ++------------------- ++ ++The ``QATzip`` installation package has been integrated into some Linux ++distributions and can be installed directly. For example, the Ubuntu Server ++24.04 LTS system can be installed using below command ++ ++.. code-block:: shell ++ ++ #apt search qatzip ++ libqatzip-dev/noble 1.2.0-0ubuntu3 amd64 ++ Intel QuickAssist user space library development files ++ ++ libqatzip3/noble 1.2.0-0ubuntu3 amd64 ++ Intel QuickAssist user space library ++ ++ qatzip/noble,now 1.2.0-0ubuntu3 amd64 [installed] ++ Compression user-space tool for Intel QuickAssist Technology ++ ++ #sudo apt install libqatzip-dev libqatzip3 qatzip ++ ++If your system does not support the ``QATzip`` installation package, you can ++use the source code to build and install, please refer to `QATzip source code installation ++`_ ++ ++QAT Hardware Deployment ++----------------------- ++ ++``QAT`` supports physical functions(PFs) and virtual functions(VFs) for ++deployment, and users can configure ``QAT`` resources for migration according ++to actual needs. For more details about ``QAT`` deployment, please refer to ++`Intel QuickAssist Technology Documentation ++`_ ++ ++For more ``QAT`` hardware introduction, please refer to `intel-quick-assist-technology-overview ++`_ ++ ++How To Use QATzip Compression ++============================= ++ ++1 - Install ``QATzip`` library ++ ++2 - Build ``QEMU`` with ``--enable-qatzip`` parameter ++ ++ E.g. configure --target-list=x86_64-softmmu --enable-kvm ``--enable-qatzip`` ++ ++3 - Set ``migrate_set_parameter multifd-compression qatzip`` ++ ++4 - Set ``migrate_set_parameter multifd-qatzip-level comp_level``, the default ++comp_level value is 1, and it supports levels from 1 to 9 ++ ++QAT Memory Requirements ++======================= ++ ++The user needs to reserve system memory for the QAT memory management to ++allocate DMA memory. The size of the reserved system memory depends on the ++number of devices used for migration and the number of multifd channels. ++ ++Because memory usage depends on QAT configuration, please refer to `QAT Memory ++Driver Queries ++`_ ++for memory usage calculation. ++ ++.. list-table:: An example of a PF used for migration ++ :header-rows: 1 ++ ++ * - Number of channels ++ - Sender memory usage ++ - Receiver memory usage ++ * - 2 ++ - 10M ++ - 10M ++ * - 4 ++ - 12M ++ - 14M ++ * - 8 ++ - 16M ++ - 20M ++ ++How To Choose Between QATzip and QPL ++==================================== ++Starting from 4th Gen Intel Xeon Scalable processors, codenamed Sapphire Rapids ++processor(``SPR``), multiple built-in accelerators are supported including ++``QAT`` and ``IAA``. The former can accelerate ``QATzip`` and the latter is ++used to accelerate ``QPL``. ++ ++Here are some suggestions: ++ ++1 - If the live migration scenario is limited by network bandwidth and ``QAT`` ++hardware resources exceed ``IAA``, use the ``QATzip`` method, which can save a ++lot of host CPU resources for compression. ++ ++2 - If the system cannot support shared virtual memory (SVM) technology, use ++the ``QATzip`` method because ``QPL`` performance is not good without SVM ++support. ++ ++3 - For other scenarios, use the ``QPL`` method first. +-- +2.33.0 + diff --git a/docs-migration-add-qpl-compression-feature.patch b/docs-migration-add-qpl-compression-feature.patch new file mode 100644 index 0000000..15587ec --- /dev/null +++ b/docs-migration-add-qpl-compression-feature.patch @@ -0,0 +1,304 @@ +From 4c4e9830f3bee7313f3ac49fe4887f040fd85f7a Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:04 +0800 +Subject: [72/99] docs/migration: add qpl compression feature + +commit 0d40b3d76ced77c1c82c77a636af703fabdb407c upstream. + +add Intel Query Processing Library (QPL) compression method +introduction + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Acked-by: Peter Xu +Signed-off-by: Fabiano Rosas + + Conflicts: + docs/devel/migration/features.rst +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/qpl-compression.rst | 260 +++++++++++++++++++++++ + 2 files changed, 261 insertions(+) + create mode 100644 docs/devel/migration/qpl-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index a9acaf618e..9819393c12 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -10,3 +10,4 @@ Migration has plenty of features to support different use cases. + dirty-limit + vfio + virtio ++ qpl-compression +diff --git a/docs/devel/migration/qpl-compression.rst b/docs/devel/migration/qpl-compression.rst +new file mode 100644 +index 0000000000..990992d786 +--- /dev/null ++++ b/docs/devel/migration/qpl-compression.rst +@@ -0,0 +1,260 @@ ++=============== ++QPL Compression ++=============== ++The Intel Query Processing Library (Intel ``QPL``) is an open-source library to ++provide compression and decompression features and it is based on deflate ++compression algorithm (RFC 1951). ++ ++The ``QPL`` compression relies on Intel In-Memory Analytics Accelerator(``IAA``) ++and Shared Virtual Memory(``SVM``) technology, they are new features supported ++from Intel 4th Gen Intel Xeon Scalable processors, codenamed Sapphire Rapids ++processor(``SPR``). ++ ++For more ``QPL`` introduction, please refer to `QPL Introduction ++`_ ++ ++QPL Compression Framework ++========================= ++ ++:: ++ ++ +----------------+ +------------------+ ++ | MultiFD Thread | |accel-config tool | ++ +-------+--------+ +--------+---------+ ++ | | ++ | | ++ |compress/decompress | ++ +-------+--------+ | Setup IAA ++ | QPL library | | Resources ++ +-------+---+----+ | ++ | | | ++ | +-------------+-------+ ++ | Open IAA | ++ | Devices +-----+-----+ ++ | |idxd driver| ++ | +-----+-----+ ++ | | ++ | | ++ | +-----+-----+ ++ +-----------+IAA Devices| ++ Submit jobs +-----------+ ++ via enqcmd ++ ++ ++QPL Build And Installation ++-------------------------- ++ ++.. code-block:: shell ++ ++ $git clone --recursive https://github.com/intel/qpl.git qpl ++ $mkdir qpl/build ++ $cd qpl/build ++ $cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DQPL_LIBRARY_TYPE=SHARED .. ++ $sudo cmake --build . --target install ++ ++For more details about ``QPL`` installation, please refer to `QPL Installation ++`_ ++ ++IAA Device Management ++--------------------- ++ ++The number of ``IAA`` devices will vary depending on the Xeon product model. ++On a ``SPR`` server, there can be a maximum of 8 ``IAA`` devices, with up to ++4 devices per socket. ++ ++By default, all ``IAA`` devices are disabled and need to be configured and ++enabled by users manually. ++ ++Check the number of devices through the following command ++ ++.. code-block:: shell ++ ++ #lspci -d 8086:0cfe ++ 6a:02.0 System peripheral: Intel Corporation Device 0cfe ++ 6f:02.0 System peripheral: Intel Corporation Device 0cfe ++ 74:02.0 System peripheral: Intel Corporation Device 0cfe ++ 79:02.0 System peripheral: Intel Corporation Device 0cfe ++ e7:02.0 System peripheral: Intel Corporation Device 0cfe ++ ec:02.0 System peripheral: Intel Corporation Device 0cfe ++ f1:02.0 System peripheral: Intel Corporation Device 0cfe ++ f6:02.0 System peripheral: Intel Corporation Device 0cfe ++ ++IAA Device Configuration And Enabling ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++The ``accel-config`` tool is used to enable ``IAA`` devices and configure ++``IAA`` hardware resources(work queues and engines). One ``IAA`` device ++has 8 work queues and 8 processing engines, multiple engines can be assigned ++to a work queue via ``group`` attribute. ++ ++For ``accel-config`` installation, please refer to `accel-config installation ++`_ ++ ++One example of configuring and enabling an ``IAA`` device. ++ ++.. code-block:: shell ++ ++ #accel-config config-engine iax1/engine1.0 -g 0 ++ #accel-config config-engine iax1/engine1.1 -g 0 ++ #accel-config config-engine iax1/engine1.2 -g 0 ++ #accel-config config-engine iax1/engine1.3 -g 0 ++ #accel-config config-engine iax1/engine1.4 -g 0 ++ #accel-config config-engine iax1/engine1.5 -g 0 ++ #accel-config config-engine iax1/engine1.6 -g 0 ++ #accel-config config-engine iax1/engine1.7 -g 0 ++ #accel-config config-wq iax1/wq1.0 -g 0 -s 128 -p 10 -b 1 -t 128 -m shared -y user -n app1 -d user ++ #accel-config enable-device iax1 ++ #accel-config enable-wq iax1/wq1.0 ++ ++.. note:: ++ IAX is an early name for IAA ++ ++- The ``IAA`` device index is 1, use ``ls -lh /sys/bus/dsa/devices/iax*`` ++ command to query the ``IAA`` device index. ++ ++- 8 engines and 1 work queue are configured in group 0, so all compression jobs ++ submitted to this work queue can be processed by all engines at the same time. ++ ++- Set work queue attributes including the work mode, work queue size and so on. ++ ++- Enable the ``IAA1`` device and work queue 1.0 ++ ++.. note:: ++ ++ Set work queue mode to shared mode, since ``QPL`` library only supports ++ shared mode ++ ++For more detailed configuration, please refer to `IAA Configuration Samples ++`_ ++ ++IAA Unit Test ++^^^^^^^^^^^^^ ++ ++- Enabling ``IAA`` devices for Xeon platform, please refer to `IAA User Guide ++ `_ ++ ++- ``IAA`` device driver is Intel Data Accelerator Driver (idxd), it is ++ recommended that the minimum version of Linux kernel is 5.18. ++ ++- Add ``"intel_iommu=on,sm_on"`` parameter to kernel command line ++ for ``SVM`` feature enabling. ++ ++Here is an easy way to verify ``IAA`` device driver and ``SVM`` with `iaa_test ++`_ ++ ++.. code-block:: shell ++ ++ #./test/iaa_test ++ [ info] alloc wq 0 shared size 128 addr 0x7f26cebe5000 batch sz 0xfffffffe xfer sz 0x80000000 ++ [ info] test noop: tflags 0x1 num_desc 1 ++ [ info] preparing descriptor for noop ++ [ info] Submitted all noop jobs ++ [ info] verifying task result for 0x16f7e20 ++ [ info] test with op 0 passed ++ ++ ++IAA Resources Allocation For Migration ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++There is no ``IAA`` resource configuration parameters for migration and ++``accel-config`` tool configuration cannot directly specify the ``IAA`` ++resources used for migration. ++ ++The multifd migration with ``QPL`` compression method will use all work ++queues that are enabled and shared mode. ++ ++.. note:: ++ ++ Accessing IAA resources requires ``sudo`` command or ``root`` privileges ++ by default. Administrators can modify the IAA device node ownership ++ so that QEMU can use IAA with specified user permissions. ++ ++ For example ++ ++ #chown -R qemu /dev/iax ++ ++Shared Virtual Memory(SVM) Introduction ++======================================= ++ ++An ability for an accelerator I/O device to operate in the same virtual ++memory space of applications on host processors. It also implies the ++ability to operate from pageable memory, avoiding functional requirements ++to pin memory for DMA operations. ++ ++When using ``SVM`` technology, users do not need to reserve memory for the ++``IAA`` device and perform pin memory operation. The ``IAA`` device can ++directly access data using the virtual address of the process. ++ ++For more ``SVM`` technology, please refer to ++`Shared Virtual Addressing (SVA) with ENQCMD ++`_ ++ ++ ++How To Use QPL Compression In Migration ++======================================= ++ ++1 - Installation of ``QPL`` library and ``accel-config`` library if using IAA ++ ++2 - Configure and enable ``IAA`` devices and work queues via ``accel-config`` ++ ++3 - Build ``QEMU`` with ``--enable-qpl`` parameter ++ ++ E.g. configure --target-list=x86_64-softmmu --enable-kvm ``--enable-qpl`` ++ ++4 - Enable ``QPL`` compression during migration ++ ++ Set ``migrate_set_parameter multifd-compression qpl`` when migrating, the ++ ``QPL`` compression does not support configuring the compression level, it ++ only supports one compression level. ++ ++The Difference Between QPL And ZLIB ++=================================== ++ ++Although both ``QPL`` and ``ZLIB`` are based on the deflate compression ++algorithm, and ``QPL`` can support the header and tail of ``ZLIB``, ``QPL`` ++is still not fully compatible with the ``ZLIB`` compression in the migration. ++ ++``QPL`` only supports 4K history buffer, and ``ZLIB`` is 32K by default. ++``ZLIB`` compresses data that ``QPL`` may not decompress correctly and ++vice versa. ++ ++``QPL`` does not support the ``Z_SYNC_FLUSH`` operation in ``ZLIB`` streaming ++compression, current ``ZLIB`` implementation uses ``Z_SYNC_FLUSH``, so each ++``multifd`` thread has a ``ZLIB`` streaming context, and all page compression ++and decompression are based on this stream. ``QPL`` cannot decompress such data ++and vice versa. ++ ++The introduction for ``Z_SYNC_FLUSH``, please refer to `Zlib Manual ++`_ ++ ++The Best Practices ++================== ++When user enables the IAA device for ``QPL`` compression, it is recommended ++to add ``-mem-prealloc`` parameter to the destination boot parameters. This ++parameter can avoid the occurrence of I/O page fault and reduce the overhead ++of IAA compression and decompression. ++ ++The example of booting with ``-mem-prealloc`` parameter ++ ++.. code-block:: shell ++ ++ $qemu-system-x86_64 --enable-kvm -cpu host --mem-prealloc ... ++ ++ ++An example about I/O page fault measurement of destination without ++``-mem-prealloc``, the ``svm_prq`` indicates the number of I/O page fault ++occurrences and processing time. ++ ++.. code-block:: shell ++ ++ #echo 1 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 2 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 3 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #echo 4 > /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ #cat /sys/kernel/debug/iommu/intel/dmar_perf_latency ++ IOMMU: dmar18 Register Base Address: c87fc000 ++ <0.1us 0.1us-1us 1us-10us 10us-100us 100us-1ms 1ms-10ms >=10ms min(us) max(us) average(us) ++ inv_iotlb 0 286 123 0 0 0 0 0 1 0 ++ inv_devtlb 0 276 133 0 0 0 0 0 2 0 ++ inv_iec 0 0 0 0 0 0 0 0 0 0 ++ svm_prq 0 0 25206 364 395 0 0 1 556 9 +-- +2.33.0 + diff --git a/docs-migration-add-uadk-compression-feature.patch b/docs-migration-add-uadk-compression-feature.patch new file mode 100644 index 0000000..7972bfd --- /dev/null +++ b/docs-migration-add-uadk-compression-feature.patch @@ -0,0 +1,183 @@ +From 2d8e0ef9947bdb82ce70acd7d0605795bf775153 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:04 +0100 +Subject: [80/99] docs/migration: add uadk compression feature + +commit 3ae9bd97829213808298ae6d35ea26f8def15dc1 upstream. + +Document UADK(User Space Accelerator Development Kit) library details +and how to use that for migration. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +[s/Qemu/QEMU in docs] +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + docs/devel/migration/features.rst | 1 + + docs/devel/migration/uadk-compression.rst | 144 ++++++++++++++++++++++ + 2 files changed, 145 insertions(+) + create mode 100644 docs/devel/migration/uadk-compression.rst + +diff --git a/docs/devel/migration/features.rst b/docs/devel/migration/features.rst +index 9819393c12..0c9cb3dd6c 100644 +--- a/docs/devel/migration/features.rst ++++ b/docs/devel/migration/features.rst +@@ -11,3 +11,4 @@ Migration has plenty of features to support different use cases. + vfio + virtio + qpl-compression ++ uadk-compression +diff --git a/docs/devel/migration/uadk-compression.rst b/docs/devel/migration/uadk-compression.rst +new file mode 100644 +index 0000000000..3f73345dd5 +--- /dev/null ++++ b/docs/devel/migration/uadk-compression.rst +@@ -0,0 +1,144 @@ ++========================================================= ++User Space Accelerator Development Kit (UADK) Compression ++========================================================= ++UADK is a general-purpose user space accelerator framework that uses shared ++virtual addressing (SVA) to provide a unified programming interface for ++hardware acceleration of cryptographic and compression algorithms. ++ ++UADK includes Unified/User-space-access-intended Accelerator Framework (UACCE), ++which enables hardware accelerators from different vendors that support SVA to ++adapt to UADK. ++ ++Currently, HiSilicon Kunpeng hardware accelerators have been registered with ++UACCE. Through the UADK framework, users can run cryptographic and compression ++algorithms using hardware accelerators instead of CPUs, freeing up CPU ++computing power and improving computing performance. ++ ++https://github.com/Linaro/uadk/tree/master/docs ++ ++UADK Framework ++============== ++UADK consists of UACCE, vendors' drivers, and an algorithm layer. UADK requires ++the hardware accelerator to support SVA, and the operating system to support ++IOMMU and SVA. Hardware accelerators from different vendors are registered as ++different character devices with UACCE by using kernel-mode drivers of the ++vendors. A user can access the hardware accelerators by performing user-mode ++operations on the character devices. ++ ++:: ++ ++ +----------------------------------+ ++ | apps | ++ +----+------------------------+----+ ++ | | ++ | | ++ +-------+--------+ +-------+-------+ ++ | scheduler | | alg libraries | ++ +-------+--------+ +-------+-------+ ++ | | ++ | | ++ | | ++ | +--------+------+ ++ | | vendor drivers| ++ | +-+-------------+ ++ | | ++ | | ++ +--+------------------+--+ ++ | libwd | ++ User +----+-------------+-----+ ++ -------------------------------------------------- ++ Kernel +--+-----+ +------+ ++ | uacce | | smmu | ++ +---+----+ +------+ ++ | ++ +---+------------------+ ++ | vendor kernel driver | ++ +----------------------+ ++ -------------------------------------------------- ++ +----------------------+ ++ | HW Accelerators | ++ +----------------------+ ++ ++UADK Installation ++----------------- ++Build UADK ++^^^^^^^^^^ ++ ++.. code-block:: shell ++ ++ git clone https://github.com/Linaro/uadk.git ++ cd uadk ++ mkdir build ++ ./autogen.sh ++ ./configure --prefix=$PWD/build ++ make ++ make install ++ ++Without --prefix, UADK will be installed to /usr/local/lib by default. ++If get error:"cannot find -lnuma", please install the libnuma-dev ++ ++Run pkg-config libwd to ensure env is setup correctly ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++* export PKG_CONFIG_PATH=$PWD/build/lib/pkgconfig ++* pkg-config libwd --cflags --libs ++ -I/usr/local/include -L/usr/local/lib -lwd ++ ++* export PKG_CONFIG_PATH is required on demand. ++ Not required if UADK is installed to /usr/local/lib ++ ++UADK Host Kernel Requirements ++----------------------------- ++User needs to make sure that ``UACCE`` is already supported in Linux kernel. ++The kernel version should be at least v5.9 with SVA (Shared Virtual ++Addressing) enabled. ++ ++Kernel Configuration ++^^^^^^^^^^^^^^^^^^^^ ++ ++``UACCE`` could be built as module or built-in. ++ ++Here's an example to enable UACCE with hardware accelerator in HiSilicon ++Kunpeng platform. ++ ++* CONFIG_IOMMU_SVA_LIB=y ++* CONFIG_ARM_SMMU=y ++* CONFIG_ARM_SMMU_V3=y ++* CONFIG_ARM_SMMU_V3_SVA=y ++* CONFIG_PCI_PASID=y ++* CONFIG_UACCE=y ++* CONFIG_CRYPTO_DEV_HISI_QM=y ++* CONFIG_CRYPTO_DEV_HISI_ZIP=y ++ ++Make sure all these above kernel configurations are selected. ++ ++Accelerator dev node permissions ++-------------------------------- ++Harware accelerators(eg: HiSilicon Kunpeng Zip accelerator) gets registered to ++UADK and char devices are created in dev directory. In order to access resources ++on hardware accelerator devices, write permission should be provided to user. ++ ++.. code-block:: shell ++ ++ $ sudo chmod 777 /dev/hisi_zip-* ++ ++How To Use UADK Compression In QEMU Migration ++--------------------------------------------- ++* Make sure UADK is installed as above ++* Build ``QEMU`` with ``--enable-uadk`` parameter ++ ++ E.g. configure --target-list=aarch64-softmmu --enable-kvm ``--enable-uadk`` ++ ++* Enable ``UADK`` compression during migration ++ ++ Set ``migrate_set_parameter multifd-compression uadk`` ++ ++Since UADK uses Shared Virtual Addressing(SVA) and device access virtual memory ++directly it is possible that SMMUv3 may enounter page faults while walking the ++IO page tables. This may impact the performance. In order to mitigate this, ++please make sure to specify ``-mem-prealloc`` parameter to the destination VM ++boot parameters. ++ ++Though both UADK and ZLIB are based on the deflate compression algorithm, UADK ++is not fully compatible with ZLIB. Hence, please make sure to use ``uadk`` on ++both source and destination during migration. +-- +2.33.0 + diff --git a/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch b/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch new file mode 100644 index 0000000..c6702dd --- /dev/null +++ b/hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch @@ -0,0 +1,170 @@ +From ff64aed3c87427dfa65fa85aef93b44372aefe7d Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 16:59:20 +0800 +Subject: [PATCH 2/4] hw/arm/virt: HDBSS: fix arm-softmmu build on x86 platform + +Move kvm_update_hdbss_cap() to accel/kvm/kvm-stub.c, +check kvm_enabled() and add stub function + +Fixes: e549f32b1a88 ("hw/arm/virt: support the HDBSS feature") +Signed-off-by: Jason Zeng +--- + accel/kvm/kvm-all.c | 25 +++++++++++++++++++++++++ + accel/stubs/kvm-stub.c | 5 +++++ + include/sysemu/kvm.h | 8 ++++++++ + migration/migration.h | 7 ------- + migration/ram.c | 35 ++++++----------------------------- + 5 files changed, 44 insertions(+), 36 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f96afb1230..7d175d3262 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -3251,6 +3251,31 @@ bool kvm_arm_supports_user_irq(void) + return kvm_check_extension(kvm_state, KVM_CAP_ARM_USER_IRQ); + } + ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) ++{ ++ KVMState *s = kvm_state; ++ int size, ret; ++ ++ if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { ++ return; ++ } ++ ++ size = hdbss_buffer_size; ++ if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { ++ fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, ++ enable ? size : 0); ++ if (ret) { ++ fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", ++ enable ? "enable" : "disable", ret); ++ } ++ ++ return; ++} ++ + #ifdef KVM_CAP_SET_GUEST_DEBUG + struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu, vaddr pc) + { +diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c +index 1fffdc0ea2..2625175b99 100644 +--- a/accel/stubs/kvm-stub.c ++++ b/accel/stubs/kvm-stub.c +@@ -119,6 +119,11 @@ bool kvm_arm_supports_user_irq(void) + return false; + } + ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) ++{ ++ g_assert_not_reached(); ++} ++ + bool kvm_dirty_ring_enabled(void) + { + return false; +diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h +index 16cccc881e..098257e72f 100644 +--- a/include/sysemu/kvm.h ++++ b/include/sysemu/kvm.h +@@ -229,6 +229,14 @@ int kvm_has_gsi_routing(void); + */ + bool kvm_arm_supports_user_irq(void); + ++/* ++ * The default HDBSS size. The value ranges [0, 9]. ++ * Set to 0 to disable the HDBSS feature. ++ */ ++#define DEFAULT_HDBSS_BUFFER_SIZE 0 ++#define MAX_HDBSS_BUFFER_SIZE 9 ++ ++void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size); + + int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); + int kvm_on_sigbus(int code, void *addr); +diff --git a/migration/migration.h b/migration/migration.h +index 4a95f00157..eeddb7c0bd 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -48,13 +48,6 @@ struct PostcopyBlocktimeContext; + */ + #define CLEAR_BITMAP_SHIFT_MAX 31 + +-/* +- * The default HDBSS size. The value ranges [0, 9]. +- * Set to 0 to disable the HDBSS feature. +- */ +-#define DEFAULT_HDBSS_BUFFER_SIZE 0 +-#define MAX_HDBSS_BUFFER_SIZE 9 +- + /* This is an abstraction of a "temp huge page" for postcopy's purpose */ + typedef struct { + /* +diff --git a/migration/ram.c b/migration/ram.c +index ee57da62f6..91bec89a6e 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2812,33 +2812,6 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + +-#ifdef TARGET_AARCH64 +-static void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) +-{ +- KVMState *s = kvm_state; +- int size, ret; +- +- if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { +- return; +- } +- +- size = hdbss_buffer_size; +- if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { +- fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); +- return; +- } +- +- ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, +- enable ? size : 0); +- if (ret) { +- fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", +- enable ? "enable" : "disable", ret); +- } +- +- return; +-} +-#endif +- + static void ram_save_cleanup(void *opaque) + { + RAMState **rsp = opaque; +@@ -2856,7 +2829,9 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_start/stop used in pairs + */ + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(false, 0); ++ if (kvm_enabled()) { ++ kvm_update_hdbss_cap(false, 0); ++ } + #endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } +@@ -3262,7 +3237,9 @@ static void ram_init_bitmaps(RAMState *rs) + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); ++ if (kvm_enabled()) { ++ kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); ++ } + #endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); +-- +2.33.0 + diff --git a/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch b/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch new file mode 100644 index 0000000..2f52575 --- /dev/null +++ b/hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch @@ -0,0 +1,56 @@ +From d13e44fe048159d48891887169f756ac974d07fb Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Mon, 26 May 2025 16:49:00 +0800 +Subject: [PATCH 1/4] hw/arm/virt: decouple migrate_hdbss_buffer_size() with + kvm_update_hdbss_cap() + +So that we can move kvm_update_hdbss_cap() to accel/kvm/kvm-all.c + +Signed-of-by: Jason Zeng +--- + migration/ram.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index a8308eb005..ee57da62f6 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2813,7 +2813,7 @@ static void xbzrle_cleanup(void) + } + + #ifdef TARGET_AARCH64 +-static void kvm_update_hdbss_cap(bool enable) ++static void kvm_update_hdbss_cap(bool enable, int hdbss_buffer_size) + { + KVMState *s = kvm_state; + int size, ret; +@@ -2822,7 +2822,7 @@ static void kvm_update_hdbss_cap(bool enable) + return; + } + +- size = migrate_hdbss_buffer_size(); ++ size = hdbss_buffer_size; + if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { + fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); + return; +@@ -2856,7 +2856,7 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_start/stop used in pairs + */ + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(false); ++ kvm_update_hdbss_cap(false, 0); + #endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } +@@ -3262,7 +3262,7 @@ static void ram_init_bitmaps(RAMState *rs) + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { + #ifdef TARGET_AARCH64 +- kvm_update_hdbss_cap(true); ++ kvm_update_hdbss_cap(true, migrate_hdbss_buffer_size()); + #endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); +-- +2.33.0 + diff --git a/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch b/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch new file mode 100644 index 0000000..7a9572d --- /dev/null +++ b/hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch @@ -0,0 +1,54 @@ +From e8587f657fd33f223227a167e94ed69db729e2ac Mon Sep 17 00:00:00 2001 +From: eillon +Date: Sun, 25 May 2025 22:22:58 +0800 +Subject: [PATCH] hw/arm/virt: only support the HDBSS feature in aarch64 + +Only support the HDBSS feature in aarch64 architecture as it +depends on the kvm. +--- + migration/ram.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/migration/ram.c b/migration/ram.c +index 6acf518a34..a8308eb005 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2812,6 +2812,7 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + ++#ifdef TARGET_AARCH64 + static void kvm_update_hdbss_cap(bool enable) + { + KVMState *s = kvm_state; +@@ -2836,6 +2837,7 @@ static void kvm_update_hdbss_cap(bool enable) + + return; + } ++#endif + + static void ram_save_cleanup(void *opaque) + { +@@ -2853,7 +2855,9 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ ++#ifdef TARGET_AARCH64 + kvm_update_hdbss_cap(false); ++#endif + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } + } +@@ -3257,7 +3261,9 @@ static void ram_init_bitmaps(RAMState *rs) + ram_list_init_bitmaps(); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { ++#ifdef TARGET_AARCH64 + kvm_update_hdbss_cap(true); ++#endif + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); + } +-- +2.33.0 + diff --git a/hw-arm-virt-support-the-HDBSS-feature.patch b/hw-arm-virt-support-the-HDBSS-feature.patch new file mode 100644 index 0000000..874f05e --- /dev/null +++ b/hw-arm-virt-support-the-HDBSS-feature.patch @@ -0,0 +1,285 @@ +From e549f32b1a88cb9ffdc4fc88fa818854a918498e Mon Sep 17 00:00:00 2001 +From: eillon +Date: Mon, 14 Apr 2025 22:33:21 +0800 +Subject: [PATCH] hw/arm/virt: support the HDBSS feature + +We use QEMU to enable or disable the HDBSS feature during live +migration. We can use the migration-parameter to control the size +of the HDBSS buffer, such as: + migrate_set_parameter hdbss-buffer-size 3 + info migrate_parameters + +Signed-off-by: eillon +--- + linux-headers/linux/kvm.h | 2 ++ + migration/migration-hmp-cmds.c | 9 +++++++++ + migration/migration.h | 7 +++++++ + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + migration/ram.c | 28 ++++++++++++++++++++++++++++ + qapi/migration.json | 17 ++++++++++++++--- + 7 files changed, 82 insertions(+), 3 deletions(-) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index b94c5fd90f..57d6e12744 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1212,6 +1212,8 @@ struct kvm_ppc_resize_hpt { + /* support request to inject secret to CSV3 guest */ + #define KVM_CAP_HYGON_COCO_EXT_CSV3_INJ_SECRET (1 << 2) + ++#define KVM_CAP_ARM_HW_DIRTY_STATE_TRACK 502 ++ + #define KVM_CAP_ARM_VIRT_MSI_BYPASS 799 + + #define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index aac5e7a73a..9857e2c97f 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -409,6 +409,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_SEV_AMD_CERT), + params->sev_amd_cert); ++ ++ assert(params->has_hdbss_buffer_size); ++ monitor_printf(mon, "%s: %u\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_HDBSS_BUFFER_SIZE), ++ params->hdbss_buffer_size); + } + + qapi_free_MigrationParameters(params); +@@ -725,6 +730,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->sev_amd_cert->type = QTYPE_QSTRING; + visit_type_str(v, param, &p->sev_amd_cert->u.s, &err); + break; ++ case MIGRATION_PARAMETER_HDBSS_BUFFER_SIZE: ++ p->has_hdbss_buffer_size = true; ++ visit_type_uint8(v, param, &p->hdbss_buffer_size, &err); ++ break; + default: + assert(0); + } +diff --git a/migration/migration.h b/migration/migration.h +index eeddb7c0bd..4a95f00157 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -48,6 +48,13 @@ struct PostcopyBlocktimeContext; + */ + #define CLEAR_BITMAP_SHIFT_MAX 31 + ++/* ++ * The default HDBSS size. The value ranges [0, 9]. ++ * Set to 0 to disable the HDBSS feature. ++ */ ++#define DEFAULT_HDBSS_BUFFER_SIZE 0 ++#define MAX_HDBSS_BUFFER_SIZE 9 ++ + /* This is an abstraction of a "temp huge page" for postcopy's purpose */ + typedef struct { + /* +diff --git a/migration/options.c b/migration/options.c +index 71e71ea801..71645c8721 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -186,6 +186,9 @@ Property migration_properties[] = { + DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), + DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), + DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), ++ DEFINE_PROP_UINT8("hdbss-buffer-size", MigrationState, ++ parameters.hdbss_buffer_size, ++ DEFAULT_HDBSS_BUFFER_SIZE), + + /* Migration capabilities */ + DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +@@ -853,6 +856,13 @@ MigMode migrate_mode(void) + return s->parameters.mode; + } + ++int migrate_hdbss_buffer_size(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.hdbss_buffer_size; ++} ++ + int migrate_multifd_channels(void) + { + MigrationState *s = migrate_get_current(); +@@ -1032,6 +1042,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; + params->has_mode = true; + params->mode = s->parameters.mode; ++ params->has_hdbss_buffer_size = true; ++ params->hdbss_buffer_size = s->parameters.hdbss_buffer_size; + + return params; + } +@@ -1069,6 +1081,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_x_vcpu_dirty_limit_period = true; + params->has_vcpu_dirty_limit = true; + params->has_mode = true; ++ params->has_hdbss_buffer_size = true; + + params->sev_pdh = g_strdup(""); + params->sev_plat_cert = g_strdup(""); +@@ -1415,6 +1428,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + dest->sev_amd_cert = params->sev_amd_cert->u.s; + } ++ ++ if (params->has_hdbss_buffer_size) { ++ dest->hdbss_buffer_size = params->hdbss_buffer_size; ++ } + } + + static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1579,6 +1596,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + assert(params->sev_amd_cert->type == QTYPE_QSTRING); + s->parameters.sev_amd_cert = g_strdup(params->sev_amd_cert->u.s); + } ++ ++ if (params->has_hdbss_buffer_size) { ++ s->parameters.hdbss_buffer_size = params->hdbss_buffer_size; ++ } + } + + void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) +diff --git a/migration/options.h b/migration/options.h +index 9aca5e41ad..987fc81a18 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -85,6 +85,7 @@ uint64_t migrate_max_bandwidth(void); + uint64_t migrate_avail_switchover_bandwidth(void); + uint64_t migrate_max_postcopy_bandwidth(void); + MigMode migrate_mode(void); ++int migrate_hdbss_buffer_size(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); +diff --git a/migration/ram.c b/migration/ram.c +index 1f9348fd06..f1ff38cf39 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -39,6 +39,7 @@ + #include "migration-stats.h" + #include "migration/register.h" + #include "migration/misc.h" ++#include "migration/options.h" + #include "qemu-file.h" + #include "postcopy-ram.h" + #include "page_cache.h" +@@ -2790,6 +2791,31 @@ static void xbzrle_cleanup(void) + XBZRLE_cache_unlock(); + } + ++static void kvm_update_hdbss_cap(bool enable) ++{ ++ KVMState *s = kvm_state; ++ int size, ret; ++ ++ if (s == NULL || !kvm_check_extension(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK)) { ++ return; ++ } ++ ++ size = migrate_hdbss_buffer_size(); ++ if (size < 0 || size > MAX_HDBSS_BUFFER_SIZE) { ++ fprintf(stderr, "Invalid hdbss buffer size: %d\n", size); ++ return; ++ } ++ ++ ret = kvm_vm_enable_cap(s, KVM_CAP_ARM_HW_DIRTY_STATE_TRACK, 0, ++ enable ? size : 0); ++ if (ret) { ++ fprintf(stderr, "Could not %s KVM_CAP_ARM_HW_DIRTY_STATE_TRACK: %d\n", ++ enable ? "enable" : "disable", ret); ++ } ++ ++ return; ++} ++ + static void ram_save_cleanup(void *opaque) + { + RAMState **rsp = opaque; +@@ -2806,6 +2832,7 @@ static void ram_save_cleanup(void *opaque) + * memory_global_dirty_log_stop will assert that + * memory_global_dirty_log_start/stop used in pairs + */ ++ kvm_update_hdbss_cap(false); + memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION); + } + } +@@ -3209,6 +3236,7 @@ static void ram_init_bitmaps(RAMState *rs) + ram_list_init_bitmaps(); + /* We don't use dirty log with background snapshots */ + if (!migrate_background_snapshot()) { ++ kvm_update_hdbss_cap(true); + memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION); + migration_bitmap_sync_precopy(rs, false); + } +diff --git a/qapi/migration.json b/qapi/migration.json +index 3aed216c3b..f672da5c0d 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -902,6 +902,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -937,7 +940,7 @@ + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', + 'mode', +- 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert'] } ++ 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert', 'hdbss-buffer-size'] } + + ## + # @MigrateSetParameters: +@@ -1106,6 +1109,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1165,7 +1171,8 @@ + '*mode': 'MigMode', + '*sev-pdh': 'StrOrNull', + '*sev-plat-cert': 'StrOrNull', +- '*sev-amd-cert' : 'StrOrNull' } } ++ '*sev-amd-cert' : 'StrOrNull', ++ '*hdbss-buffer-size': 'uint8'} } + + + ## +@@ -1355,6 +1362,9 @@ + # @sev-amd-cert: AMD certificate chain which include ASK and OCA encoded in + # base64, or vendor cert filename for hygon (Since 4.2) + # ++# @hdbss-buffer-size: Size of the HDBSS(Hardware Dirty state tracking Structure). ++# Defaults to 0. (Since 8.6) ++# + # Features: + # + # @deprecated: Member @block-incremental is deprecated. Use +@@ -1410,7 +1420,8 @@ + '*mode': 'MigMode', + '*sev-pdh': 'str', + '*sev-plat-cert': 'str', +- '*sev-amd-cert' : 'str'} } ++ '*sev-amd-cert' : 'str', ++ '*hdbss-buffer-size': 'uint8'} } + + ## + # @query-migrate-parameters: +-- +2.33.0 + diff --git a/meson-Introduce-qatzip-feature-to-the-build-system.patch b/meson-Introduce-qatzip-feature-to-the-build-system.patch new file mode 100644 index 0000000..d3690cb --- /dev/null +++ b/meson-Introduce-qatzip-feature-to-the-build-system.patch @@ -0,0 +1,99 @@ +From ca73720f8e625f143a27acf7c1aedb1b426c1ee1 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:19 -0700 +Subject: [89/99] meson: Introduce 'qatzip' feature to the build system + +commit e28ed313c268aeb4e0cefb66dcd215c30e4443fe upstream. + +Add a 'qatzip' feature, which is automatically disabled, and which +depends on the QATzip library if enabled. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-3-yichen.wang@bytedance.com +Signed-off-by: Peter Xu + + Conflicts: + scripts/meson-buildoptions.sh +[jz: resolve simple context conflicts] +Signed-off-by: Jason Zeng +--- + meson.build | 10 ++++++++++ + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 15 insertions(+) + +diff --git a/meson.build b/meson.build +index e3599b9a09..d221f5cad5 100644 +--- a/meson.build ++++ b/meson.build +@@ -1061,6 +1061,14 @@ if not get_option('uadk').auto() or have_system + uadk = declare_dependency(dependencies: [libwd, libwd_comp]) + endif + endif ++ ++qatzip = not_found ++if not get_option('qatzip').auto() or have_system ++ qatzip = dependency('qatzip', version: '>=1.1.2', ++ required: get_option('qatzip'), ++ method: 'pkg-config') ++endif ++ + virgl = not_found + + have_vhost_user_gpu = have_tools and targetos == 'linux' and pixman.found() +@@ -2301,6 +2309,7 @@ config_host_data.set('CONFIG_STATX_MNT_ID', has_statx_mnt_id) + config_host_data.set('CONFIG_ZSTD', zstd.found()) + config_host_data.set('CONFIG_QPL', qpl.found()) + config_host_data.set('CONFIG_UADK', uadk.found()) ++config_host_data.set('CONFIG_QATZIP', qatzip.found()) + config_host_data.set('CONFIG_FUSE', fuse.found()) + config_host_data.set('CONFIG_FUSE_LSEEK', fuse_lseek.found()) + config_host_data.set('CONFIG_SPICE_PROTOCOL', spice_protocol.found()) +@@ -4477,6 +4486,7 @@ summary_info += {'lzfse support': liblzfse} + summary_info += {'zstd support': zstd} + summary_info += {'Query Processing Library support': qpl} + summary_info += {'UADK Library support': uadk} ++summary_info += {'qatzip support': qatzip} + summary_info += {'NUMA host support': numa} + summary_info += {'capstone': capstone} + summary_info += {'libpmem support': libpmem} +diff --git a/meson_options.txt b/meson_options.txt +index 709678fa18..61996300d5 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -263,6 +263,8 @@ option('qpl', type : 'feature', value : 'auto', + description: 'Query Processing Library support') + option('uadk', type : 'feature', value : 'auto', + description: 'UADK Library support') ++option('qatzip', type: 'feature', value: 'auto', ++ description: 'QATzip compression support') + option('fuse', type: 'feature', value: 'auto', + description: 'FUSE block device export') + option('fuse_lseek', type : 'feature', value : 'auto', +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index 833b996818..8604fe8ffa 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -163,6 +163,7 @@ meson_options_help() { + printf "%s\n" ' pixman pixman support' + printf "%s\n" ' plugins TCG plugins via shared library loading' + printf "%s\n" ' png PNG support with libpng' ++ printf "%s\n" ' qatzip QATzip compression support' + printf "%s\n" ' pvrdma Enable PVRDMA support' + printf "%s\n" ' qcow1 qcow1 image format support' + printf "%s\n" ' qed qed image format support' +@@ -430,6 +431,8 @@ _meson_option_parse() { + --enable-png) printf "%s" -Dpng=enabled ;; + --disable-png) printf "%s" -Dpng=disabled ;; + --prefix=*) quote_sh "-Dprefix=$2" ;; ++ --enable-qatzip) printf "%s" -Dqatzip=enabled ;; ++ --disable-qatzip) printf "%s" -Dqatzip=disabled ;; + --enable-pvrdma) printf "%s" -Dpvrdma=enabled ;; + --disable-pvrdma) printf "%s" -Dpvrdma=disabled ;; + --enable-qcow1) printf "%s" -Dqcow1=enabled ;; +-- +2.33.0 + diff --git a/migration-Add-migration-parameters-for-QATzip.patch b/migration-Add-migration-parameters-for-QATzip.patch new file mode 100644 index 0000000..19178bf --- /dev/null +++ b/migration-Add-migration-parameters-for-QATzip.patch @@ -0,0 +1,214 @@ +From cb3f1e1a84a3776d5382013cb9fcfe08c8ea9b3e Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:20 -0700 +Subject: [90/99] migration: Add migration parameters for QATzip + +commit 86c6eb1f39cbb7eb0467c114469e98ef699fb515 upstream. + +Adds support for migration parameters to control QATzip compression +level. + +Acked-by: Markus Armbruster +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Reviewed-by: Fabiano Rosas +Reviewed-by: Prasad Pandit +Link: https://lore.kernel.org/r/20240830232722.58272-4-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration-hmp-cmds.c | 4 ++++ + migration/options.c | 34 ++++++++++++++++++++++++++++++++++ + migration/options.h | 1 + + qapi/migration.json | 18 ++++++++++++++++++ + 4 files changed, 57 insertions(+) + +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 91e51eb7af..d6d5f373a1 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -669,6 +669,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zlib_level = true; + visit_type_uint8(v, param, &p->multifd_zlib_level, &err); + break; ++ case MIGRATION_PARAMETER_MULTIFD_QATZIP_LEVEL: ++ p->has_multifd_qatzip_level = true; ++ visit_type_uint8(v, param, &p->multifd_qatzip_level, &err); ++ break; + case MIGRATION_PARAMETER_MULTIFD_ZSTD_LEVEL: + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); +diff --git a/migration/options.c b/migration/options.c +index e752163114..6ba7ff65a3 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -63,6 +63,13 @@ + #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE + /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ + #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 ++/* ++ * 1: best speed, ... 9: best compress ratio ++ * There is some nuance here. Refer to QATzip documentation to understand ++ * the mapping of QATzip levels to standard deflate levels. ++ */ ++#define DEFAULT_MIGRATE_MULTIFD_QATZIP_LEVEL 1 ++ + /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ + #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 + +@@ -147,6 +154,9 @@ Property migration_properties[] = { + DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, + parameters.multifd_zlib_level, + DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), ++ DEFINE_PROP_UINT8("multifd-qatzip-level", MigrationState, ++ parameters.multifd_qatzip_level, ++ DEFAULT_MIGRATE_MULTIFD_QATZIP_LEVEL), + DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, + parameters.multifd_zstd_level, + DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +@@ -888,6 +898,13 @@ int migrate_multifd_zlib_level(void) + return s->parameters.multifd_zlib_level; + } + ++int migrate_multifd_qatzip_level(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.multifd_qatzip_level; ++} ++ + int migrate_multifd_zstd_level(void) + { + MigrationState *s = migrate_get_current(); +@@ -1019,6 +1036,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->multifd_compression = s->parameters.multifd_compression; + params->has_multifd_zlib_level = true; + params->multifd_zlib_level = s->parameters.multifd_zlib_level; ++ params->has_multifd_qatzip_level = true; ++ params->multifd_qatzip_level = s->parameters.multifd_qatzip_level; + params->has_multifd_zstd_level = true; + params->multifd_zstd_level = s->parameters.multifd_zstd_level; + params->has_xbzrle_cache_size = true; +@@ -1082,6 +1101,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_multifd_channels = true; + params->has_multifd_compression = true; + params->has_multifd_zlib_level = true; ++ params->has_multifd_qatzip_level = true; + params->has_multifd_zstd_level = true; + params->has_xbzrle_cache_size = true; + params->has_max_postcopy_bandwidth = true; +@@ -1221,6 +1241,14 @@ bool migrate_params_check(MigrationParameters *params, Error **errp) + return false; + } + ++ if (params->has_multifd_qatzip_level && ++ ((params->multifd_qatzip_level > 9) || ++ (params->multifd_qatzip_level < 1))) { ++ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_qatzip_level", ++ "a value between 1 and 9"); ++ return false; ++ } ++ + if (params->has_multifd_zstd_level && + (params->multifd_zstd_level > 20)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level", +@@ -1390,6 +1418,9 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_qatzip_level) { ++ dest->multifd_qatzip_level = params->multifd_qatzip_level; ++ } + if (params->has_multifd_zlib_level) { + dest->multifd_zlib_level = params->multifd_zlib_level; + } +@@ -1556,6 +1587,9 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_qatzip_level) { ++ s->parameters.multifd_qatzip_level = params->multifd_qatzip_level; ++ } + if (params->has_multifd_zlib_level) { + s->parameters.multifd_zlib_level = params->multifd_zlib_level; + } +diff --git a/migration/options.h b/migration/options.h +index dbd52d7acd..6b2a893217 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -89,6 +89,7 @@ int migrate_hdbss_buffer_size(void); + int migrate_multifd_channels(void); + MultiFDCompression migrate_multifd_compression(void); + int migrate_multifd_zlib_level(void); ++int migrate_multifd_qatzip_level(void); + int migrate_multifd_zstd_level(void); + uint8_t migrate_throttle_trigger_threshold(void); + const char *migrate_tls_authz(void); +diff --git a/qapi/migration.json b/qapi/migration.json +index f1a17c511b..255f5b50a6 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -885,6 +885,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -966,6 +971,7 @@ + 'xbzrle-cache-size', 'max-postcopy-bandwidth', + 'max-cpu-throttle', 'multifd-compression', + 'multifd-zlib-level', 'multifd-zstd-level', ++ 'multifd-qatzip-level', + 'block-bitmap-mapping', + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', +@@ -1097,6 +1103,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -1198,6 +1209,7 @@ + '*max-cpu-throttle': 'uint8', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', ++ '*multifd-qatzip-level': 'uint8', + '*multifd-zstd-level': 'uint8', + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', +@@ -1354,6 +1366,11 @@ + # speed, and 9 means best compression ratio which will consume + # more CPU. Defaults to 1. (Since 5.0) + # ++# @multifd-qatzip-level: Set the compression level to be used in live ++# migration. The level is an integer between 1 and 9, where 1 means ++# the best compression speed, and 9 means the best compression ++# ratio which will consume more CPU. Defaults to 1. (Since 9.2) ++# + # @multifd-zstd-level: Set the compression level to be used in live + # migration, the compression level is an integer between 0 and 20, + # where 0 means no compression, 1 means the best compression +@@ -1451,6 +1468,7 @@ + '*max-cpu-throttle': 'uint8', + '*multifd-compression': 'MultiFDCompression', + '*multifd-zlib-level': 'uint8', ++ '*multifd-qatzip-level': 'uint8', + '*multifd-zstd-level': 'uint8', + '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ], + '*x-vcpu-dirty-limit-period': { 'type': 'uint64', +-- +2.33.0 + diff --git a/migration-Fix-logic-of-channels-and-transport-compat.patch b/migration-Fix-logic-of-channels-and-transport-compat.patch new file mode 100644 index 0000000..d251b78 --- /dev/null +++ b/migration-Fix-logic-of-channels-and-transport-compat.patch @@ -0,0 +1,72 @@ +From d5a21de3aa2a13ab8bfb4d9d815ae60e04e08f94 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Thu, 25 Jan 2024 18:25:12 +0200 +Subject: [48/99] migration: Fix logic of channels and transport compatibility + check + +commit 3205bebd4fc6dd501fb8b10c93ddce9da18e09db upstream. + +The commit in the fixes line mistakenly modified the channels and +transport compatibility check logic so it now checks multi-channel +support only for socket transport type. + +Thus, running multifd migration using a transport other than socket that +is incompatible with multi-channels (such as "exec") would lead to a +segmentation fault instead of an error message. +For example: + (qemu) migrate_set_capability multifd on + (qemu) migrate -d "exec:cat > /tmp/vm_state" + Segmentation fault (core dumped) + +Fix it by checking multi-channel compatibility for all transport types. + +Cc: qemu-stable +Fixes: d95533e1cdcc ("migration: modify migration_channels_and_uri_compatible() for new QAPI syntax") +Signed-off-by: Avihai Horon +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240125162528.7552-2-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index f428839dd6..0e8255180d 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -127,11 +127,17 @@ static bool migration_needs_multiple_sockets(void) + return migrate_multifd() || migrate_postcopy_preempt(); + } + +-static bool transport_supports_multi_channels(SocketAddress *saddr) ++static bool transport_supports_multi_channels(MigrationAddress *addr) + { +- return saddr->type == SOCKET_ADDRESS_TYPE_INET || +- saddr->type == SOCKET_ADDRESS_TYPE_UNIX || +- saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; ++ if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) { ++ SocketAddress *saddr = &addr->u.socket; ++ ++ return saddr->type == SOCKET_ADDRESS_TYPE_INET || ++ saddr->type == SOCKET_ADDRESS_TYPE_UNIX || ++ saddr->type == SOCKET_ADDRESS_TYPE_VSOCK; ++ } ++ ++ return false; + } + + static bool +@@ -139,8 +145,7 @@ migration_channels_and_transport_compatible(MigrationAddress *addr, + Error **errp) + { + if (migration_needs_multiple_sockets() && +- (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) && +- !transport_supports_multi_channels(&addr->u.socket)) { ++ !transport_supports_multi_channels(addr)) { + error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)"); + return false; + } +-- +2.33.0 + diff --git a/migration-Fix-migration_channel_read_peek-error-path.patch b/migration-Fix-migration_channel_read_peek-error-path.patch new file mode 100644 index 0000000..5cb8552 --- /dev/null +++ b/migration-Fix-migration_channel_read_peek-error-path.patch @@ -0,0 +1,52 @@ +From 3a81455a093f3b06fd76d4964d0073c78ddbcc49 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:14 +0200 +Subject: [05/99] migration: Fix migration_channel_read_peek() error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 4f8cf323e80c17f7d4b5604f1699591326df6262 upstream. + +migration_channel_read_peek() calls qio_channel_readv_full() and handles +both cases of return value == 0 and return value < 0 the same way, by +calling error_setg() with errp. However, if return value < 0, errp is +already set, so calling error_setg() with errp will lead to an assert. + +Fix it by handling these cases separately, calling error_setg() with +errp only in return value == 0 case. + +Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-10-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/channel.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index ca3319a309..f9de064f3b 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -117,9 +117,12 @@ int migration_channel_read_peek(QIOChannel *ioc, + len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, + QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); + +- if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { +- error_setg(errp, +- "Failed to peek at channel"); ++ if (len < 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ return -1; ++ } ++ ++ if (len == 0) { ++ error_setg(errp, "Failed to peek at channel"); + return -1; + } + +-- +2.33.0 + diff --git a/migration-Introduce-qatzip-compression-method.patch b/migration-Introduce-qatzip-compression-method.patch new file mode 100644 index 0000000..ffe4ae3 --- /dev/null +++ b/migration-Introduce-qatzip-compression-method.patch @@ -0,0 +1,500 @@ +From d5ad8ffdf67cb6a76d5b4bf7145488abaa53c2ae Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:21 -0700 +Subject: [91/99] migration: Introduce 'qatzip' compression method + +commit 80484f945989988091c5cd729c3e8bde6c14907a upstream. + +Adds support for 'qatzip' as an option for the multifd compression +method parameter, and implements using QAT for 'qatzip' compression and +decompression. + +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Reviewed-by: Prasad Pandit +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-5-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-qatzip.c | 394 +++++++++++++++++++++++++++++++ + migration/multifd.h | 5 +- + qapi/migration.json | 3 + + 5 files changed, 402 insertions(+), 3 deletions(-) + create mode 100644 migration/multifd-qatzip.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 650c42eaf8..9cc2e38aba 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd/qpl/uadk", ++ "none/zlib/zstd/qpl/uadk/qatzip", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index 264d04657f..aba2581705 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -42,6 +42,7 @@ endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) + system_ss.add(when: uadk, if_true: files('multifd-uadk.c')) ++system_ss.add(when: qatzip, if_true: files('multifd-qatzip.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c +new file mode 100644 +index 0000000000..3c787ed879 +--- /dev/null ++++ b/migration/multifd-qatzip.c +@@ -0,0 +1,394 @@ ++/* ++ * Multifd QATzip compression implementation ++ * ++ * Copyright (c) Bytedance ++ * ++ * Authors: ++ * Bryan Zhang ++ * Hao Xiang ++ * Yichen Wang ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "exec/ramblock.h" ++#include "qapi/error.h" ++#include "qemu/error-report.h" ++#include "qapi/qapi-types-migration.h" ++#include "options.h" ++#include "multifd.h" ++#include ++ ++typedef struct { ++ /* ++ * Unique session for use with QATzip API ++ */ ++ QzSession_T sess; ++ ++ /* ++ * For compression: Buffer for pages to compress ++ * For decompression: Buffer for data to decompress ++ */ ++ uint8_t *in_buf; ++ uint32_t in_len; ++ ++ /* ++ * For compression: Output buffer of compressed data ++ * For decompression: Output buffer of decompressed data ++ */ ++ uint8_t *out_buf; ++ uint32_t out_len; ++} QatzipData; ++ ++/** ++ * qatzip_send_setup: Set up QATzip session and private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ QatzipData *q; ++ QzSessionParamsDeflate_T params; ++ const char *err_msg; ++ int ret; ++ ++ q = g_new0(QatzipData, 1); ++ p->compress_data = q; ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, 2); ++ ++ /* ++ * Initialize QAT device with software fallback by default. This allows ++ * QATzip to use CPU path when QAT hardware reaches maximum throughput. ++ */ ++ ret = qzInit(&q->sess, true); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzInit failed"; ++ goto err; ++ } ++ ++ ret = qzGetDefaultsDeflate(¶ms); ++ if (ret != QZ_OK) { ++ err_msg = "qzGetDefaultsDeflate failed"; ++ goto err; ++ } ++ ++ /* Make sure to use configured QATzip compression level. */ ++ params.common_params.comp_lvl = migrate_multifd_qatzip_level(); ++ ret = qzSetupSessionDeflate(&q->sess, ¶ms); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzSetupSessionDeflate failed"; ++ goto err; ++ } ++ ++ if (MULTIFD_PACKET_SIZE > UINT32_MAX) { ++ err_msg = "packet size too large for QAT"; ++ goto err; ++ } ++ ++ q->in_len = MULTIFD_PACKET_SIZE; ++ /* ++ * PINNED_MEM is an enum from qatzip headers, which means to use ++ * kzalloc_node() to allocate memory for QAT DMA purposes. When QAT device ++ * is not available or software fallback is used, the malloc flag needs to ++ * be set as COMMON_MEM. ++ */ ++ q->in_buf = qzMalloc(q->in_len, 0, PINNED_MEM); ++ if (!q->in_buf) { ++ q->in_buf = qzMalloc(q->in_len, 0, COMMON_MEM); ++ if (!q->in_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ q->out_len = qzMaxCompressedLength(MULTIFD_PACKET_SIZE, &q->sess); ++ q->out_buf = qzMalloc(q->out_len, 0, PINNED_MEM); ++ if (!q->out_buf) { ++ q->out_buf = qzMalloc(q->out_len, 0, COMMON_MEM); ++ if (!q->out_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ error_setg(errp, "multifd %u: [sender] %s", p->id, err_msg); ++ return -1; ++} ++ ++/** ++ * qatzip_send_cleanup: Tear down QATzip session and release private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return None ++ */ ++static void qatzip_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ QatzipData *q = p->compress_data; ++ ++ if (q) { ++ if (q->in_buf) { ++ qzFree(q->in_buf); ++ } ++ if (q->out_buf) { ++ qzFree(q->out_buf); ++ } ++ (void)qzTeardownSession(&q->sess); ++ (void)qzClose(&q->sess); ++ g_free(q); ++ } ++ ++ g_free(p->iov); ++ p->iov = NULL; ++ p->compress_data = NULL; ++} ++ ++/** ++ * qatzip_send_prepare: Compress pages and update IO channel info. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ MultiFDPages_t *pages = p->pages; ++ QatzipData *q = p->compress_data; ++ int ret; ++ unsigned int in_len, out_len; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ /* ++ * Unlike other multifd compression implementations, we use a non-streaming ++ * API and place all the data into one buffer, rather than sending each ++ * page to the compression API at a time. Based on initial benchmarks, the ++ * non-streaming API outperforms the streaming API. Plus, the logic in QEMU ++ * is friendly to using the non-streaming API anyway. If either of these ++ * statements becomes no longer true, we can revisit adding a streaming ++ * implementation. ++ */ ++ for (int i = 0; i < pages->normal_num; i++) { ++ memcpy(q->in_buf + (i * p->page_size), ++ pages->block->host + pages->offset[i], ++ p->page_size); ++ } ++ ++ in_len = pages->normal_num * p->page_size; ++ if (in_len > q->in_len) { ++ error_setg(errp, "multifd %u: unexpectedly large input", p->id); ++ return -1; ++ } ++ out_len = q->out_len; ++ ++ ret = qzCompress(&q->sess, q->in_buf, &in_len, q->out_buf, &out_len, 1); ++ if (ret != QZ_OK) { ++ error_setg(errp, "multifd %u: QATzip returned %d instead of QZ_OK", ++ p->id, ret); ++ return -1; ++ } ++ if (in_len != pages->normal_num * p->page_size) { ++ error_setg(errp, "multifd %u: QATzip failed to compress all input", ++ p->id); ++ return -1; ++ } ++ ++ p->iov[p->iovs_num].iov_base = q->out_buf; ++ p->iov[p->iovs_num].iov_len = out_len; ++ p->iovs_num++; ++ p->next_packet_size = out_len; ++ ++out: ++ p->flags |= MULTIFD_FLAG_QATZIP; ++ multifd_send_fill_packet(p); ++ return 0; ++} ++ ++/** ++ * qatzip_recv_setup: Set up QATzip session and allocate private buffers. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ QatzipData *q; ++ QzSessionParamsDeflate_T params; ++ const char *err_msg; ++ int ret; ++ ++ q = g_new0(QatzipData, 1); ++ p->compress_data = q; ++ ++ /* ++ * Initialize QAT device with software fallback by default. This allows ++ * QATzip to use CPU path when QAT hardware reaches maximum throughput. ++ */ ++ ret = qzInit(&q->sess, true); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzInit failed"; ++ goto err; ++ } ++ ++ ret = qzGetDefaultsDeflate(¶ms); ++ if (ret != QZ_OK) { ++ err_msg = "qzGetDefaultsDeflate failed"; ++ goto err; ++ } ++ ++ ret = qzSetupSessionDeflate(&q->sess, ¶ms); ++ if (ret != QZ_OK && ret != QZ_DUPLICATE) { ++ err_msg = "qzSetupSessionDeflate failed"; ++ goto err; ++ } ++ ++ /* ++ * Reserve extra spaces for the incoming packets. Current implementation ++ * doesn't send uncompressed pages in case the compression gets too big. ++ */ ++ q->in_len = MULTIFD_PACKET_SIZE * 2; ++ /* ++ * PINNED_MEM is an enum from qatzip headers, which means to use ++ * kzalloc_node() to allocate memory for QAT DMA purposes. When QAT device ++ * is not available or software fallback is used, the malloc flag needs to ++ * be set as COMMON_MEM. ++ */ ++ q->in_buf = qzMalloc(q->in_len, 0, PINNED_MEM); ++ if (!q->in_buf) { ++ q->in_buf = qzMalloc(q->in_len, 0, COMMON_MEM); ++ if (!q->in_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ q->out_len = MULTIFD_PACKET_SIZE; ++ q->out_buf = qzMalloc(q->out_len, 0, PINNED_MEM); ++ if (!q->out_buf) { ++ q->out_buf = qzMalloc(q->out_len, 0, COMMON_MEM); ++ if (!q->out_buf) { ++ err_msg = "qzMalloc failed"; ++ goto err; ++ } ++ } ++ ++ return 0; ++ ++err: ++ error_setg(errp, "multifd %u: [receiver] %s", p->id, err_msg); ++ return -1; ++} ++ ++/** ++ * qatzip_recv_cleanup: Tear down QATzip session and release private buffers. ++ * ++ * @param p Multifd channel params ++ * @return None ++ */ ++static void qatzip_recv_cleanup(MultiFDRecvParams *p) ++{ ++ QatzipData *q = p->compress_data; ++ ++ if (q) { ++ if (q->in_buf) { ++ qzFree(q->in_buf); ++ } ++ if (q->out_buf) { ++ qzFree(q->out_buf); ++ } ++ (void)qzTeardownSession(&q->sess); ++ (void)qzClose(&q->sess); ++ g_free(q); ++ } ++ p->compress_data = NULL; ++} ++ ++ ++/** ++ * qatzip_recv: Decompress pages and copy them to the appropriate ++ * locations. ++ * ++ * @param p Multifd channel params ++ * @param errp Pointer to error, which will be set in case of error ++ * @return 0 on success, -1 on error (and *errp will be set) ++ */ ++static int qatzip_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ QatzipData *q = p->compress_data; ++ int ret; ++ unsigned int in_len, out_len; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t expected_size = p->normal_num * p->page_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ ++ if (in_size > q->in_len) { ++ error_setg(errp, "multifd %u: received unexpectedly large packet", ++ p->id); ++ return -1; ++ } ++ ++ if (flags != MULTIFD_FLAG_QATZIP) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_QATZIP); ++ return -1; ++ } ++ ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ ret = qio_channel_read_all(p->c, (void *)q->in_buf, in_size, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ in_len = in_size; ++ out_len = q->out_len; ++ ret = qzDecompress(&q->sess, q->in_buf, &in_len, q->out_buf, &out_len); ++ if (ret != QZ_OK) { ++ error_setg(errp, "multifd %u: qzDecompress failed", p->id); ++ return -1; ++ } ++ if (out_len != expected_size) { ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", ++ p->id, out_len, expected_size); ++ return -1; ++ } ++ ++ /* Copy each page to its appropriate location. */ ++ for (int i = 0; i < p->normal_num; i++) { ++ memcpy(p->host + p->normal[i], ++ q->out_buf + p->page_size * i, ++ p->page_size); ++ } ++ return 0; ++} ++ ++static MultiFDMethods multifd_qatzip_ops = { ++ .send_setup = qatzip_send_setup, ++ .send_cleanup = qatzip_send_cleanup, ++ .send_prepare = qatzip_send_prepare, ++ .recv_setup = qatzip_recv_setup, ++ .recv_cleanup = qatzip_recv_cleanup, ++ .recv = qatzip_recv ++}; ++ ++static void multifd_qatzip_register(void) ++{ ++ multifd_register_ops(MULTIFD_COMPRESSION_QATZIP, &multifd_qatzip_ops); ++} ++ ++migration_init(multifd_qatzip_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index ace4ba050d..57c1334788 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -29,14 +29,15 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) + +-/* We reserve 4 bits for compression methods */ +-#define MULTIFD_FLAG_COMPRESSION_MASK (0xf << 1) ++/* We reserve 5 bits for compression methods */ ++#define MULTIFD_FLAG_COMPRESSION_MASK (0x1f << 1) + /* we need to be compatible. Before compression value was 0 */ + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) + #define MULTIFD_FLAG_QPL (4 << 1) + #define MULTIFD_FLAG_UADK (8 << 1) ++#define MULTIFD_FLAG_QATZIP (16 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index 255f5b50a6..37e1d4857e 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -625,6 +625,8 @@ + # + # @zstd: use zstd compression method. + # ++# @qatzip: use qatzip compression method. (Since 9.2) ++# + # @qpl: use qpl compression method. Query Processing Library(qpl) is + # based on the deflate compression algorithm and use the Intel + # In-Memory Analytics Accelerator(IAA) accelerated compression +@@ -637,6 +639,7 @@ + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, ++ { 'name': 'qatzip', 'if': 'CONFIG_QATZIP'}, + { 'name': 'qpl', 'if': 'CONFIG_QPL' }, + { 'name': 'uadk', 'if': 'CONFIG_UADK' } ] } + +-- +2.33.0 + diff --git a/migration-Properly-apply-migration-compression-level.patch b/migration-Properly-apply-migration-compression-level.patch new file mode 100644 index 0000000..21ed06e --- /dev/null +++ b/migration-Properly-apply-migration-compression-level.patch @@ -0,0 +1,53 @@ +From c17b6d51225501c92cfe6b086ea9217659d67bd1 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 1 Mar 2024 03:59:00 +0000 +Subject: [62/99] migration: Properly apply migration compression level + parameters + +commit b4014a2bf57ce08e2f6458cd82e9f968facf25c8 upstream. + +Some glue code was missing, so that using `qmp_migrate_set_parameters` +to set `multifd-zstd-level` or `multifd-zlib-level` did not work. This +commit adds the glue code to fix that. + +Signed-off-by: Bryan Zhang +Link: https://lore.kernel.org/r/20240301035901.4006936-2-bryan.zhang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/options.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/migration/options.c b/migration/options.c +index 71645c8721..52ddbac35f 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -1377,6 +1377,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + if (params->has_multifd_compression) { + dest->multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_zlib_level) { ++ dest->multifd_zlib_level = params->multifd_zlib_level; ++ } ++ if (params->has_multifd_zstd_level) { ++ dest->multifd_zstd_level = params->multifd_zstd_level; ++ } + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +@@ -1533,6 +1539,12 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + if (params->has_multifd_compression) { + s->parameters.multifd_compression = params->multifd_compression; + } ++ if (params->has_multifd_zlib_level) { ++ s->parameters.multifd_zlib_level = params->multifd_zlib_level; ++ } ++ if (params->has_multifd_zstd_level) { ++ s->parameters.multifd_zstd_level = params->multifd_zstd_level; ++ } + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); +-- +2.33.0 + diff --git a/migration-Report-error-in-incoming-migration.patch b/migration-Report-error-in-incoming-migration.patch new file mode 100644 index 0000000..a12cced --- /dev/null +++ b/migration-Report-error-in-incoming-migration.patch @@ -0,0 +1,40 @@ +From 8235f51444f1147a36733474278476d7de83d545 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:41 -0300 +Subject: [10/99] migration: Report error in incoming migration + +commit e3b8ad5c13714cca5e3fc1445472171fbcd469bc upstream. + +We're not currently reporting the errors set with migrate_set_error() +when incoming migration fails. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-5-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 5829565f9c..2c5258d0b0 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -698,6 +698,13 @@ process_incoming_migration_co(void *opaque) + } + + if (ret < 0) { ++ MigrationState *s = migrate_get_current(); ++ ++ if (migrate_has_error(s)) { ++ WITH_QEMU_LOCK_GUARD(&s->error_mutex) { ++ error_report_err(s->error); ++ } ++ } + error_report("load of migration failed: %s", strerror(-ret)); + goto fail; + } +-- +2.33.0 + diff --git a/migration-multifd-Add-UADK-based-compression-and-dec.patch b/migration-multifd-Add-UADK-based-compression-and-dec.patch new file mode 100644 index 0000000..1ee0de4 --- /dev/null +++ b/migration-multifd-Add-UADK-based-compression-and-dec.patch @@ -0,0 +1,187 @@ +From 7b83023e2ecc2debc243cd34032cbf143538f26c Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:08 +0100 +Subject: [84/99] migration/multifd: Add UADK based compression and + decompression + +commit 3c49191a0d011d941b347fda8fdadd88c988e753 upstream. + +Uses UADK wd_do_comp_sync() API to (de)compress a normal page using +hardware accelerator. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 132 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 130 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index 535411a405..70bba92eaa 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qapi/error.h" ++#include "exec/ramblock.h" + #include "migration.h" + #include "multifd.h" + #include "options.h" +@@ -142,6 +143,15 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + p->compress_data = NULL; + } + ++static inline void prepare_next_iov(MultiFDSendParams *p, void *base, ++ uint32_t len) ++{ ++ p->iov[p->iovs_num].iov_base = (uint8_t *)base; ++ p->iov[p->iovs_num].iov_len = len; ++ p->next_packet_size += len; ++ p->iovs_num++; ++} ++ + /** + * multifd_uadk_send_prepare: prepare data to be able to send + * +@@ -155,7 +165,56 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) + { +- return -1; ++ struct wd_data *uadk_data = p->compress_data; ++ uint32_t hdr_size; ++ uint8_t *buf = uadk_data->buf; ++ int ret = 0; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ hdr_size = p->pages->normal_num * sizeof(uint32_t); ++ /* prepare the header that stores the lengths of all compressed data */ ++ prepare_next_iov(p, uadk_data->buf_hdr, hdr_size); ++ ++ for (int i = 0; i < p->pages->normal_num; i++) { ++ struct wd_comp_req creq = { ++ .op_type = WD_DIR_COMPRESS, ++ .src = p->pages->block->host + p->pages->offset[i], ++ .src_len = p->page_size, ++ .dst = buf, ++ /* Set dst_len to double the src in case compressed out >= page_size */ ++ .dst_len = p->page_size * 2, ++ }; ++ ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed compression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len < p->page_size) { ++ uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); ++ prepare_next_iov(p, buf, creq.dst_len); ++ buf += creq.dst_len; ++ } else { ++ /* ++ * Send raw data if compressed out >= page_size. We might be better ++ * off sending raw data if output is slightly less than page_size ++ * as well because at the receive end we can skip the decompression. ++ * But it is tricky to find the right number here. ++ */ ++ uadk_data->buf_hdr[i] = cpu_to_be32(p->page_size); ++ prepare_next_iov(p, p->pages->block->host + p->pages->offset[i], ++ p->page_size); ++ buf += p->page_size; ++ } ++ } ++out: ++ p->flags |= MULTIFD_FLAG_UADK; ++ multifd_send_fill_packet(p); ++ return 0; + } + + /** +@@ -208,7 +267,76 @@ static void multifd_uadk_recv_cleanup(MultiFDRecvParams *p) + */ + static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) + { +- return -1; ++ struct wd_data *uadk_data = p->compress_data; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t hdr_len = p->normal_num * sizeof(uint32_t); ++ uint32_t data_len = 0; ++ uint8_t *buf = uadk_data->buf; ++ int ret = 0; ++ ++ if (flags != MULTIFD_FLAG_UADK) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_ZLIB); ++ return -1; ++ } ++ ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ /* read compressed data lengths */ ++ assert(hdr_len < in_size); ++ ret = qio_channel_read_all(p->c, (void *) uadk_data->buf_hdr, ++ hdr_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ uadk_data->buf_hdr[i] = be32_to_cpu(uadk_data->buf_hdr[i]); ++ data_len += uadk_data->buf_hdr[i]; ++ assert(uadk_data->buf_hdr[i] <= p->page_size); ++ } ++ ++ /* read compressed data */ ++ assert(in_size == hdr_len + data_len); ++ ret = qio_channel_read_all(p->c, (void *)buf, data_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ struct wd_comp_req creq = { ++ .op_type = WD_DIR_DECOMPRESS, ++ .src = buf, ++ .src_len = uadk_data->buf_hdr[i], ++ .dst = p->host + p->normal[i], ++ .dst_len = p->page_size, ++ }; ++ ++ if (uadk_data->buf_hdr[i] == p->page_size) { ++ memcpy(p->host + p->normal[i], buf, p->page_size); ++ buf += p->page_size; ++ continue; ++ } ++ ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed decompression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len != p->page_size) { ++ error_setg(errp, "multifd %u: decompressed length error", p->id); ++ return -1; ++ } ++ buf += uadk_data->buf_hdr[i]; ++ } ++ ++ return 0; + } + + static MultiFDMethods multifd_uadk_ops = { +-- +2.33.0 + diff --git a/migration-multifd-Add-UADK-initialization.patch b/migration-multifd-Add-UADK-initialization.patch new file mode 100644 index 0000000..14bdca8 --- /dev/null +++ b/migration-multifd-Add-UADK-initialization.patch @@ -0,0 +1,244 @@ +From f6ef2126594a919c5f921dfedf79631167efbc40 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:07 +0100 +Subject: [83/99] migration/multifd: Add UADK initialization + +commit 819dd20636d51d5dc9d42aa28edb3dd9c1b8b863 upstream. + +Initialize UADK session and allocate buffers required. The actual +compression/decompression will only be done in a subsequent patch. + +Signed-off-by: Shameer Kolothum +Reviewed-by: Fabiano Rosas +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 209 ++++++++++++++++++++++++++++++++++++++- + 1 file changed, 208 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index c2bb07535b..535411a405 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -12,9 +12,216 @@ + + #include "qemu/osdep.h" + #include "qemu/module.h" ++#include "qapi/error.h" ++#include "migration.h" ++#include "multifd.h" ++#include "options.h" ++#include "uadk/wd_comp.h" ++#include "uadk/wd_sched.h" ++ ++struct wd_data { ++ handle_t handle; ++ uint8_t *buf; ++ uint32_t *buf_hdr; ++}; ++ ++static bool uadk_hw_init(void) ++{ ++ char alg[] = "zlib"; ++ int ret; ++ ++ ret = wd_comp_init2(alg, SCHED_POLICY_RR, TASK_HW); ++ if (ret && ret != -WD_EEXIST) { ++ return false; ++ } else { ++ return true; ++ } ++} ++ ++static struct wd_data *multifd_uadk_init_sess(uint32_t count, ++ uint32_t page_size, ++ bool compress, Error **errp) ++{ ++ struct wd_comp_sess_setup ss = {0}; ++ struct sched_params param = {0}; ++ uint32_t size = count * page_size; ++ struct wd_data *wd; ++ ++ if (!uadk_hw_init()) { ++ error_setg(errp, "multifd: UADK hardware not available"); ++ return NULL; ++ } ++ ++ wd = g_new0(struct wd_data, 1); ++ ss.alg_type = WD_ZLIB; ++ if (compress) { ++ ss.op_type = WD_DIR_COMPRESS; ++ /* Add an additional page for handling output > input */ ++ size += page_size; ++ } else { ++ ss.op_type = WD_DIR_DECOMPRESS; ++ } ++ ++ /* We use default level 1 compression and 4K window size */ ++ param.type = ss.op_type; ++ ss.sched_param = ¶m; ++ ++ wd->handle = wd_comp_alloc_sess(&ss); ++ if (!wd->handle) { ++ error_setg(errp, "multifd: failed wd_comp_alloc_sess"); ++ goto out; ++ } ++ ++ wd->buf = g_try_malloc(size); ++ if (!wd->buf) { ++ error_setg(errp, "multifd: out of mem for uadk buf"); ++ goto out_free_sess; ++ } ++ wd->buf_hdr = g_new0(uint32_t, count); ++ return wd; ++ ++out_free_sess: ++ wd_comp_free_sess(wd->handle); ++out: ++ wd_comp_uninit2(); ++ g_free(wd); ++ return NULL; ++} ++ ++static void multifd_uadk_uninit_sess(struct wd_data *wd) ++{ ++ wd_comp_free_sess(wd->handle); ++ wd_comp_uninit2(); ++ g_free(wd->buf); ++ g_free(wd->buf_hdr); ++ g_free(wd); ++} ++ ++/** ++ * multifd_uadk_send_setup: setup send side ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ struct wd_data *wd; ++ ++ wd = multifd_uadk_init_sess(p->page_count, p->page_size, true, errp); ++ if (!wd) { ++ return -1; ++ } ++ ++ p->compress_data = wd; ++ assert(p->iov == NULL); ++ /* ++ * Each page will be compressed independently and sent using an IOV. The ++ * additional two IOVs are used to store packet header and compressed data ++ * length ++ */ ++ ++ p->iov = g_new0(struct iovec, p->page_count + 2); ++ return 0; ++} ++ ++/** ++ * multifd_uadk_send_cleanup: cleanup send side ++ * ++ * Close the channel and return memory. ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ struct wd_data *wd = p->compress_data; ++ ++ multifd_uadk_uninit_sess(wd); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_uadk_send_prepare: prepare data to be able to send ++ * ++ * Create a compressed buffer with all the pages that we are going to ++ * send. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ return -1; ++} ++ ++/** ++ * multifd_uadk_recv_setup: setup receive side ++ * ++ * Create the compressed channel and buffer. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ struct wd_data *wd; ++ ++ wd = multifd_uadk_init_sess(p->page_count, p->page_size, false, errp); ++ if (!wd) { ++ return -1; ++ } ++ p->compress_data = wd; ++ return 0; ++} ++ ++/** ++ * multifd_uadk_recv_cleanup: cleanup receive side ++ * ++ * Close the channel and return memory. ++ * ++ * @p: Params for the channel that we are using ++ */ ++static void multifd_uadk_recv_cleanup(MultiFDRecvParams *p) ++{ ++ struct wd_data *wd = p->compress_data; ++ ++ multifd_uadk_uninit_sess(wd); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_uadk_recv: read the data from the channel into actual pages ++ * ++ * Read the compressed buffer, and uncompress it into the actual ++ * pages. ++ * ++ * Returns 0 for success or -1 for error ++ * ++ * @p: Params for the channel that we are using ++ * @errp: pointer to an error ++ */ ++static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ return -1; ++} ++ ++static MultiFDMethods multifd_uadk_ops = { ++ .send_setup = multifd_uadk_send_setup, ++ .send_cleanup = multifd_uadk_send_cleanup, ++ .send_prepare = multifd_uadk_send_prepare, ++ .recv_setup = multifd_uadk_recv_setup, ++ .recv_cleanup = multifd_uadk_recv_cleanup, ++ .recv = multifd_uadk_recv, ++}; + + static void multifd_uadk_register(void) + { +- /* noop for now */ ++ multifd_register_ops(MULTIFD_COMPRESSION_UADK, &multifd_uadk_ops); + } + migration_init(multifd_uadk_register); +-- +2.33.0 + diff --git a/migration-multifd-Add-a-synchronization-point-for-ch.patch b/migration-multifd-Add-a-synchronization-point-for-ch.patch new file mode 100644 index 0000000..738ae80 --- /dev/null +++ b/migration-multifd-Add-a-synchronization-point-for-ch.patch @@ -0,0 +1,127 @@ +From 5236178dc96f2e9b24aa95bc01d700428a95d023 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:18 -0300 +Subject: [54/99] migration/multifd: Add a synchronization point for channel + creation + +commit 93fa9dc2e0522c54b813dee0898a5feb98b624c9 upstream. + +It is possible that one of the multifd channels fails to be created at +multifd_new_send_channel_async() while the rest of the channel +creation tasks are still in flight. + +This could lead to multifd_save_cleanup() executing the +qemu_thread_join() loop too early and not waiting for the threads +which haven't been created yet, leading to the freeing of resources +that the newly created threads will try to access and crash. + +Add a synchronization point after which there will be no attempts at +thread creation and therefore calling multifd_save_cleanup() past that +point will ensure it properly waits for the threads. + +A note about performance: Prior to this patch, if a channel took too +long to be established, other channels could finish connecting first +and already start taking load. Now we're bounded by the +slowest-connecting channel. + +Reported-by: Avihai Horon +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-7-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 32 ++++++++++++++++++++++++++------ + 1 file changed, 26 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 85d1e7c347..bd240649f7 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -63,6 +63,11 @@ struct { + * Make it easy for now. + */ + uintptr_t packet_num; ++ /* ++ * Synchronization point past which no more channels will be ++ * created. ++ */ ++ QemuSemaphore channels_created; + /* send channels ready */ + QemuSemaphore channels_ready; + /* +@@ -623,10 +628,6 @@ static void multifd_send_terminate_threads(void) + + /* + * Finally recycle all the threads. +- * +- * TODO: p->running is still buggy, e.g. we can reach here without the +- * corresponding multifd_new_send_channel_async() get invoked yet, +- * then a new thread can even be created after this function returns. + */ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +@@ -671,6 +672,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + + static void multifd_send_cleanup_state(void) + { ++ qemu_sem_destroy(&multifd_send_state->channels_created); + qemu_sem_destroy(&multifd_send_state->channels_ready); + g_free(multifd_send_state->params); + multifd_send_state->params = NULL; +@@ -958,18 +960,26 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + + if (migrate_channel_requires_tls_upgrade(ioc)) { + ret = multifd_tls_channel_connect(p, ioc, &local_err); ++ if (ret) { ++ return; ++ } + } else { + ret = multifd_channel_connect(p, ioc, &local_err); + } + ++out: ++ /* ++ * Here we're not interested whether creation succeeded, only that ++ * it happened at all. ++ */ ++ qemu_sem_post(&multifd_send_state->channels_created); ++ + if (ret) { + return; + } + +-out: + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); +- multifd_send_kick_main(p); + if (!p->c) { + /* + * If no channel has been created, drop the initial +@@ -1002,6 +1012,7 @@ bool multifd_send_setup(void) + multifd_send_state = g_malloc0(sizeof(*multifd_send_state)); + multifd_send_state->params = g_new0(MultiFDSendParams, thread_count); + multifd_send_state->pages = multifd_pages_init(page_count); ++ qemu_sem_init(&multifd_send_state->channels_created, 0); + qemu_sem_init(&multifd_send_state->channels_ready, 0); + qatomic_set(&multifd_send_state->exiting, 0); + multifd_send_state->ops = multifd_ops[migrate_multifd_compression()]; +@@ -1027,6 +1038,15 @@ bool multifd_send_setup(void) + multifd_new_send_channel_create(p); + } + ++ /* ++ * Wait until channel creation has started for all channels. The ++ * creation can still fail, but no more channels will be created ++ * past this point. ++ */ ++ for (i = 0; i < thread_count; i++) { ++ qemu_sem_wait(&multifd_send_state->channels_created); ++ } ++ + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +-- +2.33.0 + diff --git a/migration-multifd-Add-new-migration-option-zero-page.patch b/migration-multifd-Add-new-migration-option-zero-page.patch new file mode 100644 index 0000000..b732c7a --- /dev/null +++ b/migration-multifd-Add-new-migration-option-zero-page.patch @@ -0,0 +1,289 @@ +From 6bb380a1f7c37b5dda17f95519ec118990f332a8 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:11 +0000 +Subject: [68/99] migration/multifd: Add new migration option + zero-page-detection. + +commit 5fdbb1dfccfd59661c95cae760b8e276c5b8e65c upstream. + +This new parameter controls where the zero page checking is running. +1. If this parameter is set to 'legacy', zero page checking is +done in the migration main thread. +2. If this parameter is set to 'none', zero page checking is disabled. + +Signed-off-by: Hao Xiang +Reviewed-by: Peter Xu +Acked-by: Markus Armbruster +Link: https://lore.kernel.org/r/20240311180015.3359271-4-hao.xiang@linux.dev +Signed-off-by: Peter Xu + + Conflicts: + hw/core/qdev-properties-system.c + include/hw/qdev-properties-system.h + migration/options.c + qapi/migration.json +[jz: resolve simple context conflicts] +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 10 ++++++++++ + include/hw/qdev-properties-system.h | 4 ++++ + migration/migration-hmp-cmds.c | 9 +++++++++ + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + migration/ram.c | 4 ++++ + qapi/migration.json | 28 +++++++++++++++++++++++++++- + 7 files changed, 76 insertions(+), 1 deletion(-) + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index c581d46f2e..cad1e04150 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -732,6 +732,16 @@ const PropertyInfo qdev_prop_mig_mode = { + .set_default_value = qdev_propinfo_set_default_value_enum, + }; + ++const PropertyInfo qdev_prop_zero_page_detection = { ++ .name = "ZeroPageDetection", ++ .description = "zero_page_detection values, " ++ "none,legacy", ++ .enum_table = &ZeroPageDetection_lookup, ++ .get = qdev_propinfo_get_enum, ++ .set = qdev_propinfo_set_enum, ++ .set_default_value = qdev_propinfo_set_default_value_enum, ++}; ++ + /* --- Reserved Region --- */ + + /* +diff --git a/include/hw/qdev-properties-system.h b/include/hw/qdev-properties-system.h +index 7cf27e51b9..63dcf69978 100644 +--- a/include/hw/qdev-properties-system.h ++++ b/include/hw/qdev-properties-system.h +@@ -8,6 +8,7 @@ extern const PropertyInfo qdev_prop_macaddr; + extern const PropertyInfo qdev_prop_reserved_region; + extern const PropertyInfo qdev_prop_multifd_compression; + extern const PropertyInfo qdev_prop_mig_mode; ++extern const PropertyInfo qdev_prop_zero_page_detection; + extern const PropertyInfo qdev_prop_losttickpolicy; + extern const PropertyInfo qdev_prop_blockdev_on_error; + extern const PropertyInfo qdev_prop_blockdev_retry_interval; +@@ -48,6 +49,9 @@ extern const PropertyInfo qdev_prop_cpus390entitlement; + #define DEFINE_PROP_MIG_MODE(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_mig_mode, \ + MigMode) ++#define DEFINE_PROP_ZERO_PAGE_DETECTION(_n, _s, _f, _d) \ ++ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_zero_page_detection, \ ++ ZeroPageDetection) + #define DEFINE_PROP_LOSTTICKPOLICY(_n, _s, _f, _d) \ + DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_losttickpolicy, \ + LostTickPolicy) +diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c +index 9857e2c97f..91e51eb7af 100644 +--- a/migration/migration-hmp-cmds.c ++++ b/migration/migration-hmp-cmds.c +@@ -348,6 +348,11 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "%s: %s\n", + MigrationParameter_str(MIGRATION_PARAMETER_MULTIFD_COMPRESSION), + MultiFDCompression_str(params->multifd_compression)); ++ assert(params->has_zero_page_detection); ++ monitor_printf(mon, "%s: %s\n", ++ MigrationParameter_str(MIGRATION_PARAMETER_ZERO_PAGE_DETECTION), ++ qapi_enum_lookup(&ZeroPageDetection_lookup, ++ params->zero_page_detection)); + monitor_printf(mon, "%s: %" PRIu64 " bytes\n", + MigrationParameter_str(MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE), + params->xbzrle_cache_size); +@@ -668,6 +673,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) + p->has_multifd_zstd_level = true; + visit_type_uint8(v, param, &p->multifd_zstd_level, &err); + break; ++ case MIGRATION_PARAMETER_ZERO_PAGE_DETECTION: ++ p->has_zero_page_detection = true; ++ visit_type_ZeroPageDetection(v, param, &p->zero_page_detection, &err); ++ break; + case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE: + p->has_xbzrle_cache_size = true; + if (!visit_type_size(v, param, &cache_size, &err)) { +diff --git a/migration/options.c b/migration/options.c +index 52ddbac35f..e752163114 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -183,6 +183,9 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_MODE("mode", MigrationState, + parameters.mode, + MIG_MODE_NORMAL), ++ DEFINE_PROP_ZERO_PAGE_DETECTION("zero-page-detection", MigrationState, ++ parameters.zero_page_detection, ++ ZERO_PAGE_DETECTION_LEGACY), + DEFINE_PROP_STRING("sev-pdh", MigrationState, parameters.sev_pdh), + DEFINE_PROP_STRING("sev-plat-cert", MigrationState, parameters.sev_plat_cert), + DEFINE_PROP_STRING("sev-amd-cert", MigrationState, parameters.sev_amd_cert), +@@ -927,6 +930,13 @@ uint64_t migrate_xbzrle_cache_size(void) + return s->parameters.xbzrle_cache_size; + } + ++ZeroPageDetection migrate_zero_page_detection(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->parameters.zero_page_detection; ++} ++ + /* parameter setters */ + + void migrate_set_block_incremental(bool value) +@@ -1042,6 +1052,8 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + params->vcpu_dirty_limit = s->parameters.vcpu_dirty_limit; + params->has_mode = true; + params->mode = s->parameters.mode; ++ params->has_zero_page_detection = true; ++ params->zero_page_detection = s->parameters.zero_page_detection; + params->has_hdbss_buffer_size = true; + params->hdbss_buffer_size = s->parameters.hdbss_buffer_size; + +@@ -1081,6 +1093,7 @@ void migrate_params_init(MigrationParameters *params) + params->has_x_vcpu_dirty_limit_period = true; + params->has_vcpu_dirty_limit = true; + params->has_mode = true; ++ params->has_zero_page_detection = true; + params->has_hdbss_buffer_size = true; + + params->sev_pdh = g_strdup(""); +@@ -1422,6 +1435,10 @@ static void migrate_params_test_apply(MigrateSetParameters *params, + dest->mode = params->mode; + } + ++ if (params->has_zero_page_detection) { ++ dest->zero_page_detection = params->zero_page_detection; ++ } ++ + if (params->sev_pdh) { + assert(params->sev_pdh->type == QTYPE_QSTRING); + dest->sev_pdh = params->sev_pdh->u.s; +@@ -1593,6 +1610,10 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) + s->parameters.mode = params->mode; + } + ++ if (params->has_zero_page_detection) { ++ s->parameters.zero_page_detection = params->zero_page_detection; ++ } ++ + if (params->sev_pdh) { + g_free(s->parameters.sev_pdh); + assert(params->sev_pdh->type == QTYPE_QSTRING); +diff --git a/migration/options.h b/migration/options.h +index 987fc81a18..dbd52d7acd 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -95,6 +95,7 @@ const char *migrate_tls_authz(void); + const char *migrate_tls_creds(void); + const char *migrate_tls_hostname(void); + uint64_t migrate_xbzrle_cache_size(void); ++ZeroPageDetection migrate_zero_page_detection(void); + + /* parameters setters */ + +diff --git a/migration/ram.c b/migration/ram.c +index 9630b654c2..7d0f1120df 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1141,6 +1141,10 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss, + QEMUFile *file = pss->pss_channel; + int len = 0; + ++ if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_NONE) { ++ return 0; ++ } ++ + if (!buffer_is_zero(p, TARGET_PAGE_SIZE)) { + return 0; + } +diff --git a/qapi/migration.json b/qapi/migration.json +index f672da5c0d..ff247a50ce 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -653,6 +653,18 @@ + { 'enum': 'MigMode', + 'data': [ 'normal', 'cpr-reboot' ] } + ++## ++# @ZeroPageDetection: ++# ++# @none: Do not perform zero page checking. ++# ++# @legacy: Perform zero page checking in main migration thread. ++# ++# Since: 9.0 ++## ++{ 'enum': 'ZeroPageDetection', ++ 'data': [ 'none', 'legacy' ] } ++ + ## + # @BitmapMigrationBitmapAliasTransform: + # +@@ -891,6 +903,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -940,6 +956,7 @@ + { 'name': 'x-vcpu-dirty-limit-period', 'features': ['unstable'] }, + 'vcpu-dirty-limit', + 'mode', ++ 'zero-page-detection', + 'sev-pdh', 'sev-plat-cert', 'sev-amd-cert', 'hdbss-buffer-size'] } + + ## +@@ -1098,6 +1115,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -1169,12 +1190,12 @@ + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode', ++ '*zero-page-detection': 'ZeroPageDetection', + '*sev-pdh': 'StrOrNull', + '*sev-plat-cert': 'StrOrNull', + '*sev-amd-cert' : 'StrOrNull', + '*hdbss-buffer-size': 'uint8'} } + +- + ## + # @migrate-set-parameters: + # +@@ -1351,6 +1372,10 @@ + # @mode: Migration mode. See description in @MigMode. Default is 'normal'. + # (Since 8.2) + # ++# @zero-page-detection: Whether and how to detect zero pages. ++# See description in @ZeroPageDetection. Default is 'legacy'. ++# (since 9.0) ++# + # @sev-pdh: The target host platform diffie-hellman key encoded in base64, or + # pdh filename for hygon + # (Since 4.2) +@@ -1418,6 +1443,7 @@ + 'features': [ 'unstable' ] }, + '*vcpu-dirty-limit': 'uint64', + '*mode': 'MigMode', ++ '*zero-page-detection': 'ZeroPageDetection', + '*sev-pdh': 'str', + '*sev-plat-cert': 'str', + '*sev-amd-cert' : 'str', +-- +2.33.0 + diff --git a/migration-multifd-Allow-multifd-without-packets.patch b/migration-multifd-Allow-multifd-without-packets.patch new file mode 100644 index 0000000..ee9cf18 --- /dev/null +++ b/migration-multifd-Allow-multifd-without-packets.patch @@ -0,0 +1,363 @@ +From 48942069691dced68ba3ad74014ce0fb8850df46 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:08 -0300 +Subject: [67/99] migration/multifd: Allow multifd without packets + +commit 06833d83f8978139395da0f1d6a9fad81b9dd024 upstream. + +For the upcoming support to the new 'mapped-ram' migration stream +format, we cannot use multifd packets because each write into the +ramblock section in the migration file is expected to contain only the +guest pages. They are written at their respective offsets relative to +the ramblock section header. + +There is no space for the packet information and the expected gains +from the new approach come partly from being able to write the pages +sequentially without extraneous data in between. + +The new format also simply doesn't need the packets and all necessary +information can be taken from the standard migration headers with some +(future) changes to multifd code. + +Use the presence of the mapped-ram capability to decide whether to +send packets. + +This only moves code under multifd_use_packets(), it has no effect for +now as mapped-ram cannot yet be enabled with multifd. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-15-farosas@suse.de +Signed-off-by: Peter Xu +[jz: make multifd_use_packet to always return true, since mapped-ram + is not backported] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 175 +++++++++++++++++++++++++++++--------------- + 1 file changed, 114 insertions(+), 61 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d5039af833..cac5f2743c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -93,6 +93,11 @@ struct { + MultiFDMethods *ops; + } *multifd_recv_state; + ++static bool multifd_use_packets(void) ++{ ++ return true; ++} ++ + /* Multifd without compression */ + + /** +@@ -123,6 +128,19 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + return; + } + ++static void multifd_send_prepare_iovs(MultiFDSendParams *p) ++{ ++ MultiFDPages_t *pages = p->pages; ++ ++ for (int i = 0; i < pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ p->iov[p->iovs_num].iov_len = p->page_size; ++ p->iovs_num++; ++ } ++ ++ p->next_packet_size = pages->num * p->page_size; ++} ++ + /** + * nocomp_send_prepare: prepare date to be able to send + * +@@ -137,9 +155,13 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { + bool use_zero_copy_send = migrate_zero_copy_send(); +- MultiFDPages_t *pages = p->pages; + int ret; + ++ if (!multifd_use_packets()) { ++ multifd_send_prepare_iovs(p); ++ return 0; ++ } ++ + if (!use_zero_copy_send) { + /* + * Only !zerocopy needs the header in IOV; zerocopy will +@@ -148,13 +170,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + multifd_send_prepare_header(p); + } + +- for (int i = 0; i < pages->num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; +- p->iov[p->iovs_num].iov_len = p->page_size; +- p->iovs_num++; +- } +- +- p->next_packet_size = pages->num * p->page_size; ++ multifd_send_prepare_iovs(p); + p->flags |= MULTIFD_FLAG_NOCOMP; + + multifd_send_fill_packet(p); +@@ -209,7 +225,13 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + */ + static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + { +- uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t flags; ++ ++ if (!multifd_use_packets()) { ++ return 0; ++ } ++ ++ flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %u: flags received %x flags expected %x", +@@ -796,6 +818,7 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; ++ bool use_packets = multifd_use_packets(); + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + +@@ -805,9 +828,11 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); + +- if (multifd_send_initial_packet(p, &local_err) < 0) { +- ret = -1; +- goto out; ++ if (use_packets) { ++ if (multifd_send_initial_packet(p, &local_err) < 0) { ++ ret = -1; ++ goto out; ++ } + } + + while (true) { +@@ -858,16 +883,20 @@ static void *multifd_send_thread(void *opaque) + * it doesn't require explicit memory barriers. + */ + assert(qatomic_read(&p->pending_sync)); +- p->flags = MULTIFD_FLAG_SYNC; +- multifd_send_fill_packet(p); +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret != 0) { +- break; ++ ++ if (use_packets) { ++ p->flags = MULTIFD_FLAG_SYNC; ++ multifd_send_fill_packet(p); ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ break; ++ } ++ /* p->next_packet_size will always be zero for a SYNC packet */ ++ stat64_add(&mig_stats.multifd_bytes, p->packet_len); ++ p->flags = 0; + } +- /* p->next_packet_size will always be zero for a SYNC packet */ +- stat64_add(&mig_stats.multifd_bytes, p->packet_len); +- p->flags = 0; ++ + qatomic_set(&p->pending_sync, false); + qemu_sem_post(&p->sem_sync); + } +@@ -1022,6 +1051,7 @@ bool multifd_send_setup(void) + Error *local_err = NULL; + int thread_count, ret = 0; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ bool use_packets = multifd_use_packets(); + uint8_t i; + + if (!migrate_multifd()) { +@@ -1044,14 +1074,20 @@ bool multifd_send_setup(void) + qemu_sem_init(&p->sem_sync, 0); + p->id = i; + p->pages = multifd_pages_init(page_count); +- p->packet_len = sizeof(MultiFDPacket_t) +- + sizeof(uint64_t) * page_count; +- p->packet = g_malloc0(p->packet_len); +- p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); +- p->packet->version = cpu_to_be32(MULTIFD_VERSION); ++ ++ if (use_packets) { ++ p->packet_len = sizeof(MultiFDPacket_t) ++ + sizeof(uint64_t) * page_count; ++ p->packet = g_malloc0(p->packet_len); ++ p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); ++ p->packet->version = cpu_to_be32(MULTIFD_VERSION); ++ ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, page_count + 1); ++ } else { ++ p->iov = g_new0(struct iovec, page_count); ++ } + p->name = g_strdup_printf("multifdsend_%d", i); +- /* We need one extra place for the packet header */ +- p->iov = g_new0(struct iovec, page_count + 1); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + p->write_flags = 0; +@@ -1114,7 +1150,9 @@ static void multifd_recv_terminate_threads(Error *err) + * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, + * however try to wakeup it without harm in cleanup phase. + */ +- qemu_sem_post(&p->sem_sync); ++ if (multifd_use_packets()) { ++ qemu_sem_post(&p->sem_sync); ++ } + + /* + * We could arrive here for two reasons: +@@ -1189,7 +1227,7 @@ void multifd_recv_sync_main(void) + int thread_count = migrate_multifd_channels(); + int i; + +- if (!migrate_multifd()) { ++ if (!migrate_multifd() || !multifd_use_packets()) { + return; + } + +@@ -1224,13 +1262,14 @@ static void *multifd_recv_thread(void *opaque) + { + MultiFDRecvParams *p = opaque; + Error *local_err = NULL; ++ bool use_packets = multifd_use_packets(); + int ret; + + trace_multifd_recv_thread_start(p->id); + rcu_register_thread(); + + while (true) { +- uint32_t flags; ++ uint32_t flags = 0; + bool has_data = false; + p->normal_num = 0; + +@@ -1238,25 +1277,27 @@ static void *multifd_recv_thread(void *opaque) + break; + } + +- ret = qio_channel_read_all_eof(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ +- break; +- } ++ if (use_packets) { ++ ret = qio_channel_read_all_eof(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret == 0 || ret == -1) { /* 0: EOF -1: Error */ ++ break; ++ } + +- qemu_mutex_lock(&p->mutex); +- ret = multifd_recv_unfill_packet(p, &local_err); +- if (ret) { ++ qemu_mutex_lock(&p->mutex); ++ ret = multifd_recv_unfill_packet(p, &local_err); ++ if (ret) { ++ qemu_mutex_unlock(&p->mutex); ++ break; ++ } ++ ++ flags = p->flags; ++ /* recv methods don't know how to handle the SYNC flag */ ++ p->flags &= ~MULTIFD_FLAG_SYNC; ++ has_data = !!p->normal_num; + qemu_mutex_unlock(&p->mutex); +- break; + } + +- flags = p->flags; +- /* recv methods don't know how to handle the SYNC flag */ +- p->flags &= ~MULTIFD_FLAG_SYNC; +- has_data = !!p->normal_num; +- qemu_mutex_unlock(&p->mutex); +- + if (has_data) { + ret = multifd_recv_state->ops->recv(p, &local_err); + if (ret != 0) { +@@ -1264,9 +1305,11 @@ static void *multifd_recv_thread(void *opaque) + } + } + +- if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&multifd_recv_state->sem_sync); +- qemu_sem_wait(&p->sem_sync); ++ if (use_packets) { ++ if (flags & MULTIFD_FLAG_SYNC) { ++ qemu_sem_post(&multifd_recv_state->sem_sync); ++ qemu_sem_wait(&p->sem_sync); ++ } + } + } + +@@ -1285,6 +1328,7 @@ int multifd_recv_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ bool use_packets = multifd_use_packets(); + uint8_t i; + + /* +@@ -1309,9 +1353,12 @@ int multifd_recv_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem_sync, 0); + p->id = i; +- p->packet_len = sizeof(MultiFDPacket_t) +- + sizeof(uint64_t) * page_count; +- p->packet = g_malloc0(p->packet_len); ++ ++ if (use_packets) { ++ p->packet_len = sizeof(MultiFDPacket_t) ++ + sizeof(uint64_t) * page_count; ++ p->packet = g_malloc0(p->packet_len); ++ } + p->name = g_strdup_printf("multifdrecv_%d", i); + p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); +@@ -1355,18 +1402,24 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; ++ bool use_packets = multifd_use_packets(); + int id; + +- id = multifd_recv_initial_packet(ioc, &local_err); +- if (id < 0) { +- multifd_recv_terminate_threads(local_err); +- error_propagate_prepend(errp, local_err, +- "failed to receive packet" +- " via multifd channel %d: ", +- qatomic_read(&multifd_recv_state->count)); +- return; ++ if (use_packets) { ++ id = multifd_recv_initial_packet(ioc, &local_err); ++ if (id < 0) { ++ multifd_recv_terminate_threads(local_err); ++ error_propagate_prepend(errp, local_err, ++ "failed to receive packet" ++ " via multifd channel %d: ", ++ qatomic_read(&multifd_recv_state->count)); ++ return; ++ } ++ trace_multifd_recv_new_channel(id); ++ } else { ++ /* next patch gives this a meaningful value */ ++ id = 0; + } +- trace_multifd_recv_new_channel(id); + + p = &multifd_recv_state->params[id]; + if (p->c != NULL) { +-- +2.33.0 + diff --git a/migration-multifd-Change-multifd_pages_init-argument.patch b/migration-multifd-Change-multifd_pages_init-argument.patch new file mode 100644 index 0000000..71fbb95 --- /dev/null +++ b/migration-multifd-Change-multifd_pages_init-argument.patch @@ -0,0 +1,42 @@ +From 61e0a1ad97ca72ea4396d142bdfd7481b9380d6c Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:40 -0300 +Subject: [09/99] migration/multifd: Change multifd_pages_init argument + +commit 6074f81625800743e4c374aecf7dd30774aaf6e0 upstream. + +The 'size' argument is actually the number of pages that fit in a +multifd packet. Change it to uint32_t and rename. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-4-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3e5aaaa1d4..ef7d4520c4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -237,12 +237,12 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + return msg.id; + } + +-static MultiFDPages_t *multifd_pages_init(size_t size) ++static MultiFDPages_t *multifd_pages_init(uint32_t n) + { + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + +- pages->allocated = size; +- pages->offset = g_new0(ram_addr_t, size); ++ pages->allocated = n; ++ pages->offset = g_new0(ram_addr_t, n); + + return pages; + } +-- +2.33.0 + diff --git a/migration-multifd-Change-retval-of-multifd_queue_pag.patch b/migration-multifd-Change-retval-of-multifd_queue_pag.patch new file mode 100644 index 0000000..7a8b37c --- /dev/null +++ b/migration-multifd-Change-retval-of-multifd_queue_pag.patch @@ -0,0 +1,88 @@ +From d95c440bb62e6eb30b3777e10d94fbc72b7f65a4 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:50 +0800 +Subject: [40/99] migration/multifd: Change retval of multifd_queue_page() + +commit d6556d174a6b9fc443f2320193f18e71eb67052a upstream. + +Using int is an overkill when there're only two options. Change it to a +boolean. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-17-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 9 +++++---- + migration/multifd.h | 2 +- + migration/ram.c | 2 +- + 3 files changed, 7 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 59ccc42c05..c48c031009 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -506,7 +506,8 @@ static int multifd_send_pages(void) + return 1; + } + +-int multifd_queue_page(RAMBlock *block, ram_addr_t offset) ++/* Returns true if enqueue successful, false otherwise */ ++bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + bool changed = false; +@@ -520,21 +521,21 @@ int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + pages->num++; + + if (pages->num < pages->allocated) { +- return 1; ++ return true; + } + } else { + changed = true; + } + + if (multifd_send_pages() < 0) { +- return -1; ++ return false; + } + + if (changed) { + return multifd_queue_page(block, offset); + } + +- return 1; ++ return true; + } + + /* Multifd send side hit an error; remember it and prepare to quit */ +diff --git a/migration/multifd.h b/migration/multifd.h +index 34a2ecb9f4..a320c53a6f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -22,7 +22,7 @@ bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(void); +-int multifd_queue_page(RAMBlock *block, ram_addr_t offset); ++bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) +diff --git a/migration/ram.c b/migration/ram.c +index 67fa9c83d6..9630b654c2 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1389,7 +1389,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) + + static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + { +- if (multifd_queue_page(block, offset) < 0) { ++ if (!multifd_queue_page(block, offset)) { + return -1; + } + stat64_add(&mig_stats.normal_pages, 1); +-- +2.33.0 + diff --git a/migration-multifd-Change-retval-of-multifd_send_page.patch b/migration-multifd-Change-retval-of-multifd_send_page.patch new file mode 100644 index 0000000..2f60a87 --- /dev/null +++ b/migration-multifd-Change-retval-of-multifd_send_page.patch @@ -0,0 +1,83 @@ +From c91e89ee776b145b265f56fc9539514b36988e84 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:51 +0800 +Subject: [41/99] migration/multifd: Change retval of multifd_send_pages() + +commit 3b40964a863d69121733c8b9794a02347ed0000b upstream. + +Using int is an overkill when there're only two options. Change it to a +boolean. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-18-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c48c031009..dabfc3ec0d 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -450,9 +450,10 @@ static void multifd_send_kick_main(MultiFDSendParams *p) + * thread is using the channel mutex when changing it, and the channel + * have to had finish with its own, otherwise pending_job can't be + * false. ++ * ++ * Returns true if succeed, false otherwise. + */ +- +-static int multifd_send_pages(void) ++static bool multifd_send_pages(void) + { + int i; + static int next_channel; +@@ -460,7 +461,7 @@ static int multifd_send_pages(void) + MultiFDPages_t *pages = multifd_send_state->pages; + + if (multifd_send_should_exit()) { +- return -1; ++ return false; + } + + /* We wait here, until at least one channel is ready */ +@@ -474,7 +475,7 @@ static int multifd_send_pages(void) + next_channel %= migrate_multifd_channels(); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { + if (multifd_send_should_exit()) { +- return -1; ++ return false; + } + p = &multifd_send_state->params[i]; + /* +@@ -503,7 +504,7 @@ static int multifd_send_pages(void) + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + +- return 1; ++ return true; + } + + /* Returns true if enqueue successful, false otherwise */ +@@ -527,7 +528,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + changed = true; + } + +- if (multifd_send_pages() < 0) { ++ if (!multifd_send_pages()) { + return false; + } + +@@ -667,7 +668,7 @@ int multifd_send_sync_main(void) + return 0; + } + if (multifd_send_state->pages->num) { +- if (multifd_send_pages() < 0) { ++ if (!multifd_send_pages()) { + error_report("%s: multifd_send_pages fail", __func__); + return -1; + } +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-TLS-iochannel-referencing.patch b/migration-multifd-Cleanup-TLS-iochannel-referencing.patch new file mode 100644 index 0000000..8e47c27 --- /dev/null +++ b/migration-multifd-Cleanup-TLS-iochannel-referencing.patch @@ -0,0 +1,117 @@ +From f1ee974ab81330ae1048f0cf5ee2ccaeb16e26d1 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:57 +0800 +Subject: [57/99] migration/multifd: Cleanup TLS iochannel referencing + +commit 9221e3c6a237da90ac296adfeb6e99ea9babfc20 upstream. + +Commit a1af605bd5 ("migration/multifd: fix hangup with TLS-Multifd due to +blocking handshake") introduced a thread for TLS channels, which will +resolve the issue on blocking the main thread. However in the same commit +p->c is slightly abused just to be able to pass over the pointer "p" into +the thread. + +That's the major reason we'll need to conditionally free the io channel in +the fault paths. + +To clean it up, using a separate structure to pass over both "p" and "tioc" +in the tls handshake thread. Then we can make it a rule that p->c will +never be set until the channel is completely setup. With that, we can drop +the tricky conditional unref of the io channel in the error path. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 37 +++++++++++++++++++++++-------------- + 1 file changed, 23 insertions(+), 14 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bbd421004f..ad8fa6a317 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -895,16 +895,22 @@ out: + + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque); + ++typedef struct { ++ MultiFDSendParams *p; ++ QIOChannelTLS *tioc; ++} MultiFDTLSThreadArgs; ++ + static void *multifd_tls_handshake_thread(void *opaque) + { +- MultiFDSendParams *p = opaque; +- QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); ++ MultiFDTLSThreadArgs *args = opaque; + +- qio_channel_tls_handshake(tioc, ++ qio_channel_tls_handshake(args->tioc, + multifd_new_send_channel_async, +- p, ++ args->p, + NULL, + NULL); ++ g_free(args); ++ + return NULL; + } + +@@ -914,6 +920,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + { + MigrationState *s = migrate_get_current(); + const char *hostname = s->hostname; ++ MultiFDTLSThreadArgs *args; + QIOChannelTLS *tioc; + + tioc = migration_tls_client_create(ioc, hostname, errp); +@@ -928,11 +935,14 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + object_unref(OBJECT(ioc)); + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); +- p->c = QIO_CHANNEL(tioc); ++ ++ args = g_new0(MultiFDTLSThreadArgs, 1); ++ args->tioc = tioc; ++ args->p = p; + + p->tls_thread_created = true; + qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker", +- multifd_tls_handshake_thread, p, ++ multifd_tls_handshake_thread, args, + QEMU_THREAD_JOINABLE); + return true; + } +@@ -945,6 +955,7 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + + migration_ioc_register_yank(ioc); + p->registered_yank = true; ++ /* Setup p->c only if the channel is completely setup */ + p->c = ioc; + + p->thread_created = true; +@@ -998,14 +1009,12 @@ out: + + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); +- if (!p->c) { +- /* +- * If no channel has been created, drop the initial +- * reference. Otherwise cleanup happens at +- * multifd_send_channel_destroy() +- */ +- object_unref(OBJECT(ioc)); +- } ++ /* ++ * For error cases (TLS or non-TLS), IO channel is always freed here ++ * rather than when cleanup multifd: since p->c is not set, multifd ++ * cleanup code doesn't even know its existence. ++ */ ++ object_unref(OBJECT(ioc)); + error_free(local_err); + } + +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_load_cleanup.patch b/migration-multifd-Cleanup-multifd_load_cleanup.patch new file mode 100644 index 0000000..0250588 --- /dev/null +++ b/migration-multifd-Cleanup-multifd_load_cleanup.patch @@ -0,0 +1,94 @@ +From d7240e133b0eebb08d42de278fbefbc89061143b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:54 +0800 +Subject: [44/99] migration/multifd: Cleanup multifd_load_cleanup() + +commit 5e6ea8a1d64e72e648b5a5277f08ec7fb09c3b8e upstream. + +Use similar logic to cleanup the recv side. + +Note that multifd_recv_terminate_threads() may need some similar rework +like the sender side, but let's leave that for later. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-21-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 52 ++++++++++++++++++++++++++------------------- + 1 file changed, 30 insertions(+), 22 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 83c6ccd0f2..048ff66760 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1074,6 +1074,34 @@ void multifd_load_shutdown(void) + } + } + ++static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) ++{ ++ migration_ioc_unregister_yank(p->c); ++ object_unref(OBJECT(p->c)); ++ p->c = NULL; ++ qemu_mutex_destroy(&p->mutex); ++ qemu_sem_destroy(&p->sem_sync); ++ g_free(p->name); ++ p->name = NULL; ++ p->packet_len = 0; ++ g_free(p->packet); ++ p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++ g_free(p->normal); ++ p->normal = NULL; ++ multifd_recv_state->ops->recv_cleanup(p); ++} ++ ++static void multifd_recv_cleanup_state(void) ++{ ++ qemu_sem_destroy(&multifd_recv_state->sem_sync); ++ g_free(multifd_recv_state->params); ++ multifd_recv_state->params = NULL; ++ g_free(multifd_recv_state); ++ multifd_recv_state = NULL; ++} ++ + void multifd_load_cleanup(void) + { + int i; +@@ -1096,29 +1124,9 @@ void multifd_load_cleanup(void) + qemu_thread_join(&p->thread); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDRecvParams *p = &multifd_recv_state->params[i]; +- +- migration_ioc_unregister_yank(p->c); +- object_unref(OBJECT(p->c)); +- p->c = NULL; +- qemu_mutex_destroy(&p->mutex); +- qemu_sem_destroy(&p->sem_sync); +- g_free(p->name); +- p->name = NULL; +- p->packet_len = 0; +- g_free(p->packet); +- p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; +- g_free(p->normal); +- p->normal = NULL; +- multifd_recv_state->ops->recv_cleanup(p); ++ multifd_recv_cleanup_channel(&multifd_recv_state->params[i]); + } +- qemu_sem_destroy(&multifd_recv_state->sem_sync); +- g_free(multifd_recv_state->params); +- multifd_recv_state->params = NULL; +- g_free(multifd_recv_state); +- multifd_recv_state = NULL; ++ multifd_recv_cleanup_state(); + } + + void multifd_recv_sync_main(void) +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_recv_sync_main.patch b/migration-multifd-Cleanup-multifd_recv_sync_main.patch new file mode 100644 index 0000000..d26afc6 --- /dev/null +++ b/migration-multifd-Cleanup-multifd_recv_sync_main.patch @@ -0,0 +1,75 @@ +From dc7717ee9311c374ad199c5baf4ecde8ac082248 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:29:55 -0300 +Subject: [64/99] migration/multifd: Cleanup multifd_recv_sync_main + +commit 4aac6b1e9bd48677c4f24518fe86ffd34c677d5a upstream. + +Some minor cleanups and documentation for multifd_recv_sync_main. + +Use thread_count as done in other parts of the code. Remove p->id from +the multifd_recv_state sync, since that is global and not tied to a +channel. Add documentation for the sync steps. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 17 +++++++++++++---- + migration/trace-events | 2 +- + 2 files changed, 14 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 9e3955cb8c..429aad232b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1186,18 +1186,27 @@ void multifd_recv_cleanup(void) + + void multifd_recv_sync_main(void) + { ++ int thread_count = migrate_multifd_channels(); + int i; + + if (!migrate_multifd()) { + return; + } +- for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- trace_multifd_recv_sync_main_wait(p->id); ++ /* ++ * Initiate the synchronization by waiting for all channels. ++ * For socket-based migration this means each channel has received ++ * the SYNC packet on the stream. ++ */ ++ for (i = 0; i < thread_count; i++) { ++ trace_multifd_recv_sync_main_wait(i); + qemu_sem_wait(&multifd_recv_state->sem_sync); + } +- for (i = 0; i < migrate_multifd_channels(); i++) { ++ ++ /* ++ * Sync done. Release the channels for the next iteration. ++ */ ++ for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + + WITH_QEMU_LOCK_GUARD(&p->mutex) { +diff --git a/migration/trace-events b/migration/trace-events +index 298ad2b0dd..bf1a069632 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -132,7 +132,7 @@ multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uin + multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" + multifd_recv_sync_main_signal(uint8_t id) "channel %u" +-multifd_recv_sync_main_wait(uint8_t id) "channel %u" ++multifd_recv_sync_main_wait(uint8_t id) "iter %u" + multifd_recv_terminate_threads(bool error) "error %d" + multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-multifd_save_cleanup.patch b/migration-multifd-Cleanup-multifd_save_cleanup.patch new file mode 100644 index 0000000..9f5a9a3 --- /dev/null +++ b/migration-multifd-Cleanup-multifd_save_cleanup.patch @@ -0,0 +1,159 @@ +From bdcbbe9df0dcc74f21948ba459cc350da77446af Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:53 +0800 +Subject: [43/99] migration/multifd: Cleanup multifd_save_cleanup() + +commit 12808db3b8c22d26c9bc3da6f41756890ce882e4 upstream. + +Shrink the function by moving relevant works into helpers: move the thread +join()s into multifd_send_terminate_threads(), then create two more helpers +to cover channel/state cleanups. + +Add a TODO entry for the thread terminate process because p->running is +still buggy. We need to fix it at some point but not yet covered. + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-20-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 91 +++++++++++++++++++++++++++++---------------- + 1 file changed, 59 insertions(+), 32 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f92e6776f0..83c6ccd0f2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -594,6 +594,11 @@ static void multifd_send_terminate_threads(void) + * always set it. + */ + qatomic_set(&multifd_send_state->exiting, 1); ++ ++ /* ++ * Firstly, kick all threads out; no matter whether they are just idle, ++ * or blocked in an IO system call. ++ */ + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -602,6 +607,21 @@ static void multifd_send_terminate_threads(void) + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } + } ++ ++ /* ++ * Finally recycle all the threads. ++ * ++ * TODO: p->running is still buggy, e.g. we can reach here without the ++ * corresponding multifd_new_send_channel_async() get invoked yet, ++ * then a new thread can even be created after this function returns. ++ */ ++ for (i = 0; i < migrate_multifd_channels(); i++) { ++ MultiFDSendParams *p = &multifd_send_state->params[i]; ++ ++ if (p->running) { ++ qemu_thread_join(&p->thread); ++ } ++ } + } + + static int multifd_send_channel_destroy(QIOChannel *send) +@@ -609,6 +629,41 @@ static int multifd_send_channel_destroy(QIOChannel *send) + return socket_send_channel_destroy(send); + } + ++static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) ++{ ++ if (p->registered_yank) { ++ migration_ioc_unregister_yank(p->c); ++ } ++ multifd_send_channel_destroy(p->c); ++ p->c = NULL; ++ qemu_mutex_destroy(&p->mutex); ++ qemu_sem_destroy(&p->sem); ++ qemu_sem_destroy(&p->sem_sync); ++ g_free(p->name); ++ p->name = NULL; ++ multifd_pages_clear(p->pages); ++ p->pages = NULL; ++ p->packet_len = 0; ++ g_free(p->packet); ++ p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++ multifd_send_state->ops->send_cleanup(p, errp); ++ ++ return *errp == NULL; ++} ++ ++static void multifd_send_cleanup_state(void) ++{ ++ qemu_sem_destroy(&multifd_send_state->channels_ready); ++ g_free(multifd_send_state->params); ++ multifd_send_state->params = NULL; ++ multifd_pages_clear(multifd_send_state->pages); ++ multifd_send_state->pages = NULL; ++ g_free(multifd_send_state); ++ multifd_send_state = NULL; ++} ++ + void multifd_save_cleanup(void) + { + int i; +@@ -616,48 +671,20 @@ void multifd_save_cleanup(void) + if (!migrate_multifd()) { + return; + } ++ + multifd_send_terminate_threads(); +- for (i = 0; i < migrate_multifd_channels(); i++) { +- MultiFDSendParams *p = &multifd_send_state->params[i]; + +- if (p->running) { +- qemu_thread_join(&p->thread); +- } +- } + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + Error *local_err = NULL; + +- if (p->registered_yank) { +- migration_ioc_unregister_yank(p->c); +- } +- multifd_send_channel_destroy(p->c); +- p->c = NULL; +- qemu_mutex_destroy(&p->mutex); +- qemu_sem_destroy(&p->sem); +- qemu_sem_destroy(&p->sem_sync); +- g_free(p->name); +- p->name = NULL; +- multifd_pages_clear(p->pages); +- p->pages = NULL; +- p->packet_len = 0; +- g_free(p->packet); +- p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; +- multifd_send_state->ops->send_cleanup(p, &local_err); +- if (local_err) { ++ if (!multifd_send_cleanup_channel(p, &local_err)) { + migrate_set_error(migrate_get_current(), local_err); + error_free(local_err); + } + } +- qemu_sem_destroy(&multifd_send_state->channels_ready); +- g_free(multifd_send_state->params); +- multifd_send_state->params = NULL; +- multifd_pages_clear(multifd_send_state->pages); +- multifd_send_state->pages = NULL; +- g_free(multifd_send_state); +- multifd_send_state = NULL; ++ ++ multifd_send_cleanup_state(); + } + + static int multifd_zero_copy_flush(QIOChannel *c) +-- +2.33.0 + diff --git a/migration-multifd-Cleanup-outgoing_args-in-state-des.patch b/migration-multifd-Cleanup-outgoing_args-in-state-des.patch new file mode 100644 index 0000000..7c6c197 --- /dev/null +++ b/migration-multifd-Cleanup-outgoing_args-in-state-des.patch @@ -0,0 +1,78 @@ +From 28700ce624e7972fc971d7524c5aa8de868d253d Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:53:00 +0800 +Subject: [60/99] migration/multifd: Cleanup outgoing_args in state destroy + +commit 72b90b96872acc5d00f9c16dfc196543349361da upstream. + +outgoing_args is a global cache of socket address to be reused in multifd. +Freeing the cache in per-channel destructor is more or less a hack. Move +it to multifd_send_cleanup_state() so it only get checked once. Use a +small helper to do so because it's internal of socket.c. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-5-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 1 + + migration/socket.c | 12 ++++++++---- + migration/socket.h | 2 ++ + 3 files changed, 11 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index a7289289a4..aa7b7e224e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -690,6 +690,7 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + + static void multifd_send_cleanup_state(void) + { ++ socket_cleanup_outgoing_migration(); + qemu_sem_destroy(&multifd_send_state->channels_created); + qemu_sem_destroy(&multifd_send_state->channels_ready); + g_free(multifd_send_state->params); +diff --git a/migration/socket.c b/migration/socket.c +index 98e3ea1514..3184c7c3c1 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -64,10 +64,6 @@ int socket_send_channel_destroy(QIOChannel *send) + { + /* Remove channel */ + object_unref(OBJECT(send)); +- if (outgoing_args.saddr) { +- qapi_free_SocketAddress(outgoing_args.saddr); +- outgoing_args.saddr = NULL; +- } + return 0; + } + +@@ -137,6 +133,14 @@ void socket_start_outgoing_migration(MigrationState *s, + NULL); + } + ++void socket_cleanup_outgoing_migration(void) ++{ ++ if (outgoing_args.saddr) { ++ qapi_free_SocketAddress(outgoing_args.saddr); ++ outgoing_args.saddr = NULL; ++ } ++} ++ + static void socket_accept_incoming_migration(QIONetListener *listener, + QIOChannelSocket *cioc, + gpointer opaque) +diff --git a/migration/socket.h b/migration/socket.h +index 5e4c33b8ea..5f52eddd4c 100644 +--- a/migration/socket.h ++++ b/migration/socket.h +@@ -29,4 +29,6 @@ void socket_start_incoming_migration(SocketAddress *saddr, Error **errp); + + void socket_start_outgoing_migration(MigrationState *s, + SocketAddress *saddr, Error **errp); ++void socket_cleanup_outgoing_migration(void); ++ + #endif +-- +2.33.0 + diff --git a/migration-multifd-Decouple-recv-method-from-pages.patch b/migration-multifd-Decouple-recv-method-from-pages.patch new file mode 100644 index 0000000..99915b1 --- /dev/null +++ b/migration-multifd-Decouple-recv-method-from-pages.patch @@ -0,0 +1,157 @@ +From deca5474782611e8bacf0c3110897ddd204084e9 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:07 -0300 +Subject: [66/99] migration/multifd: Decouple recv method from pages + +commit 9db191251381c75e57201f7b07330ca982a55d1e upstream. + +Next patches will abstract the type of data being received by the +channels, so do some cleanup now to remove references to pages and +dependency on 'normal_num'. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-14-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 6 +++--- + migration/multifd-zstd.c | 6 +++--- + migration/multifd.c | 13 ++++++++----- + migration/multifd.h | 4 ++-- + 4 files changed, 16 insertions(+), 13 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 2a8f5fc9a6..6120faad65 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -234,7 +234,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * zlib_recv_pages: read the data from the channel into actual pages ++ * zlib_recv: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. +@@ -244,7 +244,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int zlib_recv(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; +@@ -319,7 +319,7 @@ static MultiFDMethods multifd_zlib_ops = { + .send_prepare = zlib_send_prepare, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, +- .recv_pages = zlib_recv_pages ++ .recv = zlib_recv + }; + + static void multifd_zlib_register(void) +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 593cf290ad..cac236833d 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -232,7 +232,7 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * zstd_recv_pages: read the data from the channel into actual pages ++ * zstd_recv: read the data from the channel into actual pages + * + * Read the compressed buffer, and uncompress it into the actual + * pages. +@@ -242,7 +242,7 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int zstd_recv(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +@@ -310,7 +310,7 @@ static MultiFDMethods multifd_zstd_ops = { + .send_prepare = zstd_send_prepare, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, +- .recv_pages = zstd_recv_pages ++ .recv = zstd_recv + }; + + static void multifd_zstd_register(void) +diff --git a/migration/multifd.c b/migration/multifd.c +index 429aad232b..d5039af833 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -198,7 +198,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + } + + /** +- * nocomp_recv_pages: read the data from the channel into actual pages ++ * nocomp_recv: read the data from the channel + * + * For no compression we just need to read things into the correct place. + * +@@ -207,7 +207,7 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ +-static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) ++static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + +@@ -229,7 +229,7 @@ static MultiFDMethods multifd_nocomp_ops = { + .send_prepare = nocomp_send_prepare, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, +- .recv_pages = nocomp_recv_pages ++ .recv = nocomp_recv + }; + + static MultiFDMethods *multifd_ops[MULTIFD_COMPRESSION__MAX] = { +@@ -1231,6 +1231,8 @@ static void *multifd_recv_thread(void *opaque) + + while (true) { + uint32_t flags; ++ bool has_data = false; ++ p->normal_num = 0; + + if (multifd_recv_should_exit()) { + break; +@@ -1252,10 +1254,11 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; ++ has_data = !!p->normal_num; + qemu_mutex_unlock(&p->mutex); + +- if (p->normal_num) { +- ret = multifd_recv_state->ops->recv_pages(p, &local_err); ++ if (has_data) { ++ ret = multifd_recv_state->ops->recv(p, &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index adccd3532f..6a54377cc1 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -197,8 +197,8 @@ typedef struct { + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); +- /* Read all pages */ +- int (*recv_pages)(MultiFDRecvParams *p, Error **errp); ++ /* Read all data */ ++ int (*recv)(MultiFDRecvParams *p, Error **errp); + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); +-- +2.33.0 + diff --git a/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch b/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch new file mode 100644 index 0000000..9acd02b --- /dev/null +++ b/migration-multifd-Drop-MultiFDSendParams.normal-arra.patch @@ -0,0 +1,212 @@ +From 383f4cb78af723cf650841dc31862f9b0b612f4b Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:39 +0800 +Subject: [29/99] migration/multifd: Drop MultiFDSendParams.normal[] array + +commit efd8c5439db7eaf00f35adc0fcc4f01d916e8619 upstream. + +This array is redundant when p->pages exists. Now we extended the life of +p->pages to the whole period where pending_job is set, it should be safe to +always use p->pages->offset[] rather than p->normal[]. Drop the array. + +Alongside, the normal_num is also redundant, which is the same to +p->pages->num. + +This doesn't apply to recv side, because there's no extra buffering on recv +side, so p->normal[] array is still needed. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 7 ++++--- + migration/multifd-zstd.c | 7 ++++--- + migration/multifd.c | 33 +++++++++++++-------------------- + migration/multifd.h | 4 ---- + 4 files changed, 21 insertions(+), 30 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 37ce48621e..100809abc1 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ MultiFDPages_t *pages = p->pages; + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; + uint32_t i; + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == p->normal_num - 1) { ++ if (i == pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +@@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + * with compression. zlib does not guarantee that this is safe, + * therefore copy the page before calling deflate(). + */ +- memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); ++ memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); + zs->avail_in = p->page_size; + zs->next_in = z->buf; + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index b471daadcd..2023edd8cc 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ MultiFDPages_t *pages = p->pages; + struct zstd_data *z = p->data; + int ret; + uint32_t i; +@@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == p->normal_num - 1) { ++ if (i == pages->num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = p->pages->block->host + p->normal[i]; ++ z->in.src = p->pages->block->host + pages->offset[i]; + z->in.size = p->page_size; + z->in.pos = 0; + +diff --git a/migration/multifd.c b/migration/multifd.c +index fff119237a..bfafe94e1e 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -91,13 +91,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; + +- for (int i = 0; i < p->normal_num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; ++ for (int i = 0; i < pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + +- p->next_packet_size = p->normal_num * p->page_size; ++ p->next_packet_size = pages->num * p->page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -270,21 +270,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + static void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; ++ MultiFDPages_t *pages = p->pages; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->normal_pages = cpu_to_be32(p->normal_num); ++ packet->normal_pages = cpu_to_be32(pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +- if (p->pages->block) { +- strncpy(packet->ramblock, p->pages->block->idstr, 256); ++ if (pages->block) { ++ strncpy(packet->ramblock, pages->block->idstr, 256); + } + +- for (i = 0; i < p->normal_num; i++) { ++ for (i = 0; i < pages->num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ +- uint64_t temp = p->normal[i]; ++ uint64_t temp = pages->offset[i]; + + packet->offset[i] = cpu_to_be64(temp); + } +@@ -571,8 +572,6 @@ void multifd_save_cleanup(void) + p->packet = NULL; + g_free(p->iov); + p->iov = NULL; +- g_free(p->normal); +- p->normal = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -692,8 +691,8 @@ static void *multifd_send_thread(void *opaque) + + if (p->pending_job) { + uint64_t packet_num = p->packet_num; ++ MultiFDPages_t *pages = p->pages; + uint32_t flags; +- p->normal_num = 0; + + if (use_zero_copy_send) { + p->iovs_num = 0; +@@ -701,12 +700,7 @@ static void *multifd_send_thread(void *opaque) + p->iovs_num = 1; + } + +- for (int i = 0; i < p->pages->num; i++) { +- p->normal[p->normal_num] = p->pages->offset[i]; +- p->normal_num++; +- } +- +- if (p->normal_num) { ++ if (pages->num) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); +@@ -717,10 +711,10 @@ static void *multifd_send_thread(void *opaque) + flags = p->flags; + p->flags = 0; + p->num_packets++; +- p->total_normal_pages += p->normal_num; ++ p->total_normal_pages += pages->num; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, p->normal_num, flags, ++ trace_multifd_send(p->id, packet_num, pages->num, flags, + p->next_packet_size); + + if (use_zero_copy_send) { +@@ -928,7 +922,6 @@ int multifd_save_setup(Error **errp) + p->name = g_strdup_printf("multifdsend_%d", i); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); +- p->normal = g_new0(ram_addr_t, page_count); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; + +diff --git a/migration/multifd.h b/migration/multifd.h +index 7c040cb85a..3920bdbcf1 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -122,10 +122,6 @@ typedef struct { + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; +- /* Pages that are not zero */ +- ram_addr_t *normal; +- /* num of non zero pages */ +- uint32_t normal_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +-- +2.33.0 + diff --git a/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch b/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch new file mode 100644 index 0000000..aa4fbee --- /dev/null +++ b/migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch @@ -0,0 +1,251 @@ +From 046f864bba4035328269599e7d0e9de1b7a93932 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:37 +0800 +Subject: [27/99] migration/multifd: Drop MultiFDSendParams.quit, cleanup error + paths + +commit 15f3f21d598148895c33b6fc41e29777cf6ad992 upstream. + +Multifd send side has two fields to indicate error quits: + + - MultiFDSendParams.quit + - &multifd_send_state->exiting + +Merge them into the global one. The replacement is done by changing all +p->quit checks into the global var check. The global check doesn't need +any lock. + +A few more things done on top of this altogether: + + - multifd_send_terminate_threads() + + Moving the xchg() of &multifd_send_state->exiting upper, so as to cover + the tracepoint, migrate_set_error() and migrate_set_state(). + + - multifd_send_sync_main() + + In the 2nd loop, add one more check over the global var to make sure we + don't keep the looping if QEMU already decided to quit. + + - multifd_tls_outgoing_handshake() + + Use multifd_send_terminate_threads() to set the error state. That has + a benefit of updating MigrationState.error to that error too, so we can + persist that 1st error we hit in that specific channel. + + - multifd_new_send_channel_async() + + Take similar approach like above, drop the migrate_set_error() because + multifd_send_terminate_threads() already covers that. Unwrap the helper + multifd_new_send_channel_cleanup() along the way; not really needed. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 85 ++++++++++++++++++--------------------------- + migration/multifd.h | 2 -- + 2 files changed, 33 insertions(+), 54 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d2da6178b0..ea756b6eb8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -373,6 +373,11 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++static bool multifd_send_should_exit(void) ++{ ++ return qatomic_read(&multifd_send_state->exiting); ++} ++ + /* + * The migration thread can wait on either of the two semaphores. This + * function can be used to kick the main thread out of waiting on either of +@@ -410,7 +415,7 @@ static int multifd_send_pages(void) + MultiFDSendParams *p = NULL; /* make happy gcc */ + MultiFDPages_t *pages = multifd_send_state->pages; + +- if (qatomic_read(&multifd_send_state->exiting)) { ++ if (multifd_send_should_exit()) { + return -1; + } + +@@ -422,14 +427,11 @@ static int multifd_send_pages(void) + */ + next_channel %= migrate_multifd_channels(); + for (i = next_channel;; i = (i + 1) % migrate_multifd_channels()) { +- p = &multifd_send_state->params[i]; +- +- qemu_mutex_lock(&p->mutex); +- if (p->quit) { +- error_report("%s: channel %d has already quit!", __func__, i); +- qemu_mutex_unlock(&p->mutex); ++ if (multifd_send_should_exit()) { + return -1; + } ++ p = &multifd_send_state->params[i]; ++ qemu_mutex_lock(&p->mutex); + if (!p->pending_job) { + p->pending_job++; + next_channel = (i + 1) % migrate_multifd_channels(); +@@ -484,6 +486,16 @@ static void multifd_send_terminate_threads(Error *err) + { + int i; + ++ /* ++ * We don't want to exit each threads twice. Depending on where ++ * we get the error, or if there are two independent errors in two ++ * threads at the same time, we can end calling this function ++ * twice. ++ */ ++ if (qatomic_xchg(&multifd_send_state->exiting, 1)) { ++ return; ++ } ++ + trace_multifd_send_terminate_threads(err != NULL); + + if (err) { +@@ -498,26 +510,13 @@ static void multifd_send_terminate_threads(Error *err) + } + } + +- /* +- * We don't want to exit each threads twice. Depending on where +- * we get the error, or if there are two independent errors in two +- * threads at the same time, we can end calling this function +- * twice. +- */ +- if (qatomic_xchg(&multifd_send_state->exiting, 1)) { +- return; +- } +- + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- qemu_mutex_lock(&p->mutex); +- p->quit = true; + qemu_sem_post(&p->sem); + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } +- qemu_mutex_unlock(&p->mutex); + } + } + +@@ -616,16 +615,13 @@ int multifd_send_sync_main(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- trace_multifd_send_sync_main_signal(p->id); +- +- qemu_mutex_lock(&p->mutex); +- +- if (p->quit) { +- error_report("%s: channel %d has already quit", __func__, i); +- qemu_mutex_unlock(&p->mutex); ++ if (multifd_send_should_exit()) { + return -1; + } + ++ trace_multifd_send_sync_main_signal(p->id); ++ ++ qemu_mutex_lock(&p->mutex); + p->packet_num = multifd_send_state->packet_num++; + p->flags |= MULTIFD_FLAG_SYNC; + p->pending_job++; +@@ -635,6 +631,10 @@ int multifd_send_sync_main(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ if (multifd_send_should_exit()) { ++ return -1; ++ } ++ + qemu_sem_wait(&multifd_send_state->channels_ready); + trace_multifd_send_sync_main_wait(p->id); + qemu_sem_wait(&p->sem_sync); +@@ -675,7 +675,7 @@ static void *multifd_send_thread(void *opaque) + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_wait(&p->sem); + +- if (qatomic_read(&multifd_send_state->exiting)) { ++ if (multifd_send_should_exit()) { + break; + } + qemu_mutex_lock(&p->mutex); +@@ -790,12 +790,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + +- migrate_set_error(migrate_get_current(), err); +- /* +- * Error happen, mark multifd_send_thread status as 'quit' although it +- * is not created, and then tell who pay attention to me. +- */ +- p->quit = true; ++ multifd_send_terminate_threads(err); + multifd_send_kick_main(p); + error_free(err); + } +@@ -861,22 +856,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + return true; + } + +-static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, +- QIOChannel *ioc, Error *err) +-{ +- migrate_set_error(migrate_get_current(), err); +- /* Error happen, we need to tell who pay attention to me */ +- multifd_send_kick_main(p); +- /* +- * Although multifd_send_thread is not created, but main migration +- * thread need to judge whether it is running, so we need to mark +- * its status. +- */ +- p->quit = true; +- object_unref(OBJECT(ioc)); +- error_free(err); +-} +- + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + { + MultiFDSendParams *p = opaque; +@@ -893,7 +872,10 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + } + + trace_multifd_new_send_channel_async_error(p->id, local_err); +- multifd_new_send_channel_cleanup(p, ioc, local_err); ++ multifd_send_terminate_threads(local_err); ++ multifd_send_kick_main(p); ++ object_unref(OBJECT(ioc)); ++ error_free(local_err); + } + + static void multifd_new_send_channel_create(gpointer opaque) +@@ -925,7 +907,6 @@ int multifd_save_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); +- p->quit = false; + p->pending_job = 0; + p->id = i; + p->pages = multifd_pages_init(page_count); +diff --git a/migration/multifd.h b/migration/multifd.h +index 35d11f103c..7c040cb85a 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -95,8 +95,6 @@ typedef struct { + QemuMutex mutex; + /* is this channel thread running */ + bool running; +- /* should this thread finish */ +- bool quit; + /* multifd flags for each packet */ + uint32_t flags; + /* global number of generated multifd packets */ +-- +2.33.0 + diff --git a/migration-multifd-Drop-pages-num-check-in-sender-thr.patch b/migration-multifd-Drop-pages-num-check-in-sender-thr.patch new file mode 100644 index 0000000..fc97d2e --- /dev/null +++ b/migration-multifd-Drop-pages-num-check-in-sender-thr.patch @@ -0,0 +1,46 @@ +From b24853b2f5524d988406732fc22c3fe9253de104 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:42 +0800 +Subject: [32/99] migration/multifd: Drop pages->num check in sender thread + +commit 83c560fb4249ee5698652249e0c1730c3d611a9b upstream. + +Now with a split SYNC handler, we always have pages->num set for +pending_job==true. Assert it instead. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-9-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cef4a88237..a67917b113 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -714,13 +714,14 @@ static void *multifd_send_thread(void *opaque) + p->iovs_num = 1; + } + +- if (pages->num) { +- ret = multifd_send_state->ops->send_prepare(p, &local_err); +- if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); +- break; +- } ++ assert(pages->num); ++ ++ ret = multifd_send_state->ops->send_prepare(p, &local_err); ++ if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); ++ break; + } ++ + multifd_send_fill_packet(p); + p->num_packets++; + p->total_normal_pages += pages->num; +-- +2.33.0 + diff --git a/migration-multifd-Drop-registered_yank.patch b/migration-multifd-Drop-registered_yank.patch new file mode 100644 index 0000000..fba2ec0 --- /dev/null +++ b/migration-multifd-Drop-registered_yank.patch @@ -0,0 +1,65 @@ +From 103fe08122ba65282660932a5e342a282a4b3e1c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:58 +0800 +Subject: [58/99] migration/multifd: Drop registered_yank + +commit 0518b5d8d30d3a4d0ea4f45d61527bcdc43044d2 upstream. + +With a clear definition of p->c protocol, where we only set it up if the +channel is fully established (TLS or non-TLS), registered_yank boolean will +have equal meaning of "p->c != NULL". + +Drop registered_yank by checking p->c instead. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +++---- + migration/multifd.h | 2 -- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ad8fa6a317..3e85bc544a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -667,11 +667,11 @@ static int multifd_send_channel_destroy(QIOChannel *send) + + static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + { +- if (p->registered_yank) { ++ if (p->c) { + migration_ioc_unregister_yank(p->c); ++ multifd_send_channel_destroy(p->c); ++ p->c = NULL; + } +- multifd_send_channel_destroy(p->c); +- p->c = NULL; + qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); + g_free(p->name); +@@ -954,7 +954,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + qio_channel_set_delay(ioc, false); + + migration_ioc_register_yank(ioc); +- p->registered_yank = true; + /* Setup p->c only if the channel is completely setup */ + p->c = ioc; + +diff --git a/migration/multifd.h b/migration/multifd.h +index 8a1cad0996..b3fe27ae93 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -78,8 +78,6 @@ typedef struct { + bool tls_thread_created; + /* communication channel */ + QIOChannel *c; +- /* is the yank function registered */ +- bool registered_yank; + /* packet allocated len */ + uint32_t packet_len; + /* guest page size */ +-- +2.33.0 + diff --git a/migration-multifd-Drop-stale-comment-for-multifd-zer.patch b/migration-multifd-Drop-stale-comment-for-multifd-zer.patch new file mode 100644 index 0000000..c8d2dc0 --- /dev/null +++ b/migration-multifd-Drop-stale-comment-for-multifd-zer.patch @@ -0,0 +1,43 @@ +From c454cdf2eef413af1c5ca04524e15dffdfc90a58 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:35 +0800 +Subject: [25/99] migration/multifd: Drop stale comment for multifd zero copy + +commit 8888a552bf7af200e36ff123772547dfb4f133c4 upstream. + +We've already done that with multifd_flush_after_each_section, for multifd +in general. Drop the stale "TODO-like" comment. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-2-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ef7d4520c4..07e7e78029 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -599,17 +599,6 @@ int multifd_send_sync_main(void) + } + } + +- /* +- * When using zero-copy, it's necessary to flush the pages before any of +- * the pages can be sent again, so we'll make sure the new version of the +- * pages will always arrive _later_ than the old pages. +- * +- * Currently we achieve this by flushing the zero-page requested writes +- * per ram iteration, but in the future we could potentially optimize it +- * to be less frequent, e.g. only after we finished one whole scanning of +- * all the dirty bitmaps. +- */ +- + flush_zero_copy = migrate_zero_copy_send(); + + for (i = 0; i < migrate_multifd_channels(); i++) { +-- +2.33.0 + diff --git a/migration-multifd-Drop-unnecessary-helper-to-destroy.patch b/migration-multifd-Drop-unnecessary-helper-to-destroy.patch new file mode 100644 index 0000000..2ac32ca --- /dev/null +++ b/migration-multifd-Drop-unnecessary-helper-to-destroy.patch @@ -0,0 +1,77 @@ +From 0700d5acc4e51e949cc6d34a9bbb504a2803a127 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:53:01 +0800 +Subject: [61/99] migration/multifd: Drop unnecessary helper to destroy IOC + +commit c9a7e83c9d64fd5ebc759186789e1b753c919d32 upstream. + +Both socket_send_channel_destroy() and multifd_send_channel_destroy() are +unnecessary wrappers to destroy an IOC, as the only thing to do is to +release the final IOC reference. We have plenty of code that destroys an +IOC using direct unref() already; keep that style. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-6-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +------ + migration/socket.c | 7 ------- + migration/socket.h | 1 - + 3 files changed, 1 insertion(+), 14 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index aa7b7e224e..9e3955cb8c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -660,16 +660,11 @@ static void multifd_send_terminate_threads(void) + } + } + +-static int multifd_send_channel_destroy(QIOChannel *send) +-{ +- return socket_send_channel_destroy(send); +-} +- + static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + { + if (p->c) { + migration_ioc_unregister_yank(p->c); +- multifd_send_channel_destroy(p->c); ++ object_unref(OBJECT(p->c)); + p->c = NULL; + } + qemu_sem_destroy(&p->sem); +diff --git a/migration/socket.c b/migration/socket.c +index 3184c7c3c1..9ab89b1e08 100644 +--- a/migration/socket.c ++++ b/migration/socket.c +@@ -60,13 +60,6 @@ QIOChannel *socket_send_channel_create_sync(Error **errp) + return QIO_CHANNEL(sioc); + } + +-int socket_send_channel_destroy(QIOChannel *send) +-{ +- /* Remove channel */ +- object_unref(OBJECT(send)); +- return 0; +-} +- + struct SocketConnectData { + MigrationState *s; + char *hostname; +diff --git a/migration/socket.h b/migration/socket.h +index 5f52eddd4c..46c233ecd2 100644 +--- a/migration/socket.h ++++ b/migration/socket.h +@@ -23,7 +23,6 @@ + + void socket_send_channel_create(QIOTaskFunc f, void *data); + QIOChannel *socket_send_channel_create_sync(Error **errp); +-int socket_send_channel_destroy(QIOChannel *send); + + void socket_start_incoming_migration(SocketAddress *saddr, Error **errp); + +-- +2.33.0 + diff --git a/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch b/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch new file mode 100644 index 0000000..89c1d65 --- /dev/null +++ b/migration-multifd-Ensure-packet-ramblock-is-null-ter.patch @@ -0,0 +1,69 @@ +From 82b23ca67d0a5d77cb0266b89f76b9c8c4bffb3d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 19 Sep 2024 12:06:11 -0300 +Subject: [95/99] migration/multifd: Ensure packet->ramblock is null-terminated + +commit 68e0fca625912c7c63a8bfbc784f53d4fefa1a13 upstream. + +Coverity points out that the current usage of strncpy to write the +ramblock name allows the field to not have an ending '\0' in case +idstr is already not null-terminated (e.g. if it's larger than 256 +bytes). + +This is currently harmless because the packet->ramblock field is never +touched again on the source side. The destination side reads only up +to the field's size from the stream and forces the last byte to be 0. + +We're still open to a programming error in the future in case this +field is ever passed into a function that expects a null-terminated +string. + +Change from strncpy to QEMU's pstrcpy, which puts a '\0' at the end of +the string and doesn't fill the extra space with zeros. + +(there's no spillage between iterations of fill_packet because after +commit 87bb9e953e ("migration/multifd: Isolate ram pages packet data") +the packet is always zeroed before filling) + +Resolves: Coverity CID 1560071 +Reported-by: Peter Maydell +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240919150611.17074-1-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd-nocomp.c +[jz: upstream has split nocomp code into multifd-nocomp.c, while + openEuler hasn't yet. The function that needs to be fixed is + still named multifd_send_fill_packet in multifd.c, so we fix + it in multifd.c] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 0fcecc3759..3761a803ed 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -16,6 +16,7 @@ + #include "exec/target_page.h" + #include "sysemu/sysemu.h" + #include "exec/ramblock.h" ++#include "qemu/cutils.h" + #include "qemu/error-report.h" + #include "qapi/error.h" + #include "qapi/qapi-events-migration.h" +@@ -400,7 +401,8 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + packet->packet_num = cpu_to_be64(packet_num); + + if (pages->block) { +- strncpy(packet->ramblock, pages->block->idstr, 256); ++ pstrcpy(packet->ramblock, sizeof(packet->ramblock), ++ pages->block->idstr); + } + + for (i = 0; i < pages->num; i++) { +-- +2.33.0 + diff --git a/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch b/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch new file mode 100644 index 0000000..dc7f5fb --- /dev/null +++ b/migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch @@ -0,0 +1,167 @@ +From cafe218b15706cf78c3790eaa08497c09d78c7b4 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:56 +0800 +Subject: [46/99] migration/multifd: Fix MultiFDSendParams.packet_num race + +commit 98ea497d8b8a5076be7b6ceb0dcc4a475373eb76 upstream. + +As reported correctly by Fabiano [1] (while per Fabiano, it sourced back to +Elena's initial report in Oct 2023), MultiFDSendParams.packet_num is buggy +to be assigned and stored. Consider two consequent operations of: (1) +queue a job into multifd send thread X, then (2) queue another sync request +to the same send thread X. Then the MultiFDSendParams.packet_num will be +assigned twice, and the first assignment can get lost already. + +To avoid that, we move the packet_num assignment from p->packet_num into +where the thread will fill in the packet. Use atomic operations to protect +the field, making sure there's no race. + +Note that atomic fetch_add() may not be good for scaling purposes, however +multifd should be fine as number of threads should normally not go beyond +16 threads. Let's leave that concern for later but fix the issue first. + +There's also a trick on how to make it always work even on 32 bit hosts for +uint64_t packet number. Switching to uintptr_t as of now to simply the +case. It will cause packet number to overflow easier on 32 bit, but that +shouldn't be a major concern for now as 32 bit systems is not the major +audience for any performance concerns like what multifd wants to address. + +We also need to move multifd_send_state definition upper, so that +multifd_send_fill_packet() can reference it. + +[1] https://lore.kernel.org/r/87o7d1jlu5.fsf@suse.de + +Reported-by: Elena Ufimtseva +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-23-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 56 +++++++++++++++++++++++++++------------------ + migration/multifd.h | 2 -- + 2 files changed, 34 insertions(+), 24 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 723b1d0b35..c52c18046a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -46,6 +46,35 @@ typedef struct { + uint64_t unused2[4]; /* Reserved for future use */ + } __attribute__((packed)) MultiFDInit_t; + ++struct { ++ MultiFDSendParams *params; ++ /* array of pages to sent */ ++ MultiFDPages_t *pages; ++ /* ++ * Global number of generated multifd packets. ++ * ++ * Note that we used 'uintptr_t' because it'll naturally support atomic ++ * operations on both 32bit / 64 bits hosts. It means on 32bit systems ++ * multifd will overflow the packet_num easier, but that should be ++ * fine. ++ * ++ * Another option is to use QEMU's Stat64 then it'll be 64 bits on all ++ * hosts, however so far it does not support atomic fetch_add() yet. ++ * Make it easy for now. ++ */ ++ uintptr_t packet_num; ++ /* send channels ready */ ++ QemuSemaphore channels_ready; ++ /* ++ * Have we already run terminate threads. There is a race when it ++ * happens that we got one error while we are exiting. ++ * We will use atomic operations. Only valid values are 0 and 1. ++ */ ++ int exiting; ++ /* multifd ops */ ++ MultiFDMethods *ops; ++} *multifd_send_state; ++ + /* Multifd without compression */ + + /** +@@ -293,13 +322,16 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; ++ uint64_t packet_num; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); + packet->normal_pages = cpu_to_be32(pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); +- packet->packet_num = cpu_to_be64(p->packet_num); ++ ++ packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); ++ packet->packet_num = cpu_to_be64(packet_num); + + if (pages->block) { + strncpy(packet->ramblock, pages->block->idstr, 256); +@@ -315,7 +347,7 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + p->packets_sent++; + p->total_normal_pages += pages->num; + +- trace_multifd_send(p->id, p->packet_num, pages->num, p->flags, ++ trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + } + +@@ -399,24 +431,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return 0; + } + +-struct { +- MultiFDSendParams *params; +- /* array of pages to sent */ +- MultiFDPages_t *pages; +- /* global number of generated multifd packets */ +- uint64_t packet_num; +- /* send channels ready */ +- QemuSemaphore channels_ready; +- /* +- * Have we already run terminate threads. There is a race when it +- * happens that we got one error while we are exiting. +- * We will use atomic operations. Only valid values are 0 and 1. +- */ +- int exiting; +- /* multifd ops */ +- MultiFDMethods *ops; +-} *multifd_send_state; +- + static bool multifd_send_should_exit(void) + { + return qatomic_read(&multifd_send_state->exiting); +@@ -498,7 +512,6 @@ static bool multifd_send_pages(void) + */ + assert(qatomic_read(&p->pending_job) == false); + qatomic_set(&p->pending_job, true); +- p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; + qemu_mutex_unlock(&p->mutex); +@@ -731,7 +744,6 @@ int multifd_send_sync_main(void) + trace_multifd_send_sync_main_signal(p->id); + + qemu_mutex_lock(&p->mutex); +- p->packet_num = multifd_send_state->packet_num++; + /* + * We should be the only user so far, so not possible to be set by + * others concurrently. +diff --git a/migration/multifd.h b/migration/multifd.h +index 9b40a53cb6..98876ff94a 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -97,8 +97,6 @@ typedef struct { + bool running; + /* multifd flags for each packet */ + uint32_t flags; +- /* global number of generated multifd packets */ +- uint64_t packet_num; + /* + * The sender thread has work to do if either of below boolean is set. + * +-- +2.33.0 + diff --git a/migration-multifd-Fix-error-message-in-multifd_recv_.patch b/migration-multifd-Fix-error-message-in-multifd_recv_.patch new file mode 100644 index 0000000..c276487 --- /dev/null +++ b/migration-multifd-Fix-error-message-in-multifd_recv_.patch @@ -0,0 +1,41 @@ +From 122a0daf78f540bb3595432acc33a749cc6ca5a4 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:10 +0200 +Subject: [01/99] migration/multifd: Fix error message in + multifd_recv_initial_packet() + +commit c77b40859a5201f01b44dc475258405e289c431f upstream. + +In multifd_recv_initial_packet(), if MultiFDInit_t->id is greater than +the configured number of multifd channels, an irrelevant error message +about multifd version is printed. + +Change the error message to a relevant one about the channel id. + +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-6-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f3bf6888c0..055b2688ad 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %u " +- "expected %u", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel id %u is greater than " ++ "number of channels %u", msg.id, migrate_multifd_channels()); + return -1; + } + +-- +2.33.0 + diff --git a/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch b/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch new file mode 100644 index 0000000..93d7d80 --- /dev/null +++ b/migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch @@ -0,0 +1,49 @@ +From 313207b5d51f530b45f106addcf489845f32b449 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:12 +0200 +Subject: [03/99] migration/multifd: Fix leaking of Error in TLS error flow + +commit 6ae208ce9656114e428b1a75ac62a6761ed3216c upstream. + +If there is an error in multifd TLS handshake task, +multifd_tls_outgoing_handshake() retrieves the error with +qio_task_propagate_error() but never frees it. + +Fix it by freeing the obtained Error. + +In addition, the error is not reported at all, so report it with +migrate_set_error(). + +Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-8-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 06585f0141..8221ebe4b6 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -791,6 +791,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + ++ migrate_set_error(migrate_get_current(), err); + /* + * Error happen, mark multifd_send_thread status as 'quit' although it + * is not created, and then tell who pay attention to me. +@@ -798,6 +799,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + p->quit = true; + qemu_sem_post(&multifd_send_state->channels_ready); + qemu_sem_post(&p->sem_sync); ++ error_free(err); + } + + static void *multifd_tls_handshake_thread(void *opaque) +-- +2.33.0 + diff --git a/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch b/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch new file mode 100644 index 0000000..0e6a007 --- /dev/null +++ b/migration-multifd-Fix-loop-conditions-in-multifd_zst.patch @@ -0,0 +1,57 @@ +From 75ab1fea57e8925efd8a3bef827d0c0f0cdd1fa2 Mon Sep 17 00:00:00 2001 +From: Stefan Weil +Date: Tue, 10 Sep 2024 07:41:38 +0200 +Subject: [93/99] migration/multifd: Fix loop conditions in + multifd_zstd_send_prepare and multifd_zstd_recv + +commit cb0ed522a51a7d4b1fde535972d4aeeb82447928 upstream. + +GitHub's CodeQL reports four critical errors which are fixed by this commit: + + Unsigned difference expression compared to zero + +An expression (u - v > 0) with unsigned values u, v is only false if u == v, +so all changed expressions did not work as expected. + +Signed-off-by: Stefan Weil +Link: https://lore.kernel.org/r/20240910054138.1458555-1-sw@weilnetz.de +[peterx: Fix mangled email for author] +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd-zstd.c +[jz: resolve context conflict due to p->page which not renamed to page yet] +Signed-off-by: Jason Zeng +--- + migration/multifd-zstd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index ca17b7e310..46ee68b6ce 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -152,9 +152,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + */ + do { + ret = ZSTD_compressStream2(z->zcs, &z->out, &z->in, flush); +- } while (ret > 0 && (z->in.size - z->in.pos > 0) +- && (z->out.size - z->out.pos > 0)); +- if (ret > 0 && (z->in.size - z->in.pos > 0)) { ++ } while (ret > 0 && (z->in.size > z->in.pos) ++ && (z->out.size > z->out.pos)); ++ if (ret > 0 && (z->in.size > z->in.pos)) { + error_setg(errp, "multifd %u: compressStream buffer too small", + p->id); + return -1; +@@ -299,7 +299,7 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + */ + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); +- } while (ret > 0 && (z->in.size - z->in.pos > 0) ++ } while (ret > 0 && (z->in.size > z->in.pos) + && (z->out.pos < p->page_size)); + if (ret > 0 && (z->out.pos < p->page_size)) { + error_setg(errp, "multifd %u: decompressStream buffer too small", +-- +2.33.0 + diff --git a/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch b/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch new file mode 100644 index 0000000..8234b59 --- /dev/null +++ b/migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch @@ -0,0 +1,36 @@ +From 85507465a9de3d745204ad86c4cd4a6a7b5004b1 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Wed, 28 Aug 2024 11:56:48 -0300 +Subject: [87/99] migration/multifd: Fix p->iov leak in multifd-uadk.c + +commit 405e352d28c24991cacfdebccf67d56c4795cf6e upstream. + +The send_cleanup() hook should free the p->iov that was allocated at +send_setup(). This was missed because the UADK code is conditional on +the presence of the accelerator, so it's not tested by default. + +Fixes: 819dd20636 ("migration/multifd: Add UADK initialization") +Reported-by: Peter Xu +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index d12353fb21..9a582fc919 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -146,6 +146,8 @@ static void multifd_uadk_send_cleanup(MultiFDSendParams *p, Error **errp) + + multifd_uadk_uninit_sess(wd); + p->compress_data = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + } + + static inline void prepare_next_iov(MultiFDSendParams *p, void *base, +-- +2.33.0 + diff --git a/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch b/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch new file mode 100644 index 0000000..9b251a3 --- /dev/null +++ b/migration-multifd-Fix-rb-receivedmap-cleanup-race.patch @@ -0,0 +1,95 @@ +From a15e40dc17b96c431ad4c71377a3a66e57a00dab Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 17 Sep 2024 15:58:02 -0300 +Subject: [94/99] migration/multifd: Fix rb->receivedmap cleanup race + +commit 4ce56229087860805877075ddb29dd44578365a9 upstream. + +Fix a segmentation fault in multifd when rb->receivedmap is cleared +too early. + +After commit 5ef7e26bdb ("migration/multifd: solve zero page causing +multiple page faults"), multifd started using the rb->receivedmap +bitmap, which belongs to ram.c and is initialized and *freed* from the +ram SaveVMHandlers. + +Multifd threads are live until migration_incoming_state_destroy(), +which is called after qemu_loadvm_state_cleanup(), leading to a crash +when accessing rb->receivedmap. + +process_incoming_migration_co() ... + qemu_loadvm_state() multifd_nocomp_recv() + qemu_loadvm_state_cleanup() ramblock_recv_bitmap_set_offset() + rb->receivedmap = NULL set_bit_atomic(..., rb->receivedmap) + ... + migration_incoming_state_destroy() + multifd_recv_cleanup() + multifd_recv_terminate_threads(NULL) + +Move the loadvm cleanup into migration_incoming_state_destroy(), after +multifd_recv_cleanup() to ensure multifd threads have already exited +when rb->receivedmap is cleared. + +Adjust the postcopy listen thread comment to indicate that we still +want to skip the cpu synchronization. + +CC: qemu-stable@nongnu.org +Fixes: 5ef7e26bdb ("migration/multifd: solve zero page causing multiple page faults") +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240917185802.15619-3-farosas@suse.de +[peterx: added comment in migration_incoming_state_destroy()] +Signed-off-by: Peter Xu + + Conflicts: + migration/migration.c +[jz: resolve context conflict due to non-multifd compression which is + already deleted in upstream while still in openEuler] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 5 +++++ + migration/savevm.c | 6 ++++-- + 2 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 59c0bbee67..107e106b73 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -276,6 +276,11 @@ void migration_incoming_state_destroy(void) + + multifd_recv_cleanup(); + compress_threads_load_cleanup(); ++ /* ++ * RAM state cleanup needs to happen after multifd cleanup, because ++ * multifd threads can use some of its states (receivedmap). ++ */ ++ qemu_loadvm_state_cleanup(); + + if (mis->to_src_file) { + /* Tell source that we are done */ +diff --git a/migration/savevm.c b/migration/savevm.c +index cc65da605e..29389068df 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2959,7 +2959,10 @@ int qemu_loadvm_state(QEMUFile *f) + trace_qemu_loadvm_state_post_main(ret); + + if (mis->have_listen_thread) { +- /* Listen thread still going, can't clean up yet */ ++ /* ++ * Postcopy listen thread still going, don't synchronize the ++ * cpus yet. ++ */ + return ret; + } + +@@ -3002,7 +3005,6 @@ int qemu_loadvm_state(QEMUFile *f) + } + } + +- qemu_loadvm_state_cleanup(); + cpu_synchronize_all_post_init(); + + return ret; +-- +2.33.0 + diff --git a/migration-multifd-Forbid-spurious-wakeups.patch b/migration-multifd-Forbid-spurious-wakeups.patch new file mode 100644 index 0000000..82d7cb7 --- /dev/null +++ b/migration-multifd-Forbid-spurious-wakeups.patch @@ -0,0 +1,51 @@ +From d4f46c41e0dd921563614ad48e7099eeac06d285 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:48 +0800 +Subject: [38/99] migration/multifd: Forbid spurious wakeups + +commit 859ebaf346e8b5dece6cf255c604fe953d8ec9ab upstream. + +Now multifd's logic is designed to have no spurious wakeup. I still +remember a talk to Juan and he seems to agree we should drop it now, and if +my memory was right it was there because multifd used to hit that when +still debugging. + +Let's drop it and see what can explode; as long as it's not reaching +soft-freeze. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-15-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d4528cf9d1..3b7984cf99 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -760,7 +760,9 @@ static void *multifd_send_thread(void *opaque) + p->next_packet_size = 0; + qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); +- } else if (qatomic_read(&p->pending_sync)) { ++ } else { ++ /* If not a normal job, must be a sync request */ ++ assert(qatomic_read(&p->pending_sync)); + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, +@@ -775,9 +777,6 @@ static void *multifd_send_thread(void *opaque) + qatomic_set(&p->pending_sync, false); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem_sync); +- } else { +- qemu_mutex_unlock(&p->mutex); +- /* sometimes there are spurious wakeups */ + } + } + +-- +2.33.0 + diff --git a/migration-multifd-Implement-ram_save_target_page_mul.patch b/migration-multifd-Implement-ram_save_target_page_mul.patch new file mode 100644 index 0000000..fb771c1 --- /dev/null +++ b/migration-multifd-Implement-ram_save_target_page_mul.patch @@ -0,0 +1,94 @@ +From 5107700317e5cba24822f71615a001a8a62fea07 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:13 +0000 +Subject: [70/99] migration/multifd: Implement ram_save_target_page_multifd to + handle multifd version of MigrationOps::ram_save_target_page. + +commit 9ae90f73e623c8b8c7ec1fccd8ca493805df8fbd upstream. + +1. Add a dedicated handler for MigrationOps::ram_save_target_page in +multifd live migration. +2. Refactor ram_save_target_page_legacy so that the legacy and multifd +handlers don't have internal functions calling into each other. + +Signed-off-by: Hao Xiang +Reviewed-by: Fabiano Rosas +Message-Id: <20240226195654.934709-4-hao.xiang@bytedance.com> +Link: https://lore.kernel.org/r/20240311180015.3359271-6-hao.xiang@linux.dev +Signed-off-by: Peter Xu +[jz: resolve context conflict due to BQL name] +Signed-off-by: Jason Zeng +--- + migration/ram.c | 38 +++++++++++++++++++++++++++++--------- + 1 file changed, 29 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index bae5853996..fe2e4c6164 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -2233,7 +2233,6 @@ static bool encrypted_test_list(RAMState *rs, RAMBlock *block, + */ + static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + { +- RAMBlock *block = pss->block; + ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; + int res; + +@@ -2260,17 +2259,33 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + return 1; + } + ++ return ram_save_page(rs, pss); ++} ++ ++/** ++ * ram_save_target_page_multifd: send one target page to multifd workers ++ * ++ * Returns 1 if the page was queued, -1 otherwise. ++ * ++ * @rs: current RAM state ++ * @pss: data about the page we want to send ++ */ ++static int ram_save_target_page_multifd(RAMState *rs, PageSearchStatus *pss) ++{ ++ RAMBlock *block = pss->block; ++ ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; ++ + /* +- * Do not use multifd in postcopy as one whole host page should be +- * placed. Meanwhile postcopy requires atomic update of pages, so even +- * if host page size == guest page size the dest guest during run may +- * still see partially copied pages which is data corruption. ++ * While using multifd live migration, we still need to handle zero ++ * page checking on the migration main thread. + */ +- if (migrate_multifd() && !migration_in_postcopy()) { +- return ram_save_multifd_page(block, offset); ++ if (migrate_zero_page_detection() == ZERO_PAGE_DETECTION_LEGACY) { ++ if (save_zero_page(rs, pss, offset)) { ++ return 1; ++ } + } + +- return ram_save_page(rs, pss); ++ return ram_save_multifd_page(block, offset); + } + + /* Should be called before sending a host page */ +@@ -3433,7 +3448,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + } + + migration_ops = g_malloc0(sizeof(MigrationOps)); +- migration_ops->ram_save_target_page = ram_save_target_page_legacy; ++ ++ if (migrate_multifd()) { ++ migration_ops->ram_save_target_page = ram_save_target_page_multifd; ++ } else { ++ migration_ops->ram_save_target_page = ram_save_target_page_legacy; ++ } + + qemu_mutex_unlock_iothread(); + ret = multifd_send_sync_main(); +-- +2.33.0 + diff --git a/migration-multifd-Implement-zero-page-transmission-o.patch b/migration-multifd-Implement-zero-page-transmission-o.patch new file mode 100644 index 0000000..85b976c --- /dev/null +++ b/migration-multifd-Implement-zero-page-transmission-o.patch @@ -0,0 +1,622 @@ +From 68f37655bf414e74c623164c9c20bc7884ee5bb8 Mon Sep 17 00:00:00 2001 +From: Hao Xiang +Date: Mon, 11 Mar 2024 18:00:12 +0000 +Subject: [69/99] migration/multifd: Implement zero page transmission on the + multifd thread. + +commit 303e6f54f9657be76ee060006ee2d4cacff263a0 upstream. + +1. Add zero_pages field in MultiFDPacket_t. +2. Implements the zero page detection and handling on the multifd +threads for non-compression, zlib and zstd compression backends. +3. Added a new value 'multifd' in ZeroPageDetection enumeration. +4. Adds zero page counters and updates multifd send/receive tracing +format to track the newly added counters. + +Signed-off-by: Hao Xiang +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240311180015.3359271-5-hao.xiang@linux.dev +Signed-off-by: Peter Xu + + Conflicts: + migration/meson.build + migration/multifd.c +[jz: there is no multifd_set_file_bitmap() because we didn't backport + mapped-ram, so abandon changes in multifd_set_file_bitmap()] +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-zero-page.c | 87 ++++++++++++++++++++++++++++++++ + migration/multifd-zlib.c | 21 ++++++-- + migration/multifd-zstd.c | 20 ++++++-- + migration/multifd.c | 83 +++++++++++++++++++++++++----- + migration/multifd.h | 23 ++++++++- + migration/ram.c | 1 - + migration/trace-events | 8 +-- + qapi/migration.json | 7 ++- + 10 files changed, 222 insertions(+), 31 deletions(-) + create mode 100644 migration/multifd-zero-page.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index cad1e04150..b3b9238b65 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -735,7 +735,7 @@ const PropertyInfo qdev_prop_mig_mode = { + const PropertyInfo qdev_prop_zero_page_detection = { + .name = "ZeroPageDetection", + .description = "zero_page_detection values, " +- "none,legacy", ++ "none,legacy,multifd", + .enum_table = &ZeroPageDetection_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index d9b46ef0df..d619ebf238 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -22,6 +22,7 @@ system_ss.add(files( + 'migration.c', + 'multifd.c', + 'multifd-zlib.c', ++ 'multifd-zero-page.c', + 'options.c', + 'postcopy-ram.c', + 'savevm.c', +diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c +new file mode 100644 +index 0000000000..1ba38be636 +--- /dev/null ++++ b/migration/multifd-zero-page.c +@@ -0,0 +1,87 @@ ++/* ++ * Multifd zero page detection implementation. ++ * ++ * Copyright (c) 2024 Bytedance Inc ++ * ++ * Authors: ++ * Hao Xiang ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/cutils.h" ++#include "exec/ramblock.h" ++#include "migration.h" ++#include "multifd.h" ++#include "options.h" ++#include "ram.h" ++ ++static bool multifd_zero_page_enabled(void) ++{ ++ return migrate_zero_page_detection() == ZERO_PAGE_DETECTION_MULTIFD; ++} ++ ++static void swap_page_offset(ram_addr_t *pages_offset, int a, int b) ++{ ++ ram_addr_t temp; ++ ++ if (a == b) { ++ return; ++ } ++ ++ temp = pages_offset[a]; ++ pages_offset[a] = pages_offset[b]; ++ pages_offset[b] = temp; ++} ++ ++/** ++ * multifd_send_zero_page_detect: Perform zero page detection on all pages. ++ * ++ * Sorts normal pages before zero pages in p->pages->offset and updates ++ * p->pages->normal_num. ++ * ++ * @param p A pointer to the send params. ++ */ ++void multifd_send_zero_page_detect(MultiFDSendParams *p) ++{ ++ MultiFDPages_t *pages = p->pages; ++ RAMBlock *rb = pages->block; ++ int i = 0; ++ int j = pages->num - 1; ++ ++ if (!multifd_zero_page_enabled()) { ++ pages->normal_num = pages->num; ++ return; ++ } ++ ++ /* ++ * Sort the page offset array by moving all normal pages to ++ * the left and all zero pages to the right of the array. ++ */ ++ while (i <= j) { ++ uint64_t offset = pages->offset[i]; ++ ++ if (!buffer_is_zero(rb->host + offset, p->page_size)) { ++ i++; ++ continue; ++ } ++ ++ swap_page_offset(pages->offset, i, j); ++ ram_release_page(rb->idstr, offset); ++ j--; ++ } ++ ++ pages->normal_num = i; ++} ++ ++void multifd_recv_zero_page_process(MultiFDRecvParams *p) ++{ ++ for (int i = 0; i < p->zero_num; i++) { ++ void *page = p->host + p->zero[i]; ++ if (!buffer_is_zero(page, p->page_size)) { ++ memset(page, 0, p->page_size); ++ } ++ } ++} +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 6120faad65..83c0374380 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -123,13 +123,15 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- multifd_send_prepare_header(p); ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } + +- for (i = 0; i < pages->num; i++) { ++ for (i = 0; i < pages->normal_num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == pages->num - 1) { ++ if (i == pages->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } + +@@ -172,10 +174,10 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + p->iov[p->iovs_num].iov_len = out_size; + p->iovs_num++; + p->next_packet_size = out_size; +- p->flags |= MULTIFD_FLAG_ZLIB; + ++out: ++ p->flags |= MULTIFD_FLAG_ZLIB; + multifd_send_fill_packet(p); +- + return 0; + } + +@@ -261,6 +263,14 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { +@@ -310,6 +320,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + p->id, out_size, expected_size); + return -1; + } ++ + return 0; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index cac236833d..02112255ad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -118,16 +118,18 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- multifd_send_prepare_header(p); ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } + + z->out.dst = z->zbuff; + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < pages->num; i++) { ++ for (i = 0; i < pages->normal_num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == pages->num - 1) { ++ if (i == pages->normal_num - 1) { + flush = ZSTD_e_flush; + } + z->in.src = p->pages->block->host + pages->offset[i]; +@@ -161,10 +163,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + p->iov[p->iovs_num].iov_len = z->out.pos; + p->iovs_num++; + p->next_packet_size = z->out.pos; +- p->flags |= MULTIFD_FLAG_ZSTD; + ++out: ++ p->flags |= MULTIFD_FLAG_ZSTD; + multifd_send_fill_packet(p); +- + return 0; + } + +@@ -257,6 +259,14 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ + ret = qio_channel_read_all(p->c, (void *)z->zbuff, in_size, errp); + + if (ret != 0) { +diff --git a/migration/multifd.c b/migration/multifd.c +index cac5f2743c..6c01179858 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -11,6 +11,7 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/cutils.h" + #include "qemu/rcu.h" + #include "exec/target_page.h" + #include "sysemu/sysemu.h" +@@ -132,13 +133,13 @@ static void multifd_send_prepare_iovs(MultiFDSendParams *p) + { + MultiFDPages_t *pages = p->pages; + +- for (int i = 0; i < pages->num; i++) { ++ for (int i = 0; i < pages->normal_num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; + p->iov[p->iovs_num].iov_len = p->page_size; + p->iovs_num++; + } + +- p->next_packet_size = pages->num * p->page_size; ++ p->next_packet_size = pages->normal_num * p->page_size; + } + + /** +@@ -157,6 +158,8 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + bool use_zero_copy_send = migrate_zero_copy_send(); + int ret; + ++ multifd_send_zero_page_detect(p); ++ + if (!multifd_use_packets()) { + multifd_send_prepare_iovs(p); + return 0; +@@ -238,6 +241,13 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } ++ ++ multifd_recv_zero_page_process(p); ++ ++ if (!p->normal_num) { ++ return 0; ++ } ++ + for (int i = 0; i < p->normal_num; i++) { + p->iov[i].iov_base = p->host + p->normal[i]; + p->iov[i].iov_len = p->page_size; +@@ -272,6 +282,7 @@ static void multifd_pages_reset(MultiFDPages_t *pages) + * overwritten later when reused. + */ + pages->num = 0; ++ pages->normal_num = 0; + pages->block = NULL; + } + +@@ -363,11 +374,13 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; + uint64_t packet_num; ++ uint32_t zero_num = pages->num - pages->normal_num; + int i; + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->normal_pages = cpu_to_be32(pages->num); ++ packet->normal_pages = cpu_to_be32(pages->normal_num); ++ packet->zero_pages = cpu_to_be32(zero_num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + + packet_num = qatomic_fetch_inc(&multifd_send_state->packet_num); +@@ -385,10 +398,11 @@ void multifd_send_fill_packet(MultiFDSendParams *p) + } + + p->packets_sent++; +- p->total_normal_pages += pages->num; ++ p->total_normal_pages += pages->normal_num; ++ p->total_zero_pages += zero_num; + +- trace_multifd_send(p->id, packet_num, pages->num, p->flags, +- p->next_packet_size); ++ trace_multifd_send(p->id, packet_num, pages->normal_num, zero_num, ++ p->flags, p->next_packet_size); + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -429,20 +443,29 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->normal_num = be32_to_cpu(packet->normal_pages); + if (p->normal_num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " +- "with %u pages and expected maximum pages are %u", ++ "with %u normal pages and expected maximum pages are %u", + p->normal_num, packet->pages_alloc) ; + return -1; + } + ++ p->zero_num = be32_to_cpu(packet->zero_pages); ++ if (p->zero_num > packet->pages_alloc - p->normal_num) { ++ error_setg(errp, "multifd: received packet " ++ "with %u zero pages and expected maximum zero pages are %u", ++ p->zero_num, packet->pages_alloc - p->normal_num) ; ++ return -1; ++ } ++ + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + p->packets_recved++; + p->total_normal_pages += p->normal_num; ++ p->total_zero_pages += p->zero_num; + +- trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, +- p->next_packet_size); ++ trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->zero_num, ++ p->flags, p->next_packet_size); + +- if (p->normal_num == 0) { ++ if (p->normal_num == 0 && p->zero_num == 0) { + return 0; + } + +@@ -468,6 +491,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->normal[i] = offset; + } + ++ for (i = 0; i < p->zero_num; i++) { ++ uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]); ++ ++ if (offset > (p->block->used_length - p->page_size)) { ++ error_setg(errp, "multifd: offset too long %" PRIu64 ++ " (max " RAM_ADDR_FMT ")", ++ offset, p->block->used_length); ++ return -1; ++ } ++ p->zero[i] = offset; ++ } ++ + return 0; + } + +@@ -866,6 +901,8 @@ static void *multifd_send_thread(void *opaque) + + stat64_add(&mig_stats.multifd_bytes, + (uint64_t)p->next_packet_size + p->packet_len); ++ stat64_add(&mig_stats.normal_pages, pages->normal_num); ++ stat64_add(&mig_stats.zero_pages, pages->num - pages->normal_num); + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +@@ -913,7 +950,8 @@ out: + + rcu_unregister_thread(); + migration_threads_remove(thread); +- trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); ++ trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages, ++ p->total_zero_pages); + + return NULL; + } +@@ -1189,6 +1227,8 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) + p->iov = NULL; + g_free(p->normal); + p->normal = NULL; ++ g_free(p->zero); ++ p->zero = NULL; + multifd_recv_state->ops->recv_cleanup(p); + } + +@@ -1294,7 +1334,7 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +- has_data = !!p->normal_num; ++ has_data = p->normal_num || p->zero_num; + qemu_mutex_unlock(&p->mutex); + } + +@@ -1319,7 +1359,9 @@ static void *multifd_recv_thread(void *opaque) + } + + rcu_unregister_thread(); +- trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); ++ trace_multifd_recv_thread_end(p->id, p->packets_recved, ++ p->total_normal_pages, ++ p->total_zero_pages); + + return NULL; + } +@@ -1362,6 +1404,7 @@ int multifd_recv_setup(Error **errp) + p->name = g_strdup_printf("multifdrecv_%d", i); + p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); ++ p->zero = g_new0(ram_addr_t, page_count); + p->page_count = page_count; + p->page_size = qemu_target_page_size(); + } +@@ -1437,3 +1480,17 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); + } ++ ++bool multifd_send_prepare_common(MultiFDSendParams *p) ++{ ++ multifd_send_zero_page_detect(p); ++ ++ if (!p->pages->normal_num) { ++ p->next_packet_size = 0; ++ return false; ++ } ++ ++ multifd_send_prepare_header(p); ++ ++ return true; ++} +diff --git a/migration/multifd.h b/migration/multifd.h +index 6a54377cc1..d99603c6a4 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -48,14 +48,24 @@ typedef struct { + /* size of the next packet that contains pages */ + uint32_t next_packet_size; + uint64_t packet_num; +- uint64_t unused[4]; /* Reserved for future use */ ++ /* zero pages */ ++ uint32_t zero_pages; ++ uint32_t unused32[1]; /* Reserved for future use */ ++ uint64_t unused64[3]; /* Reserved for future use */ + char ramblock[256]; ++ /* ++ * This array contains the pointers to: ++ * - normal pages (initial normal_pages entries) ++ * - zero pages (following zero_pages entries) ++ */ + uint64_t offset[]; + } __attribute__((packed)) MultiFDPacket_t; + + typedef struct { + /* number of used pages */ + uint32_t num; ++ /* number of normal pages */ ++ uint32_t normal_num; + /* number of allocated pages */ + uint32_t allocated; + /* offset of each page */ +@@ -122,6 +132,8 @@ typedef struct { + uint64_t packets_sent; + /* non zero pages sent through this channel */ + uint64_t total_normal_pages; ++ /* zero pages sent through this channel */ ++ uint64_t total_zero_pages; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ +@@ -176,12 +188,18 @@ typedef struct { + uint8_t *host; + /* non zero pages recv through this channel */ + uint64_t total_normal_pages; ++ /* zero pages recv through this channel */ ++ uint64_t total_zero_pages; + /* buffers to recv */ + struct iovec *iov; + /* Pages that are not zero */ + ram_addr_t *normal; + /* num of non zero pages */ + uint32_t normal_num; ++ /* Pages that are zero */ ++ ram_addr_t *zero; ++ /* num of zero pages */ ++ uint32_t zero_num; + /* used for de-compression methods */ + void *compress_data; + } MultiFDRecvParams; +@@ -203,6 +221,9 @@ typedef struct { + + void multifd_register_ops(int method, MultiFDMethods *ops); + void multifd_send_fill_packet(MultiFDSendParams *p); ++bool multifd_send_prepare_common(MultiFDSendParams *p); ++void multifd_send_zero_page_detect(MultiFDSendParams *p); ++void multifd_recv_zero_page_process(MultiFDRecvParams *p); + + static inline void multifd_send_prepare_header(MultiFDSendParams *p) + { +diff --git a/migration/ram.c b/migration/ram.c +index 7d0f1120df..bae5853996 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1396,7 +1396,6 @@ static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + if (!multifd_queue_page(block, offset)) { + return -1; + } +- stat64_add(&mig_stats.normal_pages, 1); + + return 1; + } +diff --git a/migration/trace-events b/migration/trace-events +index bf1a069632..f0e1cb80c7 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -128,21 +128,21 @@ postcopy_preempt_reset_channel(void) "" + # multifd.c + multifd_new_send_channel_async(uint8_t id) "channel %u" + multifd_new_send_channel_async_error(uint8_t id, void *err) "channel=%u err=%p" +-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_recv(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t zero, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" + multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" + multifd_recv_sync_main_signal(uint8_t id) "channel %u" + multifd_recv_sync_main_wait(uint8_t id) "iter %u" + multifd_recv_terminate_threads(bool error) "error %d" +-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal_pages, uint32_t zero_pages, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u zero pages %u flags 0x%x next packet size %u" + multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(void) "" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages, uint64_t zero_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 " zero pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +diff --git a/qapi/migration.json b/qapi/migration.json +index ff247a50ce..fc3178b1dc 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -660,10 +660,15 @@ + # + # @legacy: Perform zero page checking in main migration thread. + # ++# @multifd: Perform zero page checking in multifd sender thread if ++# multifd migration is enabled, else in the main migration ++# thread as for @legacy. ++# + # Since: 9.0 ++# + ## + { 'enum': 'ZeroPageDetection', +- 'data': [ 'none', 'legacy' ] } ++ 'data': [ 'none', 'legacy', 'multifd' ] } + + ## + # @BitmapMigrationBitmapAliasTransform: +-- +2.33.0 + diff --git a/migration-multifd-Join-the-TLS-thread.patch b/migration-multifd-Join-the-TLS-thread.patch new file mode 100644 index 0000000..d7c64d8 --- /dev/null +++ b/migration-multifd-Join-the-TLS-thread.patch @@ -0,0 +1,64 @@ +From 234d32c5cef7114f2554f18c8ad73fb294fb4542 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:13 -0300 +Subject: [49/99] migration/multifd: Join the TLS thread + +commit e1921f10d9afe651f4887284e85f6789b37e67d3 upstream. + +We're currently leaking the resources of the TLS thread by not joining +it and also overwriting the p->thread pointer altogether. + +Fixes: a1af605bd5 ("migration/multifd: fix hangup with TLS-Multifd due to blocking handshake") +Cc: qemu-stable +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 +++++++- + migration/multifd.h | 2 ++ + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c0d8f438bc..459e7889e8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -631,6 +631,10 @@ static void multifd_send_terminate_threads(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + ++ if (p->tls_thread_created) { ++ qemu_thread_join(&p->tls_thread); ++ } ++ + if (p->running) { + qemu_thread_join(&p->thread); + } +@@ -925,7 +929,9 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); + p->c = QIO_CHANNEL(tioc); +- qemu_thread_create(&p->thread, "multifd-tls-handshake-worker", ++ ++ p->tls_thread_created = true; ++ qemu_thread_create(&p->tls_thread, "multifd-tls-handshake-worker", + multifd_tls_handshake_thread, p, + QEMU_THREAD_JOINABLE); + return true; +diff --git a/migration/multifd.h b/migration/multifd.h +index 78a2317263..720c9d50db 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -73,6 +73,8 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ QemuThread tls_thread; ++ bool tls_thread_created; + /* communication channel */ + QIOChannel *c; + /* is the yank function registered */ +-- +2.33.0 + diff --git a/migration-multifd-Make-multifd_channel_connect-retur.patch b/migration-multifd-Make-multifd_channel_connect-retur.patch new file mode 100644 index 0000000..66daf90 --- /dev/null +++ b/migration-multifd-Make-multifd_channel_connect-retur.patch @@ -0,0 +1,54 @@ +From 797304d0151652a684f0df388036c2032dcc3979 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 22 Feb 2024 17:52:59 +0800 +Subject: [59/99] migration/multifd: Make multifd_channel_connect() return void + +commit 770de49c00fa9eb262473f282c92979b47b7fd22 upstream. + +It never fails, drop the retval and also the Error**. + +Suggested-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240222095301.171137-4-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3e85bc544a..a7289289a4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -947,9 +947,7 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + return true; + } + +-static bool multifd_channel_connect(MultiFDSendParams *p, +- QIOChannel *ioc, +- Error **errp) ++static void multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc) + { + qio_channel_set_delay(ioc, false); + +@@ -960,7 +958,6 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); +- return true; + } + + /* +@@ -992,7 +989,8 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + return; + } + } else { +- ret = multifd_channel_connect(p, ioc, &local_err); ++ multifd_channel_connect(p, ioc); ++ ret = true; + } + + out: +-- +2.33.0 + diff --git a/migration-multifd-Move-header-prepare-fill-into-send.patch b/migration-multifd-Move-header-prepare-fill-into-send.patch new file mode 100644 index 0000000..cbf8c46 --- /dev/null +++ b/migration-multifd-Move-header-prepare-fill-into-send.patch @@ -0,0 +1,227 @@ +From 1dfecda79660d2b68cd56a7e44ef76ac847f54d1 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:47 +0800 +Subject: [37/99] migration/multifd: Move header prepare/fill into + send_prepare() + +commit 25a1f8787597f6906b151b2f73ae6cc92a31de57 upstream. + +This patch redefines the interfacing of ->send_prepare(). It further +simplifies multifd_send_thread() especially on zero copy. + +Now with the new interface, we require the hook to do all the work for +preparing the IOVs to send. After it's completed, the IOVs should be ready +to be dumped into the specific multifd QIOChannel later. + +So now the API looks like: + + p->pages -----------> send_prepare() -------------> IOVs + +This also prepares for the case where the input can be extended to even not +any p->pages. But that's for later. + +This patch will achieve similar goal of what Fabiano used to propose here: + +https://lore.kernel.org/r/20240126221943.26628-1-farosas@suse.de + +However the send() interface may not be necessary. I'm boldly attaching a +"Co-developed-by" for Fabiano. + +Co-developed-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-14-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 4 +++ + migration/multifd-zstd.c | 4 +++ + migration/multifd.c | 61 ++++++++++++++++++---------------------- + migration/multifd.h | 1 + + 4 files changed, 37 insertions(+), 33 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 100809abc1..012e3bdea1 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -123,6 +123,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + ++ multifd_send_prepare_header(p); ++ + for (i = 0; i < pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; +@@ -172,6 +174,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + ++ multifd_send_fill_packet(p); ++ + return 0; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 2023edd8cc..dc8fe43e94 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -118,6 +118,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + ++ multifd_send_prepare_header(p); ++ + z->out.dst = z->zbuff; + z->out.size = z->zbuff_len; + z->out.pos = 0; +@@ -161,6 +163,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + ++ multifd_send_fill_packet(p); ++ + return 0; + } + +diff --git a/migration/multifd.c b/migration/multifd.c +index a42e152268..d4528cf9d1 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -51,15 +51,15 @@ typedef struct { + /** + * nocomp_send_setup: setup send side + * +- * For no compression this function does nothing. +- * +- * Returns 0 for success or -1 for error +- * + * @p: Params for the channel that we are using + * @errp: pointer to an error + */ + static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + { ++ if (migrate_zero_copy_send()) { ++ p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; ++ } ++ + return 0; + } + +@@ -89,7 +89,17 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { ++ bool use_zero_copy_send = migrate_zero_copy_send(); + MultiFDPages_t *pages = p->pages; ++ int ret; ++ ++ if (!use_zero_copy_send) { ++ /* ++ * Only !zerocopy needs the header in IOV; zerocopy will ++ * send it separately. ++ */ ++ multifd_send_prepare_header(p); ++ } + + for (int i = 0; i < pages->num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; +@@ -99,6 +109,18 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + + p->next_packet_size = pages->num * p->page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; ++ ++ multifd_send_fill_packet(p); ++ ++ if (use_zero_copy_send) { ++ /* Send header first, without zerocopy */ ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, errp); ++ if (ret != 0) { ++ return -1; ++ } ++ } ++ + return 0; + } + +@@ -267,7 +289,7 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + g_free(pages); + } + +-static void multifd_send_fill_packet(MultiFDSendParams *p) ++void multifd_send_fill_packet(MultiFDSendParams *p) + { + MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; +@@ -689,7 +711,6 @@ static void *multifd_send_thread(void *opaque) + MigrationThread *thread = NULL; + Error *local_err = NULL; + int ret = 0; +- bool use_zero_copy_send = migrate_zero_copy_send(); + + thread = migration_threads_add(p->name, qemu_get_thread_id()); + +@@ -717,15 +738,6 @@ static void *multifd_send_thread(void *opaque) + MultiFDPages_t *pages = p->pages; + + p->iovs_num = 0; +- +- if (!use_zero_copy_send) { +- /* +- * Only !zerocopy needs the header in IOV; zerocopy will +- * send it separately. +- */ +- multifd_send_prepare_header(p); +- } +- + assert(pages->num); + + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -734,17 +746,6 @@ static void *multifd_send_thread(void *opaque) + break; + } + +- multifd_send_fill_packet(p); +- +- if (use_zero_copy_send) { +- /* Send header first, without zerocopy */ +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); +- if (ret != 0) { +- break; +- } +- } +- + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { +@@ -949,13 +950,7 @@ int multifd_save_setup(Error **errp) + p->iov = g_new0(struct iovec, page_count + 1); + p->page_size = qemu_target_page_size(); + p->page_count = page_count; +- +- if (migrate_zero_copy_send()) { +- p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; +- } else { +- p->write_flags = 0; +- } +- ++ p->write_flags = 0; + multifd_new_send_channel_create(p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 4ec005f53f..34a2ecb9f4 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -208,6 +208,7 @@ typedef struct { + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); ++void multifd_send_fill_packet(MultiFDSendParams *p); + + static inline void multifd_send_prepare_header(MultiFDSendParams *p) + { +-- +2.33.0 + diff --git a/migration-multifd-Move-multifd_send_setup-error-hand.patch b/migration-multifd-Move-multifd_send_setup-error-hand.patch new file mode 100644 index 0000000..b72cad1 --- /dev/null +++ b/migration-multifd-Move-multifd_send_setup-error-hand.patch @@ -0,0 +1,106 @@ +From d9e7bf53856956e6417a2dd0b5636fb61fb1c365 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:15 -0300 +Subject: [51/99] migration/multifd: Move multifd_send_setup error handling in + to the function + +commit bd8b0a8f82d8fc17aa285ab963ba75675c2fbe7a upstream. + +Hide the error handling inside multifd_send_setup to make it cleaner +for the next patch to move the function around. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-4-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 6 +----- + migration/multifd.c | 24 +++++++++++++++++------- + migration/multifd.h | 2 +- + 3 files changed, 19 insertions(+), 13 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 0e8255180d..66417b40a2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3643,11 +3643,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (multifd_send_setup(&local_err) != 0) { +- migrate_set_error(s, local_err); +- error_report_err(local_err); +- migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +- MIGRATION_STATUS_FAILED); ++ if (!multifd_send_setup()) { + migrate_fd_cleanup(s); + return; + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 59dcb6c9a2..1299248fea 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -989,14 +989,16 @@ static void multifd_new_send_channel_create(gpointer opaque) + socket_send_channel_create(multifd_new_send_channel_async, opaque); + } + +-int multifd_send_setup(Error **errp) ++bool multifd_send_setup(void) + { +- int thread_count; ++ MigrationState *s = migrate_get_current(); ++ Error *local_err = NULL; ++ int thread_count, ret = 0; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + + if (!migrate_multifd()) { +- return 0; ++ return true; + } + + thread_count = migrate_multifd_channels(); +@@ -1030,14 +1032,22 @@ int multifd_send_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +- int ret; + +- ret = multifd_send_state->ops->send_setup(p, errp); ++ ret = multifd_send_state->ops->send_setup(p, &local_err); + if (ret) { +- return ret; ++ break; + } + } +- return 0; ++ ++ if (ret) { ++ migrate_set_error(s, local_err); ++ error_report_err(local_err); ++ migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, ++ MIGRATION_STATUS_FAILED); ++ return false; ++ } ++ ++ return true; + } + + struct { +diff --git a/migration/multifd.h b/migration/multifd.h +index 7881980ee6..8a1cad0996 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,7 +13,7 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + +-int multifd_send_setup(Error **errp); ++bool multifd_send_setup(void); + void multifd_send_shutdown(void); + int multifd_recv_setup(Error **errp); + void multifd_recv_cleanup(void); +-- +2.33.0 + diff --git a/migration-multifd-Move-multifd_send_setup-into-migra.patch b/migration-multifd-Move-multifd_send_setup-into-migra.patch new file mode 100644 index 0000000..7a2042e --- /dev/null +++ b/migration-multifd-Move-multifd_send_setup-into-migra.patch @@ -0,0 +1,90 @@ +From 4ab5ed68480ec55bff220496342000187b76c451 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:16 -0300 +Subject: [52/99] migration/multifd: Move multifd_send_setup into migration + thread + +commit dd904bc13f2af0c605c3fe72f118ea4e27a6610c upstream. + +We currently have an unfavorable situation around multifd channels +creation and the migration thread execution. + +We create the multifd channels with qio_channel_socket_connect_async +-> qio_task_run_in_thread, but only connect them at the +multifd_new_send_channel_async callback, called from +qio_task_complete, which is registered as a glib event. + +So at multifd_send_setup() we create the channels, but they will only +be actually usable after the whole multifd_send_setup() calling stack +returns back to the main loop. Which means that the migration thread +is already up and running without any possibility for the multifd +channels to be ready on time. + +We currently rely on the channels-ready semaphore blocking +multifd_send_sync_main() until channels start to come up and release +it. However there have been bugs recently found when a channel's +creation fails and multifd_send_cleanup() is allowed to run while +other channels are still being created. + +Let's start to organize this situation by moving the +multifd_send_setup() call into the migration thread. That way we +unblock the main-loop to dispatch the completion callbacks and +actually have a chance of getting the multifd channels ready for when +the migration thread needs them. + +The next patches will deal with the synchronization aspects. + +Note that this takes multifd_send_setup() out of the BQL. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-5-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/migration.c +[jz: upstream renamed qemu_mutex_lock_iothread() to bql_lock(), while + openEuler not yet. Resolve context conflict due to this] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 66417b40a2..59c0bbee67 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -3319,6 +3319,10 @@ static void *migration_thread(void *opaque) + object_ref(OBJECT(s)); + update_iteration_initial_status(s); + ++ if (!multifd_send_setup()) { ++ goto out; ++ } ++ + qemu_mutex_lock_iothread(); + qemu_savevm_state_header(s->to_dst_file); + qemu_mutex_unlock_iothread(); +@@ -3390,6 +3394,7 @@ static void *migration_thread(void *opaque) + urgent = migration_rate_limit(); + } + ++out: + trace_migration_thread_after_loop(); + migration_iteration_finish(s); + object_unref(OBJECT(s)); +@@ -3643,11 +3648,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (!multifd_send_setup()) { +- migrate_fd_cleanup(s); +- return; +- } +- + if (migrate_background_snapshot()) { + qemu_thread_create(&s->thread, "bg_snapshot", + bg_migration_thread, s, QEMU_THREAD_JOINABLE); +-- +2.33.0 + diff --git a/migration-multifd-Move-total_normal_pages-accounting.patch b/migration-multifd-Move-total_normal_pages-accounting.patch new file mode 100644 index 0000000..9a9f293 --- /dev/null +++ b/migration-multifd-Move-total_normal_pages-accounting.patch @@ -0,0 +1,57 @@ +From 2316c555d9893f3e637260367477edcf40592679 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:44 +0800 +Subject: [34/99] migration/multifd: Move total_normal_pages accounting + +commit db7e1cc5103137743394a939045a17fa2b30a0dc upstream. + +Just like the previous patch, move the accounting for total_normal_pages on +both src/dst sides into the packet fill/unfill procedures. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-11-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f79badb546..510bfdcac8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -291,6 +291,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + } + + p->packets_sent++; ++ p->total_normal_pages += pages->num; + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -339,6 +340,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + p->packets_recved++; ++ p->total_normal_pages += p->normal_num; + + if (p->normal_num == 0) { + return 0; +@@ -724,7 +726,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- p->total_normal_pages += pages->num; + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + +@@ -1128,7 +1129,6 @@ static void *multifd_recv_thread(void *opaque) + p->flags &= ~MULTIFD_FLAG_SYNC; + trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, + p->next_packet_size); +- p->total_normal_pages += p->normal_num; + qemu_mutex_unlock(&p->mutex); + + if (p->normal_num) { +-- +2.33.0 + diff --git a/migration-multifd-Move-trace_multifd_send-recv.patch b/migration-multifd-Move-trace_multifd_send-recv.patch new file mode 100644 index 0000000..267acb3 --- /dev/null +++ b/migration-multifd-Move-trace_multifd_send-recv.patch @@ -0,0 +1,71 @@ +From 8a1deb6f19abbd8824a9b3e04abc77f5f72f37f6 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:45 +0800 +Subject: [35/99] migration/multifd: Move trace_multifd_send|recv() + +commit 8a9ef1738037e1d1132f9e1bd3e2f1102bde719f upstream. + +Move them into fill/unfill of packets. With that, we can further cleanup +the send/recv thread procedure, and remove one more temp var. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-12-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 510bfdcac8..f545faaa52 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -292,6 +292,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + p->packets_sent++; + p->total_normal_pages += pages->num; ++ ++ trace_multifd_send(p->id, p->packet_num, pages->num, p->flags, ++ p->next_packet_size); + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -342,6 +345,9 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->packets_recved++; + p->total_normal_pages += p->normal_num; + ++ trace_multifd_recv(p->id, p->packet_num, p->normal_num, p->flags, ++ p->next_packet_size); ++ + if (p->normal_num == 0) { + return 0; + } +@@ -708,7 +714,6 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (qatomic_read(&p->pending_job)) { +- uint64_t packet_num = p->packet_num; + MultiFDPages_t *pages = p->pages; + + if (use_zero_copy_send) { +@@ -726,8 +731,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- trace_multifd_send(p->id, packet_num, pages->num, p->flags, +- p->next_packet_size); + + if (use_zero_copy_send) { + /* Send header first, without zerocopy */ +@@ -1127,8 +1130,6 @@ static void *multifd_recv_thread(void *opaque) + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +- trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, +- p->next_packet_size); + qemu_mutex_unlock(&p->mutex); + + if (p->normal_num) { +-- +2.33.0 + diff --git a/migration-multifd-Optimize-sender-side-to-be-lockles.patch b/migration-multifd-Optimize-sender-side-to-be-lockles.patch new file mode 100644 index 0000000..b74b8bd --- /dev/null +++ b/migration-multifd-Optimize-sender-side-to-be-lockles.patch @@ -0,0 +1,204 @@ +From 2beae052ba502782de62ca4ccf7a1cdb6e830150 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:57 +0800 +Subject: [47/99] migration/multifd: Optimize sender side to be lockless + +commit 488c84acb465c21b716c3fd14de27ab5ce388c85 upstream. + +When reviewing my attempt to refactor send_prepare(), Fabiano suggested we +try out with dropping the mutex in multifd code [1]. + +I thought about that before but I never tried to change the code. Now +maybe it's time to give it a stab. This only optimizes the sender side. + +The trick here is multifd has a clear provider/consumer model, that the +migration main thread publishes requests (either pending_job/pending_sync), +while the multifd sender threads are consumers. Here we don't have a lot +of complicated data sharing, and the jobs can logically be submitted +lockless. + +Arm the code with atomic weapons. Two things worth mentioning: + + - For multifd_send_pages(): we can use qatomic_load_acquire() when trying + to find a free channel, but that's expensive if we attach one ACQUIRE per + channel. Instead, keep the qatomic_read() on reading the pending_job + flag as we do already, meanwhile use one smp_mb_acquire() after the loop + to guarantee the memory ordering. + + - For pending_sync: it doesn't have any extra data required since now + p->flags are never touched, it should be safe to not use memory barrier. + That's different from pending_job. + +Provide rich comments for all the lockless operations to state how they are +paired. With that, we can remove the mutex. + +[1] https://lore.kernel.org/r/87o7d1jlu5.fsf@suse.de + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-24-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 51 +++++++++++++++++++++++---------------------- + migration/multifd.h | 2 -- + 2 files changed, 26 insertions(+), 27 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index c52c18046a..c0d8f438bc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -502,19 +502,19 @@ static bool multifd_send_pages(void) + } + } + +- qemu_mutex_lock(&p->mutex); +- assert(!p->pages->num); +- assert(!p->pages->block); + /* +- * Double check on pending_job==false with the lock. In the future if +- * we can have >1 requester thread, we can replace this with a "goto +- * retry", but that is for later. ++ * Make sure we read p->pending_job before all the rest. Pairs with ++ * qatomic_store_release() in multifd_send_thread(). + */ +- assert(qatomic_read(&p->pending_job) == false); +- qatomic_set(&p->pending_job, true); ++ smp_mb_acquire(); ++ assert(!p->pages->num); + multifd_send_state->pages = p->pages; + p->pages = pages; +- qemu_mutex_unlock(&p->mutex); ++ /* ++ * Making sure p->pages is setup before marking pending_job=true. Pairs ++ * with the qatomic_load_acquire() in multifd_send_thread(). ++ */ ++ qatomic_store_release(&p->pending_job, true); + qemu_sem_post(&p->sem); + + return true; +@@ -649,7 +649,6 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + } + multifd_send_channel_destroy(p->c); + p->c = NULL; +- qemu_mutex_destroy(&p->mutex); + qemu_sem_destroy(&p->sem); + qemu_sem_destroy(&p->sem_sync); + g_free(p->name); +@@ -743,14 +742,12 @@ int multifd_send_sync_main(void) + + trace_multifd_send_sync_main_signal(p->id); + +- qemu_mutex_lock(&p->mutex); + /* + * We should be the only user so far, so not possible to be set by + * others concurrently. + */ + assert(qatomic_read(&p->pending_sync) == false); + qatomic_set(&p->pending_sync, true); +- qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } + for (i = 0; i < migrate_multifd_channels(); i++) { +@@ -800,9 +797,12 @@ static void *multifd_send_thread(void *opaque) + if (multifd_send_should_exit()) { + break; + } +- qemu_mutex_lock(&p->mutex); + +- if (qatomic_read(&p->pending_job)) { ++ /* ++ * Read pending_job flag before p->pages. Pairs with the ++ * qatomic_store_release() in multifd_send_pages(). ++ */ ++ if (qatomic_load_acquire(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + + p->iovs_num = 0; +@@ -810,14 +810,12 @@ static void *multifd_send_thread(void *opaque) + + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + +@@ -826,24 +824,31 @@ static void *multifd_send_thread(void *opaque) + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +- qatomic_set(&p->pending_job, false); +- qemu_mutex_unlock(&p->mutex); ++ ++ /* ++ * Making sure p->pages is published before saying "we're ++ * free". Pairs with the smp_mb_acquire() in ++ * multifd_send_pages(). ++ */ ++ qatomic_store_release(&p->pending_job, false); + } else { +- /* If not a normal job, must be a sync request */ ++ /* ++ * If not a normal job, must be a sync request. Note that ++ * pending_sync is a standalone flag (unlike pending_job), so ++ * it doesn't require explicit memory barriers. ++ */ + assert(qatomic_read(&p->pending_sync)); + p->flags = MULTIFD_FLAG_SYNC; + multifd_send_fill_packet(p); + ret = qio_channel_write_all(p->c, (void *)p->packet, + p->packet_len, &local_err); + if (ret != 0) { +- qemu_mutex_unlock(&p->mutex); + break; + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); + p->flags = 0; + qatomic_set(&p->pending_sync, false); +- qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem_sync); + } + } +@@ -857,10 +862,7 @@ out: + error_free(local_err); + } + +- qemu_mutex_lock(&p->mutex); + p->running = false; +- qemu_mutex_unlock(&p->mutex); +- + rcu_unregister_thread(); + migration_threads_remove(thread); + trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); +@@ -1002,7 +1004,6 @@ int multifd_send_setup(Error **errp) + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +- qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); + p->id = i; +diff --git a/migration/multifd.h b/migration/multifd.h +index 98876ff94a..78a2317263 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -91,8 +91,6 @@ typedef struct { + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + +- /* this mutex protects the following parameters */ +- QemuMutex mutex; + /* is this channel thread running */ + bool running; + /* multifd flags for each packet */ +-- +2.33.0 + diff --git a/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch b/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch new file mode 100644 index 0000000..692b22e --- /dev/null +++ b/migration-multifd-Postpone-reset-of-MultiFDPages_t.patch @@ -0,0 +1,84 @@ +From 9ce63dcad32efdb9e31db0db495bf4a3e1a96595 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:38 +0800 +Subject: [28/99] migration/multifd: Postpone reset of MultiFDPages_t + +commit 836eca47f62f9f6d5b8e9b6fedfc3539775c4e2e upstream. + +Now we reset MultiFDPages_t object in the multifd sender thread in the +middle of the sending job. That's not necessary, because the "*pages" +struct will not be reused anyway until pending_job is cleared. + +Move that to the end after the job is completed, provide a helper to reset +a "*pages" object. Use that same helper when free the object too. + +This prepares us to keep using p->pages in the follow up patches, where we +may drop p->normal[]. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-5-peterx@redhat.com +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd.c +[jz: openEuler backported 254c67a88ab5 ("migration: fix-possible-int-overflow") + which causes simple context conflict when cherry-pick this commit] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 18 ++++++++++++++---- + 1 file changed, 14 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index ea756b6eb8..fff119237a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -173,6 +173,17 @@ void multifd_register_ops(int method, MultiFDMethods *ops) + multifd_ops[method] = ops; + } + ++/* Reset a MultiFDPages_t* object for the next use */ ++static void multifd_pages_reset(MultiFDPages_t *pages) ++{ ++ /* ++ * We don't need to touch offset[] array, because it will be ++ * overwritten later when reused. ++ */ ++ pages->num = 0; ++ pages->block = NULL; ++} ++ + static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp) + { + MultiFDInit_t msg = {}; +@@ -249,9 +260,8 @@ static MultiFDPages_t *multifd_pages_init(uint32_t n) + + static void multifd_pages_clear(MultiFDPages_t *pages) + { +- pages->num = 0; ++ multifd_pages_reset(pages); + pages->allocated = 0; +- pages->block = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +@@ -708,8 +718,6 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->total_normal_pages += p->normal_num; +- p->pages->num = 0; +- p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + + trace_multifd_send(p->id, packet_num, p->normal_num, flags, +@@ -736,6 +744,8 @@ static void *multifd_send_thread(void *opaque) + + stat64_add(&mig_stats.multifd_bytes, + (uint64_t)p->next_packet_size + p->packet_len); ++ ++ multifd_pages_reset(p->pages); + p->next_packet_size = 0; + qemu_mutex_lock(&p->mutex); + p->pending_job--; +-- +2.33.0 + diff --git a/migration-multifd-Release-recv-sem_sync-earlier.patch b/migration-multifd-Release-recv-sem_sync-earlier.patch new file mode 100644 index 0000000..6744b22 --- /dev/null +++ b/migration-multifd-Release-recv-sem_sync-earlier.patch @@ -0,0 +1,52 @@ +From 7a9435d5db4a525b841078b125ba4843339c82fa Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 20 Feb 2024 19:41:09 -0300 +Subject: [56/99] migration/multifd: Release recv sem_sync earlier + +commit d13f0026c7a625a5a34a5dea4095a4d9cfa04652 upstream. + +Now that multifd_recv_terminate_threads() is called only once, release +the recv side sem_sync earlier like we do for the send side. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240220224138.24759-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 126c18406f..bbd421004f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1108,6 +1108,12 @@ static void multifd_recv_terminate_threads(Error *err) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + ++ /* ++ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, ++ * however try to wakeup it without harm in cleanup phase. ++ */ ++ qemu_sem_post(&p->sem_sync); ++ + /* + * We could arrive here for two reasons: + * - normal quit, i.e. everything went fine, just finished +@@ -1166,12 +1172,6 @@ void multifd_recv_cleanup(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- /* +- * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, +- * however try to wakeup it without harm in cleanup phase. +- */ +- qemu_sem_post(&p->sem_sync); +- + if (p->thread_created) { + qemu_thread_join(&p->thread); + } +-- +2.33.0 + diff --git a/migration-multifd-Remove-MultiFDPages_t-packet_num.patch b/migration-multifd-Remove-MultiFDPages_t-packet_num.patch new file mode 100644 index 0000000..47f90a9 --- /dev/null +++ b/migration-multifd-Remove-MultiFDPages_t-packet_num.patch @@ -0,0 +1,48 @@ +From d6e061a269348d6d559be65a816cc0404501d86a Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:38 -0300 +Subject: [07/99] migration/multifd: Remove MultiFDPages_t::packet_num + +commit dca1bc7f24d2fa227f0b787f85f3cc67006e67bf upstream. + +This was introduced by commit 34c55a94b1 ("migration: Create multipage +support") and never used. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-2-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 1 - + migration/multifd.h | 2 -- + 2 files changed, 3 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f5991bc746..3ea204cac8 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -251,7 +251,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + { + pages->num = 0; + pages->allocated = 0; +- pages->packet_num = 0; + pages->block = NULL; + g_free(pages->offset); + pages->offset = NULL; +diff --git a/migration/multifd.h b/migration/multifd.h +index a835643b48..b0ff610c37 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -58,8 +58,6 @@ typedef struct { + uint32_t num; + /* number of allocated pages */ + uint32_t allocated; +- /* global number of generated multifd packets */ +- uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; + RAMBlock *block; +-- +2.33.0 + diff --git a/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch b/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch new file mode 100644 index 0000000..02ea46a --- /dev/null +++ b/migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch @@ -0,0 +1,159 @@ +From d7823b26d0d983402a16b3568543bac7bb5c7f34 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:39 -0300 +Subject: [08/99] migration/multifd: Remove QEMUFile from where it is not + needed + +commit 9346fa1870784c70618bfd5a9e1f1da89de0c5ec upstream. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-3-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + migration/ram.c +[jz: resolve context conflict due to BQL name, + qemu_mutex_lock_iothread() hasn't renamed to bql_lock() yet] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 12 ++++++------ + migration/multifd.h | 4 ++-- + migration/ram.c | 15 +++++++-------- + 3 files changed, 15 insertions(+), 16 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3ea204cac8..3e5aaaa1d4 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -391,7 +391,7 @@ struct { + * false. + */ + +-static int multifd_send_pages(QEMUFile *f) ++static int multifd_send_pages(void) + { + int i; + static int next_channel; +@@ -437,7 +437,7 @@ static int multifd_send_pages(QEMUFile *f) + return 1; + } + +-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) ++int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { + MultiFDPages_t *pages = multifd_send_state->pages; + bool changed = false; +@@ -457,12 +457,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + changed = true; + } + +- if (multifd_send_pages(f) < 0) { ++ if (multifd_send_pages() < 0) { + return -1; + } + + if (changed) { +- return multifd_queue_page(f, block, offset); ++ return multifd_queue_page(block, offset); + } + + return 1; +@@ -584,7 +584,7 @@ static int multifd_zero_copy_flush(QIOChannel *c) + return ret; + } + +-int multifd_send_sync_main(QEMUFile *f) ++int multifd_send_sync_main(void) + { + int i; + bool flush_zero_copy; +@@ -593,7 +593,7 @@ int multifd_send_sync_main(QEMUFile *f) + return 0; + } + if (multifd_send_state->pages->num) { +- if (multifd_send_pages(f) < 0) { ++ if (multifd_send_pages() < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return -1; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index b0ff610c37..35d11f103c 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -21,8 +21,8 @@ void multifd_load_shutdown(void); + bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-int multifd_send_sync_main(QEMUFile *f); +-int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); ++int multifd_send_sync_main(void); ++int multifd_queue_page(RAMBlock *block, ram_addr_t offset); + + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) +diff --git a/migration/ram.c b/migration/ram.c +index f1ff38cf39..67fa9c83d6 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1387,10 +1387,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss) + return pages; + } + +-static int ram_save_multifd_page(QEMUFile *file, RAMBlock *block, +- ram_addr_t offset) ++static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset) + { +- if (multifd_queue_page(file, block, offset) < 0) { ++ if (multifd_queue_page(block, offset) < 0) { + return -1; + } + stat64_add(&mig_stats.normal_pages, 1); +@@ -1473,7 +1472,7 @@ static int find_dirty_block(RAMState *rs, PageSearchStatus *pss) + if (migrate_multifd() && + !migrate_multifd_flush_after_each_section()) { + QEMUFile *f = rs->pss[RAM_CHANNEL_PRECOPY].pss_channel; +- int ret = multifd_send_sync_main(f); ++ int ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +@@ -2265,7 +2264,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) + * still see partially copied pages which is data corruption. + */ + if (migrate_multifd() && !migration_in_postcopy()) { +- return ram_save_multifd_page(pss->pss_channel, block, offset); ++ return ram_save_multifd_page(block, offset); + } + + return ram_save_page(rs, pss); +@@ -3434,7 +3433,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) + migration_ops->ram_save_target_page = ram_save_target_page_legacy; + + qemu_mutex_unlock_iothread(); +- ret = multifd_send_sync_main(f); ++ ret = multifd_send_sync_main(); + qemu_mutex_lock_iothread(); + if (ret < 0) { + return ret; +@@ -3558,7 +3557,7 @@ out: + if (ret >= 0 + && migration_is_setup_or_active(migrate_get_current()->state)) { + if (migrate_multifd() && migrate_multifd_flush_after_each_section()) { +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +@@ -3654,7 +3653,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + } + } + +- ret = multifd_send_sync_main(rs->pss[RAM_CHANNEL_PRECOPY].pss_channel); ++ ret = multifd_send_sync_main(); + if (ret < 0) { + return ret; + } +-- +2.33.0 + diff --git a/migration-multifd-Remove-error_setg-in-migration_ioc.patch b/migration-multifd-Remove-error_setg-in-migration_ioc.patch new file mode 100644 index 0000000..5b54c88 --- /dev/null +++ b/migration-multifd-Remove-error_setg-in-migration_ioc.patch @@ -0,0 +1,39 @@ +From 1698ab2f40ef2bde3e7ee3175a5b5656589ce27d Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:13 +0200 +Subject: [04/99] migration/multifd: Remove error_setg() in + migration_ioc_process_incoming() + +commit 1d3886f837d8e972366a8b58ba8afb0e5efbeed7 upstream. + +If multifd_load_setup() fails in migration_ioc_process_incoming(), +error_setg() is called with errp. This will lead to an assert because in +that case errp already contains an error. + +Fix it by removing the redundant error_setg(). + +Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") +Signed-off-by: Avihai Horon +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20231231093016.14204-9-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/migration.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index dce22c2da5..5829565f9c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -848,7 +848,6 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } + + if (multifd_load_setup(errp) != 0) { +- error_setg(errp, "Failed to setup multifd channels"); + return; + } + +-- +2.33.0 + diff --git a/migration-multifd-Remove-p-quit-from-recv-side.patch b/migration-multifd-Remove-p-quit-from-recv-side.patch new file mode 100644 index 0000000..6715864 --- /dev/null +++ b/migration-multifd-Remove-p-quit-from-recv-side.patch @@ -0,0 +1,129 @@ +From eacc8d435828d31478498fe266487906941be6cb Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 20 Feb 2024 19:41:08 -0300 +Subject: [55/99] migration/multifd: Remove p->quit from recv side + +commit 11dd7be57524d400652cecf8740a016b3d66b53d upstream. + +Like we did on the sending side, replace the p->quit per-channel flag +with a global atomic 'exiting' flag. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240220224138.24759-5-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 41 ++++++++++++++++++++++++----------------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bd240649f7..126c18406f 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -80,6 +80,19 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++struct { ++ MultiFDRecvParams *params; ++ /* number of created threads */ ++ int count; ++ /* syncs main thread and channels */ ++ QemuSemaphore sem_sync; ++ /* global number of generated multifd packets */ ++ uint64_t packet_num; ++ int exiting; ++ /* multifd ops */ ++ MultiFDMethods *ops; ++} *multifd_recv_state; ++ + /* Multifd without compression */ + + /** +@@ -441,6 +454,11 @@ static bool multifd_send_should_exit(void) + return qatomic_read(&multifd_send_state->exiting); + } + ++static bool multifd_recv_should_exit(void) ++{ ++ return qatomic_read(&multifd_recv_state->exiting); ++} ++ + /* + * The migration thread can wait on either of the two semaphores. This + * function can be used to kick the main thread out of waiting on either of +@@ -1067,24 +1085,16 @@ bool multifd_send_setup(void) + return true; + } + +-struct { +- MultiFDRecvParams *params; +- /* number of created threads */ +- int count; +- /* syncs main thread and channels */ +- QemuSemaphore sem_sync; +- /* global number of generated multifd packets */ +- uint64_t packet_num; +- /* multifd ops */ +- MultiFDMethods *ops; +-} *multifd_recv_state; +- + static void multifd_recv_terminate_threads(Error *err) + { + int i; + + trace_multifd_recv_terminate_threads(err != NULL); + ++ if (qatomic_xchg(&multifd_recv_state->exiting, 1)) { ++ return; ++ } ++ + if (err) { + MigrationState *s = migrate_get_current(); + migrate_set_error(s, err); +@@ -1098,8 +1108,6 @@ static void multifd_recv_terminate_threads(Error *err) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- qemu_mutex_lock(&p->mutex); +- p->quit = true; + /* + * We could arrive here for two reasons: + * - normal quit, i.e. everything went fine, just finished +@@ -1109,7 +1117,6 @@ static void multifd_recv_terminate_threads(Error *err) + if (p->c) { + qio_channel_shutdown(p->c, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); + } +- qemu_mutex_unlock(&p->mutex); + } + } + +@@ -1214,7 +1221,7 @@ static void *multifd_recv_thread(void *opaque) + while (true) { + uint32_t flags; + +- if (p->quit) { ++ if (multifd_recv_should_exit()) { + break; + } + +@@ -1278,6 +1285,7 @@ int multifd_recv_setup(Error **errp) + multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state)); + multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count); + qatomic_set(&multifd_recv_state->count, 0); ++ qatomic_set(&multifd_recv_state->exiting, 0); + qemu_sem_init(&multifd_recv_state->sem_sync, 0); + multifd_recv_state->ops = multifd_ops[migrate_multifd_compression()]; + +@@ -1286,7 +1294,6 @@ int multifd_recv_setup(Error **errp) + + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem_sync, 0); +- p->quit = false; + p->id = i; + p->packet_len = sizeof(MultiFDPacket_t) + + sizeof(uint64_t) * page_count; +-- +2.33.0 + diff --git a/migration-multifd-Remove-p-running.patch b/migration-multifd-Remove-p-running.patch new file mode 100644 index 0000000..c4b45f8 --- /dev/null +++ b/migration-multifd-Remove-p-running.patch @@ -0,0 +1,175 @@ +From 9fb44da2534bcf1802c5f7ce36944b0940821728 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:14 -0300 +Subject: [50/99] migration/multifd: Remove p->running + +commit a2a63c4abd52f4e3ff4046dcb67fe44ebf0bb8de upstream. + +We currently only need p->running to avoid calling qemu_thread_join() +on a non existent thread if the thread has never been created. + +However, there are at least two bugs in this logic: + +1) On the sending side, p->running is set too early and +qemu_thread_create() can be skipped due to an error during TLS +handshake, leaving the flag set and leading to a crash when +multifd_send_cleanup() calls qemu_thread_join(). + +2) During exit, the multifd thread clears the flag while holding the +channel lock. The counterpart at multifd_send_cleanup() reads the flag +outside of the lock and might free the mutex while the multifd thread +still has it locked. + +Fix the first issue by setting the flag right before creating the +thread. Rename it from p->running to p->thread_created to clarify its +usage. + +Fix the second issue by not clearing the flag at the multifd thread +exit. We don't have any use for that. + +Note that these bugs are straight-forward logic issues and not race +conditions. There is still a gap for races to affect this code due to +multifd_send_cleanup() being allowed to run concurrently with the +thread creation loop. This issue is solved in the next patches. + +Cc: qemu-stable +Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake") +Reported-by: Avihai Horon +Reported-by: chenyuhui5@huawei.com +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-3-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 27 ++++++++++++--------------- + migration/multifd.h | 7 ++----- + 2 files changed, 14 insertions(+), 20 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 459e7889e8..59dcb6c9a2 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -635,7 +635,7 @@ static void multifd_send_terminate_threads(void) + qemu_thread_join(&p->tls_thread); + } + +- if (p->running) { ++ if (p->thread_created) { + qemu_thread_join(&p->thread); + } + } +@@ -866,7 +866,6 @@ out: + error_free(local_err); + } + +- p->running = false; + rcu_unregister_thread(); + migration_threads_remove(thread); + trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); +@@ -957,6 +956,8 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + migration_ioc_register_yank(ioc); + p->registered_yank = true; + p->c = ioc; ++ ++ p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); + return true; +@@ -971,7 +972,6 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + trace_multifd_new_send_channel_async(p->id); + if (!qio_task_propagate_error(task, &local_err)) { + qio_channel_set_delay(ioc, false); +- p->running = true; + if (multifd_channel_connect(p, ioc, &local_err)) { + return; + } +@@ -1132,15 +1132,15 @@ void multifd_recv_cleanup(void) + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; + +- if (p->running) { +- /* +- * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, +- * however try to wakeup it without harm in cleanup phase. +- */ +- qemu_sem_post(&p->sem_sync); +- } ++ /* ++ * multifd_recv_thread may hung at MULTIFD_FLAG_SYNC handle code, ++ * however try to wakeup it without harm in cleanup phase. ++ */ ++ qemu_sem_post(&p->sem_sync); + +- qemu_thread_join(&p->thread); ++ if (p->thread_created) { ++ qemu_thread_join(&p->thread); ++ } + } + for (i = 0; i < migrate_multifd_channels(); i++) { + multifd_recv_cleanup_channel(&multifd_recv_state->params[i]); +@@ -1226,9 +1226,6 @@ static void *multifd_recv_thread(void *opaque) + multifd_recv_terminate_threads(local_err); + error_free(local_err); + } +- qemu_mutex_lock(&p->mutex); +- p->running = false; +- qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); + trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); +@@ -1334,7 +1331,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + p->c = ioc; + object_ref(OBJECT(ioc)); + +- p->running = true; ++ p->thread_created = true; + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +diff --git a/migration/multifd.h b/migration/multifd.h +index 720c9d50db..7881980ee6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -73,6 +73,7 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ bool thread_created; + QemuThread tls_thread; + bool tls_thread_created; + /* communication channel */ +@@ -93,8 +94,6 @@ typedef struct { + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + +- /* is this channel thread running */ +- bool running; + /* multifd flags for each packet */ + uint32_t flags; + /* +@@ -143,6 +142,7 @@ typedef struct { + char *name; + /* channel thread id */ + QemuThread thread; ++ bool thread_created; + /* communication channel */ + QIOChannel *c; + /* packet allocated len */ +@@ -157,8 +157,6 @@ typedef struct { + + /* this mutex protects the following parameters */ + QemuMutex mutex; +- /* is this channel thread running */ +- bool running; + /* should this thread finish */ + bool quit; + /* multifd flags for each packet */ +@@ -217,4 +215,3 @@ static inline void multifd_send_prepare_header(MultiFDSendParams *p) + + + #endif +- +-- +2.33.0 + diff --git a/migration-multifd-Remove-unnecessary-usage-of-local-.patch b/migration-multifd-Remove-unnecessary-usage-of-local-.patch new file mode 100644 index 0000000..3a9701c --- /dev/null +++ b/migration-multifd-Remove-unnecessary-usage-of-local-.patch @@ -0,0 +1,61 @@ +From c707a4d1339d572942b79a1b6440cbe487ab2b81 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:16 +0200 +Subject: [06/99] migration/multifd: Remove unnecessary usage of local Error +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit 3fc58efa938338a82e4d5c0c031e7e9c98e9544f upstream. + +According to Error API, usage of ERRP_GUARD() or a local Error instead +of errp is needed if errp is passed to void functions, where it is later +dereferenced to see if an error occurred. + +There are several places in multifd.c that use local Error although it +is not needed. Change these places to use errp directly. + +Signed-off-by: Avihai Horon +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-12-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8221ebe4b6..f5991bc746 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -955,12 +955,10 @@ int multifd_save_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; +- Error *local_err = NULL; + int ret; + +- ret = multifd_send_state->ops->send_setup(p, &local_err); ++ ret = multifd_send_state->ops->send_setup(p, errp); + if (ret) { +- error_propagate(errp, local_err); + return ret; + } + } +@@ -1199,12 +1197,10 @@ int multifd_load_setup(Error **errp) + + for (i = 0; i < thread_count; i++) { + MultiFDRecvParams *p = &multifd_recv_state->params[i]; +- Error *local_err = NULL; + int ret; + +- ret = multifd_recv_state->ops->recv_setup(p, &local_err); ++ ret = multifd_recv_state->ops->recv_setup(p, errp); + if (ret) { +- error_propagate(errp, local_err); + return ret; + } + } +-- +2.33.0 + diff --git a/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch b/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch new file mode 100644 index 0000000..22840a0 --- /dev/null +++ b/migration-multifd-Rename-MultiFDSend-RecvParams-data.patch @@ -0,0 +1,199 @@ +From 68a8a9da612d2d2dec5ad1b7b9ad5d7db603e05d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 29 Feb 2024 12:30:06 -0300 +Subject: [65/99] migration/multifd: Rename MultiFDSend|RecvParams::data to + compress_data + +commit 402dd7ac1c3be44f306c903cdfd2583ffec5e2fd upstream. + +Use a more specific name for the compression data so we can use the +generic for the multifd core code. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240229153017.2221-13-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 20 ++++++++++---------- + migration/multifd-zstd.c | 20 ++++++++++---------- + migration/multifd.h | 4 ++-- + 3 files changed, 22 insertions(+), 22 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 012e3bdea1..2a8f5fc9a6 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -69,7 +69,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + err_msg = "out of memory for buf"; + goto err_free_zbuff; + } +- p->data = z; ++ p->compress_data = z; + return 0; + + err_free_zbuff: +@@ -92,15 +92,15 @@ err_free_z: + */ + static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + + deflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; + g_free(z->buf); + z->buf = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -117,7 +117,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; +@@ -194,7 +194,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + struct zlib_data *z = g_new0(struct zlib_data, 1); + z_stream *zs = &z->zs; + +- p->data = z; ++ p->compress_data = z; + zs->zalloc = Z_NULL; + zs->zfree = Z_NULL; + zs->opaque = Z_NULL; +@@ -224,13 +224,13 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void zlib_recv_cleanup(MultiFDRecvParams *p) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + + inflateEnd(&z->zs); + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -246,7 +246,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + */ + static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { +- struct zlib_data *z = p->data; ++ struct zlib_data *z = p->compress_data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index dc8fe43e94..593cf290ad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -52,7 +52,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int res; + +- p->data = z; ++ p->compress_data = z; + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +@@ -90,14 +90,14 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + */ + static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + { +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + + ZSTD_freeCStream(z->zcs); + z->zcs = NULL; + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -114,7 +114,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { + MultiFDPages_t *pages = p->pages; +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + int ret; + uint32_t i; + +@@ -183,7 +183,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int ret; + +- p->data = z; ++ p->compress_data = z; + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); +@@ -221,14 +221,14 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void zstd_recv_cleanup(MultiFDRecvParams *p) + { +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + + ZSTD_freeDStream(z->zds); + z->zds = NULL; + g_free(z->zbuff); + z->zbuff = NULL; +- g_free(p->data); +- p->data = NULL; ++ g_free(p->compress_data); ++ p->compress_data = NULL; + } + + /** +@@ -248,7 +248,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + uint32_t out_size = 0; + uint32_t expected_size = p->normal_num * p->page_size; + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; +- struct zstd_data *z = p->data; ++ struct zstd_data *z = p->compress_data; + int ret; + int i; + +diff --git a/migration/multifd.h b/migration/multifd.h +index b3fe27ae93..adccd3532f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -127,7 +127,7 @@ typedef struct { + /* number of iovs used */ + uint32_t iovs_num; + /* used for compression methods */ +- void *data; ++ void *compress_data; + } MultiFDSendParams; + + typedef struct { +@@ -183,7 +183,7 @@ typedef struct { + /* num of non zero pages */ + uint32_t normal_num; + /* used for de-compression methods */ +- void *data; ++ void *compress_data; + } MultiFDRecvParams; + + typedef struct { +-- +2.33.0 + diff --git a/migration-multifd-Rename-p-num_packets-and-clean-it-.patch b/migration-multifd-Rename-p-num_packets-and-clean-it-.patch new file mode 100644 index 0000000..26cd02c --- /dev/null +++ b/migration-multifd-Rename-p-num_packets-and-clean-it-.patch @@ -0,0 +1,140 @@ +From a10ddd65e951c65119135eb847c93ab8db980638 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:43 +0800 +Subject: [33/99] migration/multifd: Rename p->num_packets and clean it up + +commit 05b7ec1890158471afb8537a6817a7e0d0a6c938 upstream. + +This field, no matter whether on src or dest, is only used for debugging +purpose. + +They can even be removed already, unless it still more or less provide some +accounting on "how many packets are sent/recved for this thread". The +other more important one is called packet_num, which is embeded in the +multifd packet headers (MultiFDPacket_t). + +So let's keep them for now, but make them much easier to understand, by +doing below: + + - Rename both of them to packets_sent / packets_recved, the old + name (num_packets) are waaay too confusing when we already have + MultiFDPacket_t.packets_num. + + - Avoid worrying on the "initial packet": we know we will send it, that's + good enough. The accounting won't matter a great deal to start with 0 or + with 1. + + - Move them to where we send/recv the packets. They're: + + - multifd_send_fill_packet() for senders. + - multifd_recv_unfill_packet() for receivers. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-10-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 +++++-------- + migration/multifd.h | 6 +++--- + 2 files changed, 8 insertions(+), 11 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index a67917b113..f79badb546 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -289,6 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->offset[i] = cpu_to_be64(temp); + } ++ ++ p->packets_sent++; + } + + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) +@@ -336,6 +338,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); ++ p->packets_recved++; + + if (p->normal_num == 0) { + return 0; +@@ -692,8 +695,6 @@ static void *multifd_send_thread(void *opaque) + ret = -1; + goto out; + } +- /* initial packet */ +- p->num_packets = 1; + + while (true) { + qemu_sem_post(&multifd_send_state->channels_ready); +@@ -723,7 +724,6 @@ static void *multifd_send_thread(void *opaque) + } + + multifd_send_fill_packet(p); +- p->num_packets++; + p->total_normal_pages += pages->num; + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); +@@ -791,7 +791,7 @@ out: + + rcu_unregister_thread(); + migration_threads_remove(thread); +- trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); ++ trace_multifd_send_thread_end(p->id, p->packets_sent, p->total_normal_pages); + + return NULL; + } +@@ -1128,7 +1128,6 @@ static void *multifd_recv_thread(void *opaque) + p->flags &= ~MULTIFD_FLAG_SYNC; + trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, + p->next_packet_size); +- p->num_packets++; + p->total_normal_pages += p->normal_num; + qemu_mutex_unlock(&p->mutex); + +@@ -1154,7 +1153,7 @@ static void *multifd_recv_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); +- trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages); ++ trace_multifd_recv_thread_end(p->id, p->packets_recved, p->total_normal_pages); + + return NULL; + } +@@ -1256,8 +1255,6 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + } + p->c = ioc; + object_ref(OBJECT(ioc)); +- /* initial packet */ +- p->num_packets = 1; + + p->running = true; + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, +diff --git a/migration/multifd.h b/migration/multifd.h +index 08f26ef3fe..2e4ad0dc56 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -124,7 +124,7 @@ typedef struct { + /* size of the next packet that contains pages */ + uint32_t next_packet_size; + /* packets sent through this channel */ +- uint64_t num_packets; ++ uint64_t packets_sent; + /* non zero pages sent through this channel */ + uint64_t total_normal_pages; + /* buffers to send */ +@@ -174,8 +174,8 @@ typedef struct { + MultiFDPacket_t *packet; + /* size of the next packet that contains pages */ + uint32_t next_packet_size; +- /* packets sent through this channel */ +- uint64_t num_packets; ++ /* packets received through this channel */ ++ uint64_t packets_recved; + /* ramblock */ + RAMBlock *block; + /* ramblock host address */ +-- +2.33.0 + diff --git a/migration-multifd-Rewrite-multifd_queue_page.patch b/migration-multifd-Rewrite-multifd_queue_page.patch new file mode 100644 index 0000000..5126981 --- /dev/null +++ b/migration-multifd-Rewrite-multifd_queue_page.patch @@ -0,0 +1,112 @@ +From 68733215eef6342b28386fd6711f3ab82a7dc66a Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:52 +0800 +Subject: [42/99] migration/multifd: Rewrite multifd_queue_page() + +commit f88f86c4ee3fe673b34873e27af2de0a16fe01fd upstream. + +The current multifd_queue_page() is not easy to read and follow. It is not +good with a few reasons: + + - No helper at all to show what exactly does a condition mean; in short, + readability is low. + + - Rely on pages->ramblock being cleared to detect an empty queue. It's + slightly an overload of the ramblock pointer, per Fabiano [1], which I + also agree. + + - Contains a self recursion, even if not necessary.. + +Rewrite this function. We add some comments to make it even clearer on +what it does. + +[1] https://lore.kernel.org/r/87wmrpjzew.fsf@suse.de + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-19-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 56 ++++++++++++++++++++++++++++++--------------- + 1 file changed, 37 insertions(+), 19 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index dabfc3ec0d..f92e6776f0 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -507,35 +507,53 @@ static bool multifd_send_pages(void) + return true; + } + ++static inline bool multifd_queue_empty(MultiFDPages_t *pages) ++{ ++ return pages->num == 0; ++} ++ ++static inline bool multifd_queue_full(MultiFDPages_t *pages) ++{ ++ return pages->num == pages->allocated; ++} ++ ++static inline void multifd_enqueue(MultiFDPages_t *pages, ram_addr_t offset) ++{ ++ pages->offset[pages->num++] = offset; ++} ++ + /* Returns true if enqueue successful, false otherwise */ + bool multifd_queue_page(RAMBlock *block, ram_addr_t offset) + { +- MultiFDPages_t *pages = multifd_send_state->pages; +- bool changed = false; ++ MultiFDPages_t *pages; ++ ++retry: ++ pages = multifd_send_state->pages; + +- if (!pages->block) { ++ /* If the queue is empty, we can already enqueue now */ ++ if (multifd_queue_empty(pages)) { + pages->block = block; ++ multifd_enqueue(pages, offset); ++ return true; + } + +- if (pages->block == block) { +- pages->offset[pages->num] = offset; +- pages->num++; +- +- if (pages->num < pages->allocated) { +- return true; ++ /* ++ * Not empty, meanwhile we need a flush. It can because of either: ++ * ++ * (1) The page is not on the same ramblock of previous ones, or, ++ * (2) The queue is full. ++ * ++ * After flush, always retry. ++ */ ++ if (pages->block != block || multifd_queue_full(pages)) { ++ if (!multifd_send_pages()) { ++ return false; + } +- } else { +- changed = true; +- } +- +- if (!multifd_send_pages()) { +- return false; +- } +- +- if (changed) { +- return multifd_queue_page(block, offset); ++ goto retry; + } + ++ /* Not empty, and we still have space, do it! */ ++ multifd_enqueue(pages, offset); + return true; + } + +-- +2.33.0 + diff --git a/migration-multifd-Separate-SYNC-request-with-normal-.patch b/migration-multifd-Separate-SYNC-request-with-normal-.patch new file mode 100644 index 0000000..4aa8c75 --- /dev/null +++ b/migration-multifd-Separate-SYNC-request-with-normal-.patch @@ -0,0 +1,190 @@ +From 40021e3b91b10672849477f4d76712ff3e78f738 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:40 +0800 +Subject: [30/99] migration/multifd: Separate SYNC request with normal jobs + +commit f5f48a7891cf6664a920ba52f6f4dea1646049a4 upstream. + +Multifd provide a threaded model for processing jobs. On sender side, +there can be two kinds of job: (1) a list of pages to send, or (2) a sync +request. + +The sync request is a very special kind of job. It never contains a page +array, but only a multifd packet telling the dest side to synchronize with +sent pages. + +Before this patch, both requests use the pending_job field, no matter what +the request is, it will boost pending_job, while multifd sender thread will +decrement it after it finishes one job. + +However this should be racy, because SYNC is special in that it needs to +set p->flags with MULTIFD_FLAG_SYNC, showing that this is a sync request. +Consider a sequence of operations where: + + - migration thread enqueue a job to send some pages, pending_job++ (0->1) + + - [...before the selected multifd sender thread wakes up...] + + - migration thread enqueue another job to sync, pending_job++ (1->2), + setup p->flags=MULTIFD_FLAG_SYNC + + - multifd sender thread wakes up, found pending_job==2 + - send the 1st packet with MULTIFD_FLAG_SYNC and list of pages + - send the 2nd packet with flags==0 and no pages + +This is not expected, because MULTIFD_FLAG_SYNC should hopefully be done +after all the pages are received. Meanwhile, the 2nd packet will be +completely useless, which contains zero information. + +I didn't verify above, but I think this issue is still benign in that at +least on the recv side we always receive pages before handling +MULTIFD_FLAG_SYNC. However that's not always guaranteed and just tricky. + +One other reason I want to separate it is using p->flags to communicate +between the two threads is also not clearly defined, it's very hard to read +and understand why accessing p->flags is always safe; see the current impl +of multifd_send_thread() where we tried to cache only p->flags. It doesn't +need to be that complicated. + +This patch introduces pending_sync, a separate flag just to show that the +requester needs a sync. Alongside, we remove the tricky caching of +p->flags now because after this patch p->flags should only be used by +multifd sender thread now, which will be crystal clear. So it is always +thread safe to access p->flags. + +With that, we can also safely convert the pending_job into a boolean, +because we don't support >1 pending jobs anyway. + +Always use atomic ops to access both flags to make sure no cache effect. +When at it, drop the initial setting of "pending_job = 0" because it's +always allocated using g_new0(). + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-7-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 39 +++++++++++++++++++++++++-------------- + migration/multifd.h | 13 +++++++++++-- + 2 files changed, 36 insertions(+), 16 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index bfafe94e1e..dd90c09b26 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -443,8 +443,8 @@ static int multifd_send_pages(void) + } + p = &multifd_send_state->params[i]; + qemu_mutex_lock(&p->mutex); +- if (!p->pending_job) { +- p->pending_job++; ++ if (qatomic_read(&p->pending_job) == false) { ++ qatomic_set(&p->pending_job, true); + next_channel = (i + 1) % migrate_multifd_channels(); + break; + } +@@ -632,8 +632,12 @@ int multifd_send_sync_main(void) + + qemu_mutex_lock(&p->mutex); + p->packet_num = multifd_send_state->packet_num++; +- p->flags |= MULTIFD_FLAG_SYNC; +- p->pending_job++; ++ /* ++ * We should be the only user so far, so not possible to be set by ++ * others concurrently. ++ */ ++ assert(qatomic_read(&p->pending_sync) == false); ++ qatomic_set(&p->pending_sync, true); + qemu_mutex_unlock(&p->mutex); + qemu_sem_post(&p->sem); + } +@@ -689,10 +693,9 @@ static void *multifd_send_thread(void *opaque) + } + qemu_mutex_lock(&p->mutex); + +- if (p->pending_job) { ++ if (qatomic_read(&p->pending_job)) { + uint64_t packet_num = p->packet_num; + MultiFDPages_t *pages = p->pages; +- uint32_t flags; + + if (use_zero_copy_send) { + p->iovs_num = 0; +@@ -708,13 +711,11 @@ static void *multifd_send_thread(void *opaque) + } + } + multifd_send_fill_packet(p); +- flags = p->flags; +- p->flags = 0; + p->num_packets++; + p->total_normal_pages += pages->num; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, pages->num, flags, ++ trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + + if (use_zero_copy_send) { +@@ -742,12 +743,23 @@ static void *multifd_send_thread(void *opaque) + multifd_pages_reset(p->pages); + p->next_packet_size = 0; + qemu_mutex_lock(&p->mutex); +- p->pending_job--; ++ qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); +- +- if (flags & MULTIFD_FLAG_SYNC) { +- qemu_sem_post(&p->sem_sync); ++ } else if (qatomic_read(&p->pending_sync)) { ++ p->flags = MULTIFD_FLAG_SYNC; ++ multifd_send_fill_packet(p); ++ ret = qio_channel_write_all(p->c, (void *)p->packet, ++ p->packet_len, &local_err); ++ if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); ++ break; + } ++ /* p->next_packet_size will always be zero for a SYNC packet */ ++ stat64_add(&mig_stats.multifd_bytes, p->packet_len); ++ p->flags = 0; ++ qatomic_set(&p->pending_sync, false); ++ qemu_mutex_unlock(&p->mutex); ++ qemu_sem_post(&p->sem_sync); + } else { + qemu_mutex_unlock(&p->mutex); + /* sometimes there are spurious wakeups */ +@@ -911,7 +923,6 @@ int multifd_save_setup(Error **errp) + qemu_mutex_init(&p->mutex); + qemu_sem_init(&p->sem, 0); + qemu_sem_init(&p->sem_sync, 0); +- p->pending_job = 0; + p->id = i; + p->pages = multifd_pages_init(page_count); + p->packet_len = sizeof(MultiFDPacket_t) +diff --git a/migration/multifd.h b/migration/multifd.h +index 3920bdbcf1..08f26ef3fe 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -99,8 +99,17 @@ typedef struct { + uint32_t flags; + /* global number of generated multifd packets */ + uint64_t packet_num; +- /* thread has work to do */ +- int pending_job; ++ /* ++ * The sender thread has work to do if either of below boolean is set. ++ * ++ * @pending_job: a job is pending ++ * @pending_sync: a sync request is pending ++ * ++ * For both of these fields, they're only set by the requesters, and ++ * cleared by the multifd sender threads. ++ */ ++ bool pending_job; ++ bool pending_sync; + /* array of pages to sent. + * The owner of 'pages' depends of 'pending_job' value: + * pending_job == 0 -> migration_thread can use it. +-- +2.33.0 + diff --git a/migration-multifd-Simplify-locking-in-sender-thread.patch b/migration-multifd-Simplify-locking-in-sender-thread.patch new file mode 100644 index 0000000..06c4958 --- /dev/null +++ b/migration-multifd-Simplify-locking-in-sender-thread.patch @@ -0,0 +1,99 @@ +From 9e616674520aa0272393eda94a4ad7301969b73c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:41 +0800 +Subject: [31/99] migration/multifd: Simplify locking in sender thread + +commit e3cce9af10b06c51434ced4e1a6686f1ce43e124 upstream. + +The sender thread will yield the p->mutex before IO starts, trying to not +block the requester thread. This may be unnecessary lock optimizations, +because the requester can already read pending_job safely even without the +lock, because the requester is currently the only one who can assign a +task. + +Drop that lock complication on both sides: + + (1) in the sender thread, always take the mutex until job done + (2) in the requester thread, check pending_job clear lockless + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-8-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index dd90c09b26..cef4a88237 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -430,7 +430,9 @@ static int multifd_send_pages(void) + return -1; + } + ++ /* We wait here, until at least one channel is ready */ + qemu_sem_wait(&multifd_send_state->channels_ready); ++ + /* + * next_channel can remain from a previous migration that was + * using more channels, so ensure it doesn't overflow if the +@@ -442,17 +444,26 @@ static int multifd_send_pages(void) + return -1; + } + p = &multifd_send_state->params[i]; +- qemu_mutex_lock(&p->mutex); ++ /* ++ * Lockless read to p->pending_job is safe, because only multifd ++ * sender thread can clear it. ++ */ + if (qatomic_read(&p->pending_job) == false) { +- qatomic_set(&p->pending_job, true); + next_channel = (i + 1) % migrate_multifd_channels(); + break; + } +- qemu_mutex_unlock(&p->mutex); + } ++ ++ qemu_mutex_lock(&p->mutex); + assert(!p->pages->num); + assert(!p->pages->block); +- ++ /* ++ * Double check on pending_job==false with the lock. In the future if ++ * we can have >1 requester thread, we can replace this with a "goto ++ * retry", but that is for later. ++ */ ++ assert(qatomic_read(&p->pending_job) == false); ++ qatomic_set(&p->pending_job, true); + p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; +@@ -713,8 +724,6 @@ static void *multifd_send_thread(void *opaque) + multifd_send_fill_packet(p); + p->num_packets++; + p->total_normal_pages += pages->num; +- qemu_mutex_unlock(&p->mutex); +- + trace_multifd_send(p->id, packet_num, pages->num, p->flags, + p->next_packet_size); + +@@ -734,6 +743,7 @@ static void *multifd_send_thread(void *opaque) + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, + 0, p->write_flags, &local_err); + if (ret != 0) { ++ qemu_mutex_unlock(&p->mutex); + break; + } + +@@ -742,7 +752,6 @@ static void *multifd_send_thread(void *opaque) + + multifd_pages_reset(p->pages); + p->next_packet_size = 0; +- qemu_mutex_lock(&p->mutex); + qatomic_set(&p->pending_job, false); + qemu_mutex_unlock(&p->mutex); + } else if (qatomic_read(&p->pending_sync)) { +-- +2.33.0 + diff --git a/migration-multifd-Simplify-multifd_channel_connect-i.patch b/migration-multifd-Simplify-multifd_channel_connect-i.patch new file mode 100644 index 0000000..69d0a5b --- /dev/null +++ b/migration-multifd-Simplify-multifd_channel_connect-i.patch @@ -0,0 +1,53 @@ +From 9ec8c17e34afec47c8085a870e8dcfff36a9d3c7 Mon Sep 17 00:00:00 2001 +From: Avihai Horon +Date: Sun, 31 Dec 2023 11:30:11 +0200 +Subject: [02/99] migration/multifd: Simplify multifd_channel_connect() if else + statement +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit a4395f5d3c06472ed70d9ef9f79878f95575be9e upstream. + +The else branch in multifd_channel_connect() is redundant because when +the if branch is taken the function returns. + +Simplify the code by removing the else branch. + +Signed-off-by: Avihai Horon +Reviewed-by: Philippe Mathieu-Daudé +Link: https://lore.kernel.org/r/20231231093016.14204-7-avihaih@nvidia.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 055b2688ad..06585f0141 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -851,14 +851,13 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + * so we mustn't call multifd_send_thread until then + */ + return multifd_tls_channel_connect(p, ioc, errp); +- +- } else { +- migration_ioc_register_yank(ioc); +- p->registered_yank = true; +- p->c = ioc; +- qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, +- QEMU_THREAD_JOINABLE); + } ++ ++ migration_ioc_register_yank(ioc); ++ p->registered_yank = true; ++ p->c = ioc; ++ qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, ++ QEMU_THREAD_JOINABLE); + return true; + } + +-- +2.33.0 + diff --git a/migration-multifd-Split-multifd_send_terminate_threa.patch b/migration-multifd-Split-multifd_send_terminate_threa.patch new file mode 100644 index 0000000..91e5f4b --- /dev/null +++ b/migration-multifd-Split-multifd_send_terminate_threa.patch @@ -0,0 +1,131 @@ +From e033a771a9d35a86b7864652abf61165bcdcaf55 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:49 +0800 +Subject: [39/99] migration/multifd: Split multifd_send_terminate_threads() + +commit 3ab4441d97af59ea09ee015d68c4770704b2b34f upstream. + +Split multifd_send_terminate_threads() into two functions: + + - multifd_send_set_error(): used when an error happened on the sender + side, set error and quit state only + + - multifd_send_terminate_threads(): used only by the main thread to kick + all multifd send threads out of sleep, for the last recycling. + +Use multifd_send_set_error() in the three old call sites where only the +error will be set. + +Use multifd_send_terminate_threads() in the last one where the main thread +will kick the multifd threads at last in multifd_save_cleanup(). + +Both helpers will need to set quitting=1. + +Suggested-by: Fabiano Rosas +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-16-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 27 ++++++++++++++++++--------- + migration/trace-events | 2 +- + 2 files changed, 19 insertions(+), 10 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3b7984cf99..59ccc42c05 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -537,10 +537,9 @@ int multifd_queue_page(RAMBlock *block, ram_addr_t offset) + return 1; + } + +-static void multifd_send_terminate_threads(Error *err) ++/* Multifd send side hit an error; remember it and prepare to quit */ ++static void multifd_send_set_error(Error *err) + { +- int i; +- + /* + * We don't want to exit each threads twice. Depending on where + * we get the error, or if there are two independent errors in two +@@ -551,8 +550,6 @@ static void multifd_send_terminate_threads(Error *err) + return; + } + +- trace_multifd_send_terminate_threads(err != NULL); +- + if (err) { + MigrationState *s = migrate_get_current(); + migrate_set_error(s, err); +@@ -564,7 +561,19 @@ static void multifd_send_terminate_threads(Error *err) + MIGRATION_STATUS_FAILED); + } + } ++} ++ ++static void multifd_send_terminate_threads(void) ++{ ++ int i; ++ ++ trace_multifd_send_terminate_threads(); + ++ /* ++ * Tell everyone we're quitting. No xchg() needed here; we simply ++ * always set it. ++ */ ++ qatomic_set(&multifd_send_state->exiting, 1); + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -587,7 +596,7 @@ void multifd_save_cleanup(void) + if (!migrate_multifd()) { + return; + } +- multifd_send_terminate_threads(NULL); ++ multifd_send_terminate_threads(); + for (i = 0; i < migrate_multifd_channels(); i++) { + MultiFDSendParams *p = &multifd_send_state->params[i]; + +@@ -784,7 +793,7 @@ out: + if (ret) { + assert(local_err); + trace_multifd_send_error(p->id); +- multifd_send_terminate_threads(local_err); ++ multifd_send_set_error(local_err); + multifd_send_kick_main(p); + error_free(local_err); + } +@@ -820,7 +829,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + + trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + +- multifd_send_terminate_threads(err); ++ multifd_send_set_error(err); + multifd_send_kick_main(p); + error_free(err); + } +@@ -902,7 +911,7 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + } + + trace_multifd_new_send_channel_async_error(p->id, local_err); +- multifd_send_terminate_threads(local_err); ++ multifd_send_set_error(local_err); + multifd_send_kick_main(p); + object_unref(OBJECT(ioc)); + error_free(local_err); +diff --git a/migration/trace-events b/migration/trace-events +index de4a743c8a..298ad2b0dd 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -141,7 +141,7 @@ multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" +-multifd_send_terminate_threads(bool error) "error %d" ++multifd_send_terminate_threads(void) "" + multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" +-- +2.33.0 + diff --git a/migration-multifd-Stick-with-send-recv-on-function-n.patch b/migration-multifd-Stick-with-send-recv-on-function-n.patch new file mode 100644 index 0000000..720636b --- /dev/null +++ b/migration-multifd-Stick-with-send-recv-on-function-n.patch @@ -0,0 +1,156 @@ +From f78f9157a90c7bef026f87fd38f6ce5b785f6cb7 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:55 +0800 +Subject: [45/99] migration/multifd: Stick with send/recv on function names + +commit cde85c37ca54e4a2dbee8653181938499887f6be upstream. + +Most of the multifd code uses send/recv to represent the two sides, but +some rare cases use save/load. + +Since send/recv is the majority, replacing the save/load use cases to use +send/recv globally. Now we reach a consensus on the naming. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-22-peterx@redhat.com +Signed-off-by: Peter Xu +[jz: upstream renamed qemu_mutex_lock_iothread() to qpl_lock(), while + openEuler not yet, resolve context conflict due to this] +Signed-off-by: Jason Zeng +--- + migration/migration.c | 12 ++++++------ + migration/multifd.c | 10 +++++----- + migration/multifd.h | 10 +++++----- + 3 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 2c5258d0b0..f428839dd6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -269,7 +269,7 @@ void migration_incoming_state_destroy(void) + { + struct MigrationIncomingState *mis = migration_incoming_get_current(); + +- multifd_load_cleanup(); ++ multifd_recv_cleanup(); + compress_threads_load_cleanup(); + + if (mis->to_src_file) { +@@ -622,7 +622,7 @@ static void process_incoming_migration_bh(void *opaque) + + trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced"); + +- multifd_load_shutdown(); ++ multifd_recv_shutdown(); + + dirty_bitmap_mig_before_vm_start(); + +@@ -721,7 +721,7 @@ fail: + MIGRATION_STATUS_FAILED); + qemu_fclose(mis->from_src_file); + +- multifd_load_cleanup(); ++ multifd_recv_cleanup(); + compress_threads_load_cleanup(); + + exit(EXIT_FAILURE); +@@ -854,7 +854,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + default_channel = !mis->from_src_file; + } + +- if (multifd_load_setup(errp) != 0) { ++ if (multifd_recv_setup(errp) != 0) { + return; + } + +@@ -1306,7 +1306,7 @@ static void migrate_fd_cleanup(MigrationState *s) + } + qemu_mutex_lock_iothread(); + +- multifd_save_cleanup(); ++ multifd_send_shutdown(); + qemu_mutex_lock(&s->qemu_file_lock); + tmp = s->to_dst_file; + s->to_dst_file = NULL; +@@ -3638,7 +3638,7 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + return; + } + +- if (multifd_save_setup(&local_err) != 0) { ++ if (multifd_send_setup(&local_err) != 0) { + migrate_set_error(s, local_err); + error_report_err(local_err); + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, +diff --git a/migration/multifd.c b/migration/multifd.c +index 048ff66760..723b1d0b35 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -664,7 +664,7 @@ static void multifd_send_cleanup_state(void) + multifd_send_state = NULL; + } + +-void multifd_save_cleanup(void) ++void multifd_send_shutdown(void) + { + int i; + +@@ -969,7 +969,7 @@ static void multifd_new_send_channel_create(gpointer opaque) + socket_send_channel_create(multifd_new_send_channel_async, opaque); + } + +-int multifd_save_setup(Error **errp) ++int multifd_send_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); +@@ -1067,7 +1067,7 @@ static void multifd_recv_terminate_threads(Error *err) + } + } + +-void multifd_load_shutdown(void) ++void multifd_recv_shutdown(void) + { + if (migrate_multifd()) { + multifd_recv_terminate_threads(NULL); +@@ -1102,7 +1102,7 @@ static void multifd_recv_cleanup_state(void) + multifd_recv_state = NULL; + } + +-void multifd_load_cleanup(void) ++void multifd_recv_cleanup(void) + { + int i; + +@@ -1217,7 +1217,7 @@ static void *multifd_recv_thread(void *opaque) + return NULL; + } + +-int multifd_load_setup(Error **errp) ++int multifd_recv_setup(Error **errp) + { + int thread_count; + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); +diff --git a/migration/multifd.h b/migration/multifd.h +index a320c53a6f..9b40a53cb6 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,11 +13,11 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + +-int multifd_save_setup(Error **errp); +-void multifd_save_cleanup(void); +-int multifd_load_setup(Error **errp); +-void multifd_load_cleanup(void); +-void multifd_load_shutdown(void); ++int multifd_send_setup(Error **errp); ++void multifd_send_shutdown(void); ++int multifd_recv_setup(Error **errp); ++void multifd_recv_cleanup(void); ++void multifd_recv_shutdown(void); + bool multifd_recv_all_channels_created(void); + void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); +-- +2.33.0 + diff --git a/migration-multifd-Switch-to-no-compression-when-no-h.patch b/migration-multifd-Switch-to-no-compression-when-no-h.patch new file mode 100644 index 0000000..7a701e9 --- /dev/null +++ b/migration-multifd-Switch-to-no-compression-when-no-h.patch @@ -0,0 +1,169 @@ +From 56d75b83e20501cbd35326823d3450ccede2823a Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:09 +0100 +Subject: [85/99] migration/multifd: Switch to no compression when no hardware + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit c1dfd12168e1be0a940e97f85044098e18d18178 upstream. + +Send raw packets over if UADK hardware support is not available. This is to +satisfy  Qemu qtest CI which may run on platforms that don't have UADK +hardware support. Subsequent patch will add support for uadk migration +qtest. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-uadk.c | 92 +++++++++++++++++++++++----------------- + 1 file changed, 53 insertions(+), 39 deletions(-) + +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +index 70bba92eaa..d12353fb21 100644 +--- a/migration/multifd-uadk.c ++++ b/migration/multifd-uadk.c +@@ -17,6 +17,7 @@ + #include "migration.h" + #include "multifd.h" + #include "options.h" ++#include "qemu/error-report.h" + #include "uadk/wd_comp.h" + #include "uadk/wd_sched.h" + +@@ -48,29 +49,29 @@ static struct wd_data *multifd_uadk_init_sess(uint32_t count, + uint32_t size = count * page_size; + struct wd_data *wd; + +- if (!uadk_hw_init()) { +- error_setg(errp, "multifd: UADK hardware not available"); +- return NULL; +- } +- + wd = g_new0(struct wd_data, 1); +- ss.alg_type = WD_ZLIB; +- if (compress) { +- ss.op_type = WD_DIR_COMPRESS; +- /* Add an additional page for handling output > input */ +- size += page_size; +- } else { +- ss.op_type = WD_DIR_DECOMPRESS; +- } +- +- /* We use default level 1 compression and 4K window size */ +- param.type = ss.op_type; +- ss.sched_param = ¶m; + +- wd->handle = wd_comp_alloc_sess(&ss); +- if (!wd->handle) { +- error_setg(errp, "multifd: failed wd_comp_alloc_sess"); +- goto out; ++ if (uadk_hw_init()) { ++ ss.alg_type = WD_ZLIB; ++ if (compress) { ++ ss.op_type = WD_DIR_COMPRESS; ++ /* Add an additional page for handling output > input */ ++ size += page_size; ++ } else { ++ ss.op_type = WD_DIR_DECOMPRESS; ++ } ++ /* We use default level 1 compression and 4K window size */ ++ param.type = ss.op_type; ++ ss.sched_param = ¶m; ++ ++ wd->handle = wd_comp_alloc_sess(&ss); ++ if (!wd->handle) { ++ error_setg(errp, "multifd: failed wd_comp_alloc_sess"); ++ goto out; ++ } ++ } else { ++ /* For CI test use */ ++ warn_report_once("UADK hardware not available. Switch to no compression mode"); + } + + wd->buf = g_try_malloc(size); +@@ -82,7 +83,9 @@ static struct wd_data *multifd_uadk_init_sess(uint32_t count, + return wd; + + out_free_sess: +- wd_comp_free_sess(wd->handle); ++ if (wd->handle) { ++ wd_comp_free_sess(wd->handle); ++ } + out: + wd_comp_uninit2(); + g_free(wd); +@@ -91,7 +94,9 @@ out: + + static void multifd_uadk_uninit_sess(struct wd_data *wd) + { +- wd_comp_free_sess(wd->handle); ++ if (wd->handle) { ++ wd_comp_free_sess(wd->handle); ++ } + wd_comp_uninit2(); + g_free(wd->buf); + g_free(wd->buf_hdr); +@@ -188,23 +193,26 @@ static int multifd_uadk_send_prepare(MultiFDSendParams *p, Error **errp) + .dst_len = p->page_size * 2, + }; + +- ret = wd_do_comp_sync(uadk_data->handle, &creq); +- if (ret || creq.status) { +- error_setg(errp, "multifd %u: failed compression, ret %d status %d", +- p->id, ret, creq.status); +- return -1; ++ if (uadk_data->handle) { ++ ret = wd_do_comp_sync(uadk_data->handle, &creq); ++ if (ret || creq.status) { ++ error_setg(errp, "multifd %u: failed compression, ret %d status %d", ++ p->id, ret, creq.status); ++ return -1; ++ } ++ if (creq.dst_len < p->page_size) { ++ uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); ++ prepare_next_iov(p, buf, creq.dst_len); ++ buf += creq.dst_len; ++ } + } +- if (creq.dst_len < p->page_size) { +- uadk_data->buf_hdr[i] = cpu_to_be32(creq.dst_len); +- prepare_next_iov(p, buf, creq.dst_len); +- buf += creq.dst_len; +- } else { +- /* +- * Send raw data if compressed out >= page_size. We might be better +- * off sending raw data if output is slightly less than page_size +- * as well because at the receive end we can skip the decompression. +- * But it is tricky to find the right number here. +- */ ++ /* ++ * Send raw data if no UADK hardware or if compressed out >= page_size. ++ * We might be better off sending raw data if output is slightly less ++ * than page_size as well because at the receive end we can skip the ++ * decompression. But it is tricky to find the right number here. ++ */ ++ if (!uadk_data->handle || creq.dst_len >= p->page_size) { + uadk_data->buf_hdr[i] = cpu_to_be32(p->page_size); + prepare_next_iov(p, p->pages->block->host + p->pages->offset[i], + p->page_size); +@@ -323,6 +331,12 @@ static int multifd_uadk_recv(MultiFDRecvParams *p, Error **errp) + continue; + } + ++ if (unlikely(!uadk_data->handle)) { ++ error_setg(errp, "multifd %u: UADK HW not available for decompression", ++ p->id); ++ return -1; ++ } ++ + ret = wd_do_comp_sync(uadk_data->handle, &creq); + if (ret || creq.status) { + error_setg(errp, "multifd %u: failed decompression, ret %d status %d", +-- +2.33.0 + diff --git a/migration-multifd-Unify-multifd-and-TLS-connection-p.patch b/migration-multifd-Unify-multifd-and-TLS-connection-p.patch new file mode 100644 index 0000000..8058286 --- /dev/null +++ b/migration-multifd-Unify-multifd-and-TLS-connection-p.patch @@ -0,0 +1,175 @@ +From 7b385b0d528dfe3490bb3c8f58937bde1685f0f1 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Tue, 6 Feb 2024 18:51:17 -0300 +Subject: [53/99] migration/multifd: Unify multifd and TLS connection paths + +commit 2576ae488ef9aa692486157df7d8b410919cd219 upstream. + +During multifd channel creation (multifd_send_new_channel_async) when +TLS is enabled, the multifd_channel_connect function is called twice, +once to create the TLS handshake thread and another time after the +asynchrounous TLS handshake has finished. + +This creates a slightly confusing call stack where +multifd_channel_connect() is called more times than the number of +channels. It also splits error handling between the two callers of +multifd_channel_connect() causing some code duplication. Lastly, it +gets in the way of having a single point to determine whether all +channel creation tasks have been initiated. + +Refactor the code to move the reentrancy one level up at the +multifd_new_send_channel_async() level, de-duplicating the error +handling and allowing for the next patch to introduce a +synchronization point common to all the multifd channel creation, +regardless of TLS. + +Note that the previous code would never fail once p->c had been set. +This patch changes this assumption, which affects refcounting, so add +comments around object_unref to explain the situation. + +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240206215118.6171-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 83 ++++++++++++++++++++++----------------------- + 1 file changed, 40 insertions(+), 43 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1299248fea..85d1e7c347 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -873,30 +873,7 @@ out: + return NULL; + } + +-static bool multifd_channel_connect(MultiFDSendParams *p, +- QIOChannel *ioc, +- Error **errp); +- +-static void multifd_tls_outgoing_handshake(QIOTask *task, +- gpointer opaque) +-{ +- MultiFDSendParams *p = opaque; +- QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); +- Error *err = NULL; +- +- if (!qio_task_propagate_error(task, &err)) { +- trace_multifd_tls_outgoing_handshake_complete(ioc); +- if (multifd_channel_connect(p, ioc, &err)) { +- return; +- } +- } +- +- trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); +- +- multifd_send_set_error(err); +- multifd_send_kick_main(p); +- error_free(err); +-} ++static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque); + + static void *multifd_tls_handshake_thread(void *opaque) + { +@@ -904,7 +881,7 @@ static void *multifd_tls_handshake_thread(void *opaque) + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(p->c); + + qio_channel_tls_handshake(tioc, +- multifd_tls_outgoing_handshake, ++ multifd_new_send_channel_async, + p, + NULL, + NULL); +@@ -924,6 +901,10 @@ static bool multifd_tls_channel_connect(MultiFDSendParams *p, + return false; + } + ++ /* ++ * Ownership of the socket channel now transfers to the newly ++ * created TLS channel, which has already taken a reference. ++ */ + object_unref(OBJECT(ioc)); + trace_multifd_tls_outgoing_handshake_start(ioc, tioc, hostname); + qio_channel_set_name(QIO_CHANNEL(tioc), "multifd-tls-outgoing"); +@@ -940,18 +921,7 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + QIOChannel *ioc, + Error **errp) + { +- trace_multifd_set_outgoing_channel( +- ioc, object_get_typename(OBJECT(ioc)), +- migrate_get_current()->hostname); +- +- if (migrate_channel_requires_tls_upgrade(ioc)) { +- /* +- * tls_channel_connect will call back to this +- * function after the TLS handshake, +- * so we mustn't call multifd_send_thread until then +- */ +- return multifd_tls_channel_connect(p, ioc, errp); +- } ++ qio_channel_set_delay(ioc, false); + + migration_ioc_register_yank(ioc); + p->registered_yank = true; +@@ -963,24 +933,51 @@ static bool multifd_channel_connect(MultiFDSendParams *p, + return true; + } + ++/* ++ * When TLS is enabled this function is called once to establish the ++ * TLS connection and a second time after the TLS handshake to create ++ * the multifd channel. Without TLS it goes straight into the channel ++ * creation. ++ */ + static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque) + { + MultiFDSendParams *p = opaque; + QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task)); + Error *local_err = NULL; ++ bool ret; + + trace_multifd_new_send_channel_async(p->id); +- if (!qio_task_propagate_error(task, &local_err)) { +- qio_channel_set_delay(ioc, false); +- if (multifd_channel_connect(p, ioc, &local_err)) { +- return; +- } ++ ++ if (qio_task_propagate_error(task, &local_err)) { ++ ret = false; ++ goto out; ++ } ++ ++ trace_multifd_set_outgoing_channel(ioc, object_get_typename(OBJECT(ioc)), ++ migrate_get_current()->hostname); ++ ++ if (migrate_channel_requires_tls_upgrade(ioc)) { ++ ret = multifd_tls_channel_connect(p, ioc, &local_err); ++ } else { ++ ret = multifd_channel_connect(p, ioc, &local_err); + } + ++ if (ret) { ++ return; ++ } ++ ++out: + trace_multifd_new_send_channel_async_error(p->id, local_err); + multifd_send_set_error(local_err); + multifd_send_kick_main(p); +- object_unref(OBJECT(ioc)); ++ if (!p->c) { ++ /* ++ * If no channel has been created, drop the initial ++ * reference. Otherwise cleanup happens at ++ * multifd_send_channel_destroy() ++ */ ++ object_unref(OBJECT(ioc)); ++ } + error_free(local_err); + } + +-- +2.33.0 + diff --git a/migration-multifd-Zero-p-flags-before-starting-filli.patch b/migration-multifd-Zero-p-flags-before-starting-filli.patch new file mode 100644 index 0000000..eae6a62 --- /dev/null +++ b/migration-multifd-Zero-p-flags-before-starting-filli.patch @@ -0,0 +1,50 @@ +From c927bd2c10ee92131eba56ab8d2c26dd9dedfe50 Mon Sep 17 00:00:00 2001 +From: "Maciej S. Szmigiero" +Date: Tue, 29 Oct 2024 15:58:15 +0100 +Subject: [96/99] migration/multifd: Zero p->flags before starting filling a + packet + +commit 00b4b216534d84ace7b0583cec70a3aaf256cb25 upstream. + +This way there aren't stale flags there. + +p->flags can't contain SYNC to be sent at the next RAM packet since syncs +are now handled separately in multifd_send_thread. + +Reviewed-by: Fabiano Rosas +Reviewed-by: Peter Xu +Signed-off-by: Maciej S. Szmigiero +Link: https://lore.kernel.org/r/1c96b6cdb797e6f035eb1a4ad9bfc24f4c7f5df8.1730203967.git.maciej.szmigiero@oracle.com +Signed-off-by: Peter Xu + + Conflicts: + migration/multifd.c +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 3761a803ed..36581a5631 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -898,6 +898,7 @@ static void *multifd_send_thread(void *opaque) + if (qatomic_load_acquire(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + ++ p->flags = 0; + p->iovs_num = 0; + assert(pages->num); + +@@ -944,7 +945,6 @@ static void *multifd_send_thread(void *opaque) + } + /* p->next_packet_size will always be zero for a SYNC packet */ + stat64_add(&mig_stats.multifd_bytes, p->packet_len); +- p->flags = 0; + } + + qatomic_set(&p->pending_sync, false); +-- +2.33.0 + diff --git a/migration-multifd-add-qpl-compression-method.patch b/migration-multifd-add-qpl-compression-method.patch new file mode 100644 index 0000000..50a0565 --- /dev/null +++ b/migration-multifd-add-qpl-compression-method.patch @@ -0,0 +1,125 @@ +From 0f0f9c2c5a658a77c1d99e1d1ec166b8259ec307 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:07 +0800 +Subject: [75/99] migration/multifd: add qpl compression method + +commit 354cac2859e48ec5f7ee72a2a071da6c60a462d0 upstream. + +add the Query Processing Library (QPL) compression method + +Introduce the qpl as a new multifd migration compression method, it can +use In-Memory Analytics Accelerator(IAA) to accelerate compression and +decompression, which can not only reduce network bandwidth requirement +but also reduce host compression and decompression CPU overhead. + +How to enable qpl compression during migration: +migrate_set_parameter multifd-compression qpl + +There is no qpl compression level parameter added since it only supports +level one, users do not need to specify the qpl compression level. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +[fixed docs spacing in migration.json] +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-qpl.c | 20 ++++++++++++++++++++ + migration/multifd.h | 1 + + qapi/migration.json | 8 +++++++- + 5 files changed, 30 insertions(+), 2 deletions(-) + create mode 100644 migration/multifd-qpl.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index b3b9238b65..6ee9744e00 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd", ++ "none/zlib/zstd/qpl", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index d619ebf238..6652f68d32 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed() + system_ss.add(files('block.c')) + endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) ++system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +new file mode 100644 +index 0000000000..056a68a060 +--- /dev/null ++++ b/migration/multifd-qpl.c +@@ -0,0 +1,20 @@ ++/* ++ * Multifd qpl compression accelerator implementation ++ * ++ * Copyright (c) 2023 Intel Corporation ++ * ++ * Authors: ++ * Yuan Liu ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++ ++static void multifd_qpl_register(void) ++{ ++ /* noop */ ++} ++ ++migration_init(multifd_qpl_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index d99603c6a4..11f05dd6d5 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -33,6 +33,7 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) ++#define MULTIFD_FLAG_QPL (4 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index fc3178b1dc..f8f3f6f272 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -625,11 +625,17 @@ + # + # @zstd: use zstd compression method. + # ++# @qpl: use qpl compression method. Query Processing Library(qpl) is ++# based on the deflate compression algorithm and use the Intel ++# In-Memory Analytics Accelerator(IAA) accelerated compression ++# and decompression. (Since 9.1) ++# + # Since: 5.0 + ## + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', +- { 'name': 'zstd', 'if': 'CONFIG_ZSTD' } ] } ++ { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, ++ { 'name': 'qpl', 'if': 'CONFIG_QPL' } ] } + + ## + # @MigMode: +-- +2.33.0 + diff --git a/migration-multifd-add-uadk-compression-framework.patch b/migration-multifd-add-uadk-compression-framework.patch new file mode 100644 index 0000000..bb0f863 --- /dev/null +++ b/migration-multifd-add-uadk-compression-framework.patch @@ -0,0 +1,121 @@ +From cf49f952f849aecd772144cee5285b746bfae228 Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:06 +0100 +Subject: [82/99] migration/multifd: add uadk compression framework + +commit f3d8bb759d13a2e33389f00fa338d0761309029a upstream. + +Adds the skeleton to support uadk compression method. +Complete functionality will be added in subsequent patches. + +Acked-by: Markus Armbruster +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Reviewed-by: Zhangfei Gao +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + hw/core/qdev-properties-system.c | 2 +- + migration/meson.build | 1 + + migration/multifd-uadk.c | 20 ++++++++++++++++++++ + migration/multifd.h | 5 +++-- + qapi/migration.json | 5 ++++- + 5 files changed, 29 insertions(+), 4 deletions(-) + create mode 100644 migration/multifd-uadk.c + +diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c +index 6ee9744e00..650c42eaf8 100644 +--- a/hw/core/qdev-properties-system.c ++++ b/hw/core/qdev-properties-system.c +@@ -711,7 +711,7 @@ const PropertyInfo qdev_prop_fdc_drive_type = { + const PropertyInfo qdev_prop_multifd_compression = { + .name = "MultiFDCompression", + .description = "multifd_compression values, " +- "none/zlib/zstd/qpl", ++ "none/zlib/zstd/qpl/uadk", + .enum_table = &MultiFDCompression_lookup, + .get = qdev_propinfo_get_enum, + .set = qdev_propinfo_set_enum, +diff --git a/migration/meson.build b/migration/meson.build +index 6652f68d32..264d04657f 100644 +--- a/migration/meson.build ++++ b/migration/meson.build +@@ -41,6 +41,7 @@ if get_option('live_block_migration').allowed() + endif + system_ss.add(when: zstd, if_true: files('multifd-zstd.c')) + system_ss.add(when: qpl, if_true: files('multifd-qpl.c')) ++system_ss.add(when: uadk, if_true: files('multifd-uadk.c')) + + specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', + if_true: files('ram.c', +diff --git a/migration/multifd-uadk.c b/migration/multifd-uadk.c +new file mode 100644 +index 0000000000..c2bb07535b +--- /dev/null ++++ b/migration/multifd-uadk.c +@@ -0,0 +1,20 @@ ++/* ++ * Multifd UADK compression accelerator implementation ++ * ++ * Copyright (c) 2024 Huawei Technologies R & D (UK) Ltd ++ * ++ * Authors: ++ * Shameer Kolothum ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/module.h" ++ ++static void multifd_uadk_register(void) ++{ ++ /* noop for now */ ++} ++migration_init(multifd_uadk_register); +diff --git a/migration/multifd.h b/migration/multifd.h +index 41965df7a9..ace4ba050d 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -29,13 +29,14 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset); + /* Multifd Compression flags */ + #define MULTIFD_FLAG_SYNC (1 << 0) + +-/* We reserve 3 bits for compression methods */ +-#define MULTIFD_FLAG_COMPRESSION_MASK (7 << 1) ++/* We reserve 4 bits for compression methods */ ++#define MULTIFD_FLAG_COMPRESSION_MASK (0xf << 1) + /* we need to be compatible. Before compression value was 0 */ + #define MULTIFD_FLAG_NOCOMP (0 << 1) + #define MULTIFD_FLAG_ZLIB (1 << 1) + #define MULTIFD_FLAG_ZSTD (2 << 1) + #define MULTIFD_FLAG_QPL (4 << 1) ++#define MULTIFD_FLAG_UADK (8 << 1) + + /* This value needs to be a multiple of qemu_target_page_size() */ + #define MULTIFD_PACKET_SIZE (512 * 1024) +diff --git a/qapi/migration.json b/qapi/migration.json +index f8f3f6f272..f1a17c511b 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -630,12 +630,15 @@ + # In-Memory Analytics Accelerator(IAA) accelerated compression + # and decompression. (Since 9.1) + # ++# @uadk: use UADK library compression method. (Since 9.1) ++# + # Since: 5.0 + ## + { 'enum': 'MultiFDCompression', + 'data': [ 'none', 'zlib', + { 'name': 'zstd', 'if': 'CONFIG_ZSTD' }, +- { 'name': 'qpl', 'if': 'CONFIG_QPL' } ] } ++ { 'name': 'qpl', 'if': 'CONFIG_QPL' }, ++ { 'name': 'uadk', 'if': 'CONFIG_UADK' } ] } + + ## + # @MigMode: +-- +2.33.0 + diff --git a/migration-multifd-implement-initialization-of-qpl-co.patch b/migration-multifd-implement-initialization-of-qpl-co.patch new file mode 100644 index 0000000..36176ad --- /dev/null +++ b/migration-multifd-implement-initialization-of-qpl-co.patch @@ -0,0 +1,369 @@ +From 41fed938d3474ab517e689feeb8abf5e2876d2df Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:08 +0800 +Subject: [77/99] migration/multifd: implement initialization of qpl + compression + +commit 34e104b897da6e144a5f34e7c5eebf8a4c4d9d59 upstream. + +during initialization, a software job is allocated to each channel +for software path fallabck when the IAA hardware is unavailable or +the hardware job submission fails. If the IAA hardware is available, +multiple hardware jobs are allocated for batch processing. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 328 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 327 insertions(+), 1 deletion(-) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 056a68a060..6791a204d5 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -9,12 +9,338 @@ + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ ++ + #include "qemu/osdep.h" + #include "qemu/module.h" ++#include "qapi/error.h" ++#include "multifd.h" ++#include "qpl/qpl.h" ++ ++typedef struct { ++ /* the QPL hardware path job */ ++ qpl_job *job; ++ /* indicates if fallback to software path is required */ ++ bool fallback_sw_path; ++ /* output data from the software path */ ++ uint8_t *sw_output; ++ /* output data length from the software path */ ++ uint32_t sw_output_len; ++} QplHwJob; ++ ++typedef struct { ++ /* array of hardware jobs, the number of jobs equals the number pages */ ++ QplHwJob *hw_jobs; ++ /* the QPL software job for the slow path and software fallback */ ++ qpl_job *sw_job; ++ /* the number of pages that the QPL needs to process at one time */ ++ uint32_t page_num; ++ /* array of compressed page buffers */ ++ uint8_t *zbuf; ++ /* array of compressed page lengths */ ++ uint32_t *zlen; ++ /* the status of the hardware device */ ++ bool hw_avail; ++} QplData; ++ ++/** ++ * check_hw_avail: check if IAA hardware is available ++ * ++ * If the IAA hardware does not exist or is unavailable, ++ * the QPL hardware job initialization will fail. ++ * ++ * Returns true if IAA hardware is available, otherwise false. ++ * ++ * @job_size: indicates the hardware job size if hardware is available ++ */ ++static bool check_hw_avail(uint32_t *job_size) ++{ ++ qpl_path_t path = qpl_path_hardware; ++ uint32_t size = 0; ++ qpl_job *job; ++ ++ if (qpl_get_job_size(path, &size) != QPL_STS_OK) { ++ return false; ++ } ++ assert(size > 0); ++ job = g_malloc0(size); ++ if (qpl_init_job(path, job) != QPL_STS_OK) { ++ g_free(job); ++ return false; ++ } ++ g_free(job); ++ *job_size = size; ++ return true; ++} ++ ++/** ++ * multifd_qpl_free_sw_job: clean up software job ++ * ++ * Free the software job resources. ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_free_sw_job(QplData *qpl) ++{ ++ assert(qpl); ++ if (qpl->sw_job) { ++ qpl_fini_job(qpl->sw_job); ++ g_free(qpl->sw_job); ++ qpl->sw_job = NULL; ++ } ++} ++ ++/** ++ * multifd_qpl_free_jobs: clean up hardware jobs ++ * ++ * Free all hardware job resources. ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_free_hw_job(QplData *qpl) ++{ ++ assert(qpl); ++ if (qpl->hw_jobs) { ++ for (int i = 0; i < qpl->page_num; i++) { ++ qpl_fini_job(qpl->hw_jobs[i].job); ++ g_free(qpl->hw_jobs[i].job); ++ qpl->hw_jobs[i].job = NULL; ++ } ++ g_free(qpl->hw_jobs); ++ qpl->hw_jobs = NULL; ++ } ++} ++ ++/** ++ * multifd_qpl_init_sw_job: initialize a software job ++ * ++ * Use the QPL software path to initialize a job ++ * ++ * @qpl: pointer to the QplData structure ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_init_sw_job(QplData *qpl, Error **errp) ++{ ++ qpl_path_t path = qpl_path_software; ++ uint32_t size = 0; ++ qpl_job *job = NULL; ++ qpl_status status; ++ ++ status = qpl_get_job_size(path, &size); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl_get_job_size failed with error %d", status); ++ return -1; ++ } ++ job = g_malloc0(size); ++ status = qpl_init_job(path, job); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl_init_job failed with error %d", status); ++ g_free(job); ++ return -1; ++ } ++ qpl->sw_job = job; ++ return 0; ++} ++ ++/** ++ * multifd_qpl_init_jobs: initialize hardware jobs ++ * ++ * Use the QPL hardware path to initialize jobs ++ * ++ * @qpl: pointer to the QplData structure ++ * @size: the size of QPL hardware path job ++ * @errp: pointer to an error ++ */ ++static void multifd_qpl_init_hw_job(QplData *qpl, uint32_t size, Error **errp) ++{ ++ qpl_path_t path = qpl_path_hardware; ++ qpl_job *job = NULL; ++ qpl_status status; ++ ++ qpl->hw_jobs = g_new0(QplHwJob, qpl->page_num); ++ for (int i = 0; i < qpl->page_num; i++) { ++ job = g_malloc0(size); ++ status = qpl_init_job(path, job); ++ /* the job initialization should succeed after check_hw_avail */ ++ assert(status == QPL_STS_OK); ++ qpl->hw_jobs[i].job = job; ++ } ++} ++ ++/** ++ * multifd_qpl_init: initialize QplData structure ++ * ++ * Allocate and initialize a QplData structure ++ * ++ * Returns a QplData pointer on success or NULL on error ++ * ++ * @num: the number of pages ++ * @size: the page size ++ * @errp: pointer to an error ++ */ ++static QplData *multifd_qpl_init(uint32_t num, uint32_t size, Error **errp) ++{ ++ uint32_t job_size = 0; ++ QplData *qpl; ++ ++ qpl = g_new0(QplData, 1); ++ qpl->page_num = num; ++ if (multifd_qpl_init_sw_job(qpl, errp) != 0) { ++ g_free(qpl); ++ return NULL; ++ } ++ qpl->hw_avail = check_hw_avail(&job_size); ++ if (qpl->hw_avail) { ++ multifd_qpl_init_hw_job(qpl, job_size, errp); ++ } ++ qpl->zbuf = g_malloc0(size * num); ++ qpl->zlen = g_new0(uint32_t, num); ++ return qpl; ++} ++ ++/** ++ * multifd_qpl_deinit: clean up QplData structure ++ * ++ * Free jobs, buffers and the QplData structure ++ * ++ * @qpl: pointer to the QplData structure ++ */ ++static void multifd_qpl_deinit(QplData *qpl) ++{ ++ if (qpl) { ++ multifd_qpl_free_sw_job(qpl); ++ multifd_qpl_free_hw_job(qpl); ++ g_free(qpl->zbuf); ++ g_free(qpl->zlen); ++ g_free(qpl); ++ } ++} ++ ++/** ++ * multifd_qpl_send_setup: set up send side ++ * ++ * Set up the channel with QPL compression. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_send_setup(MultiFDSendParams *p, Error **errp) ++{ ++ QplData *qpl; ++ ++ qpl = multifd_qpl_init(p->page_count, p->page_size, errp); ++ if (!qpl) { ++ return -1; ++ } ++ p->compress_data = qpl; ++ ++ /* ++ * the page will be compressed independently and sent using an IOV. The ++ * additional two IOVs are used to store packet header and compressed data ++ * length ++ */ ++ p->iov = g_new0(struct iovec, p->page_count + 2); ++ return 0; ++} ++ ++/** ++ * multifd_qpl_send_cleanup: clean up send side ++ * ++ * Close the channel and free memory. ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) ++{ ++ multifd_qpl_deinit(p->compress_data); ++ p->compress_data = NULL; ++ g_free(p->iov); ++ p->iov = NULL; ++} ++ ++/** ++ * multifd_qpl_send_prepare: prepare data to be able to send ++ * ++ * Create a compressed buffer with all the pages that we are going to ++ * send. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp) ++{ ++ /* Implement in next patch */ ++ return -1; ++} ++ ++/** ++ * multifd_qpl_recv_setup: set up receive side ++ * ++ * Create the compressed channel and buffer. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_recv_setup(MultiFDRecvParams *p, Error **errp) ++{ ++ QplData *qpl; ++ ++ qpl = multifd_qpl_init(p->page_count, p->page_size, errp); ++ if (!qpl) { ++ return -1; ++ } ++ p->compress_data = qpl; ++ return 0; ++} ++ ++/** ++ * multifd_qpl_recv_cleanup: set up receive side ++ * ++ * Close the channel and free memory. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) ++{ ++ multifd_qpl_deinit(p->compress_data); ++ p->compress_data = NULL; ++} ++ ++/** ++ * multifd_qpl_recv: read the data from the channel into actual pages ++ * ++ * Read the compressed buffer, and uncompress it into the actual ++ * pages. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) ++{ ++ /* Implement in next patch */ ++ return -1; ++} ++ ++static MultiFDMethods multifd_qpl_ops = { ++ .send_setup = multifd_qpl_send_setup, ++ .send_cleanup = multifd_qpl_send_cleanup, ++ .send_prepare = multifd_qpl_send_prepare, ++ .recv_setup = multifd_qpl_recv_setup, ++ .recv_cleanup = multifd_qpl_recv_cleanup, ++ .recv = multifd_qpl_recv, ++}; + + static void multifd_qpl_register(void) + { +- /* noop */ ++ multifd_register_ops(MULTIFD_COMPRESSION_QPL, &multifd_qpl_ops); + } + + migration_init(multifd_qpl_register); +-- +2.33.0 + diff --git a/migration-multifd-implement-qpl-compression-and-deco.patch b/migration-multifd-implement-qpl-compression-and-deco.patch new file mode 100644 index 0000000..43d807f --- /dev/null +++ b/migration-multifd-implement-qpl-compression-and-deco.patch @@ -0,0 +1,510 @@ +From 9c0666808448c393ffff4b44e3e5bb0f62e48a8f Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:09 +0800 +Subject: [78/99] migration/multifd: implement qpl compression and + decompression + +commit f6fe9fea995249ecc2cd72975d803fbf4d512c02 upstream. + +QPL compression and decompression will use IAA hardware path if the IAA +hardware is available. Otherwise the QPL library software path is used. + +The hardware path will automatically fall back to QPL software path if +the IAA queues are busy. In some scenarios, this may happen frequently, +such as configuring 4 channels but only one IAA device is available. In +the case of insufficient IAA hardware resources, retry and fallback can +help optimize performance: + + 1. Retry + SW fallback: + total time: 14649 ms + downtime: 25 ms + throughput: 17666.57 mbps + pages-per-second: 1509647 + + 2. No fallback, always wait for work queues to become available + total time: 18381 ms + downtime: 25 ms + throughput: 13698.65 mbps + pages-per-second: 859607 + +If both the hardware and software paths fail, the uncompressed page is +sent directly. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 424 +++++++++++++++++++++++++++++++++++++++- + 1 file changed, 420 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 6791a204d5..9265098ee7 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -13,9 +13,14 @@ + #include "qemu/osdep.h" + #include "qemu/module.h" + #include "qapi/error.h" ++#include "qapi/qapi-types-migration.h" ++#include "exec/ramblock.h" + #include "multifd.h" + #include "qpl/qpl.h" + ++/* Maximum number of retries to resubmit a job if IAA work queues are full */ ++#define MAX_SUBMIT_RETRY_NUM (3) ++ + typedef struct { + /* the QPL hardware path job */ + qpl_job *job; +@@ -260,6 +265,225 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) + p->iov = NULL; + } + ++/** ++ * multifd_qpl_prepare_job: prepare the job ++ * ++ * Set the QPL job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @is_compression: indicates compression and decompression ++ * @input: pointer to the input data buffer ++ * @input_len: the length of the input data ++ * @output: pointer to the output data buffer ++ * @output_len: the length of the output data ++ */ ++static void multifd_qpl_prepare_job(qpl_job *job, bool is_compression, ++ uint8_t *input, uint32_t input_len, ++ uint8_t *output, uint32_t output_len) ++{ ++ job->op = is_compression ? qpl_op_compress : qpl_op_decompress; ++ job->next_in_ptr = input; ++ job->next_out_ptr = output; ++ job->available_in = input_len; ++ job->available_out = output_len; ++ job->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST | QPL_FLAG_OMIT_VERIFY; ++ /* only supports compression level 1 */ ++ job->level = 1; ++} ++ ++/** ++ * multifd_qpl_prepare_comp_job: prepare the compression job ++ * ++ * Set the compression job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @input: pointer to the input data buffer ++ * @output: pointer to the output data buffer ++ * @size: the page size ++ */ ++static void multifd_qpl_prepare_comp_job(qpl_job *job, uint8_t *input, ++ uint8_t *output, uint32_t size) ++{ ++ /* ++ * Set output length to less than the page size to force the job to ++ * fail in case it compresses to a larger size. We'll send that page ++ * without compression and skip the decompression operation on the ++ * destination. ++ */ ++ multifd_qpl_prepare_job(job, true, input, size, output, size - 1); ++} ++ ++/** ++ * multifd_qpl_prepare_decomp_job: prepare the decompression job ++ * ++ * Set the decompression job parameters and properties. ++ * ++ * @job: pointer to the qpl_job structure ++ * @input: pointer to the input data buffer ++ * @len: the length of the input data ++ * @output: pointer to the output data buffer ++ * @size: the page size ++ */ ++static void multifd_qpl_prepare_decomp_job(qpl_job *job, uint8_t *input, ++ uint32_t len, uint8_t *output, ++ uint32_t size) ++{ ++ multifd_qpl_prepare_job(job, false, input, len, output, size); ++} ++ ++/** ++ * multifd_qpl_fill_iov: fill in the IOV ++ * ++ * Fill in the QPL packet IOV ++ * ++ * @p: Params for the channel being used ++ * @data: pointer to the IOV data ++ * @len: The length of the IOV data ++ */ ++static void multifd_qpl_fill_iov(MultiFDSendParams *p, uint8_t *data, ++ uint32_t len) ++{ ++ p->iov[p->iovs_num].iov_base = data; ++ p->iov[p->iovs_num].iov_len = len; ++ p->iovs_num++; ++ p->next_packet_size += len; ++} ++ ++/** ++ * multifd_qpl_fill_packet: fill the compressed page into the QPL packet ++ * ++ * Fill the compressed page length and IOV into the QPL packet ++ * ++ * @idx: The index of the compressed length array ++ * @p: Params for the channel being used ++ * @data: pointer to the compressed page buffer ++ * @len: The length of the compressed page ++ */ ++static void multifd_qpl_fill_packet(uint32_t idx, MultiFDSendParams *p, ++ uint8_t *data, uint32_t len) ++{ ++ QplData *qpl = p->compress_data; ++ ++ qpl->zlen[idx] = cpu_to_be32(len); ++ multifd_qpl_fill_iov(p, data, len); ++} ++ ++/** ++ * multifd_qpl_submit_job: submit a job to the hardware ++ * ++ * Submit a QPL hardware job to the IAA device ++ * ++ * Returns true if the job is submitted successfully, otherwise false. ++ * ++ * @job: pointer to the qpl_job structure ++ */ ++static bool multifd_qpl_submit_job(qpl_job *job) ++{ ++ qpl_status status; ++ uint32_t num = 0; ++ ++retry: ++ status = qpl_submit_job(job); ++ if (status == QPL_STS_QUEUES_ARE_BUSY_ERR) { ++ if (num < MAX_SUBMIT_RETRY_NUM) { ++ num++; ++ goto retry; ++ } ++ } ++ return (status == QPL_STS_OK); ++} ++ ++/** ++ * multifd_qpl_compress_pages_slow_path: compress pages using slow path ++ * ++ * Compress the pages using software. If compression fails, the uncompressed ++ * page will be sent. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_compress_pages_slow_path(MultiFDSendParams *p) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ qpl_job *job = qpl->sw_job; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *buf; ++ ++ for (int i = 0; i < p->pages->normal_num; i++) { ++ buf = p->pages->block->host + p->pages->offset[i]; ++ multifd_qpl_prepare_comp_job(job, buf, zbuf, size); ++ if (qpl_execute_job(job) == QPL_STS_OK) { ++ multifd_qpl_fill_packet(i, p, zbuf, job->total_out); ++ } else { ++ /* send the uncompressed page */ ++ multifd_qpl_fill_packet(i, p, buf, size); ++ } ++ zbuf += size; ++ } ++} ++ ++/** ++ * multifd_qpl_compress_pages: compress pages ++ * ++ * Submit the pages to the IAA hardware for compression. If hardware ++ * compression fails, it falls back to software compression. If software ++ * compression also fails, the uncompressed page is sent. ++ * ++ * @p: Params for the channel being used ++ */ ++static void multifd_qpl_compress_pages(MultiFDSendParams *p) ++{ ++ QplData *qpl = p->compress_data; ++ MultiFDPages_t *pages = p->pages; ++ uint32_t size = p->page_size; ++ QplHwJob *hw_job; ++ uint8_t *buf; ++ uint8_t *zbuf; ++ ++ for (int i = 0; i < pages->normal_num; i++) { ++ buf = pages->block->host + pages->offset[i]; ++ zbuf = qpl->zbuf + (size * i); ++ hw_job = &qpl->hw_jobs[i]; ++ multifd_qpl_prepare_comp_job(hw_job->job, buf, zbuf, size); ++ if (multifd_qpl_submit_job(hw_job->job)) { ++ hw_job->fallback_sw_path = false; ++ } else { ++ /* ++ * The IAA work queue is full, any immediate subsequent job ++ * submission is likely to fail, sending the page via the QPL ++ * software path at this point gives us a better chance of ++ * finding the queue open for the next pages. ++ */ ++ hw_job->fallback_sw_path = true; ++ multifd_qpl_prepare_comp_job(qpl->sw_job, buf, zbuf, size); ++ if (qpl_execute_job(qpl->sw_job) == QPL_STS_OK) { ++ hw_job->sw_output = zbuf; ++ hw_job->sw_output_len = qpl->sw_job->total_out; ++ } else { ++ hw_job->sw_output = buf; ++ hw_job->sw_output_len = size; ++ } ++ } ++ } ++ ++ for (int i = 0; i < pages->normal_num; i++) { ++ buf = pages->block->host + pages->offset[i]; ++ zbuf = qpl->zbuf + (size * i); ++ hw_job = &qpl->hw_jobs[i]; ++ if (hw_job->fallback_sw_path) { ++ multifd_qpl_fill_packet(i, p, hw_job->sw_output, ++ hw_job->sw_output_len); ++ continue; ++ } ++ if (qpl_wait_job(hw_job->job) == QPL_STS_OK) { ++ multifd_qpl_fill_packet(i, p, zbuf, hw_job->job->total_out); ++ } else { ++ /* send the uncompressed page */ ++ multifd_qpl_fill_packet(i, p, buf, size); ++ } ++ } ++} ++ + /** + * multifd_qpl_send_prepare: prepare data to be able to send + * +@@ -273,8 +497,26 @@ static void multifd_qpl_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int multifd_qpl_send_prepare(MultiFDSendParams *p, Error **errp) + { +- /* Implement in next patch */ +- return -1; ++ QplData *qpl = p->compress_data; ++ uint32_t len = 0; ++ ++ if (!multifd_send_prepare_common(p)) { ++ goto out; ++ } ++ ++ /* The first IOV is used to store the compressed page lengths */ ++ len = p->pages->normal_num * sizeof(uint32_t); ++ multifd_qpl_fill_iov(p, (uint8_t *) qpl->zlen, len); ++ if (qpl->hw_avail) { ++ multifd_qpl_compress_pages(p); ++ } else { ++ multifd_qpl_compress_pages_slow_path(p); ++ } ++ ++out: ++ p->flags |= MULTIFD_FLAG_QPL; ++ multifd_send_fill_packet(p); ++ return 0; + } + + /** +@@ -312,6 +554,140 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) + p->compress_data = NULL; + } + ++/** ++ * multifd_qpl_process_and_check_job: process and check a QPL job ++ * ++ * Process the job and check whether the job output length is the ++ * same as the specified length ++ * ++ * Returns true if the job execution succeeded and the output length ++ * is equal to the specified length, otherwise false. ++ * ++ * @job: pointer to the qpl_job structure ++ * @is_hardware: indicates whether the job is a hardware job ++ * @len: Specified output length ++ * @errp: pointer to an error ++ */ ++static bool multifd_qpl_process_and_check_job(qpl_job *job, bool is_hardware, ++ uint32_t len, Error **errp) ++{ ++ qpl_status status; ++ ++ status = (is_hardware ? qpl_wait_job(job) : qpl_execute_job(job)); ++ if (status != QPL_STS_OK) { ++ error_setg(errp, "qpl job failed with error %d", status); ++ return false; ++ } ++ if (job->total_out != len) { ++ error_setg(errp, "qpl decompressed len %u, expected len %u", ++ job->total_out, len); ++ return false; ++ } ++ return true; ++} ++ ++/** ++ * multifd_qpl_decompress_pages_slow_path: decompress pages using slow path ++ * ++ * Decompress the pages using software ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_decompress_pages_slow_path(MultiFDRecvParams *p, ++ Error **errp) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ qpl_job *job = qpl->sw_job; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *addr; ++ uint32_t len; ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ len = qpl->zlen[i]; ++ addr = p->host + p->normal[i]; ++ /* the page is uncompressed, load it */ ++ if (len == size) { ++ memcpy(addr, zbuf, size); ++ zbuf += size; ++ continue; ++ } ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { ++ return -1; ++ } ++ zbuf += len; ++ } ++ return 0; ++} ++ ++/** ++ * multifd_qpl_decompress_pages: decompress pages ++ * ++ * Decompress the pages using the IAA hardware. If hardware ++ * decompression fails, it falls back to software decompression. ++ * ++ * Returns 0 on success or -1 on error ++ * ++ * @p: Params for the channel being used ++ * @errp: pointer to an error ++ */ ++static int multifd_qpl_decompress_pages(MultiFDRecvParams *p, Error **errp) ++{ ++ QplData *qpl = p->compress_data; ++ uint32_t size = p->page_size; ++ uint8_t *zbuf = qpl->zbuf; ++ uint8_t *addr; ++ uint32_t len; ++ qpl_job *job; ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ addr = p->host + p->normal[i]; ++ len = qpl->zlen[i]; ++ /* the page is uncompressed if received length equals the page size */ ++ if (len == size) { ++ memcpy(addr, zbuf, size); ++ zbuf += size; ++ continue; ++ } ++ ++ job = qpl->hw_jobs[i].job; ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (multifd_qpl_submit_job(job)) { ++ qpl->hw_jobs[i].fallback_sw_path = false; ++ } else { ++ /* ++ * The IAA work queue is full, any immediate subsequent job ++ * submission is likely to fail, sending the page via the QPL ++ * software path at this point gives us a better chance of ++ * finding the queue open for the next pages. ++ */ ++ qpl->hw_jobs[i].fallback_sw_path = true; ++ job = qpl->sw_job; ++ multifd_qpl_prepare_decomp_job(job, zbuf, len, addr, size); ++ if (!multifd_qpl_process_and_check_job(job, false, size, errp)) { ++ return -1; ++ } ++ } ++ zbuf += len; ++ } ++ ++ for (int i = 0; i < p->normal_num; i++) { ++ /* ignore pages that have already been processed */ ++ if (qpl->zlen[i] == size || qpl->hw_jobs[i].fallback_sw_path) { ++ continue; ++ } ++ ++ job = qpl->hw_jobs[i].job; ++ if (!multifd_qpl_process_and_check_job(job, true, size, errp)) { ++ return -1; ++ } ++ } ++ return 0; ++} + /** + * multifd_qpl_recv: read the data from the channel into actual pages + * +@@ -325,8 +701,48 @@ static void multifd_qpl_recv_cleanup(MultiFDRecvParams *p) + */ + static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) + { +- /* Implement in next patch */ +- return -1; ++ QplData *qpl = p->compress_data; ++ uint32_t in_size = p->next_packet_size; ++ uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ uint32_t len = 0; ++ uint32_t zbuf_len = 0; ++ int ret; ++ ++ if (flags != MULTIFD_FLAG_QPL) { ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", ++ p->id, flags, MULTIFD_FLAG_QPL); ++ return -1; ++ } ++ multifd_recv_zero_page_process(p); ++ if (!p->normal_num) { ++ assert(in_size == 0); ++ return 0; ++ } ++ ++ /* read compressed page lengths */ ++ len = p->normal_num * sizeof(uint32_t); ++ assert(len < in_size); ++ ret = qio_channel_read_all(p->c, (void *) qpl->zlen, len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ for (int i = 0; i < p->normal_num; i++) { ++ qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]); ++ assert(qpl->zlen[i] <= p->page_size); ++ zbuf_len += qpl->zlen[i]; ++ } ++ ++ /* read compressed pages */ ++ assert(in_size == len + zbuf_len); ++ ret = qio_channel_read_all(p->c, (void *) qpl->zbuf, zbuf_len, errp); ++ if (ret != 0) { ++ return ret; ++ } ++ ++ if (qpl->hw_avail) { ++ return multifd_qpl_decompress_pages(p, errp); ++ } ++ return multifd_qpl_decompress_pages_slow_path(p, errp); + } + + static MultiFDMethods multifd_qpl_ops = { +-- +2.33.0 + diff --git a/migration-multifd-include-ram.h-in-multifd.h.patch b/migration-multifd-include-ram.h-in-multifd.h.patch new file mode 100644 index 0000000..55a131c --- /dev/null +++ b/migration-multifd-include-ram.h-in-multifd.h.patch @@ -0,0 +1,31 @@ +From 8b069af63b1dc70ffdcc2662289164b3fd6e29f3 Mon Sep 17 00:00:00 2001 +From: Jason Zeng +Date: Wed, 2 Apr 2025 18:09:21 +0800 +Subject: [76/99] migration/multifd: include ram.h in multifd.h + +Header file ram.h was included by multifd.h when mapped-ram was +introduced in upstream code. This inclusion is needed by qpl when +multifd-qpl.c includes multifd.h. Add this inclusion here since +we don't backport mapped-ram + +Signed-off-by: Jason Zeng +--- + migration/multifd.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.h b/migration/multifd.h +index 11f05dd6d5..41965df7a9 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -13,6 +13,8 @@ + #ifndef QEMU_MIGRATION_MULTIFD_H + #define QEMU_MIGRATION_MULTIFD_H + ++#include "ram.h" ++ + bool multifd_send_setup(void); + void multifd_send_shutdown(void); + int multifd_recv_setup(Error **errp); +-- +2.33.0 + diff --git a/migration-multifd-multifd_send_kick_main.patch b/migration-multifd-multifd_send_kick_main.patch new file mode 100644 index 0000000..9f033c1 --- /dev/null +++ b/migration-multifd-multifd_send_kick_main.patch @@ -0,0 +1,76 @@ +From fa8d23b539d417e69cc0a02f13ca66ef2b506d8e Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:36 +0800 +Subject: [26/99] migration/multifd: multifd_send_kick_main() + +commit 48c0f5d56fd2ff0a0cda23301637b742c690f59a upstream. + +When a multifd sender thread hit errors, it always needs to kick the main +thread by kicking all the semaphores that it can be waiting upon. + +Provide a helper for it and deduplicate the code. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-3-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 07e7e78029..d2da6178b0 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -373,6 +373,18 @@ struct { + MultiFDMethods *ops; + } *multifd_send_state; + ++/* ++ * The migration thread can wait on either of the two semaphores. This ++ * function can be used to kick the main thread out of waiting on either of ++ * them. Should mostly only be called when something wrong happened with ++ * the current multifd send thread. ++ */ ++static void multifd_send_kick_main(MultiFDSendParams *p) ++{ ++ qemu_sem_post(&p->sem_sync); ++ qemu_sem_post(&multifd_send_state->channels_ready); ++} ++ + /* + * How we use multifd_send_state->pages and channel->pages? + * +@@ -743,8 +755,7 @@ out: + assert(local_err); + trace_multifd_send_error(p->id); + multifd_send_terminate_threads(local_err); +- qemu_sem_post(&p->sem_sync); +- qemu_sem_post(&multifd_send_state->channels_ready); ++ multifd_send_kick_main(p); + error_free(local_err); + } + +@@ -785,8 +796,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, + * is not created, and then tell who pay attention to me. + */ + p->quit = true; +- qemu_sem_post(&multifd_send_state->channels_ready); +- qemu_sem_post(&p->sem_sync); ++ multifd_send_kick_main(p); + error_free(err); + } + +@@ -856,8 +866,7 @@ static void multifd_new_send_channel_cleanup(MultiFDSendParams *p, + { + migrate_set_error(migrate_get_current(), err); + /* Error happen, we need to tell who pay attention to me */ +- qemu_sem_post(&multifd_send_state->channels_ready); +- qemu_sem_post(&p->sem_sync); ++ multifd_send_kick_main(p); + /* + * Although multifd_send_thread is not created, but main migration + * thread need to judge whether it is running, so we need to mark +-- +2.33.0 + diff --git a/migration-multifd-multifd_send_prepare_header.patch b/migration-multifd-multifd_send_prepare_header.patch new file mode 100644 index 0000000..9e492d0 --- /dev/null +++ b/migration-multifd-multifd_send_prepare_header.patch @@ -0,0 +1,82 @@ +From fb749030a3151fff95a84f478ec5bcc1b5e0d07c Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Fri, 2 Feb 2024 18:28:46 +0800 +Subject: [36/99] migration/multifd: multifd_send_prepare_header() + +commit 452b205702335ddd45554aaf0eb37baf50bdfa00 upstream. + +Introduce a helper multifd_send_prepare_header() to setup the header packet +for multifd sender. + +It's fine to setup the IOV[0] _before_ send_prepare() because the packet +buffer is already ready, even if the content is to be filled in. + +With this helper, we can already slightly clean up the zero copy path. + +Note that I explicitly put it into multifd.h, because I want it inlined +directly into multifd*.c where necessary later. + +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240202102857.110210-13-peterx@redhat.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 16 ++++++++-------- + migration/multifd.h | 8 ++++++++ + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index f545faaa52..a42e152268 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -716,10 +716,14 @@ static void *multifd_send_thread(void *opaque) + if (qatomic_read(&p->pending_job)) { + MultiFDPages_t *pages = p->pages; + +- if (use_zero_copy_send) { +- p->iovs_num = 0; +- } else { +- p->iovs_num = 1; ++ p->iovs_num = 0; ++ ++ if (!use_zero_copy_send) { ++ /* ++ * Only !zerocopy needs the header in IOV; zerocopy will ++ * send it separately. ++ */ ++ multifd_send_prepare_header(p); + } + + assert(pages->num); +@@ -739,10 +743,6 @@ static void *multifd_send_thread(void *opaque) + if (ret != 0) { + break; + } +- } else { +- /* Send header using the same writev call */ +- p->iov[0].iov_len = p->packet_len; +- p->iov[0].iov_base = p->packet; + } + + ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL, +diff --git a/migration/multifd.h b/migration/multifd.h +index 2e4ad0dc56..4ec005f53f 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -209,5 +209,13 @@ typedef struct { + + void multifd_register_ops(int method, MultiFDMethods *ops); + ++static inline void multifd_send_prepare_header(MultiFDSendParams *p) ++{ ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ p->iovs_num++; ++} ++ ++ + #endif + +-- +2.33.0 + diff --git a/migration-multifd-put-IOV-initialization-into-compre.patch b/migration-multifd-put-IOV-initialization-into-compre.patch new file mode 100644 index 0000000..652dc53 --- /dev/null +++ b/migration-multifd-put-IOV-initialization-into-compre.patch @@ -0,0 +1,168 @@ +From 4e0ebb941ba15c31e7d19d44189bf47fee3181c9 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:05 +0800 +Subject: [73/99] migration/multifd: put IOV initialization into compression + method + +commit d9d3e4f243214f742425d9d8360f0794bb05c999 upstream. + +Different compression methods may require different numbers of IOVs. +Based on streaming compression of zlib and zstd, all pages will be +compressed to a data block, so two IOVs are needed for packet header +and compressed data block. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Fabiano Rosas +Reviewed-by: Peter Xu +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-zlib.c | 7 +++++++ + migration/multifd-zstd.c | 8 +++++++- + migration/multifd.c | 22 ++++++++++++---------- + 3 files changed, 26 insertions(+), 11 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index b210725f6e..2df4983780 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -70,6 +70,10 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + goto err_free_zbuff; + } + p->compress_data = z; ++ ++ /* Needs 2 IOVs, one for packet header and one for compressed data */ ++ p->iov = g_new0(struct iovec, 2); ++ + return 0; + + err_free_zbuff: +@@ -101,6 +105,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + z->buf = NULL; + g_free(p->compress_data); + p->compress_data = NULL; ++ ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 256858df0a..ca17b7e310 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -52,7 +52,6 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + struct zstd_data *z = g_new0(struct zstd_data, 1); + int res; + +- p->compress_data = z; + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +@@ -77,6 +76,10 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } ++ p->compress_data = z; ++ ++ /* Needs 2 IOVs, one for packet header and one for compressed data */ ++ p->iov = g_new0(struct iovec, 2); + return 0; + } + +@@ -98,6 +101,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + z->zbuff = NULL; + g_free(p->compress_data); + p->compress_data = NULL; ++ ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +diff --git a/migration/multifd.c b/migration/multifd.c +index 4394952fbb..0fcecc3759 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -113,6 +113,13 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + p->write_flags |= QIO_CHANNEL_WRITE_FLAG_ZERO_COPY; + } + ++ if (multifd_use_packets()) { ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, p->page_count + 1); ++ } else { ++ p->iov = g_new0(struct iovec, p->page_count); ++ } ++ + return 0; + } + +@@ -126,6 +133,8 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + */ + static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + { ++ g_free(p->iov); ++ p->iov = NULL; + return; + } + +@@ -202,6 +211,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) + { ++ p->iov = g_new0(struct iovec, p->page_count); + return 0; + } + +@@ -214,6 +224,8 @@ static int nocomp_recv_setup(MultiFDRecvParams *p, Error **errp) + */ + static void nocomp_recv_cleanup(MultiFDRecvParams *p) + { ++ g_free(p->iov); ++ p->iov = NULL; + } + + /** +@@ -734,8 +746,6 @@ static bool multifd_send_cleanup_channel(MultiFDSendParams *p, Error **errp) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; + multifd_send_state->ops->send_cleanup(p, errp); + + return *errp == NULL; +@@ -1120,11 +1130,6 @@ bool multifd_send_setup(void) + p->packet = g_malloc0(p->packet_len); + p->packet->magic = cpu_to_be32(MULTIFD_MAGIC); + p->packet->version = cpu_to_be32(MULTIFD_VERSION); +- +- /* We need one extra place for the packet header */ +- p->iov = g_new0(struct iovec, page_count + 1); +- } else { +- p->iov = g_new0(struct iovec, page_count); + } + p->name = g_strdup_printf("multifdsend_%d", i); + p->page_size = qemu_target_page_size(); +@@ -1224,8 +1229,6 @@ static void multifd_recv_cleanup_channel(MultiFDRecvParams *p) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; +- g_free(p->iov); +- p->iov = NULL; + g_free(p->normal); + p->normal = NULL; + g_free(p->zero); +@@ -1403,7 +1406,6 @@ int multifd_recv_setup(Error **errp) + p->packet = g_malloc0(p->packet_len); + } + p->name = g_strdup_printf("multifdrecv_%d", i); +- p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); + p->zero = g_new0(ram_addr_t, page_count); + p->page_count = page_count; +-- +2.33.0 + diff --git a/migration-multifd-solve-zero-page-causing-multiple-p.patch b/migration-multifd-solve-zero-page-causing-multiple-p.patch new file mode 100644 index 0000000..57dab98 --- /dev/null +++ b/migration-multifd-solve-zero-page-causing-multiple-p.patch @@ -0,0 +1,132 @@ +From 57c611db900ca4373f3a34d3d87d57bb4f0bba00 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 1 Apr 2024 23:41:10 +0800 +Subject: [71/99] migration/multifd: solve zero page causing multiple page + faults + +commit 5ef7e26bdb7eda10d6d5e1b77121be9945e5e550 upstream. + +Implemented recvbitmap tracking of received pages in multifd. + +If the zero page appears for the first time in the recvbitmap, this +page is not checked and set. + +If the zero page has already appeared in the recvbitmap, there is no +need to check the data but directly set the data to 0, because it is +unlikely that the zero page will be migrated multiple times. + +Signed-off-by: Yuan Liu +Reviewed-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20240401154110.2028453-2-yuan1.liu@intel.com +[peterx: touch up the comment, as the bitmap is used outside postcopy now] +Signed-off-by: Peter Xu + + Conflicts: + include/exec/ramblock.h +[jz: resolve context conflict due to mapped-ram which was not backported] +Signed-off-by: Jason Zeng +--- + include/exec/ramblock.h | 2 +- + migration/multifd-zero-page.c | 4 +++- + migration/multifd-zlib.c | 1 + + migration/multifd-zstd.c | 1 + + migration/multifd.c | 1 + + migration/ram.c | 4 ++++ + migration/ram.h | 1 + + 7 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h +index 69c6a53902..8f9579ed70 100644 +--- a/include/exec/ramblock.h ++++ b/include/exec/ramblock.h +@@ -44,7 +44,7 @@ struct RAMBlock { + size_t page_size; + /* dirty bitmap used during migration */ + unsigned long *bmap; +- /* bitmap of already received pages in postcopy */ ++ /* Bitmap of already received pages. Only used on destination side. */ + unsigned long *receivedmap; + + /* +diff --git a/migration/multifd-zero-page.c b/migration/multifd-zero-page.c +index 1ba38be636..e1b8370f88 100644 +--- a/migration/multifd-zero-page.c ++++ b/migration/multifd-zero-page.c +@@ -80,8 +80,10 @@ void multifd_recv_zero_page_process(MultiFDRecvParams *p) + { + for (int i = 0; i < p->zero_num; i++) { + void *page = p->host + p->zero[i]; +- if (!buffer_is_zero(page, p->page_size)) { ++ if (ramblock_recv_bitmap_test_byte_offset(p->block, p->zero[i])) { + memset(page, 0, p->page_size); ++ } else { ++ ramblock_recv_bitmap_set_offset(p->block, p->zero[i]); + } + } + } +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 83c0374380..b210725f6e 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -284,6 +284,7 @@ static int zlib_recv(MultiFDRecvParams *p, Error **errp) + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + if (i == p->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 02112255ad..256858df0a 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -278,6 +278,7 @@ static int zstd_recv(MultiFDRecvParams *p, Error **errp) + z->in.pos = 0; + + for (i = 0; i < p->normal_num; i++) { ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + z->out.dst = p->host + p->normal[i]; + z->out.size = p->page_size; + z->out.pos = 0; +diff --git a/migration/multifd.c b/migration/multifd.c +index 6c01179858..4394952fbb 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -251,6 +251,7 @@ static int nocomp_recv(MultiFDRecvParams *p, Error **errp) + for (int i = 0; i < p->normal_num; i++) { + p->iov[i].iov_base = p->host + p->normal[i]; + p->iov[i].iov_len = p->page_size; ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); + } +diff --git a/migration/ram.c b/migration/ram.c +index fe2e4c6164..6acf518a34 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -275,6 +275,10 @@ void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, + nr); + } + ++void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset) ++{ ++ set_bit_atomic(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap); ++} + #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL) + + /* +diff --git a/migration/ram.h b/migration/ram.h +index 9b937a446b..cd263df026 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -69,6 +69,7 @@ int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); + bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); + void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr); + void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr, size_t nr); ++void ramblock_recv_bitmap_set_offset(RAMBlock *rb, uint64_t byte_offset); + int64_t ramblock_recv_bitmap_send(QEMUFile *file, + const char *block_name); + bool ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb, Error **errp); +-- +2.33.0 + diff --git a/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch b/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch new file mode 100644 index 0000000..850327a --- /dev/null +++ b/multifd-bugfix-for-incorrect-migration-data-with-QPL.patch @@ -0,0 +1,47 @@ +From 1b0fb2f08c76bc727e52ff763ed5bb7ee1bda820 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:12 +0800 +Subject: [98/99] multifd: bugfix for incorrect migration data with QPL + compression + +commit 2588a5f99b0c3493b4690e3ff01ed36f80e830cc upstream. + +When QPL compression is enabled on the migration channel and the same +dirty page changes from a normal page to a zero page in the iterative +memory copy, the dirty page will not be updated to a zero page again +on the target side, resulting in incorrect memory data on the source +and target sides. + +The root cause is that the target side does not record the normal pages +to the receivedmap. + +The solution is to add ramblock_recv_bitmap_set_offset in target side +to record the normal pages. + +Intel-SIG: commit 2588a5f99b0c multifd: bugfix for incorrect migration data with QPL compression + +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-3-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + migration/multifd-qpl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/multifd-qpl.c b/migration/multifd-qpl.c +index 9265098ee7..fea60e3937 100644 +--- a/migration/multifd-qpl.c ++++ b/migration/multifd-qpl.c +@@ -730,6 +730,7 @@ static int multifd_qpl_recv(MultiFDRecvParams *p, Error **errp) + qpl->zlen[i] = be32_to_cpu(qpl->zlen[i]); + assert(qpl->zlen[i] <= p->page_size); + zbuf_len += qpl->zlen[i]; ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + + /* read compressed pages */ +-- +2.33.0 + diff --git a/multifd-bugfix-for-incorrect-migration-data-with-qat.patch b/multifd-bugfix-for-incorrect-migration-data-with-qat.patch new file mode 100644 index 0000000..52b71b6 --- /dev/null +++ b/multifd-bugfix-for-incorrect-migration-data-with-qat.patch @@ -0,0 +1,51 @@ +From 7541385c82f3c85fc8727080bb224dd8761fe719 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:13 +0800 +Subject: [99/99] multifd: bugfix for incorrect migration data with qatzip + compression + +commit a523bc52166c80d8a04d46584f9f3868bd53ef69 upstream. + +When QPL compression is enabled on the migration channel and the same +dirty page changes from a normal page to a zero page in the iterative +memory copy, the dirty page will not be updated to a zero page again +on the target side, resulting in incorrect memory data on the source +and target sides. + +The root cause is that the target side does not record the normal pages +to the receivedmap. + +The solution is to add ramblock_recv_bitmap_set_offset in target side +to record the normal pages. + +Intel-SIG: commit a523bc52166c multifd: bugfix for incorrect migration data with qatzip compression + +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-4-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas + + Conflicts: + migration/multifd-qatzip.c +[jz: resolve context conflict] +Signed-off-by: Jason Zeng +--- + migration/multifd-qatzip.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/migration/multifd-qatzip.c b/migration/multifd-qatzip.c +index 3c787ed879..88b6fb44ad 100644 +--- a/migration/multifd-qatzip.c ++++ b/migration/multifd-qatzip.c +@@ -373,6 +373,7 @@ static int qatzip_recv(MultiFDRecvParams *p, Error **errp) + memcpy(p->host + p->normal[i], + q->out_buf + p->page_size * i, + p->page_size); ++ ramblock_recv_bitmap_set_offset(p->block, p->normal[i]); + } + return 0; + } +-- +2.33.0 + diff --git a/multifd-bugfix-for-migration-using-compression-metho.patch b/multifd-bugfix-for-migration-using-compression-metho.patch new file mode 100644 index 0000000..9760860 --- /dev/null +++ b/multifd-bugfix-for-migration-using-compression-metho.patch @@ -0,0 +1,63 @@ +From 123e52e1dc6629fec922dad4f7c97e23a82ec157 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Wed, 18 Dec 2024 17:14:11 +0800 +Subject: [97/99] multifd: bugfix for migration using compression methods + +commit cdc3970f8597ebdc1a4c2090cfb4d11e297329ed upstream. + +When compression is enabled on the migration channel and +the pages processed are all zero pages, these pages will +not be sent and updated on the target side, resulting in +incorrect memory data on the source and target sides. + +The root cause is that all compression methods call +multifd_send_prepare_common to determine whether to compress +dirty pages, but multifd_send_prepare_common does not update +the IOV of MultiFDPacket_t when all dirty pages are zero pages. + +The solution is to always update the IOV of MultiFDPacket_t +regardless of whether the dirty pages are all zero pages. + +Intel-SIG: commit cdc3970f8597 multifd: bugfix for migration using compression methods + +Fixes: 303e6f54f9 ("migration/multifd: Implement zero page transmission on the multifd thread.") +Cc: qemu-stable@nongnu.org #9.0+ +Signed-off-by: Yuan Liu +Reviewed-by: Jason Zeng +Reviewed-by: Peter Xu +Message-Id: <20241218091413.140396-2-yuan1.liu@intel.com> +Signed-off-by: Fabiano Rosas + + Conflicts: + migration/multifd-nocomp.c +[jz: upstream has split nocomp code into multifd-nocomp.c, while + openEuler hasn't yet. The function that needs to be fixed + is still in multifd.c, so we fix it in multifd.c] +Signed-off-by: Jason Zeng +--- + migration/multifd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 36581a5631..4c310deb61 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1488,6 +1488,7 @@ void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + + bool multifd_send_prepare_common(MultiFDSendParams *p) + { ++ multifd_send_prepare_header(p); + multifd_send_zero_page_detect(p); + + if (!p->pages->normal_num) { +@@ -1495,7 +1496,5 @@ bool multifd_send_prepare_common(MultiFDSendParams *p) + return false; + } + +- multifd_send_prepare_header(p); +- + return true; + } +-- +2.33.0 + diff --git a/qemu.spec b/qemu.spec index 675f081..8f65d48 100644 --- a/qemu.spec +++ b/qemu.spec @@ -3,7 +3,7 @@ Name: qemu Version: 8.2.0 -Release: 32 +Release: 33 Epoch: 11 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -817,6 +817,111 @@ Patch0800: docs-Add-GNR-SRF-and-CWF-CPU-models.patch Patch0801: target-i386-add-sha512-sm3-sm4-feature-bits.patch Patch0802: target-i386-Add-new-CPU-model-ClearwaterForest.patch Patch0803: target-i386-csv-Release-CSV3-shared-pages-after-unma.patch +Patch0804: hw-arm-virt-support-the-HDBSS-feature.patch +Patch0805: migration-multifd-Fix-error-message-in-multifd_recv_.patch +Patch0806: migration-multifd-Simplify-multifd_channel_connect-i.patch +Patch0807: migration-multifd-Fix-leaking-of-Error-in-TLS-error-.patch +Patch0808: migration-multifd-Remove-error_setg-in-migration_ioc.patch +Patch0809: migration-Fix-migration_channel_read_peek-error-path.patch +Patch0810: migration-multifd-Remove-unnecessary-usage-of-local-.patch +Patch0811: migration-multifd-Remove-MultiFDPages_t-packet_num.patch +Patch0812: migration-multifd-Remove-QEMUFile-from-where-it-is-n.patch +Patch0813: migration-multifd-Change-multifd_pages_init-argument.patch +Patch0814: migration-Report-error-in-incoming-migration.patch +Patch0815: tests-qtest-migration-Print-migration-incoming-error.patch +Patch0816: tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch +Patch0817: tests-qtest-migration-Use-the-new-migration_test_add.patch +Patch0818: tests-qtest-Re-enable-multifd-cancel-test.patch +Patch0819: docs-migration-Create-migration-directory.patch +Patch0820: docs-migration-Create-index-page.patch +Patch0821: docs-migration-Convert-virtio.txt-into-rST.patch +Patch0822: docs-migration-Split-Backwards-compatibility-separat.patch +Patch0823: docs-migration-Split-Debugging-and-Firmware.patch +Patch0824: docs-migration-Split-Postcopy.patch +Patch0825: docs-migration-Split-dirty-limit.patch +Patch0826: docs-migration-Organize-Postcopy-page.patch +Patch0827: docs-migration-Further-move-vfio-to-be-feature-of-mi.patch +Patch0828: docs-migration-Further-move-virtio-to-be-feature-of-.patch +Patch0829: migration-multifd-Drop-stale-comment-for-multifd-zer.patch +Patch0830: migration-multifd-multifd_send_kick_main.patch +Patch0831: migration-multifd-Drop-MultiFDSendParams.quit-cleanu.patch +Patch0832: migration-multifd-Postpone-reset-of-MultiFDPages_t.patch +Patch0833: migration-multifd-Drop-MultiFDSendParams.normal-arra.patch +Patch0834: migration-multifd-Separate-SYNC-request-with-normal-.patch +Patch0835: migration-multifd-Simplify-locking-in-sender-thread.patch +Patch0836: migration-multifd-Drop-pages-num-check-in-sender-thr.patch +Patch0837: migration-multifd-Rename-p-num_packets-and-clean-it-.patch +Patch0838: migration-multifd-Move-total_normal_pages-accounting.patch +Patch0839: migration-multifd-Move-trace_multifd_send-recv.patch +Patch0840: migration-multifd-multifd_send_prepare_header.patch +Patch0841: migration-multifd-Move-header-prepare-fill-into-send.patch +Patch0842: migration-multifd-Forbid-spurious-wakeups.patch +Patch0843: migration-multifd-Split-multifd_send_terminate_threa.patch +Patch0844: migration-multifd-Change-retval-of-multifd_queue_pag.patch +Patch0845: migration-multifd-Change-retval-of-multifd_send_page.patch +Patch0846: migration-multifd-Rewrite-multifd_queue_page.patch +Patch0847: migration-multifd-Cleanup-multifd_save_cleanup.patch +Patch0848: migration-multifd-Cleanup-multifd_load_cleanup.patch +Patch0849: migration-multifd-Stick-with-send-recv-on-function-n.patch +Patch0850: migration-multifd-Fix-MultiFDSendParams.packet_num-r.patch +Patch0851: migration-multifd-Optimize-sender-side-to-be-lockles.patch +Patch0852: migration-Fix-logic-of-channels-and-transport-compat.patch +Patch0853: migration-multifd-Join-the-TLS-thread.patch +Patch0854: migration-multifd-Remove-p-running.patch +Patch0855: migration-multifd-Move-multifd_send_setup-error-hand.patch +Patch0856: migration-multifd-Move-multifd_send_setup-into-migra.patch +Patch0857: migration-multifd-Unify-multifd-and-TLS-connection-p.patch +Patch0858: migration-multifd-Add-a-synchronization-point-for-ch.patch +Patch0859: migration-multifd-Remove-p-quit-from-recv-side.patch +Patch0860: migration-multifd-Release-recv-sem_sync-earlier.patch +Patch0861: migration-multifd-Cleanup-TLS-iochannel-referencing.patch +Patch0862: migration-multifd-Drop-registered_yank.patch +Patch0863: migration-multifd-Make-multifd_channel_connect-retur.patch +Patch0864: migration-multifd-Cleanup-outgoing_args-in-state-des.patch +Patch0865: migration-multifd-Drop-unnecessary-helper-to-destroy.patch +Patch0866: migration-Properly-apply-migration-compression-level.patch +Patch0867: tests-migration-Set-compression-level-in-migration-t.patch +Patch0868: migration-multifd-Cleanup-multifd_recv_sync_main.patch +Patch0869: migration-multifd-Rename-MultiFDSend-RecvParams-data.patch +Patch0870: migration-multifd-Decouple-recv-method-from-pages.patch +Patch0871: migration-multifd-Allow-multifd-without-packets.patch +Patch0872: migration-multifd-Add-new-migration-option-zero-page.patch +Patch0873: migration-multifd-Implement-zero-page-transmission-o.patch +Patch0874: migration-multifd-Implement-ram_save_target_page_mul.patch +Patch0875: migration-multifd-solve-zero-page-causing-multiple-p.patch +Patch0876: docs-migration-add-qpl-compression-feature.patch +Patch0877: migration-multifd-put-IOV-initialization-into-compre.patch +Patch0878: configure-add-enable-qpl-build-option.patch +Patch0879: migration-multifd-add-qpl-compression-method.patch +Patch0880: migration-multifd-include-ram.h-in-multifd.h.patch +Patch0881: migration-multifd-implement-initialization-of-qpl-co.patch +Patch0882: migration-multifd-implement-qpl-compression-and-deco.patch +Patch0883: tests-migration-test-add-qpl-compression-test.patch +Patch0884: docs-migration-add-uadk-compression-feature.patch +Patch0885: configure-Add-uadk-option.patch +Patch0886: migration-multifd-add-uadk-compression-framework.patch +Patch0887: migration-multifd-Add-UADK-initialization.patch +Patch0888: migration-multifd-Add-UADK-based-compression-and-dec.patch +Patch0889: migration-multifd-Switch-to-no-compression-when-no-h.patch +Patch0890: tests-migration-test-add-uadk-compression-test.patch +Patch0891: migration-multifd-Fix-p-iov-leak-in-multifd-uadk.c.patch +Patch0892: docs-migration-add-qatzip-compression-feature.patch +Patch0893: meson-Introduce-qatzip-feature-to-the-build-system.patch +Patch0894: migration-Add-migration-parameters-for-QATzip.patch +Patch0895: migration-Introduce-qatzip-compression-method.patch +Patch0896: tests-migration-Add-integration-test-for-qatzip-comp.patch +Patch0897: migration-multifd-Fix-loop-conditions-in-multifd_zst.patch +Patch0898: migration-multifd-Fix-rb-receivedmap-cleanup-race.patch +Patch0899: migration-multifd-Ensure-packet-ramblock-is-null-ter.patch +Patch0900: migration-multifd-Zero-p-flags-before-starting-filli.patch +Patch0901: multifd-bugfix-for-migration-using-compression-metho.patch +Patch0902: multifd-bugfix-for-incorrect-migration-data-with-QPL.patch +Patch0903: multifd-bugfix-for-incorrect-migration-data-with-qat.patch +Patch0904: hw-arm-virt-only-support-the-HDBSS-feature-in-aarch6.patch +Patch0905: hw-arm-virt-decouple-migrate_hdbss_buffer_size-with-.patch +Patch0906: hw-arm-virt-HDBSS-fix-arm-softmmu-build-on-x86-platf.patch +Patch0907: arm-VirtCCA-fix-arm-softmmu-build-on-x86-platform.patch +Patch0908: arm-cvm-fix-arm-softmmu-build-on-x86-platform.patch BuildRequires: flex BuildRequires: gcc @@ -1415,6 +1520,109 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Wed May 28 2025 Pengrui Zhang - 11:8.2.0-33 +- arm: cvm: fix arm-softmmu build on x86 platform +- arm: VirtCCA: fix arm-softmmu build on x86 platform +- hw/arm/virt: HDBSS: fix arm-softmmu build on x86 platform +- hw/arm/virt: decouple migrate_hdbss_buffer_size() with kvm_update_hdbss_cap() +- hw/arm/virt: only support the HDBSS feature in aarch64 +- multifd: bugfix for incorrect migration data with qatzip compression +- multifd: bugfix for incorrect migration data with QPL compression +- multifd: bugfix for migration using compression methods +- migration/multifd: Zero p->flags before starting filling a packet +- migration/multifd: Ensure packet->ramblock is null-terminated +- migration/multifd: Fix rb->receivedmap cleanup race +- migration/multifd: Fix loop conditions in multifd_zstd_send_prepare and multifd_zstd_recv +- tests/migration: Add integration test for 'qatzip' compression method +- migration: Introduce 'qatzip' compression method +- migration: Add migration parameters for QATzip +- meson: Introduce 'qatzip' feature to the build system +- docs/migration: add qatzip compression feature +- migration/multifd: Fix p->iov leak in multifd-uadk.c +- tests/migration-test: add uadk compression test +- migration/multifd: Switch to no compression when no hardware support +- migration/multifd: Add UADK based compression and decompression +- migration/multifd: Add UADK initialization +- migration/multifd: add uadk compression framework +- configure: Add uadk option +- docs/migration: add uadk compression feature +- configure: add --enable-qpl build option +- migration/multifd: implement qpl compression and decompression +- migration/multifd: implement initialization of qpl compression +- migration/multifd: include ram.h in multifd.h +- migration/multifd: add qpl compression method +- migration/multifd: put IOV initialization into compression method +- docs/migration: add qpl compression feature +- migration/multifd: solve zero page causing multiple page faults +- migration/multifd: Implement ram_save_target_page_multifd to handle multifd version of MigrationOps::ram_save_target_page. +- migration/multifd: Implement zero page transmission on the multifd thread +- migration/multifd: Add new migration option zero-page-detection +- migration/multifd: Allow multifd without packets +- migration/multifd: Rename MultiFDSend|RecvParams::data to compress_data +- migration/multifd: Cleanup multifd_recv_sync_main +- tests/migration: Set compression level in migration tests +- migration: Properly apply migration compression level parameters +- migration/multifd: Drop unnecessary helper to destroy IOC +- migration/multifd: Cleanup outgoing_args in state destroy +- migration/multifd: Make multifd_channel_connect() return void +- migration/multifd: Drop registered_yank +- migration/multifd: Cleanup TLS iochannel referencing +- migration/multifd: Release recv sem_sync earlier +- migration/multifd: Remove p->quit from recv side +- migration/multifd: Add a synchronization point for channel creation +- migration/multifd: Unify multifd and TLS connection paths +- migration/multifd: Move multifd_send_setup into migration thread +- migration/multifd: Move multifd_send_setup error handling in to the function +- migration/multifd: Remove p->running +- migration/multifd: Join the TLS thread +- migration: Fix logic of channels and transport compatibility check +- migration/multifd: Optimize sender side to be lockless +- migration/multifd: Stick with send/recv on function names +- migration/multifd: Cleanup multifd_load_cleanup() +- migration/multifd: Cleanup multifd_save_cleanup() +- migration/multifd: Rewrite multifd_queue_page() +- migration/multifd: Change retval of multifd_send_pages() +- migration/multifd: Change retval of multifd_queue_page() +- migration/multifd: Split multifd_send_terminate_threads() +- migration/multifd: Forbid spurious wakeups +- migration/multifd: Move header prepare/fill into send_prepare() +- migration/multifd: Move trace_multifd_send|recv() +- migration/multifd: Move total_normal_pages accounting +- migration/multifd: Rename p->num_packets and clean it up +- migration/multifd: Drop pages->num check in sender thread +- migration/multifd: Simplify locking in sender thread +- migration/multifd: Separate SYNC request with normal jobs +- migration/multifd: Drop MultiFDSendParams.normal[] array +- migration/multifd: Postpone reset of MultiFDPages_t +- migration/multifd: Drop MultiFDSendParams.quit, cleanup error paths +- migration/multifd: multifd_send_kick_main() +- migration/multifd: Drop stale comment for multifd zero copy +- docs/migration: Further move virtio to be feature of migration +- docs/migration: Further move vfio to be feature of migration +- docs/migration: Organize "Postcopy" page +- docs/migration: Split "dirty limit" +- docs/migration: Split "Postcopy" +- docs/migration: Split "Debugging" and "Firmware" +- docs/migration: Split "Backwards compatibility" separately +- docs/migration: Convert virtio.txt into rST +- docs/migration: Create index page +- docs/migration: Create migration/ directory +- tests/qtest: Re-enable multifd cancel test +- tests/qtest/migration: Use the new migration_test_add +- tests/qtest/migration: Add a wrapper to print test names +- tests/qtest/migration: Print migration incoming errors +- migration: Report error in incoming migration +- migration/multifd: Change multifd_pages_init argument +- migration/multifd: Remove QEMUFile from where it is not needed +- migration/multifd: Remove MultiFDPages_t::packet_num +- migration/multifd: Remove unnecessary usage of local Error +- migration: Fix migration_channel_read_peek() error path +- migration/multifd: Remove error_setg() in migration_ioc_process_incoming() +- migration/multifd: Fix leaking of Error in TLS error flow +- migration/multifd: Simplify multifd_channel_connect() if else statement +- migration/multifd: Fix error message in multifd_recv_initial_packet() +- hw/arm/virt: support the HDBSS feature + * Thu May 15 2025 Jiabo Feng - 11:8.2.0-32 - target/i386: csv: Release CSV3 shared pages after unmapping DMA - target/i386: Add new CPU model ClearwaterForest diff --git a/tests-migration-Add-integration-test-for-qatzip-comp.patch b/tests-migration-Add-integration-test-for-qatzip-comp.patch new file mode 100644 index 0000000..681124e --- /dev/null +++ b/tests-migration-Add-integration-test-for-qatzip-comp.patch @@ -0,0 +1,76 @@ +From 049442961f30f504475a7cb4b4c02043a7fb3c04 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 30 Aug 2024 16:27:22 -0700 +Subject: [92/99] tests/migration: Add integration test for 'qatzip' + compression method + +commit afe166d4e8bc33bc448cd573b55d0ac094187d48 upstream. + +Adds an integration test for 'qatzip'. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Bryan Zhang +Signed-off-by: Hao Xiang +Signed-off-by: Yichen Wang +Link: https://lore.kernel.org/r/20240830232722.58272-6-yichen.wang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 7ecf4ce9a5..3385ca1f15 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2582,6 +2582,18 @@ test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + } + #endif /* CONFIG_ZSTD */ + ++#ifdef CONFIG_QATZIP ++static void * ++test_migrate_precopy_tcp_multifd_qatzip_start(QTestState *from, ++ QTestState *to) ++{ ++ migrate_set_parameter_int(from, "multifd-qatzip-level", 2); ++ migrate_set_parameter_int(to, "multifd-qatzip-level", 2); ++ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "qatzip"); ++} ++#endif ++ + #ifdef CONFIG_QPL + static void * + test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, +@@ -2634,6 +2646,17 @@ static void test_multifd_tcp_zstd(void) + } + #endif + ++#ifdef CONFIG_QATZIP ++static void test_multifd_tcp_qatzip(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_qatzip_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_QPL + static void test_multifd_tcp_qpl(void) + { +@@ -3531,6 +3554,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/zstd", + test_multifd_tcp_zstd); + #endif ++#ifdef CONFIG_QATZIP ++ migration_test_add("/migration/multifd/tcp/plain/qatzip", ++ test_multifd_tcp_qatzip); ++#endif + #ifdef CONFIG_QPL + migration_test_add("/migration/multifd/tcp/plain/qpl", + test_multifd_tcp_qpl); +-- +2.33.0 + diff --git a/tests-migration-Set-compression-level-in-migration-t.patch b/tests-migration-Set-compression-level-in-migration-t.patch new file mode 100644 index 0000000..566c10d --- /dev/null +++ b/tests-migration-Set-compression-level-in-migration-t.patch @@ -0,0 +1,49 @@ +From 51191c9239aee8a25428fef53fe99589e1aca711 Mon Sep 17 00:00:00 2001 +From: Bryan Zhang +Date: Fri, 1 Mar 2024 03:59:01 +0000 +Subject: [63/99] tests/migration: Set compression level in migration tests + +commit 2b571432314ab42da742fbb578f4174166ecd7f5 upstream. + +Adds calls to set compression level for `zstd` and `zlib` migration +tests, just to make sure that the calls work. + +Signed-off-by: Bryan Zhang +Link: https://lore.kernel.org/r/20240301035901.4006936-3-bryan.zhang@bytedance.com +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 13888be898..0ac5e7ddc9 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2560,6 +2560,13 @@ static void * + test_migrate_precopy_tcp_multifd_zlib_start(QTestState *from, + QTestState *to) + { ++ /* ++ * Overloading this test to also check that set_parameter does not error. ++ * This is also done in the tests for the other compression methods. ++ */ ++ migrate_set_parameter_int(from, "multifd-zlib-level", 2); ++ migrate_set_parameter_int(to, "multifd-zlib-level", 2); ++ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zlib"); + } + +@@ -2568,6 +2575,9 @@ static void * + test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + QTestState *to) + { ++ migrate_set_parameter_int(from, "multifd-zstd-level", 2); ++ migrate_set_parameter_int(to, "multifd-zstd-level", 2); ++ + return test_migrate_precopy_tcp_multifd_start_common(from, to, "zstd"); + } + #endif /* CONFIG_ZSTD */ +-- +2.33.0 + diff --git a/tests-migration-test-add-qpl-compression-test.patch b/tests-migration-test-add-qpl-compression-test.patch new file mode 100644 index 0000000..4c5904e --- /dev/null +++ b/tests-migration-test-add-qpl-compression-test.patch @@ -0,0 +1,80 @@ +From 3b4704d5856f383244b0c2a1e6c180cdcc672eb0 Mon Sep 17 00:00:00 2001 +From: Yuan Liu +Date: Mon, 10 Jun 2024 18:21:10 +0800 +Subject: [79/99] tests/migration-test: add qpl compression test + +commit 08b82d207d138173ddd334c91b387213508a6e13 upstream. + +add qpl to compression method test for multifd migration + +the qpl compression supports software path and hardware +path(IAA device), and the hardware path is used first by +default. If the hardware path is unavailable, it will +automatically fallback to the software path for testing. + +Signed-off-by: Yuan Liu +Reviewed-by: Nanhai Zou +Reviewed-by: Peter Xu +Reviewed-by: Fabiano Rosas +Signed-off-by: Fabiano Rosas + + Conflicts: + tests/qtest/migration-test.c +[jz: resolve simple context conflict] +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 0ac5e7ddc9..16cb7993b3 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2582,6 +2582,15 @@ test_migrate_precopy_tcp_multifd_zstd_start(QTestState *from, + } + #endif /* CONFIG_ZSTD */ + ++#ifdef CONFIG_QPL ++static void * ++test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, ++ QTestState *to) ++{ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "qpl"); ++} ++#endif /* CONFIG_QPL */ ++ + static void test_multifd_tcp_none(void) + { + MigrateCommon args = { +@@ -2617,6 +2626,17 @@ static void test_multifd_tcp_zstd(void) + } + #endif + ++#ifdef CONFIG_QPL ++static void test_multifd_tcp_qpl(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_qpl_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_GNUTLS + static void * + test_migrate_multifd_tcp_tls_psk_start_match(QTestState *from, +@@ -3492,6 +3512,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/zstd", + test_multifd_tcp_zstd); + #endif ++#ifdef CONFIG_QPL ++ migration_test_add("/migration/multifd/tcp/plain/qpl", ++ test_multifd_tcp_qpl); ++#endif + #ifdef CONFIG_GNUTLS + migration_test_add("/migration/multifd/tcp/tls/psk/match", + test_multifd_tcp_tls_psk_match); +-- +2.33.0 + diff --git a/tests-migration-test-add-uadk-compression-test.patch b/tests-migration-test-add-uadk-compression-test.patch new file mode 100644 index 0000000..dc51662 --- /dev/null +++ b/tests-migration-test-add-uadk-compression-test.patch @@ -0,0 +1,66 @@ +From 76db600f67d72fdb24d794954c85a902968f71ea Mon Sep 17 00:00:00 2001 +From: Shameer Kolothum +Date: Fri, 7 Jun 2024 14:53:10 +0100 +Subject: [86/99] tests/migration-test: add uadk compression test + +commit c519caa825f5eba6e204bed5a464df167a5421d0 upstream. + +Reviewed-by: Fabiano Rosas +Signed-off-by: Shameer Kolothum +Signed-off-by: Fabiano Rosas +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 23 +++++++++++++++++++++++ + 1 file changed, 23 insertions(+) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 16cb7993b3..7ecf4ce9a5 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -2590,6 +2590,14 @@ test_migrate_precopy_tcp_multifd_qpl_start(QTestState *from, + return test_migrate_precopy_tcp_multifd_start_common(from, to, "qpl"); + } + #endif /* CONFIG_QPL */ ++#ifdef CONFIG_UADK ++static void * ++test_migrate_precopy_tcp_multifd_uadk_start(QTestState *from, ++ QTestState *to) ++{ ++ return test_migrate_precopy_tcp_multifd_start_common(from, to, "uadk"); ++} ++#endif /* CONFIG_UADK */ + + static void test_multifd_tcp_none(void) + { +@@ -2637,6 +2645,17 @@ static void test_multifd_tcp_qpl(void) + } + #endif + ++#ifdef CONFIG_UADK ++static void test_multifd_tcp_uadk(void) ++{ ++ MigrateCommon args = { ++ .listen_uri = "defer", ++ .start_hook = test_migrate_precopy_tcp_multifd_uadk_start, ++ }; ++ test_precopy_common(&args); ++} ++#endif ++ + #ifdef CONFIG_GNUTLS + static void * + test_migrate_multifd_tcp_tls_psk_start_match(QTestState *from, +@@ -3516,6 +3535,10 @@ int main(int argc, char **argv) + migration_test_add("/migration/multifd/tcp/plain/qpl", + test_multifd_tcp_qpl); + #endif ++#ifdef CONFIG_UADK ++ migration_test_add("/migration/multifd/tcp/plain/uadk", ++ test_multifd_tcp_uadk); ++#endif + #ifdef CONFIG_GNUTLS + migration_test_add("/migration/multifd/tcp/tls/psk/match", + test_multifd_tcp_tls_psk_match); +-- +2.33.0 + diff --git a/tests-qtest-Re-enable-multifd-cancel-test.patch b/tests-qtest-Re-enable-multifd-cancel-test.patch new file mode 100644 index 0000000..a62956e --- /dev/null +++ b/tests-qtest-Re-enable-multifd-cancel-test.patch @@ -0,0 +1,43 @@ +From eea4f476c2c35e4153637d5efe25ce308c2aaa55 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Wed, 11 Oct 2023 15:46:04 -0300 +Subject: [14/99] tests/qtest: Re-enable multifd cancel test + +commit 75b1f88cd2dd5eeb1fd817a2f3a291c2670f9c50 upstream. + +We've found the source of flakiness in this test, so re-enable it. + +Reviewed-by: Juan Quintela +Signed-off-by: Fabiano Rosas +Link: https://lore.kernel.org/r/20230606144551.24367-4-farosas@suse.de +[peterx: rebase to 2a61a6964c, to use migration_test_add()] +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 470b06bbb4..13888be898 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -3474,14 +3474,8 @@ int main(int argc, char **argv) + } + migration_test_add("/migration/multifd/tcp/plain/none", + test_multifd_tcp_none); +- /* +- * This test is flaky and sometimes fails in CI and otherwise: +- * don't run unless user opts in via environment variable. +- */ +- if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- migration_test_add("/migration/multifd/tcp/plain/cancel", +- test_multifd_tcp_cancel); +- } ++ migration_test_add("/migration/multifd/tcp/plain/cancel", ++ test_multifd_tcp_cancel); + migration_test_add("/migration/multifd/tcp/plain/zlib", + test_multifd_tcp_zlib); + #ifdef CONFIG_ZSTD +-- +2.33.0 + diff --git a/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch b/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch new file mode 100644 index 0000000..a0de622 --- /dev/null +++ b/tests-qtest-migration-Add-a-wrapper-to-print-test-na.patch @@ -0,0 +1,88 @@ +From d78a7031877a343563200e875c4ef2d71522f1d0 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:43 -0300 +Subject: [12/99] tests/qtest/migration: Add a wrapper to print test names + +commit e33b6712dba206547a313a6f2608b0fd967ee558 upstream. + +Our usage of gtest results in us losing the very basic functionality +of "knowing which test failed". The issue is that gtest only prints +test names ("paths" in gtest parlance) once the test has finished, but +we use asserts in the tests and crash gtest itself before it can print +anything. We also use a final abort when the result of g_test_run is +not 0. + +Depending on how the test failed/broke we can see the function that +trigged the abort, which may be representative of the test, but it +could also just be some generic function. + +We have been relying on the primitive method of looking at the name of +the previous successful test and then looking at the code to figure +out which test should have come next. + +Add a wrapper to the test registration that does the job of printing +the test name before running. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-7-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-helpers.c | 32 ++++++++++++++++++++++++++++++++ + tests/qtest/migration-helpers.h | 1 + + 2 files changed, 33 insertions(+) + +diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c +index f1106128a9..164e09c299 100644 +--- a/tests/qtest/migration-helpers.c ++++ b/tests/qtest/migration-helpers.c +@@ -298,3 +298,35 @@ char *resolve_machine_version(const char *alias, const char *var1, + + return find_common_machine_version(machine_name, var1, var2); + } ++ ++typedef struct { ++ char *name; ++ void (*func)(void); ++} MigrationTest; ++ ++static void migration_test_destroy(gpointer data) ++{ ++ MigrationTest *test = (MigrationTest *)data; ++ ++ g_free(test->name); ++ g_free(test); ++} ++ ++static void migration_test_wrapper(const void *data) ++{ ++ MigrationTest *test = (MigrationTest *)data; ++ ++ g_test_message("Running /%s%s", qtest_get_arch(), test->name); ++ test->func(); ++} ++ ++void migration_test_add(const char *path, void (*fn)(void)) ++{ ++ MigrationTest *test = g_new0(MigrationTest, 1); ++ ++ test->func = fn; ++ test->name = g_strdup(path); ++ ++ qtest_add_data_func_full(path, test, migration_test_wrapper, ++ migration_test_destroy); ++} +diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h +index e31dc85cc7..0d9a02edc7 100644 +--- a/tests/qtest/migration-helpers.h ++++ b/tests/qtest/migration-helpers.h +@@ -47,4 +47,5 @@ char *find_common_machine_version(const char *mtype, const char *var1, + const char *var2); + char *resolve_machine_version(const char *alias, const char *var1, + const char *var2); ++void migration_test_add(const char *path, void (*fn)(void)); + #endif /* MIGRATION_HELPERS_H */ +-- +2.33.0 + diff --git a/tests-qtest-migration-Print-migration-incoming-error.patch b/tests-qtest-migration-Print-migration-incoming-error.patch new file mode 100644 index 0000000..e205c82 --- /dev/null +++ b/tests-qtest-migration-Print-migration-incoming-error.patch @@ -0,0 +1,39 @@ +From 20c8c77ba5e362b1bfada691b2242648d3626d5d Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:42 -0300 +Subject: [11/99] tests/qtest/migration: Print migration incoming errors + +commit 679a7382a389875c0f7835a1a409ebf4859f8410 upstream. + +We're currently just asserting when incoming migration fails. Let's +print the error message from QMP as well. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-6-farosas@suse.de +Signed-off-by: Peter Xu +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-helpers.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c +index 24fb7b3525..f1106128a9 100644 +--- a/tests/qtest/migration-helpers.c ++++ b/tests/qtest/migration-helpers.c +@@ -118,6 +118,12 @@ void migrate_incoming_qmp(QTestState *to, const char *uri, const char *fmt, ...) + + rsp = qtest_qmp(to, "{ 'execute': 'migrate-incoming', 'arguments': %p}", + args); ++ ++ if (!qdict_haskey(rsp, "return")) { ++ g_autoptr(GString) s = qobject_to_json_pretty(QOBJECT(rsp), true); ++ g_test_message("%s", s->str); ++ } ++ + g_assert(qdict_haskey(rsp, "return")); + qobject_unref(rsp); + +-- +2.33.0 + diff --git a/tests-qtest-migration-Use-the-new-migration_test_add.patch b/tests-qtest-migration-Use-the-new-migration_test_add.patch new file mode 100644 index 0000000..7fb81ec --- /dev/null +++ b/tests-qtest-migration-Use-the-new-migration_test_add.patch @@ -0,0 +1,308 @@ +From a26a1ea993f48dbccd0fee3812b7535531b1cc14 Mon Sep 17 00:00:00 2001 +From: Fabiano Rosas +Date: Thu, 4 Jan 2024 11:21:44 -0300 +Subject: [13/99] tests/qtest/migration: Use the new migration_test_add + +commit 6f0771de903bb7623dc85bbf9f94f641979daaaa upstream. + +Replace the tests registration with the new function that prints tests +names. + +Signed-off-by: Fabiano Rosas +Reviewed-by: Peter Xu +Link: https://lore.kernel.org/r/20240104142144.9680-8-farosas@suse.de +Signed-off-by: Peter Xu + + Conflicts: + tests/qtest/migration-test.c +[jz: resolve context conflicts due to live-suspend which is not backported] +Signed-off-by: Jason Zeng +--- + tests/qtest/migration-test.c | 202 ++++++++++++++++++----------------- + 1 file changed, 104 insertions(+), 98 deletions(-) + +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index 0fbaa6a90f..470b06bbb4 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -3339,62 +3339,64 @@ int main(int argc, char **argv) + module_call_init(MODULE_INIT_QOM); + + if (has_uffd) { +- qtest_add_func("/migration/postcopy/plain", test_postcopy); +- qtest_add_func("/migration/postcopy/recovery/plain", +- test_postcopy_recovery); +- qtest_add_func("/migration/postcopy/preempt/plain", test_postcopy_preempt); +- qtest_add_func("/migration/postcopy/preempt/recovery/plain", +- test_postcopy_preempt_recovery); ++ migration_test_add("/migration/postcopy/plain", test_postcopy); ++ migration_test_add("/migration/postcopy/recovery/plain", ++ test_postcopy_recovery); ++ migration_test_add("/migration/postcopy/preempt/plain", ++ test_postcopy_preempt); ++ migration_test_add("/migration/postcopy/preempt/recovery/plain", ++ test_postcopy_preempt_recovery); + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/postcopy/compress/plain", +- test_postcopy_compress); +- qtest_add_func("/migration/postcopy/recovery/compress/plain", +- test_postcopy_recovery_compress); ++ migration_test_add("/migration/postcopy/compress/plain", ++ test_postcopy_compress); ++ migration_test_add("/migration/postcopy/recovery/compress/plain", ++ test_postcopy_recovery_compress); + } + #ifndef _WIN32 +- qtest_add_func("/migration/postcopy/recovery/double-failures", +- test_postcopy_recovery_double_fail); ++ migration_test_add("/migration/postcopy/recovery/double-failures", ++ test_postcopy_recovery_double_fail); + #endif /* _WIN32 */ +- + } + +- qtest_add_func("/migration/bad_dest", test_baddest); ++ migration_test_add("/migration/bad_dest", test_baddest); + #ifndef _WIN32 + if (!g_str_equal(arch, "s390x")) { +- qtest_add_func("/migration/analyze-script", test_analyze_script); ++ migration_test_add("/migration/analyze-script", test_analyze_script); + } + #endif +- qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain); +- qtest_add_func("/migration/precopy/unix/xbzrle", test_precopy_unix_xbzrle); ++ migration_test_add("/migration/precopy/unix/plain", ++ test_precopy_unix_plain); ++ migration_test_add("/migration/precopy/unix/xbzrle", ++ test_precopy_unix_xbzrle); + /* + * Compression fails from time to time. + * Put test here but don't enable it until everything is fixed. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/precopy/unix/compress/wait", +- test_precopy_unix_compress); +- qtest_add_func("/migration/precopy/unix/compress/nowait", +- test_precopy_unix_compress_nowait); ++ migration_test_add("/migration/precopy/unix/compress/wait", ++ test_precopy_unix_compress); ++ migration_test_add("/migration/precopy/unix/compress/nowait", ++ test_precopy_unix_compress_nowait); + } + +- qtest_add_func("/migration/precopy/file", +- test_precopy_file); +- qtest_add_func("/migration/precopy/file/offset", +- test_precopy_file_offset); +- qtest_add_func("/migration/precopy/file/offset/bad", +- test_precopy_file_offset_bad); ++ migration_test_add("/migration/precopy/file", ++ test_precopy_file); ++ migration_test_add("/migration/precopy/file/offset", ++ test_precopy_file_offset); ++ migration_test_add("/migration/precopy/file/offset/bad", ++ test_precopy_file_offset_bad); + + /* + * Our CI system has problems with shared memory. + * Don't run this test until we find a workaround. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/mode/reboot", test_mode_reboot); ++ migration_test_add("/migration/mode/reboot", test_mode_reboot); + } + + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/precopy/unix/tls/psk", +- test_precopy_unix_tls_psk); ++ migration_test_add("/migration/precopy/unix/tls/psk", ++ test_precopy_unix_tls_psk); + + if (has_uffd) { + /* +@@ -3402,110 +3404,114 @@ int main(int argc, char **argv) + * channels are tested under precopy. Here what we want to test is the + * general postcopy path that has TLS channel enabled. + */ +- qtest_add_func("/migration/postcopy/tls/psk", test_postcopy_tls_psk); +- qtest_add_func("/migration/postcopy/recovery/tls/psk", +- test_postcopy_recovery_tls_psk); +- qtest_add_func("/migration/postcopy/preempt/tls/psk", +- test_postcopy_preempt_tls_psk); +- qtest_add_func("/migration/postcopy/preempt/recovery/tls/psk", +- test_postcopy_preempt_all); ++ migration_test_add("/migration/postcopy/tls/psk", ++ test_postcopy_tls_psk); ++ migration_test_add("/migration/postcopy/recovery/tls/psk", ++ test_postcopy_recovery_tls_psk); ++ migration_test_add("/migration/postcopy/preempt/tls/psk", ++ test_postcopy_preempt_tls_psk); ++ migration_test_add("/migration/postcopy/preempt/recovery/tls/psk", ++ test_postcopy_preempt_all); + } + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/precopy/unix/tls/x509/default-host", +- test_precopy_unix_tls_x509_default_host); +- qtest_add_func("/migration/precopy/unix/tls/x509/override-host", +- test_precopy_unix_tls_x509_override_host); ++ migration_test_add("/migration/precopy/unix/tls/x509/default-host", ++ test_precopy_unix_tls_x509_default_host); ++ migration_test_add("/migration/precopy/unix/tls/x509/override-host", ++ test_precopy_unix_tls_x509_override_host); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + +- qtest_add_func("/migration/precopy/tcp/plain", test_precopy_tcp_plain); ++ migration_test_add("/migration/precopy/tcp/plain", test_precopy_tcp_plain); + +- qtest_add_func("/migration/precopy/tcp/plain/switchover-ack", +- test_precopy_tcp_switchover_ack); ++ migration_test_add("/migration/precopy/tcp/plain/switchover-ack", ++ test_precopy_tcp_switchover_ack); + + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/precopy/tcp/tls/psk/match", +- test_precopy_tcp_tls_psk_match); +- qtest_add_func("/migration/precopy/tcp/tls/psk/mismatch", +- test_precopy_tcp_tls_psk_mismatch); ++ migration_test_add("/migration/precopy/tcp/tls/psk/match", ++ test_precopy_tcp_tls_psk_match); ++ migration_test_add("/migration/precopy/tcp/tls/psk/mismatch", ++ test_precopy_tcp_tls_psk_mismatch); + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/precopy/tcp/tls/x509/default-host", +- test_precopy_tcp_tls_x509_default_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/override-host", +- test_precopy_tcp_tls_x509_override_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/mismatch-host", +- test_precopy_tcp_tls_x509_mismatch_host); +- qtest_add_func("/migration/precopy/tcp/tls/x509/friendly-client", +- test_precopy_tcp_tls_x509_friendly_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/hostile-client", +- test_precopy_tcp_tls_x509_hostile_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/allow-anon-client", +- test_precopy_tcp_tls_x509_allow_anon_client); +- qtest_add_func("/migration/precopy/tcp/tls/x509/reject-anon-client", +- test_precopy_tcp_tls_x509_reject_anon_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/default-host", ++ test_precopy_tcp_tls_x509_default_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/override-host", ++ test_precopy_tcp_tls_x509_override_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/mismatch-host", ++ test_precopy_tcp_tls_x509_mismatch_host); ++ migration_test_add("/migration/precopy/tcp/tls/x509/friendly-client", ++ test_precopy_tcp_tls_x509_friendly_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/hostile-client", ++ test_precopy_tcp_tls_x509_hostile_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/allow-anon-client", ++ test_precopy_tcp_tls_x509_allow_anon_client); ++ migration_test_add("/migration/precopy/tcp/tls/x509/reject-anon-client", ++ test_precopy_tcp_tls_x509_reject_anon_client); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + +- /* qtest_add_func("/migration/ignore_shared", test_ignore_shared); */ ++ /* migration_test_add("/migration/ignore_shared", test_ignore_shared); */ + #ifndef _WIN32 +- qtest_add_func("/migration/fd_proto", test_migrate_fd_proto); ++ migration_test_add("/migration/fd_proto", test_migrate_fd_proto); + #endif +- qtest_add_func("/migration/validate_uuid", test_validate_uuid); +- qtest_add_func("/migration/validate_uuid_error", test_validate_uuid_error); +- qtest_add_func("/migration/validate_uuid_src_not_set", +- test_validate_uuid_src_not_set); +- qtest_add_func("/migration/validate_uuid_dst_not_set", +- test_validate_uuid_dst_not_set); ++ migration_test_add("/migration/validate_uuid", test_validate_uuid); ++ migration_test_add("/migration/validate_uuid_error", ++ test_validate_uuid_error); ++ migration_test_add("/migration/validate_uuid_src_not_set", ++ test_validate_uuid_src_not_set); ++ migration_test_add("/migration/validate_uuid_dst_not_set", ++ test_validate_uuid_dst_not_set); + /* + * See explanation why this test is slow on function definition + */ + if (g_test_slow()) { +- qtest_add_func("/migration/auto_converge", test_migrate_auto_converge); ++ migration_test_add("/migration/auto_converge", ++ test_migrate_auto_converge); + if (g_str_equal(arch, "x86_64") && + has_kvm && kvm_dirty_ring_supported()) { +- qtest_add_func("/migration/dirty_limit", test_migrate_dirty_limit); ++ migration_test_add("/migration/dirty_limit", ++ test_migrate_dirty_limit); + } + } +- qtest_add_func("/migration/multifd/tcp/plain/none", +- test_multifd_tcp_none); ++ migration_test_add("/migration/multifd/tcp/plain/none", ++ test_multifd_tcp_none); + /* + * This test is flaky and sometimes fails in CI and otherwise: + * don't run unless user opts in via environment variable. + */ + if (getenv("QEMU_TEST_FLAKY_TESTS")) { +- qtest_add_func("/migration/multifd/tcp/plain/cancel", +- test_multifd_tcp_cancel); ++ migration_test_add("/migration/multifd/tcp/plain/cancel", ++ test_multifd_tcp_cancel); + } +- qtest_add_func("/migration/multifd/tcp/plain/zlib", +- test_multifd_tcp_zlib); ++ migration_test_add("/migration/multifd/tcp/plain/zlib", ++ test_multifd_tcp_zlib); + #ifdef CONFIG_ZSTD +- qtest_add_func("/migration/multifd/tcp/plain/zstd", +- test_multifd_tcp_zstd); ++ migration_test_add("/migration/multifd/tcp/plain/zstd", ++ test_multifd_tcp_zstd); + #endif + #ifdef CONFIG_GNUTLS +- qtest_add_func("/migration/multifd/tcp/tls/psk/match", +- test_multifd_tcp_tls_psk_match); +- qtest_add_func("/migration/multifd/tcp/tls/psk/mismatch", +- test_multifd_tcp_tls_psk_mismatch); ++ migration_test_add("/migration/multifd/tcp/tls/psk/match", ++ test_multifd_tcp_tls_psk_match); ++ migration_test_add("/migration/multifd/tcp/tls/psk/mismatch", ++ test_multifd_tcp_tls_psk_mismatch); + #ifdef CONFIG_TASN1 +- qtest_add_func("/migration/multifd/tcp/tls/x509/default-host", +- test_multifd_tcp_tls_x509_default_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/override-host", +- test_multifd_tcp_tls_x509_override_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/mismatch-host", +- test_multifd_tcp_tls_x509_mismatch_host); +- qtest_add_func("/migration/multifd/tcp/tls/x509/allow-anon-client", +- test_multifd_tcp_tls_x509_allow_anon_client); +- qtest_add_func("/migration/multifd/tcp/tls/x509/reject-anon-client", +- test_multifd_tcp_tls_x509_reject_anon_client); ++ migration_test_add("/migration/multifd/tcp/tls/x509/default-host", ++ test_multifd_tcp_tls_x509_default_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/override-host", ++ test_multifd_tcp_tls_x509_override_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/mismatch-host", ++ test_multifd_tcp_tls_x509_mismatch_host); ++ migration_test_add("/migration/multifd/tcp/tls/x509/allow-anon-client", ++ test_multifd_tcp_tls_x509_allow_anon_client); ++ migration_test_add("/migration/multifd/tcp/tls/x509/reject-anon-client", ++ test_multifd_tcp_tls_x509_reject_anon_client); + #endif /* CONFIG_TASN1 */ + #endif /* CONFIG_GNUTLS */ + + if (g_str_equal(arch, "x86_64") && has_kvm && kvm_dirty_ring_supported()) { +- qtest_add_func("/migration/dirty_ring", +- test_precopy_unix_dirty_ring); +- qtest_add_func("/migration/vcpu_dirty_limit", +- test_vcpu_dirty_limit); ++ migration_test_add("/migration/dirty_ring", ++ test_precopy_unix_dirty_ring); ++ migration_test_add("/migration/vcpu_dirty_limit", ++ test_vcpu_dirty_limit); + } + + ret = g_test_run(); +-- +2.33.0 + -- Gitee