From 6e9beed308cf3a252419fe8fde37cae3d1f2e281 Mon Sep 17 00:00:00 2001 From: Zhang Bo Date: Mon, 29 Aug 2022 16:38:09 +0800 Subject: [PATCH] backport nbd related patches to avoid vm crash during migration block-nbd was refacted during release 6.2.0, but we didn't induced all the needed patches within the 6.2.0 baseline, which leads to vm crash during migration. the reasons are as below: when iothread is configured, the coroutines should get back to the exact iothread that was out of. But within the 6.2.0 baseline, patches were missing, nbd related coroutine didn't have its related aio_context. It in fact get to the mainline aio_context, the mistaken context leads to vm crash. --- ...sert-there-are-no-timers-when-closed.patch | 38 +++++ block-nbd-Delete-open-timer-when-done.patch | 48 ++++++ ...lete-reconnect-delay-timer-when-done.patch | 45 ++++++ ...-nbd-Move-s-ioc-on-AioContext-change.patch | 97 ++++++++++++ ...ect-on-open-with-corresponding-new-o.patch | 138 ++++++++++++++++++ qemu.spec | 10 +- 6 files changed, 375 insertions(+), 1 deletion(-) create mode 100644 block-nbd-Assert-there-are-no-timers-when-closed.patch create mode 100644 block-nbd-Delete-open-timer-when-done.patch create mode 100644 block-nbd-Delete-reconnect-delay-timer-when-done.patch create mode 100644 block-nbd-Move-s-ioc-on-AioContext-change.patch create mode 100644 nbd-allow-reconnect-on-open-with-corresponding-new-o.patch diff --git a/block-nbd-Assert-there-are-no-timers-when-closed.patch b/block-nbd-Assert-there-are-no-timers-when-closed.patch new file mode 100644 index 0000000..da3badd --- /dev/null +++ b/block-nbd-Assert-there-are-no-timers-when-closed.patch @@ -0,0 +1,38 @@ +From 8353d0d6a31042ba7c54696ef1ec59eb883d647f Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Mon, 29 Aug 2022 15:37:08 +0800 +Subject: [PATCH 4/5] block/nbd: Assert there are no timers when closed + +Our two timers must not remain armed beyond nbd_clear_bdrvstate(), or +they will access freed data when they fire. + +This patch is separate from the patches that actually fix the issue +(HEAD^^ and HEAD^) so that you can run the associated regression iotest +(281) on a configuration that reproducibly exposes the bug. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Zhang Bo +--- + block/nbd.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ff8a57314..dc6c3f3bbc 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -110,6 +110,10 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs) + + yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name)); + ++ /* Must not leave timers behind that would access freed data */ ++ assert(!s->reconnect_delay_timer); ++ assert(!s->open_timer); ++ + object_unref(OBJECT(s->tlscreds)); + qapi_free_SocketAddress(s->saddr); + s->saddr = NULL; +-- +2.27.0 + diff --git a/block-nbd-Delete-open-timer-when-done.patch b/block-nbd-Delete-open-timer-when-done.patch new file mode 100644 index 0000000..f8cebad --- /dev/null +++ b/block-nbd-Delete-open-timer-when-done.patch @@ -0,0 +1,48 @@ +From 6a49e752439f02ef9ccaac30d14acf185e31a261 Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Mon, 29 Aug 2022 15:35:36 +0800 +Subject: [PATCH 3/5] block/nbd: Delete open timer when done + +We start the open timer to cancel the connection attempt after a while. +Once nbd_do_establish_connection() has returned, the attempt is over, +and we no longer need the timer. + +Delete it before returning from nbd_open(), so that it does not persist +for longer. It has no use after nbd_open(), and just like the reconnect +delay timer, it might well be dangerous if it were to fire afterwards. + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Zhang Bo +--- + block/nbd.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 16cd7fef77..5ff8a57314 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -1885,11 +1885,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, + goto fail; + } + ++ /* ++ * The connect attempt is done, so we no longer need this timer. ++ * Delete it, because we do not want it to be around when this node ++ * is drained or closed. ++ */ ++ open_timer_del(s); ++ + nbd_client_connection_enable_retry(s->conn); + + return 0; + + fail: ++ open_timer_del(s); + nbd_clear_bdrvstate(bs); + return ret; + } +-- +2.27.0 + diff --git a/block-nbd-Delete-reconnect-delay-timer-when-done.patch b/block-nbd-Delete-reconnect-delay-timer-when-done.patch new file mode 100644 index 0000000..b7ca363 --- /dev/null +++ b/block-nbd-Delete-reconnect-delay-timer-when-done.patch @@ -0,0 +1,45 @@ +From a4d001a08ce1279b121cb870c378ddeb0825f2bc Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Mon, 29 Aug 2022 15:34:07 +0800 +Subject: [PATCH 2/5] block/nbd: Delete reconnect delay timer when done + +We start the reconnect delay timer to cancel the reconnection attempt +after a while. Once nbd_co_do_establish_connection() has returned, this +attempt is over, and we no longer need the timer. + +Delete it before returning from nbd_reconnect_attempt(), so that it does +not persist beyond the I/O request that was paused for reconnecting; we +do not want it to fire in a drained section, because all sort of things +can happen in such a section (e.g. the AioContext might be changed, and +we do not want the timer to fire in the wrong context; or the BDS might +even be deleted, and so the timer CB would access already-freed data). + +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Zhang Bo +--- + block/nbd.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index 63dbfa807d..16cd7fef77 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -381,6 +381,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) + } + + nbd_co_do_establish_connection(s->bs, NULL); ++ ++ /* ++ * The reconnect attempt is done (maybe successfully, maybe not), so ++ * we no longer need this timer. Delete it so it will not outlive ++ * this I/O request (so draining removes all timers). ++ */ ++ reconnect_delay_timer_del(s); + } + + static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle) +-- +2.27.0 + diff --git a/block-nbd-Move-s-ioc-on-AioContext-change.patch b/block-nbd-Move-s-ioc-on-AioContext-change.patch new file mode 100644 index 0000000..f39430e --- /dev/null +++ b/block-nbd-Move-s-ioc-on-AioContext-change.patch @@ -0,0 +1,97 @@ +From ba810e3b3800f0d084a2dd324a9dea89c9320548 Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Mon, 29 Aug 2022 15:38:19 +0800 +Subject: [PATCH 5/5] block/nbd: Move s->ioc on AioContext change + +s->ioc must always be attached to the NBD node's AioContext. If that +context changes, s->ioc must be attached to the new context. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2033626 +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Reitz +Signed-off-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Zhang Bo +--- + block/nbd.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 45 insertions(+) + +diff --git a/block/nbd.c b/block/nbd.c +index dc6c3f3bbc..5853d85d60 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -2055,6 +2055,42 @@ static void nbd_cancel_in_flight(BlockDriverState *bs) + nbd_co_establish_connection_cancel(s->conn); + } + ++static void nbd_attach_aio_context(BlockDriverState *bs, ++ AioContext *new_context) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ /* The open_timer is used only during nbd_open() */ ++ assert(!s->open_timer); ++ ++ /* ++ * The reconnect_delay_timer is scheduled in I/O paths when the ++ * connection is lost, to cancel the reconnection attempt after a ++ * given time. Once this attempt is done (successfully or not), ++ * nbd_reconnect_attempt() ensures the timer is deleted before the ++ * respective I/O request is resumed. ++ * Since the AioContext can only be changed when a node is drained, ++ * the reconnect_delay_timer cannot be active here. ++ */ ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_attach_aio_context(s->ioc, new_context); ++ } ++} ++ ++static void nbd_detach_aio_context(BlockDriverState *bs) ++{ ++ BDRVNBDState *s = bs->opaque; ++ ++ assert(!s->open_timer); ++ assert(!s->reconnect_delay_timer); ++ ++ if (s->ioc) { ++ qio_channel_detach_aio_context(s->ioc); ++ } ++} ++ + static BlockDriver bdrv_nbd = { + .format_name = "nbd", + .protocol_name = "nbd", +@@ -2078,6 +2114,9 @@ static BlockDriver bdrv_nbd = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_tcp = { +@@ -2103,6 +2142,9 @@ static BlockDriver bdrv_nbd_tcp = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static BlockDriver bdrv_nbd_unix = { +@@ -2128,6 +2170,9 @@ static BlockDriver bdrv_nbd_unix = { + .bdrv_dirname = nbd_dirname, + .strong_runtime_opts = nbd_strong_runtime_opts, + .bdrv_cancel_in_flight = nbd_cancel_in_flight, ++ ++ .bdrv_attach_aio_context = nbd_attach_aio_context, ++ .bdrv_detach_aio_context = nbd_detach_aio_context, + }; + + static void bdrv_nbd_init(void) +-- +2.27.0 + diff --git a/nbd-allow-reconnect-on-open-with-corresponding-new-o.patch b/nbd-allow-reconnect-on-open-with-corresponding-new-o.patch new file mode 100644 index 0000000..274536b --- /dev/null +++ b/nbd-allow-reconnect-on-open-with-corresponding-new-o.patch @@ -0,0 +1,138 @@ +From eb42fba27842e3ebc342f15847863b5e812a7919 Mon Sep 17 00:00:00 2001 +From: Zhang Bo +Date: Mon, 29 Aug 2022 15:28:55 +0800 +Subject: [PATCH 1/5] nbd: allow reconnect on open, with corresponding new + options + +It is useful when start of vm and start of nbd server are not +simple to sync. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Eric Blake +Signed-off-by: Zhang Bo +--- + block/nbd.c | 45 +++++++++++++++++++++++++++++++++++++++++++- + qapi/block-core.json | 9 ++++++++- + 2 files changed, 52 insertions(+), 2 deletions(-) + +diff --git a/block/nbd.c b/block/nbd.c +index 5ef462db1b..63dbfa807d 100644 +--- a/block/nbd.c ++++ b/block/nbd.c +@@ -80,6 +80,7 @@ typedef struct BDRVNBDState { + NBDClientState state; + + QEMUTimer *reconnect_delay_timer; ++ QEMUTimer *open_timer; + + NBDClientRequest requests[MAX_NBD_REQUESTS]; + NBDReply reply; +@@ -87,6 +88,7 @@ typedef struct BDRVNBDState { + + /* Connection parameters */ + uint32_t reconnect_delay; ++ uint32_t open_timeout; + SocketAddress *saddr; + char *export, *tlscredsid; + QCryptoTLSCreds *tlscreds; +@@ -218,6 +220,32 @@ static void nbd_teardown_connection(BlockDriverState *bs) + s->state = NBD_CLIENT_QUIT; + } + ++static void open_timer_del(BDRVNBDState *s) ++{ ++ if (s->open_timer) { ++ timer_free(s->open_timer); ++ s->open_timer = NULL; ++ } ++} ++ ++static void open_timer_cb(void *opaque) ++{ ++ BDRVNBDState *s = opaque; ++ ++ nbd_co_establish_connection_cancel(s->conn); ++ open_timer_del(s); ++} ++ ++static void open_timer_init(BDRVNBDState *s, uint64_t expire_time_ns) ++{ ++ assert(!s->open_timer); ++ s->open_timer = aio_timer_new(bdrv_get_aio_context(s->bs), ++ QEMU_CLOCK_REALTIME, ++ SCALE_NS, ++ open_timer_cb, s); ++ timer_mod(s->open_timer, expire_time_ns); ++} ++ + static bool nbd_client_connecting(BDRVNBDState *s) + { + NBDClientState state = qatomic_load_acquire(&s->state); +@@ -1742,6 +1770,15 @@ static QemuOptsList nbd_runtime_opts = { + "future requests before a successful reconnect will " + "immediately fail. Default 0", + }, ++ { ++ .name = "open-timeout", ++ .type = QEMU_OPT_NUMBER, ++ .help = "In seconds. If zero, the nbd driver tries the connection " ++ "only once, and fails to open if the connection fails. " ++ "If non-zero, the nbd driver will repeat connection " ++ "attempts until successful or until @open-timeout seconds " ++ "have elapsed. Default 0", ++ }, + { /* end of list */ } + }, + }; +@@ -1797,6 +1834,7 @@ static int nbd_process_options(BlockDriverState *bs, QDict *options, + } + + s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0); ++ s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0); + + ret = 0; + +@@ -1828,7 +1866,12 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, + s->conn = nbd_client_connection_new(s->saddr, true, s->export, + s->x_dirty_bitmap, s->tlscreds); + +- /* TODO: Configurable retry-until-timeout behaviour. */ ++ if (s->open_timeout) { ++ nbd_client_connection_enable_retry(s->conn); ++ open_timer_init(s, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ++ s->open_timeout * NANOSECONDS_PER_SECOND); ++ } ++ + s->state = NBD_CLIENT_CONNECTING_WAIT; + ret = nbd_do_establish_connection(bs, errp); + if (ret < 0) { +diff --git a/qapi/block-core.json b/qapi/block-core.json +index e65fabe36d..618e417135 100644 +--- a/qapi/block-core.json ++++ b/qapi/block-core.json +@@ -4096,6 +4096,12 @@ + # future requests before a successful reconnect will + # immediately fail. Default 0 (Since 4.2) + # ++# @open-timeout: In seconds. If zero, the nbd driver tries the connection ++# only once, and fails to open if the connection fails. ++# If non-zero, the nbd driver will repeat connection attempts ++# until successful or until @open-timeout seconds have elapsed. ++# Default 0 (Since 7.0) ++# + # Features: + # @unstable: Member @x-dirty-bitmap is experimental. + # +@@ -4106,7 +4112,8 @@ + '*export': 'str', + '*tls-creds': 'str', + '*x-dirty-bitmap': { 'type': 'str', 'features': [ 'unstable' ] }, +- '*reconnect-delay': 'uint32' } } ++ '*reconnect-delay': 'uint32', ++ '*open-timeout': 'uint32' } } + + ## + # @BlockdevOptionsRaw: +-- +2.27.0 + diff --git a/qemu.spec b/qemu.spec index acaa3fb..8f445d4 100644 --- a/qemu.spec +++ b/qemu.spec @@ -1,6 +1,6 @@ Name: qemu Version: 6.2.0 -Release: 46 +Release: 47 Epoch: 10 Summary: QEMU is a generic and open source machine emulator and virtualizer License: GPLv2 and BSD and MIT and CC-BY-SA-4.0 @@ -294,6 +294,11 @@ Patch0280: target-ppc-exit-1-on-failure-in-kvmppc_get_clockfreq.patch Patch0281: bugfix-pointer-double-free-in-func-qemu_savevm_state.patch Patch0282: vhost-user-remove-VirtQ-notifier-restore.patch Patch0283: vhost-user-fix-VirtQ-notifier-cleanup.patch +Patch0284: nbd-allow-reconnect-on-open-with-corresponding-new-o.patch +Patch0285: block-nbd-Delete-reconnect-delay-timer-when-done.patch +Patch0286: block-nbd-Delete-open-timer-when-done.patch +Patch0287: block-nbd-Assert-there-are-no-timers-when-closed.patch +Patch0288: block-nbd-Move-s-ioc-on-AioContext-change.patch BuildRequires: flex BuildRequires: gcc @@ -806,6 +811,9 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Mon Aug 29 2022 Zhang Bo - 10:6.2.0-47 +- backport nbd related patches to avoid vm crash during migration + * Thu Aug 25 2022 yezengruan - 10:6.2.0-46 - vhost-user: remove VirtQ notifier restore - vhost-user: fix VirtQ notifier cleanup -- Gitee