diff --git a/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch b/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch new file mode 100644 index 0000000000000000000000000000000000000000..d27d763bb19d0b6b3323eb8f7647e5d89cb1df22 --- /dev/null +++ b/aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch @@ -0,0 +1,120 @@ +From a2bae876b7f694b12073bac8ad6668e4d975ad88 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 10 Apr 2020 16:08:19 +0000 +Subject: [PATCH 1/2] aio-wait: delegate polling of main AioContext if BQL not + held + +Any thread that is not a iothread returns NULL for qemu_get_current_aio_context(). +As a result, it would also return true for +in_aio_context_home_thread(qemu_get_aio_context()), causing +AIO_WAIT_WHILE to invoke aio_poll() directly. This is incorrect +if the BQL is not held, because aio_poll() does not expect to +run concurrently from multiple threads, and it can actually +happen when savevm writes to the vmstate file from the +migration thread. + +Therefore, restrict in_aio_context_home_thread to return true +for the main AioContext only if the BQL is held. + +The function is moved to aio-wait.h because it is mostly used +there and to avoid a circular reference between main-loop.h +and block/aio.h. + +upstream_url: https://patchwork.kernel.org/patch/11482099/ + +Signed-off-by: Paolo Bonzini +Message-Id: <20200407140746.8041-5-pbonzini@redhat.com> +Signed-off-by: Stefan Hajnoczi +--- + include/block/aio-wait.h | 22 ++++++++++++++++++++++ + include/block/aio.h | 29 ++++++++++------------------- + 2 files changed, 32 insertions(+), 19 deletions(-) + +diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h +index afd0ff7e..d349e7e3 100644 +--- a/include/block/aio-wait.h ++++ b/include/block/aio-wait.h +@@ -26,6 +26,7 @@ + #define QEMU_AIO_WAIT_H + + #include "block/aio.h" ++#include "qemu/main-loop.h" + + /** + * AioWait: +@@ -124,4 +125,25 @@ void aio_wait_kick(void); + */ + void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + ++/** ++ * in_aio_context_home_thread: ++ * @ctx: the aio context ++ * ++ * Return whether we are running in the thread that normally runs @ctx. Note ++ * that acquiring/releasing ctx does not affect the outcome, each AioContext ++ * still only has one home thread that is responsible for running it. ++ */ ++static inline bool in_aio_context_home_thread(AioContext *ctx) ++{ ++ if (ctx == qemu_get_current_aio_context()) { ++ return true; ++ } ++ ++ if (ctx == qemu_get_aio_context()) { ++ return qemu_mutex_iothread_locked(); ++ } else { ++ return false; ++ } ++} ++ + #endif /* QEMU_AIO_WAIT */ +diff --git a/include/block/aio.h b/include/block/aio.h +index 0ca25dfe..c527893b 100644 +--- a/include/block/aio.h ++++ b/include/block/aio.h +@@ -61,12 +61,16 @@ struct AioContext { + QLIST_HEAD(, AioHandler) aio_handlers; + + /* Used to avoid unnecessary event_notifier_set calls in aio_notify; +- * accessed with atomic primitives. If this field is 0, everything +- * (file descriptors, bottom halves, timers) will be re-evaluated +- * before the next blocking poll(), thus the event_notifier_set call +- * can be skipped. If it is non-zero, you may need to wake up a +- * concurrent aio_poll or the glib main event loop, making +- * event_notifier_set necessary. ++ * only written from the AioContext home thread, or under the BQL in ++ * the case of the main AioContext. However, it is read from any ++ * thread so it is still accessed with atomic primitives. ++ * ++ * If this field is 0, everything (file descriptors, bottom halves, ++ * timers) will be re-evaluated before the next blocking poll() or ++ * io_uring wait; therefore, the event_notifier_set call can be ++ * skipped. If it is non-zero, you may need to wake up a concurrent ++ * aio_poll or the glib main event loop, making event_notifier_set ++ * necessary. + * + * Bit 0 is reserved for GSource usage of the AioContext, and is 1 + * between a call to aio_ctx_prepare and the next call to aio_ctx_check. +@@ -581,19 +585,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co); + */ + AioContext *qemu_get_current_aio_context(void); + +-/** +- * in_aio_context_home_thread: +- * @ctx: the aio context +- * +- * Return whether we are running in the thread that normally runs @ctx. Note +- * that acquiring/releasing ctx does not affect the outcome, each AioContext +- * still only has one home thread that is responsible for running it. +- */ +-static inline bool in_aio_context_home_thread(AioContext *ctx) +-{ +- return ctx == qemu_get_current_aio_context(); +-} +- + /** + * aio_context_setup: + * @ctx: the aio context +-- +2.25.2 + diff --git a/async-use-explicit-memory-barriers.patch b/async-use-explicit-memory-barriers.patch new file mode 100644 index 0000000000000000000000000000000000000000..7fb68c949ad4e95cc0f908c44042096b69cf9295 --- /dev/null +++ b/async-use-explicit-memory-barriers.patch @@ -0,0 +1,171 @@ +From 787af8ed2bc86dc8688727d62a251965d9c42e00 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Fri, 10 Apr 2020 16:19:50 +0000 +Subject: [PATCH 2/2] async: use explicit memory barriers + +When using C11 atomics, non-seqcst reads and writes do not participate +in the total order of seqcst operations. In util/async.c and util/aio-posix.c, +in particular, the pattern that we use + + write ctx->notify_me write bh->scheduled + read bh->scheduled read ctx->notify_me + if !bh->scheduled, sleep if ctx->notify_me, notify + +needs to use seqcst operations for both the write and the read. In +general this is something that we do not want, because there can be +many sources that are polled in addition to bottom halves. The +alternative is to place a seqcst memory barrier between the write +and the read. This also comes with a disadvantage, in that the +memory barrier is implicit on strongly-ordered architectures and +it wastes a few dozen clock cycles. + +Fortunately, ctx->notify_me is never written concurrently by two +threads, so we can assert that and relax the writes to ctx->notify_me. +The resulting solution works and performs well on both aarch64 and x86. + +Note that the atomic_set/atomic_read combination is not an atomic +read-modify-write, and therefore it is even weaker than C11 ATOMIC_RELAXED; +on x86, ATOMIC_RELAXED compiles to a locked operation. + +upstream_url: https://patchwork.kernel.org/patch/11482103/ + +Analyzed-by: Ying Fang +Signed-off-by: Paolo Bonzini +Tested-by: Ying Fang +Message-Id: <20200407140746.8041-6-pbonzini@redhat.com> +Signed-off-by: Stefan Hajnoczi +--- + util/aio-posix.c | 16 ++++++++++++++-- + util/aio-win32.c | 17 ++++++++++++++--- + util/async.c | 16 ++++++++++++---- + 3 files changed, 40 insertions(+), 9 deletions(-) + +diff --git a/util/aio-posix.c b/util/aio-posix.c +index 6fbfa792..ca58b9a4 100644 +--- a/util/aio-posix.c ++++ b/util/aio-posix.c +@@ -613,6 +613,11 @@ bool aio_poll(AioContext *ctx, bool blocking) + int64_t timeout; + int64_t start = 0; + ++ /* ++ * There cannot be two concurrent aio_poll calls for the same AioContext (or ++ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). ++ * We rely on this below to avoid slow locked accesses to ctx->notify_me. ++ */ + assert(in_aio_context_home_thread(ctx)); + + /* aio_notify can avoid the expensive event_notifier_set if +@@ -623,7 +628,13 @@ bool aio_poll(AioContext *ctx, bool blocking) + * so disable the optimization now. + */ + if (blocking) { +- atomic_add(&ctx->notify_me, 2); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + } + + qemu_lockcnt_inc(&ctx->list_lock); +@@ -668,7 +679,8 @@ bool aio_poll(AioContext *ctx, bool blocking) + } + + if (blocking) { +- atomic_sub(&ctx->notify_me, 2); ++ /* Finish the poll before clearing the flag. */ ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); + } + +diff --git a/util/aio-win32.c b/util/aio-win32.c +index a23b9c36..729d533f 100644 +--- a/util/aio-win32.c ++++ b/util/aio-win32.c +@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking) + int count; + int timeout; + ++ /* ++ * There cannot be two concurrent aio_poll calls for the same AioContext (or ++ * an aio_poll concurrent with a GSource prepare/check/dispatch callback). ++ * We rely on this below to avoid slow locked accesses to ctx->notify_me. ++ */ ++ assert(in_aio_context_home_thread(ctx)); + progress = false; + + /* aio_notify can avoid the expensive event_notifier_set if +@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking) + * so disable the optimization now. + */ + if (blocking) { +- atomic_add(&ctx->notify_me, 2); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2); ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + } + + qemu_lockcnt_inc(&ctx->list_lock); +@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking) + ret = WaitForMultipleObjects(count, events, FALSE, timeout); + if (blocking) { + assert(first); +- assert(in_aio_context_home_thread(ctx)); +- atomic_sub(&ctx->notify_me, 2); ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2); + aio_notify_accept(ctx); + } + +diff --git a/util/async.c b/util/async.c +index afc17fb3..12b33204 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -221,7 +221,14 @@ aio_ctx_prepare(GSource *source, gint *timeout) + { + AioContext *ctx = (AioContext *) source; + +- atomic_or(&ctx->notify_me, 1); ++ atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1); ++ ++ /* ++ * Write ctx->notify_me before computing the timeout ++ * (reading bottom half flags, etc.). Pairs with ++ * smp_mb in aio_notify(). ++ */ ++ smp_mb(); + + /* We assume there is no timeout already supplied */ + *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); +@@ -239,7 +246,8 @@ aio_ctx_check(GSource *source) + AioContext *ctx = (AioContext *) source; + QEMUBH *bh; + +- atomic_and(&ctx->notify_me, ~1); ++ /* Finish computing the timeout before clearing the flag. */ ++ atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1); + aio_notify_accept(ctx); + + for (bh = ctx->first_bh; bh; bh = bh->next) { +@@ -344,10 +352,10 @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) + void aio_notify(AioContext *ctx) + { + /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs +- * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll. ++ * with smp_mb in aio_ctx_prepare or aio_poll. + */ + smp_mb(); +- if (ctx->notify_me) { ++ if (atomic_read(&ctx->notify_me)) { + event_notifier_set(&ctx->notifier); + atomic_mb_set(&ctx->notified, true); + } +-- +2.25.2 + diff --git a/qemu.spec b/qemu.spec index eb3cc8989b070c63b09c4a33f53e6f507c59dc65..c2a390b06ad96488c8acb43fc007dd97e068c8fa 100644 --- a/qemu.spec +++ b/qemu.spec @@ -61,6 +61,8 @@ Patch0048: COLO-compare-Fix-incorrect-if-logic.patch Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0052: aio-wait-delegate-polling-of-main-AioContext-if-BQL-not-held.patch +Patch0053: async-use-explicit-memory-barriers.patch BuildRequires: flex BuildRequires: bison @@ -396,6 +398,10 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Sat Apr 11 4 2020 Huawei Technologies Co., Ltd. +- aio-wait: delegate polling of main AioContext if BQL not held +- async: use explicit memory barriers + * Wed Mar 18 2020 Huawei Technologies Co., Ltd. - pcie: Add pcie-root-port fast plug/unplug feature - pcie: Compat with devices which do not support Link Width