diff --git a/qemu.spec b/qemu.spec index eb3cc8989b070c63b09c4a33f53e6f507c59dc65..cc1eadadb1ffdb55e87b98f55e33fdf0c2760543 100644 --- a/qemu.spec +++ b/qemu.spec @@ -61,6 +61,7 @@ Patch0048: COLO-compare-Fix-incorrect-if-logic.patch Patch0049: qcow2-bitmap-Fix-uint64_t-left-shift-overflow.patch Patch0050: pcie-Add-pcie-root-port-fast-plug-unplug-feature.patch Patch0051: pcie-Compat-with-devices-which-do-not-support-Link-W.patch +Patch0052: util-async-Add-memory-barrier-to-aio_ctx_prepare.patch BuildRequires: flex BuildRequires: bison @@ -396,6 +397,9 @@ getent passwd qemu >/dev/null || \ %endif %changelog +* Thu Apr 2 2020 Huawei Technologies Co., Ltd. +util/async: Add memory barrier to aio_ctx_prepare + * Wed Mar 18 2020 Huawei Technologies Co., Ltd. - pcie: Add pcie-root-port fast plug/unplug feature - pcie: Compat with devices which do not support Link Width diff --git a/util-async-Add-memory-barrier-to-aio_ctx_prepare.patch b/util-async-Add-memory-barrier-to-aio_ctx_prepare.patch new file mode 100644 index 0000000000000000000000000000000000000000..49648291c96d78e5bc33db952c50397076bfb2fc --- /dev/null +++ b/util-async-Add-memory-barrier-to-aio_ctx_prepare.patch @@ -0,0 +1,70 @@ +From 99026ec6a2735c6ff2f094ac247f558f14e3f3b9 Mon Sep 17 00:00:00 2001 +From: Ying Fang +Date: Thu, 2 Apr 2020 15:53:47 +0800 +Subject: [PATCH] util/async: Add memory barrier to aio_ctx_prepare + +Qemu main thread is found to hang up in the mainloop when doing +image format convert on aarch64 platform and it is highly +reproduceable by executing test using: + +qemu-img convert -f qcow2 -O qcow2 origin.qcow2 converted.qcow2 + +This mysterious hang can be explained by a race condition between +the main thread and an io worker thread. There can be a chance that +the last worker thread has called aio_bh_schedule_oneshot and it is +checking against notify_me to deliver a notfiy event. At the same +time, the main thread is calling aio_ctx_prepare however it first +calls qemu_timeout_ns_to_ms, thus the worker thread did not see +notify_me as true and did not send a notify event. The time line +can be shown in the following way: + + Main Thread + ------------------------------------------------ + aio_ctx_prepare + atomic_or(&ctx->notify_me, 1); + /* out of order execution goes here */ + *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); + + Worker Thread + ----------------------------------------------- + aio_bh_schedule_oneshot -> aio_bh_enqueue + aio_notify + smp_mb(); + if (ctx->notify_me) { /* worker thread checks notify_me here */ + event_notifier_set(&ctx->notifier); + atomic_mb_set(&ctx->notified, true); + } + +Normal VM runtime is not affected by this hang since there is always some +timer timeout or subsequent io worker come and notify the main thead. +To fix this problem, a memory barrier is added to aio_ctx_prepare and +it is proved to have the hang fixed in our test. + +This hang is not observed on the x86 platform however it can be easily +reproduced on the aarch64 platform, thus it is architecture related. +Not sure if this is revelant to Commit eabc977973103527bbb8fed69c91cfaa6691f8ab + +Signed-off-by: Ying Fang +Signed-off-by: zhanghailiang +Reported-by: Euler Robot +--- + util/async.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/util/async.c b/util/async.c +index afc17fb3..50dfa9ce 100644 +--- a/util/async.c ++++ b/util/async.c +@@ -222,7 +222,8 @@ aio_ctx_prepare(GSource *source, gint *timeout) + AioContext *ctx = (AioContext *) source; + + atomic_or(&ctx->notify_me, 1); +- ++ /* Make sure notify_me is set before aio_compute_timeout */ ++ smp_mb(); + /* We assume there is no timeout already supplied */ + *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)); + +-- +2.23.0 +