From c6a77fb2275a2044fd552e9ce3d164d33dc003ad Mon Sep 17 00:00:00 2001 From: guping Date: Thu, 13 Nov 2025 09:51:04 +0800 Subject: [PATCH 1/2] aio-posix: fix race between io_uring CQE and AioHandler deletion cherry-pick from 0003-aio-posix-fix-race-between-io_uring-CQE-and-AioHandl.patch When an AioHandler is enqueued on ctx->submit_list for removal, the fill_sq_ring() function will submit an io_uring POLL_REMOVE operation to cancel the in-flight POLL_ADD operation. There is a race when another thread enqueues an AioHandler for deletion on ctx->submit_list when the POLL_ADD CQE has already appeared. In that case POLL_REMOVE is unnecessary. The code already handled this, but forgot that the AioHandler itself is still on ctx->submit_list when the POLL_ADD CQE is being processed. It's unsafe to delete the AioHandler at that point in time (use-after-free). Solve this problem by keeping the AioHandler alive but setting a flag so that it will be deleted by fill_sq_ring() when it runs. Signed-off-by: Stefan Hajnoczi Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Message-ID: <20251104022933.618123-2-stefanha@redhat.com> Signed-off-by: Kevin Wolf Signed-off-by: guping --- util/fdmon-io_uring.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c index 16054c5ede..1af7afba6c 100644 --- a/util/fdmon-io_uring.c +++ b/util/fdmon-io_uring.c @@ -52,9 +52,10 @@ enum { FDMON_IO_URING_ENTRIES = 128, /* sq/cq ring size */ /* AioHandler::flags */ - FDMON_IO_URING_PENDING = (1 << 0), - FDMON_IO_URING_ADD = (1 << 1), - FDMON_IO_URING_REMOVE = (1 << 2), + FDMON_IO_URING_PENDING = (1 << 0), + FDMON_IO_URING_ADD = (1 << 1), + FDMON_IO_URING_REMOVE = (1 << 2), + FDMON_IO_URING_DELETE_AIO_HANDLER = (1 << 3), }; static inline int poll_events_from_pfd(int pfd_events) @@ -218,6 +219,16 @@ static void fill_sq_ring(AioContext *ctx) if (flags & FDMON_IO_URING_REMOVE) { add_poll_remove_sqe(ctx, node); } + if (flags & FDMON_IO_URING_DELETE_AIO_HANDLER) { + /* + * process_cqe() sets this flag after ADD and REMOVE have been + * cleared. They cannot be set again, so they must be clear. + */ + assert(!(flags & FDMON_IO_URING_ADD)); + assert(!(flags & FDMON_IO_URING_REMOVE)); + + QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); + } } } @@ -241,7 +252,12 @@ static bool process_cqe(AioContext *ctx, */ flags = qatomic_fetch_and(&node->flags, ~FDMON_IO_URING_REMOVE); if (flags & FDMON_IO_URING_REMOVE) { - QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); + if (flags & FDMON_IO_URING_PENDING) { + /* Still on ctx->submit_list, defer deletion until fill_sq_ring() */ + qatomic_or(&node->flags, FDMON_IO_URING_DELETE_AIO_HANDLER); + } else { + QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); + } return false; } @@ -347,10 +363,13 @@ void fdmon_io_uring_destroy(AioContext *ctx) unsigned flags = qatomic_fetch_and(&node->flags, ~(FDMON_IO_URING_PENDING | FDMON_IO_URING_ADD | - FDMON_IO_URING_REMOVE)); + FDMON_IO_URING_REMOVE | + FDMON_IO_URING_DELETE_AIO_HANDLER)); - if (flags & FDMON_IO_URING_REMOVE) { - QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted); + if ((flags & FDMON_IO_URING_REMOVE) || + (flags & FDMON_IO_URING_DELETE_AIO_HANDLER)) { + QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, + node, node_deleted); } QSLIST_REMOVE_HEAD_RCU(&ctx->submit_list, node_submitted); -- Gitee From e596fd6a29a40d8ffb5901ac24d3e80327054743 Mon Sep 17 00:00:00 2001 From: guping Date: Thu, 13 Nov 2025 09:51:59 +0800 Subject: [PATCH 2/2] aio-posix: fix fdmon-io_uring.c timeout stack variable lifetime io_uring_prep_timeout() stashes a pointer to the timespec struct rather than copying its fields. That means the struct must live until after the SQE has been submitted by io_uring_enter(2). add_timeout_sqe() violates this constraint because the SQE is not submitted within the function. Inline add_timeout_sqe() into fdmon_io_uring_wait() so that the struct lives at least as long as io_uring_enter(2). This fixes random hangs (bogus timeout values) when the kernel loads undefined timespec struct values from userspace after the original struct on the stack has been destroyed. Reported-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Message-ID: <20251104022933.618123-3-stefanha@redhat.com> Reviewed-by: Kevin Wolf Signed-off-by: Kevin Wolf Signed-off-by: guping --- util/fdmon-io_uring.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c index 1af7afba6c..2da59b9dd2 100644 --- a/util/fdmon-io_uring.c +++ b/util/fdmon-io_uring.c @@ -188,20 +188,6 @@ static void add_poll_remove_sqe(AioContext *ctx, AioHandler *node) io_uring_sqe_set_data(sqe, NULL); } -/* Add a timeout that self-cancels when another cqe becomes ready */ -static void add_timeout_sqe(AioContext *ctx, int64_t ns) -{ - struct io_uring_sqe *sqe; - struct __kernel_timespec ts = { - .tv_sec = ns / NANOSECONDS_PER_SECOND, - .tv_nsec = ns % NANOSECONDS_PER_SECOND, - }; - - sqe = get_sqe(ctx); - io_uring_prep_timeout(sqe, &ts, 1, 0); - io_uring_sqe_set_data(sqe, NULL); -} - /* Add sqes from ctx->submit_list for submission */ static void fill_sq_ring(AioContext *ctx) { @@ -291,13 +277,24 @@ static int process_cq_ring(AioContext *ctx, AioHandlerList *ready_list) static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list, int64_t timeout) { + struct __kernel_timespec ts; unsigned wait_nr = 1; /* block until at least one cqe is ready */ int ret; if (timeout == 0) { wait_nr = 0; /* non-blocking */ } else if (timeout > 0) { - add_timeout_sqe(ctx, timeout); + /* Add a timeout that self-cancels when another cqe becomes ready */ + struct io_uring_sqe *sqe; + + ts = (struct __kernel_timespec){ + .tv_sec = timeout / NANOSECONDS_PER_SECOND, + .tv_nsec = timeout % NANOSECONDS_PER_SECOND, + }; + + sqe = get_sqe(ctx); + io_uring_prep_timeout(sqe, &ts, 1, 0); + io_uring_sqe_set_data(sqe, NULL); } fill_sq_ring(ctx); -- Gitee