diff --git a/0001-add-new-pollfree-test-case.patch b/0001-add-new-pollfree-test-case.patch new file mode 100644 index 0000000000000000000000000000000000000000..970fa394b0b1c50fbb21f6dd440f6acb68419545 --- /dev/null +++ b/0001-add-new-pollfree-test-case.patch @@ -0,0 +1,540 @@ +From 856fd053311faf107081c84f6aaac781ba53e0e8 Mon Sep 17 00:00:00 2001 +From: Liwei Ge +Date: Tue, 15 Oct 2024 20:10:34 +0800 +Subject: [PATCH] add new pollfree test case + +port from upstream +eb3255f3a614914f2678065e91bd24e5afcb8238 +082c3c01391cd9bdf63d9ad67d166c220f1848a8 +--- + test/pollfree.c | 481 ++++++++++-------------------------------------- + 1 file changed, 94 insertions(+), 387 deletions(-) + +diff --git a/test/pollfree.c b/test/pollfree.c +index d753ffe..ca00d09 100644 +--- a/test/pollfree.c ++++ b/test/pollfree.c +@@ -1,426 +1,133 @@ + /* SPDX-License-Identifier: MIT */ +-// https://syzkaller.appspot.com/bug?id=5f5a44abb4cba056fe24255c4fcb7e7bbe13de7a +-// autogenerated by syzkaller (https://github.com/google/syzkaller) +- +-#include +-#include +-#include ++/* ++ * Description: test pollfree wakeups ++ */ + #include +-#include +-#include +-#include +-#include +-#include + #include +-#include + #include +-#include +-#include + #include +-#include + #include +-#include +-#include ++#include + #include ++#include + +-#include ++#include "liburing.h" ++#include "helpers.h" + +-#ifdef __NR_futex ++static int no_signalfd; + +-static void sleep_ms(uint64_t ms) ++static int child(int flags) + { +- usleep(ms * 1000); +-} ++ struct io_uring_sqe *sqe; ++ struct io_uring ring; ++ struct signalfd_siginfo *si; ++ static unsigned long index; ++ sigset_t mask; ++ int ret, fd; + +-static uint64_t current_time_ms(void) +-{ +- struct timespec ts; +- if (clock_gettime(CLOCK_MONOTONIC, &ts)) +- exit(1); +- return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; +-} ++ ret = io_uring_queue_init(1, &ring, flags); ++ if (ret) { ++ fprintf(stderr, "queue init failed %d\n", ret); ++ return ret; ++ } + +-static void thread_start(void* (*fn)(void*), void* arg) +-{ +- pthread_t th; +- pthread_attr_t attr; +- pthread_attr_init(&attr); +- pthread_attr_setstacksize(&attr, 128 << 10); +- int i = 0; +- for (; i < 100; i++) { +- if (pthread_create(&th, &attr, fn, arg) == 0) { +- pthread_attr_destroy(&attr); +- return; +- } +- if (errno == EAGAIN) { +- usleep(50); +- continue; +- } +- break; +- } +- exit(1); +-} ++ sigemptyset(&mask); ++ sigaddset(&mask, SIGINT); + +-typedef struct { +- int state; +-} event_t; ++ fd = signalfd(-1, &mask, SFD_NONBLOCK); ++ if (fd < 0) { ++ no_signalfd = 1; ++ perror("signalfd"); ++ return 1; ++ } + +-static void event_init(event_t* ev) +-{ +- ev->state = 0; +-} ++ sqe = io_uring_get_sqe(&ring); ++ si = malloc(sizeof(*si)); ++ io_uring_prep_read(sqe, fd, si, sizeof(*si), 0); ++ io_uring_submit(&ring); + +-static void event_reset(event_t* ev) +-{ +- ev->state = 0; +-} ++ if (!(++index & 127)) ++ usleep(100); + +-static void event_set(event_t* ev) +-{ +- if (ev->state) +- exit(1); +- __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); +- syscall(__NR_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000); ++ return 0; + } + +-static void event_wait(event_t* ev) ++static int run_test(int flags) + { +- while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) +- syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); +-} +- +-static int event_isset(event_t* ev) +-{ +- return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); +-} +- +-static int event_timedwait(event_t* ev, uint64_t timeout) +-{ +- uint64_t start = current_time_ms(); +- uint64_t now = start; +- for (;;) { +- uint64_t remain = timeout - (now - start); +- struct timespec ts; +- ts.tv_sec = remain / 1000; +- ts.tv_nsec = (remain % 1000) * 1000 * 1000; +- syscall(__NR_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); +- if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) +- return 1; +- now = current_time_ms(); +- if (now - start > timeout) +- return 0; +- } +-} +- +-#define SIZEOF_IO_URING_SQE 64 +-#define SIZEOF_IO_URING_CQE 16 +-#define SQ_HEAD_OFFSET 0 +-#define SQ_TAIL_OFFSET 64 +-#define SQ_RING_MASK_OFFSET 256 +-#define SQ_RING_ENTRIES_OFFSET 264 +-#define SQ_FLAGS_OFFSET 276 +-#define SQ_DROPPED_OFFSET 272 +-#define CQ_HEAD_OFFSET 128 +-#define CQ_TAIL_OFFSET 192 +-#define CQ_RING_MASK_OFFSET 260 +-#define CQ_RING_ENTRIES_OFFSET 268 +-#define CQ_RING_OVERFLOW_OFFSET 284 +-#define CQ_FLAGS_OFFSET 280 +-#define CQ_CQES_OFFSET 320 +- +-struct io_sqring_offsets { +- uint32_t head; +- uint32_t tail; +- uint32_t ring_mask; +- uint32_t ring_entries; +- uint32_t flags; +- uint32_t dropped; +- uint32_t array; +- uint32_t resv1; +- uint64_t resv2; +-}; +- +-struct io_cqring_offsets { +- uint32_t head; +- uint32_t tail; +- uint32_t ring_mask; +- uint32_t ring_entries; +- uint32_t overflow; +- uint32_t cqes; +- uint64_t resv[2]; +-}; +- +-struct io_uring_params { +- uint32_t sq_entries; +- uint32_t cq_entries; +- uint32_t flags; +- uint32_t sq_thread_cpu; +- uint32_t sq_thread_idle; +- uint32_t features; +- uint32_t resv[4]; +- struct io_sqring_offsets sq_off; +- struct io_cqring_offsets cq_off; +-}; +- +-#define IORING_OFF_SQ_RING 0 +-#define IORING_OFF_SQES 0x10000000ULL ++ pid_t pid; ++ int ret; + +-#define sys_io_uring_setup 425 +-static long syz_io_uring_setup(volatile long a0, volatile long a1, +- volatile long a2, volatile long a3, +- volatile long a4, volatile long a5) +-{ +- uint32_t entries = (uint32_t)a0; +- struct io_uring_params* setup_params = (struct io_uring_params*)a1; +- void* vma1 = (void*)a2; +- void* vma2 = (void*)a3; +- void** ring_ptr_out = (void**)a4; +- void** sqes_ptr_out = (void**)a5; +- uint32_t fd_io_uring = syscall(sys_io_uring_setup, entries, setup_params); +- uint32_t sq_ring_sz = +- setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t); +- uint32_t cq_ring_sz = setup_params->cq_off.cqes + +- setup_params->cq_entries * SIZEOF_IO_URING_CQE; +- uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; +- *ring_ptr_out = mmap(vma1, ring_sz, PROT_READ | PROT_WRITE, +- MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, +- IORING_OFF_SQ_RING); +- uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; +- *sqes_ptr_out = +- mmap(vma2, sqes_sz, PROT_READ | PROT_WRITE, +- MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd_io_uring, IORING_OFF_SQES); +- return fd_io_uring; +-} ++ pid = fork(); ++ if (pid < 0) { ++ perror("fork"); ++ return 1; ++ } else if (pid) { ++ ret = child(flags); ++ exit(ret); ++ } else { ++ int wstatus; ++ pid_t childpid; + +-static long syz_io_uring_submit(volatile long a0, volatile long a1, +- volatile long a2, volatile long a3) +-{ +- char* ring_ptr = (char*)a0; +- char* sqes_ptr = (char*)a1; +- char* sqe = (char*)a2; +- uint32_t sqes_index = (uint32_t)a3; +- uint32_t sq_ring_entries = *(uint32_t*)(ring_ptr + SQ_RING_ENTRIES_OFFSET); +- uint32_t cq_ring_entries = *(uint32_t*)(ring_ptr + CQ_RING_ENTRIES_OFFSET); +- uint32_t sq_array_off = +- (CQ_CQES_OFFSET + cq_ring_entries * SIZEOF_IO_URING_CQE + 63) & ~63; +- if (sq_ring_entries) +- sqes_index %= sq_ring_entries; +- char* sqe_dest = sqes_ptr + sqes_index * SIZEOF_IO_URING_SQE; +- memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); +- uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET); +- uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET); +- uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask; +- uint32_t sq_tail_next = *sq_tail_ptr + 1; +- uint32_t* sq_array = (uint32_t*)(ring_ptr + sq_array_off); +- *(sq_array + sq_tail) = sqes_index; +- __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); +- return 0; +-} ++ do { ++ childpid = waitpid(pid, &wstatus, 0); ++ } while (childpid == (pid_t) -1 && (errno == EINTR)); + +-static void kill_and_wait(int pid, int* status) +-{ +- kill(-pid, SIGKILL); +- kill(pid, SIGKILL); +- for (int i = 0; i < 100; i++) { +- if (waitpid(-1, status, WNOHANG | __WALL) == pid) +- return; +- usleep(1000); +- } +- DIR* dir = opendir("/sys/fs/fuse/connections"); +- if (dir) { +- for (;;) { +- struct dirent* ent = readdir(dir); +- if (!ent) +- break; +- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) +- continue; +- char abort[300]; +- snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", +- ent->d_name); +- int fd = open(abort, O_WRONLY); +- if (fd == -1) { +- continue; +- } +- if (write(fd, abort, 1) < 0) { +- } +- close(fd); +- } +- closedir(dir); +- } else { +- } +- while (waitpid(-1, status, __WALL) != pid) { +- } ++ if (errno == ECHILD) ++ wstatus = 0; ++ return wstatus; ++ } + } + +-static void setup_test() ++static int test(int flags) + { +- prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); +- setpgrp(); +-} +- +-struct thread_t { +- int created, call; +- event_t ready, done; +-}; ++ int i, ret; + +-static struct thread_t threads[16]; +-static void execute_call(int call); +-static int running; ++ for (i = 0; i < 5000; i++) { ++ ret = run_test(flags); ++ if (ret) { ++ fprintf(stderr, "test %d with flags %x failed\n", i, flags); ++ return 1; ++ } ++ if (no_signalfd) ++ break; ++ } + +-static void* thr(void* arg) +-{ +- struct thread_t* th = (struct thread_t*)arg; +- for (;;) { +- event_wait(&th->ready); +- event_reset(&th->ready); +- execute_call(th->call); +- __atomic_fetch_sub(&running, 1, __ATOMIC_RELAXED); +- event_set(&th->done); +- } +- return 0; +-} +- +-static void execute_one(void) +-{ +- int i, call, thread; +- for (call = 0; call < 4; call++) { +- for (thread = 0; thread < (int)(sizeof(threads) / sizeof(threads[0])); +- thread++) { +- struct thread_t* th = &threads[thread]; +- if (!th->created) { +- th->created = 1; +- event_init(&th->ready); +- event_init(&th->done); +- event_set(&th->done); +- thread_start(thr, th); +- } +- if (!event_isset(&th->done)) +- continue; +- event_reset(&th->done); +- th->call = call; +- __atomic_fetch_add(&running, 1, __ATOMIC_RELAXED); +- event_set(&th->ready); +- event_timedwait(&th->done, 50); +- break; +- } +- } +- for (i = 0; i < 100 && __atomic_load_n(&running, __ATOMIC_RELAXED); i++) +- sleep_ms(1); ++ return 0; + } + +-static void execute_one(void); +- +-#define WAIT_FLAGS __WALL +- +-static void loop(void) ++int main(int argc, char *argv[]) + { +- int iter = 0; +- for (; iter < 5000; iter++) { +- int pid = fork(); +- if (pid < 0) +- exit(1); +- if (pid == 0) { +- setup_test(); +- execute_one(); +- exit(0); +- } +- int status = 0; +- uint64_t start = current_time_ms(); +- for (;;) { +- if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid) +- break; +- sleep_ms(1); +- if (current_time_ms() - start < 5000) +- continue; +- kill_and_wait(pid, &status); +- break; +- } +- } +-} +- +-#ifndef __NR_io_uring_enter +-#define __NR_io_uring_enter 426 +-#endif ++ int ret; + +-uint64_t r[4] = {0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0x0}; +- +-void execute_call(int call) +-{ +- intptr_t res = 0; +- switch (call) { +- case 0: +- *(uint64_t*)0x200000c0 = 0; +- res = syscall(__NR_signalfd4, -1, 0x200000c0ul, 8ul, 0ul); +- if (res != -1) +- r[0] = res; +- break; +- case 1: +- *(uint32_t*)0x20000a84 = 0; +- *(uint32_t*)0x20000a88 = 0; +- *(uint32_t*)0x20000a8c = 0; +- *(uint32_t*)0x20000a90 = 0; +- *(uint32_t*)0x20000a98 = -1; +- memset((void*)0x20000a9c, 0, 12); +- res = -1; +- res = syz_io_uring_setup(0x87, 0x20000a80, 0x206d6000, 0x206d7000, +- 0x20000000, 0x20000040); +- if (res != -1) { +- r[1] = res; +- r[2] = *(uint64_t*)0x20000000; +- r[3] = *(uint64_t*)0x20000040; +- } +- break; +- case 2: +- *(uint8_t*)0x20002240 = 6; +- *(uint8_t*)0x20002241 = 0; +- *(uint16_t*)0x20002242 = 0; +- *(uint32_t*)0x20002244 = r[0]; +- *(uint64_t*)0x20002248 = 0; +- *(uint64_t*)0x20002250 = 0; +- *(uint32_t*)0x20002258 = 0; +- *(uint16_t*)0x2000225c = 0; +- *(uint16_t*)0x2000225e = 0; +- *(uint64_t*)0x20002260 = 0; +- *(uint16_t*)0x20002268 = 0; +- *(uint16_t*)0x2000226a = 0; +- memset((void*)0x2000226c, 0, 20); +- syz_io_uring_submit(r[2], r[3], 0x20002240, 0); +- break; +- case 3: +- syscall(__NR_io_uring_enter, r[1], 0x1523a, 0, 0ul, 0ul, 0xaul); +- break; +- } +-} ++ if (argc > 1) ++ return T_EXIT_SKIP; + +-int main(int argc, char *argv[]) +-{ +- void *ret; ++ ret = test(0); ++ if (ret) { ++ fprintf(stderr, "test 0 failed: %d\n", ret); ++ return ret; ++ } + +-#if !defined(__i386) && !defined(__x86_64__) +- return 0; +-#endif ++ if (no_signalfd) ++ return T_EXIT_SKIP; + +- if (argc > 1) +- return 0; ++ ret = test(IORING_SETUP_SQPOLL); ++ if (ret) { ++ fprintf(stderr, "test SQPOLL failed: %d\n", ret); ++ return ret; ++ } + +- ret = mmap((void *)0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); +- if (ret == MAP_FAILED) +- return 0; +- ret = mmap((void *)0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); +- if (ret == MAP_FAILED) +- return 0; +- ret = mmap((void *)0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); +- if (ret == MAP_FAILED) +- return 0; +- loop(); +- return 0; +-} ++ ret = test(IORING_SETUP_COOP_TASKRUN); ++ if (ret) { ++ fprintf(stderr, "test COOP failed: %d\n", ret); ++ return ret; ++ } + +-#else /* __NR_futex */ ++ ret = test(IORING_SETUP_DEFER_TASKRUN|IORING_SETUP_SINGLE_ISSUER); ++ if (ret) { ++ fprintf(stderr, "test DEFER failed: %d\n", ret); ++ return ret; ++ } + +-int main(int argc, char *argv[]) +-{ +- return 0; ++ return T_EXIT_PASS; + } +- +-#endif /* __NR_futex */ +-- +2.39.3 + diff --git a/0001-test-io_uring_register-fix-poll-testing.patch b/0001-test-io_uring_register-fix-poll-testing.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8d60c54076463f8d951c84e0f0610de90c0b497 --- /dev/null +++ b/0001-test-io_uring_register-fix-poll-testing.patch @@ -0,0 +1,66 @@ +From cbae992b34c8b07061c7fad883567bb444545c56 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 15 Oct 2024 10:11:05 -0600 +Subject: [PATCH] test/io_uring_register: fix poll testing + +The test is buggy in that it sets up a ring with a size of 1, and then +expects POLLOUT or POLLIN to be set when polling the ring. However, +POLLIN will only be set if there are CQEs ready to reap, and the test +doesn't ensure that is the case. Which is fine since it's also +testing for POLLOUT, however POLLOUT is only true if the ring has +SQE entries available. But since it's a ring of size 1 AND the only +SQE is already being used, that is not going to be true either. Hence +it'll wait forever. + +This used to work by accident as the internal io_uring test for +whether the SQ ring is full or not was buggy, and didn't take into +account whether they were committed already or not. Bump the ring size +to 2 so that the test will actually (kind of) work. + +While at it, ensure that we test both SQPOLL and !SQPOLL for the poll +testing. + +Signed-off-by: Jens Axboe +--- + test/io_uring_register.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/test/io_uring_register.c b/test/io_uring_register.c +index a5e0c7a6a..b53a67d31 100644 +--- a/test/io_uring_register.c ++++ b/test/io_uring_register.c +@@ -436,14 +436,14 @@ static int ioring_poll(struct io_uring *ring, int fd, int fixed) + return ret; + } + +-static int test_poll_ringfd(void) ++static int __test_poll_ringfd(int ring_flags) + { + int status = 0; + int ret; + int fd; + struct io_uring ring; + +- ret = io_uring_queue_init(1, &ring, 0); ++ ret = io_uring_queue_init(2, &ring, ring_flags); + if (ret) { + perror("io_uring_queue_init"); + return 1; +@@ -466,6 +466,17 @@ static int test_poll_ringfd(void) + return status; + } + ++static int test_poll_ringfd(void) ++{ ++ int ret; ++ ++ ret = __test_poll_ringfd(0); ++ if (ret) ++ return ret; ++ ++ return __test_poll_ringfd(IORING_SETUP_SQPOLL); ++} ++ + int main(int argc, char **argv) + { + int fd, ret; diff --git a/dont-always-expect-multishot-recv-to-stop-posting-events.patch b/dont-always-expect-multishot-recv-to-stop-posting-events.patch new file mode 100644 index 0000000000000000000000000000000000000000..80158d59947d1b2a389ff0da37f9276ad3b7f6f3 --- /dev/null +++ b/dont-always-expect-multishot-recv-to-stop-posting-events.patch @@ -0,0 +1,46 @@ +From 0d4fdb416718a70a4a90c5c4722b38cf44849195 Mon Sep 17 00:00:00 2001 +From: Dylan Yudaken +Date: Mon, 7 Nov 2022 05:04:04 -0800 +Subject: Do not always expect multishot recv to stop posting events + +Later kernels can have a fix that does not stop multishot from posting +events, and would just continue in overflow mode. + +Signed-off-by: Dylan Yudaken +Link: https://lore.kernel.org/r/20221107130404.360691-1-dylany@meta.com +Signed-off-by: Jens Axboe +--- + test/recv-multishot.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +(limited to 'test/recv-multishot.c') + +diff --git a/test/recv-multishot.c b/test/recv-multishot.c +index 2cfe689..ed26a5f 100644 +--- a/test/recv-multishot.c ++++ b/test/recv-multishot.c +@@ -264,11 +264,19 @@ static int test(struct args *args) + + bool const is_last = i == recv_cqes - 1; + ++ /* ++ * Older kernels could terminate multishot early due to overflow, ++ * but later ones will not. So discriminate based on the MORE flag. ++ */ ++ bool const early_last = args->early_error == ERROR_EARLY_OVERFLOW && ++ !args->wait_each && ++ i == N_CQE_OVERFLOW && ++ !(cqe->flags & IORING_CQE_F_MORE); ++ + bool const should_be_last = + (cqe->res <= 0) || + (args->stream && is_last) || +- (args->early_error == ERROR_EARLY_OVERFLOW && +- !args->wait_each && i == N_CQE_OVERFLOW); ++ early_last; + int *this_recv; + int orig_payload_size = cqe->res; + +-- +cgit v1.2.3-59-gdc87 + diff --git a/dont-expect-multishot-recv-overflow-backlogging.patch b/dont-expect-multishot-recv-overflow-backlogging.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc39557c0ccf85ba8b6977e004830485adbf02cd --- /dev/null +++ b/dont-expect-multishot-recv-overflow-backlogging.patch @@ -0,0 +1,33 @@ +From b73e940c9dd4ffa8ac121db046c0788376691b99 Mon Sep 17 00:00:00 2001 +From: Pavel Begunkov +Date: Fri, 11 Aug 2023 13:58:30 +0100 +Subject: tests: don't expect multishot recv overflow backlogging + +Multishots may and are likely to complete when there is no space in CQ, +don't rely on overflows. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/d078c0f797322bd01d8c91743d652b734e83e9ba.1691758633.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +--- + test/recv-multishot.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +(limited to 'test/recv-multishot.c') + +diff --git a/test/recv-multishot.c b/test/recv-multishot.c +index e4a07ce..f66f131 100644 +--- a/test/recv-multishot.c ++++ b/test/recv-multishot.c +@@ -271,7 +271,7 @@ static int test(struct args *args) + */ + bool const early_last = args->early_error == ERROR_EARLY_OVERFLOW && + !args->wait_each && +- i == N_CQE_OVERFLOW && ++ i >= N_CQE_OVERFLOW && + !(cqe->flags & IORING_CQE_F_MORE); + + bool const should_be_last = +-- +cgit v1.2.3-59-gdc87 + diff --git a/fix-errno-confusion-and-new-error.patch b/fix-errno-confusion-and-new-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..5eb75d6b73083cd199c026ee58cec2366799e8c1 --- /dev/null +++ b/fix-errno-confusion-and-new-error.patch @@ -0,0 +1,168 @@ +From 2d3368b73b478a737b2247ef5630be56c3b176b5 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Wed, 28 Jun 2023 19:43:02 -0600 +Subject: test/io_uring_register: fix errno confusion and new error + +This test suffers from some serious errno vs liburing function return. +All liburing functions return a negative errno value on failure, there's +no use of errno at all. This means that rather than check for the return +being -1 and then relying on errno being set to EINVAL, liburing +functions return -EINVAL directly instead. + +Outside of that, due to recent kernel changes, we may now get -EFAULT +when trying to register a file backed buffers. Before we always returned +-EOPNOTSUPP. Correct that as well. + +Signed-off-by: Jens Axboe +--- + test/io_uring_register.c | 51 +++++++++++++++++++++++------------------------- + 1 file changed, 24 insertions(+), 27 deletions(-) + +diff --git a/test/io_uring_register.c b/test/io_uring_register.c +index ddd4fe3..0f44af8 100644 +--- a/test/io_uring_register.c ++++ b/test/io_uring_register.c +@@ -32,7 +32,7 @@ static rlim_t mlock_limit; + static int devnull; + + static int expect_fail(int fd, unsigned int opcode, void *arg, +- unsigned int nr_args, int error) ++ unsigned int nr_args, int error, int error2) + { + int ret; + +@@ -55,8 +55,8 @@ static int expect_fail(int fd, unsigned int opcode, void *arg, + return 1; + } + +- if (ret != error) { +- fprintf(stderr, "expected %d, got %d\n", error, ret); ++ if (ret != error && (error2 && ret != error2)) { ++ fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret); + return 1; + } + return 0; +@@ -195,8 +195,7 @@ static int test_max_fds(int uring_fd) + status = 0; + ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0); + if (ret < 0) { +- ret = errno; +- errno = ret; ++ errno = -ret; + perror("io_uring_register UNREGISTER_FILES"); + exit(1); + } +@@ -230,22 +229,20 @@ static int test_memlock_exceeded(int fd) + + while (iov.iov_len) { + ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); +- if (ret < 0) { +- if (errno == ENOMEM) { +- iov.iov_len /= 2; +- continue; +- } +- if (errno == EFAULT) { +- free(buf); +- return 0; +- } +- fprintf(stderr, "expected success or EFAULT, got %d\n", errno); ++ if (ret == -ENOMEM) { ++ iov.iov_len /= 2; ++ continue; ++ } else if (ret == -EFAULT) { ++ free(buf); ++ return 0; ++ } else if (ret) { ++ fprintf(stderr, "expected success or EFAULT, got %d\n", ret); + free(buf); + return 1; + } + ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0); + if (ret != 0) { +- fprintf(stderr, "error: unregister failed with %d\n", errno); ++ fprintf(stderr, "error: unregister failed with %d\n", ret); + free(buf); + return 1; + } +@@ -277,15 +274,15 @@ static int test_iovec_nr(int fd) + iovs[i].iov_len = pagesize; + } + +- status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL); ++ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0); + + /* reduce to UIO_MAXIOV */ + nr = UIO_MAXIOV; + ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); +- if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) { ++ if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) { + fprintf(stderr, "can't register large iovec for regular users, skip\n"); + } else if (ret != 0) { +- fprintf(stderr, "expected success, got %d\n", errno); ++ fprintf(stderr, "expected success, got %d\n", ret); + status = 1; + } else { + io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); +@@ -308,12 +305,12 @@ static int test_iovec_size(int fd) + /* NULL pointer for base */ + iov.iov_base = 0; + iov.iov_len = 4096; +- status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); ++ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); + + /* valid base, 0 length */ + iov.iov_base = &buf; + iov.iov_len = 0; +- status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); ++ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); + + /* valid base, length exceeds size */ + /* this requires an unampped page directly after buf */ +@@ -324,7 +321,7 @@ static int test_iovec_size(int fd) + assert(ret == 0); + iov.iov_base = buf; + iov.iov_len = 2 * pagesize; +- status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT); ++ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0); + munmap(buf, pagesize); + + /* huge page */ +@@ -372,7 +369,7 @@ static int test_iovec_size(int fd) + status = 1; + iov.iov_base = buf; + iov.iov_len = 2*1024*1024; +- status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EOPNOTSUPP); ++ status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP); + munmap(buf, 2*1024*1024); + + /* bump up against the soft limit and make sure we get EFAULT +@@ -442,7 +439,7 @@ static int test_poll_ringfd(void) + * fail, because the kernel does not allow registering of the + * ring_fd. + */ +- status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF); ++ status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0); + + /* tear down queue */ + io_uring_queue_exit(&ring); +@@ -475,14 +472,14 @@ int main(int argc, char **argv) + } + + /* invalid fd */ +- status |= expect_fail(-1, 0, NULL, 0, -EBADF); ++ status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0); + /* valid fd that is not an io_uring fd */ +- status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP); ++ status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0); + + /* invalid opcode */ + memset(&p, 0, sizeof(p)); + fd = new_io_uring(1, &p); +- ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL); ++ ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0); + if (ret) { + /* if this succeeds, tear down the io_uring instance + * and start clean for the next test. */ +-- +cgit v1.2.3-59-gdc87 + diff --git a/liburing.spec b/liburing.spec index 53c5396992d841fbcb5fd7c526da79f5b0a2ba48..cf25d35d0e21a7374681d9829c53fc653612a75e 100644 --- a/liburing.spec +++ b/liburing.spec @@ -1,4 +1,4 @@ -%define anolis_release 3 +%define anolis_release 4 Name: liburing Version: 2.3 Release: %{anolis_release}%{?dist} @@ -22,6 +22,14 @@ Patch1009: 1009-test-read-before-exit-handle-IOPOLL-failure-on-older.patch Patch1010: 1010-test-sqpoll-idle-us-add-SQ_AFF-flag.patch # End: Anolis customized patches +Patch2001: 0001-add-new-pollfree-test-case.patch +Patch2002: make-trigger-event-wait-before-reading.patch +Patch2003: fix-errno-confusion-and-new-error.patch +Patch2004: dont-always-expect-multishot-recv-to-stop-posting-events.patch +Patch2005: dont-expect-multishot-recv-overflow-backlogging.patch +Patch2006: wait-for-the-right-amount-of-CQEs.patch +Patch2007: 0001-test-io_uring_register-fix-poll-testing.patch + %description Provides native async IO for the Linux kernel, in a fast and efficient manner, for both buffered and O_DIRECT. @@ -66,6 +74,13 @@ for the Linux-native io_uring. %{_mandir}/man7/* %changelog +* Thu May 29 2025 mgb01105731 - 2.3-4 +- test/io_uring_register: fix errno confusion and new error +- add new pollfree test case +- multicqes_drain: make trigger event wait before reading +- fix multishot test failures +- test/io_uring_register: fix poll testing + * Tue Oct 10 2023 Ferry Meng - 2.3-3 - update read-before-exit and sqpoll-idle-us tests diff --git a/make-trigger-event-wait-before-reading.patch b/make-trigger-event-wait-before-reading.patch new file mode 100644 index 0000000000000000000000000000000000000000..2ef94ff5118cad978061ecc67b4894093288722b --- /dev/null +++ b/make-trigger-event-wait-before-reading.patch @@ -0,0 +1,72 @@ +From 313aece03ab7dc7447a19cff5b5f542d0c1b2a1e Mon Sep 17 00:00:00 2001 +From: Dylan Yudaken +Date: Mon, 30 Jan 2023 01:49:57 -0800 +Subject: multicqes_drain: make trigger event wait before reading + +trigger_event is used to generate CQEs on the poll requests. However there +is a race if that poll request is running asynchronously, where the +read_pipe will complete before the poll is run, and the poll result will +be that there is no data ready. + +Instead sleep and force an io_uring_get_events in order to give the poll a +chance to run before reading from the pipe. + +Signed-off-by: Dylan Yudaken +Signed-off-by: Jens Axboe +--- + test/multicqes_drain.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +diff --git a/test/multicqes_drain.c b/test/multicqes_drain.c +index 3755bee..6c4d5f2 100644 +--- a/test/multicqes_drain.c ++++ b/test/multicqes_drain.c +@@ -71,13 +71,15 @@ static void read_pipe(int pipe) + perror("read"); + } + +-int trigger_event(int p[]) ++int trigger_event(struct io_uring *ring, int p[]) + { + int ret; + if ((ret = write_pipe(p[1], "foo")) != 3) { + fprintf(stderr, "bad write return %d\n", ret); + return 1; + } ++ usleep(1000); ++ io_uring_get_events(ring); + read_pipe(p[0]); + return 0; + } +@@ -236,10 +238,8 @@ static int test_generic_drain(struct io_uring *ring) + if (si[i].op != multi && si[i].op != single) + continue; + +- if (trigger_event(pipes[i])) ++ if (trigger_event(ring, pipes[i])) + goto err; +- +- io_uring_get_events(ring); + } + sleep(1); + i = 0; +@@ -317,13 +317,11 @@ static int test_simple_drain(struct io_uring *ring) + } + + for (i = 0; i < 2; i++) { +- if (trigger_event(pipe1)) ++ if (trigger_event(ring, pipe1)) + goto err; +- io_uring_get_events(ring); + } +- if (trigger_event(pipe2)) +- goto err; +- io_uring_get_events(ring); ++ if (trigger_event(ring, pipe2)) ++ goto err; + + for (i = 0; i < 2; i++) { + sqe[i] = io_uring_get_sqe(ring); +-- +cgit v1.2.3-59-gdc87 + diff --git a/wait-for-the-right-amount-of-CQEs.patch b/wait-for-the-right-amount-of-CQEs.patch new file mode 100644 index 0000000000000000000000000000000000000000..64069547941e2f9d374df68463b634b64efa0947 --- /dev/null +++ b/wait-for-the-right-amount-of-CQEs.patch @@ -0,0 +1,46 @@ +From a1d5e4b863a60af93d0cab9d4bbf578733337a90 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Tue, 6 Feb 2024 13:17:14 -0700 +Subject: test/recv-multishot: wait for the right amount of CQEs + +This test assumes that all task_work is a) has already arrived, and +b) will always be fully run even though the app asked for less, which +can lead to premature checking of CQEs and hitting end-of-CQEs before +it should. + +Fix it up to wait for what it needs. + +Signed-off-by: Jens Axboe +--- + test/recv-multishot.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/test/recv-multishot.c b/test/recv-multishot.c +index f66f131..39983e8 100644 +--- a/test/recv-multishot.c ++++ b/test/recv-multishot.c +@@ -57,7 +57,7 @@ static int test(struct args *args) + int const N = 8; + int const N_BUFFS = N * 64; + int const N_CQE_OVERFLOW = 4; +- int const min_cqes = 2; ++ int const min_cqes = args->early_error ? 2 : 8; + int const NAME_LEN = sizeof(struct sockaddr_storage); + int const CONTROL_LEN = CMSG_ALIGN(sizeof(struct sockaddr_storage)) + + sizeof(struct cmsghdr); +@@ -237,7 +237,11 @@ static int test(struct args *args) + usleep(1000); + + if ((args->stream && !early_error) || recv_cqes < min_cqes) { +- ret = io_uring_wait_cqes(&ring, &cqe, 1, &timeout, NULL); ++ unsigned int to_wait = 1; ++ ++ if (recv_cqes < min_cqes) ++ to_wait = min_cqes - recv_cqes; ++ ret = io_uring_wait_cqes(&ring, &cqe, to_wait, &timeout, NULL); + if (ret && ret != -ETIME) { + fprintf(stderr, "wait final failed: %d\n", ret); + ret = -1; +-- +cgit v1.2.3-59-gdc87 +