diff --git a/1001-Add-percpu-io-sq-thread-support.patch b/1001-Add-percpu-io-sq-thread-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..149c071ea8304fea4875aac7b9e64f3df3cdf74d --- /dev/null +++ b/1001-Add-percpu-io-sq-thread-support.patch @@ -0,0 +1,54 @@ +From cd0ee5df307beda0cc8a14af09ffdaafcd5e0e6a Mon Sep 17 00:00:00 2001 +From: Xiaoguang Wang +Date: Thu, 6 Aug 2020 14:23:01 +0800 +Subject: [PATCH 1/4] Add percpu io sq thread support + +Add a new IORING_SETUP_SQPOLL_PERCPU flag, this flag is only meaningful +when IORING_SETUP_SQPOLL and IORING_SETUP_SQ_AFF are both specified, that +means if user creates multiple io_uring instances which are all bound +to one same cpu, only a kernel thread is created for this cpu to perform +these io_uring instances' submission queue polling. + +Signed-off-by: Xiaoguang Wang +Acked-by: Joseph Qi +--- + man/io_uring_setup.2 | 9 +++++++++ + src/include/liburing/io_uring.h | 2 ++ + 2 files changed, 11 insertions(+) + +diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2 +index cd69994..30a2306 100644 +--- a/man/io_uring_setup.2 ++++ b/man/io_uring_setup.2 +@@ -147,6 +147,15 @@ is specified. When cgroup setting + changes (typically in container environment), the bounded cpu set may be + changed as well. + .TP ++.B IORING_SETUP_SQPOLL_PERCPU ++This flag is only meaningful when ++.B IORING_SETUP_SQPOLL ++and ++.B IORING_SETUP_SQ_AFF ++are both specified, that means if user creates multiple io_uring instances ++which are all bound to one same cpu, only a kernel thread is created for this ++cpu to perform these io_uring instances' submission queue polling. ++.TP + .B IORING_SETUP_CQSIZE + Create the completion queue with + .IR "struct io_uring_params.cq_entries" +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index a3e0920..d4a0d83 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -166,6 +166,8 @@ enum { + * Only one task is allowed to submit requests + */ + #define IORING_SETUP_SINGLE_ISSUER (1U << 12) ++/* use percpu SQ poll thread */ ++#define IORING_SETUP_SQPOLL_PERCPU (1U << 31) + + /* + * Defer running task work to get events. +-- +2.31.1 + diff --git a/1002-support-us-granularity-of-io_sq_thread_idle.patch b/1002-support-us-granularity-of-io_sq_thread_idle.patch new file mode 100644 index 0000000000000000000000000000000000000000..6f6f85c6d0750ae4bae681a85ed9ca989f800eca --- /dev/null +++ b/1002-support-us-granularity-of-io_sq_thread_idle.patch @@ -0,0 +1,55 @@ +From 5c31734118432639f7758d4671f1f05348914d63 Mon Sep 17 00:00:00 2001 +From: Hao Xu +Date: Mon, 10 May 2021 11:34:29 +0800 +Subject: [PATCH 2/4] support us granularity of io_sq_thread_idle + +add flag IORING_SETUP_IDLE_US to support microsecond granularity +io_sq_thread_idle. + +Signed-off-by: Hao Xu +--- + man/io_uring_setup.2 | 8 +++++++- + src/include/liburing/io_uring.h | 1 + + 2 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2 +index 30a2306..082507c 100644 +--- a/man/io_uring_setup.2 ++++ b/man/io_uring_setup.2 +@@ -83,7 +83,9 @@ doing a single system call. + + If the kernel thread is idle for more than + .I sq_thread_idle +-milliseconds, it will set the ++milliseconds/microseconds(depends on if ++.B IORING_SETUP_IDLE_US ++is set), it will set the + .B IORING_SQ_NEED_WAKEUP + bit in the + .I flags +@@ -287,6 +289,10 @@ Note that if this flag is set then it is the application's responsibility to per + trigger work (for example via any of the CQE waiting functions) or else completions may + not be delivered. + Available since 6.1. ++.B IORING_SETUP_IDLE_US ++If this flag is set, the unit of ++.I sq_thread_idle ++is microsecond, rather than millisecond. + .PP + If no flags are specified, the io_uring instance is setup for + interrupt driven I/O. I/O may be submitted using +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index d4a0d83..8291de7 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -166,6 +166,7 @@ enum { + * Only one task is allowed to submit requests + */ + #define IORING_SETUP_SINGLE_ISSUER (1U << 12) ++#define IORING_SETUP_IDLE_US (1U << 30) /* unit of thread_idle is microsecond */ + /* use percpu SQ poll thread */ + #define IORING_SETUP_SQPOLL_PERCPU (1U << 31) + +-- +2.31.1 + diff --git a/1003-add-IORING_ENTER_SQ_SUBMIT_ON_IDLE-flag.patch b/1003-add-IORING_ENTER_SQ_SUBMIT_ON_IDLE-flag.patch new file mode 100644 index 0000000000000000000000000000000000000000..3056111da9bb66468a6b6b49cbd95d4ad4758973 --- /dev/null +++ b/1003-add-IORING_ENTER_SQ_SUBMIT_ON_IDLE-flag.patch @@ -0,0 +1,134 @@ +From c6488e58ba2ba3b8a49e728143dac428b55f591a Mon Sep 17 00:00:00 2001 +From: Ziyang Zhang +Date: Thu, 1 Dec 2022 10:43:39 +0800 +Subject: [PATCH 3/4] add IORING_ENTER_SQ_SUBMIT_ON_IDLE flag + +add this flag to allow the original task to submit some sqes to reduce +sqthread wakeup latency. + +Signed-off-by: Hao Xu +[Ziyang Zhang: adjust value of IORING_ENTER_SQ_SUBMIT_ON_IDLE] +Signed-off-by: Ziyang Zhang +--- + src/include/liburing.h | 2 ++ + src/include/liburing/io_uring.h | 1 + + src/liburing.map | 2 ++ + src/queue.c | 30 ++++++++++++++++++++++-------- + 4 files changed, 27 insertions(+), 8 deletions(-) + +diff --git a/src/include/liburing.h b/src/include/liburing.h +index 12a703f..531bf2e 100644 +--- a/src/include/liburing.h ++++ b/src/include/liburing.h +@@ -176,12 +176,14 @@ int io_uring_wait_cqe_timeout(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, + struct __kernel_timespec *ts); + int io_uring_submit(struct io_uring *ring); ++int io_uring_submit_on_idle(struct io_uring *ring); + int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr); + int io_uring_submit_and_wait_timeout(struct io_uring *ring, + struct io_uring_cqe **cqe_ptr, + unsigned wait_nr, + struct __kernel_timespec *ts, + sigset_t *sigmask); ++int io_uring_submit_on_idle_and_wait(struct io_uring *ring, unsigned wait_nr); + + int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs, + unsigned nr_iovecs); +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index 8291de7..4b474e3 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -423,6 +423,7 @@ struct io_cqring_offsets { + #define IORING_ENTER_SQ_WAIT (1U << 2) + #define IORING_ENTER_EXT_ARG (1U << 3) + #define IORING_ENTER_REGISTERED_RING (1U << 4) ++#define IORING_ENTER_SQ_SUBMIT_ON_IDLE (1U << 31) + + /* + * Passed in for io_uring_setup(2). Copied back with updated info on success +diff --git a/src/liburing.map b/src/liburing.map +index 06c64f8..e8124a2 100644 +--- a/src/liburing.map ++++ b/src/liburing.map +@@ -66,4 +66,6 @@ LIBURING_2.3 { + io_uring_register; + io_uring_get_events; + io_uring_submit_and_get_events; ++ io_uring_submit_on_idle; ++ io_uring_submit_on_idle_and_wait; + } LIBURING_2.2; +diff --git a/src/queue.c b/src/queue.c +index c06bcc3..563c7c3 100644 +--- a/src/queue.c ++++ b/src/queue.c +@@ -366,13 +366,13 @@ int io_uring_wait_cqe_timeout(struct io_uring *ring, + * Returns number of sqes submitted + */ + static int __io_uring_submit(struct io_uring *ring, unsigned submitted, +- unsigned wait_nr, bool getevents) ++ unsigned wait_nr, bool getevents, unsigned enter_flags) + { + bool cq_needs_enter = getevents || wait_nr || cq_ring_needs_enter(ring); +- unsigned flags; ++ unsigned flags = 0; + int ret; + +- flags = 0; ++ flags |= enter_flags; + if (sq_ring_needs_enter(ring, submitted, &flags) || cq_needs_enter) { + if (cq_needs_enter) + flags |= IORING_ENTER_GETEVENTS; +@@ -387,9 +387,9 @@ static int __io_uring_submit(struct io_uring *ring, unsigned submitted, + return ret; + } + +-static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) ++static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr, unsigned enter_flags) + { +- return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr, false); ++ return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr, false, enter_flags); + } + + /* +@@ -399,7 +399,16 @@ static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) + */ + int io_uring_submit(struct io_uring *ring) + { +- return __io_uring_submit_and_wait(ring, 0); ++ return __io_uring_submit_and_wait(ring, 0, 0); ++} ++ ++/* ++ * Similar to io_uring_submit(), but with flag IORING_ENTER_SUBMIT_ON_IDLE ++ * to allow submitting sqes in original context when waking up sqthread. ++ */ ++int io_uring_submit_on_idle(struct io_uring *ring) ++{ ++ return __io_uring_submit_and_wait(ring, 0, IORING_ENTER_SQ_SUBMIT_ON_IDLE); + } + + /* +@@ -409,12 +418,17 @@ int io_uring_submit(struct io_uring *ring) + */ + int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) + { +- return __io_uring_submit_and_wait(ring, wait_nr); ++ return __io_uring_submit_and_wait(ring, wait_nr, 0); ++} ++ ++int io_uring_submit_on_idle_and_wait(struct io_uring *ring, unsigned wait_nr) ++{ ++ return __io_uring_submit_and_wait(ring, wait_nr, IORING_ENTER_SQ_SUBMIT_ON_IDLE); + } + + int io_uring_submit_and_get_events(struct io_uring *ring) + { +- return __io_uring_submit(ring, __io_uring_flush_sq(ring), 0, true); ++ return __io_uring_submit(ring, __io_uring_flush_sq(ring), 0, true, 0); + } + + #ifdef LIBURING_INTERNAL +-- +2.31.1 + diff --git a/1004-Add-a-test-for-sqpoll-sq_thread_idle_us-mode.patch b/1004-Add-a-test-for-sqpoll-sq_thread_idle_us-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..0add38ee95770a1b523439633c542f1cd72b5b61 --- /dev/null +++ b/1004-Add-a-test-for-sqpoll-sq_thread_idle_us-mode.patch @@ -0,0 +1,132 @@ +From fd1a90edb0c193db13f42cadeb588bc3d73ab18c Mon Sep 17 00:00:00 2001 +From: Hao Xu +Date: Tue, 6 Dec 2022 15:47:57 +0800 +Subject: [PATCH 4/4] Add a test for sqpoll sq_thread_idle_us mode + +Reviewed-by: Xiaoguang Wang +Signed-off-by: Hao Xu +--- + test/Makefile | 1 + + test/sqpoll-idle-us.c | 98 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 99 insertions(+) + create mode 100644 test/sqpoll-idle-us.c + +diff --git a/test/Makefile b/test/Makefile +index e14eb51..6efac71 100644 +--- a/test/Makefile ++++ b/test/Makefile +@@ -158,6 +158,7 @@ test_srcs := \ + sqpoll-disable-exit.c \ + sq-poll-dup.c \ + sqpoll-exit-hang.c \ ++ sqpoll-idle-us.c \ + sq-poll-kthread.c \ + sq-poll-share.c \ + sqpoll-sleep.c \ +diff --git a/test/sqpoll-idle-us.c b/test/sqpoll-idle-us.c +new file mode 100644 +index 0000000..957ea7c +--- /dev/null ++++ b/test/sqpoll-idle-us.c +@@ -0,0 +1,98 @@ ++/* ++ * Test sqpoll sq_thread_idle in us mode ++ */ ++#include ++#include ++#include ++#include ++#include "liburing.h" ++ ++#define DEFAULT_IDLE_US 50 ++ ++static int submit_single_nop(struct io_uring *ring, bool submit_on_idle) ++{ ++ struct io_uring_sqe *sqe; ++ int ret; ++ ++ sqe = io_uring_get_sqe(ring); ++ if (!sqe) { ++ fprintf(stderr, "get sqe failed\n"); ++ goto err; ++ } ++ ++ io_uring_prep_nop(sqe); ++ ++ if (submit_on_idle) ++ ret = io_uring_submit_on_idle(ring); ++ else ++ ret = io_uring_submit(ring); ++ if (ret <= 0) { ++ fprintf(stderr, "sqe submit failed: %d\n", ret); ++ goto err; ++ } ++ ++ return 0; ++err: ++ return 1; ++} ++ ++static int reap_cqes(struct io_uring *ring, int count) ++{ ++ int ret; ++ struct io_uring_cqe *cqe; ++ ++ while(count--) { ++ ret = io_uring_wait_cqe(ring, &cqe); ++ if (ret < 0) { ++ fprintf(stderr, "wait completion %d\n", ret); ++ return 1; ++ } ++ io_uring_cqe_seen(ring, cqe); ++ } ++ return 0; ++} ++ ++static int test_sqpoll_idle_us(int nr) ++{ ++ struct io_uring_params p = {}; ++ struct io_uring ring; ++ int ret, count = nr; ++ ++ srand((unsigned)time(NULL)); ++ p.flags = (IORING_SETUP_SQPOLL | IORING_SETUP_SQPOLL_PERCPU | IORING_SETUP_IDLE_US); ++ p.sq_thread_idle = DEFAULT_IDLE_US; ++ ++ ret = io_uring_queue_init_params(nr + 10, &ring, &p); ++ if (ret) { ++ fprintf(stderr, "queue_init=%d\n", ret); ++ return 1; ++ } ++ while (nr--) { ++ unsigned x = p.sq_thread_idle; ++ /* ++ * sleep some time to make sure sqthread often sleeps. ++ * [(1 / 2) * sq_thread_idle, (2 + 1 / 2) * sq_thread_idle) ++ */ ++ usleep((x >> 1) + rand() % (x << 1)); ++ ret = submit_single_nop(&ring, nr % 2); ++ if (ret) ++ return ret; ++ } ++ return reap_cqes(&ring, count); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int ret; ++ ++ if (argc > 1) ++ return 0; ++ ++ ret = test_sqpoll_idle_us(10000); ++ if (ret) { ++ fprintf(stderr, "test failed: %d\n", ret); ++ return 1; ++ } ++ ++ return 0; ++} +-- +2.31.1 + diff --git a/liburing-2.1.tar.gz b/liburing-2.1.tar.gz deleted file mode 100644 index a45022550b67396b176328dd9830afdf99e5e6f8..0000000000000000000000000000000000000000 Binary files a/liburing-2.1.tar.gz and /dev/null differ diff --git a/liburing-2.3.tar.gz b/liburing-2.3.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3f1cef2eff47f8550205be9c9d7e33979290576a Binary files /dev/null and b/liburing-2.3.tar.gz differ diff --git a/liburing.spec b/liburing.spec index eda94975817ddc14d2d74bd20f18bb05a19be52a..5e766857d007d28d628035f6eafd5b937d749c75 100644 --- a/liburing.spec +++ b/liburing.spec @@ -1,14 +1,21 @@ -%define anolis_release 2 +%define anolis_release 1 Name: liburing -Version: 2.1 +Version: 2.3 Release: %{anolis_release}%{?dist} Summary: Linux-native io_uring I/O access library License: (GPLv2 with exceptions and LGPLv2+) or MIT -Source0: https://github.com/axboe/liburing/archive/refs/tags/%{name}-%{version}.tar.gz +Source0: https://brick.kernel.dk/snaps/%{name}-%{version}.tar.gz URL: https://github.com/axboe/liburing BuildRequires: gcc BuildRequires: make +# Begin: Anolis customized patches +Patch1001: 1001-Add-percpu-io-sq-thread-support.patch +Patch1002: 1002-support-us-granularity-of-io_sq_thread_idle.patch +Patch1003: 1003-add-IORING_ENTER_SQ_SUBMIT_ON_IDLE-flag.patch +Patch1004: 1004-Add-a-test-for-sqpoll-sq_thread_idle_us-mode.patch +# End: Anolis customized patches + %description Provides native async IO for the Linux kernel, in a fast and efficient manner, for both buffered and O_DIRECT. @@ -23,7 +30,7 @@ This package provides header files to include and libraries to link with for the Linux-native io_uring. %prep -%autosetup +%autosetup -p1 %build %set_build_flags @@ -53,6 +60,9 @@ for the Linux-native io_uring. %{_mandir}/man7/* %changelog +* Thu Dec 1 2022 Ziyang Zhang - 2.3-1 +- upgrade to 2.3 and add anolis liburing features + * Wed Oct 19 2022 mgb01105731 - 2.1-2 - optimise spec file