diff --git a/1020-io_uring.h-add-IORING_SETUP_SQE128.patch b/1020-io_uring.h-add-IORING_SETUP_SQE128.patch new file mode 100644 index 0000000000000000000000000000000000000000..b5e722c2ff2bd3d73976ac24768a73e1bd28cc57 --- /dev/null +++ b/1020-io_uring.h-add-IORING_SETUP_SQE128.patch @@ -0,0 +1,82 @@ +From 01222106355284998baa2e1374401ed0fa013f4c Mon Sep 17 00:00:00 2001 +From: Ziyang Zhang +Date: Fri, 23 Dec 2022 16:44:17 +0800 +Subject: [PATCH 1/7] io_uring.h: add IORING_SETUP_SQE128 + +This asks the kernel to setup a ring with 128-byte SQE entries. May fail +with -EINVAL if the kernel doesn't support this feature. If the kernel +does support the feature, then the ring will support big sqe entries +which some commands may require. + +Signed-off-by: Jens Axboe +Signed-off-by: Ziyang Zhang +--- + src/include/liburing.h | 2 +- + src/include/liburing/io_uring.h | 30 ++++++++++++++++-------------- + 2 files changed, 17 insertions(+), 15 deletions(-) + +diff --git a/src/include/liburing.h b/src/include/liburing.h +index 19dc8b4..5cb3cae 100644 +--- a/src/include/liburing.h ++++ b/src/include/liburing.h +@@ -199,7 +199,7 @@ static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, + sqe->len = len; + sqe->rw_flags = 0; + sqe->user_data = 0; +- sqe->__pad2[0] = sqe->__pad2[1] = sqe->__pad2[2] = 0; ++ sqe->__pad2[0] = sqe->__pad2[1] = 0; + } + + static inline void io_uring_prep_splice(struct io_uring_sqe *sqe, +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index 64df61b..5306dff 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -44,21 +44,22 @@ struct io_uring_sqe { + __u32 splice_flags; + }; + __u64 user_data; /* data to be passed back at completion time */ ++ /* pack this to avoid bogus arm OABI complaints */ + union { +- struct { +- /* pack this to avoid bogus arm OABI complaints */ +- union { +- /* index into fixed buffers, if used */ +- __u16 buf_index; +- /* for grouped buffer selection */ +- __u16 buf_group; +- } __attribute__((packed)); +- /* personality to use, if used */ +- __u16 personality; +- __s32 splice_fd_in; +- }; +- __u64 __pad2[3]; +- }; ++ /* index into fixed buffers, if used */ ++ __u16 buf_index; ++ /* for grouped buffer selection */ ++ __u16 buf_group; ++ } __attribute__((packed)); ++ /* personality to use, if used */ ++ __u16 personality; ++ __s32 splice_fd_in; ++ __u64 __pad2[2]; ++ /* ++ * If the ring is initialized with IORING_SETUP_SQE128, then this field ++ * contains 64-bytes of padding, doubling the size of the SQE. ++ */ ++ __u64 __big_sqe_pad[0]; + }; + + enum { +@@ -95,6 +96,7 @@ enum { + #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ + #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ + #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ ++#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */ + #define IORING_SETUP_IDLE_US (1U << 30) /* unit of thread_idle is microsecond */ + /* use percpu SQ poll thread */ + #define IORING_SETUP_SQPOLL_PERCPU (1U << 31) +-- +2.31.1 + diff --git a/1021-setup-add-basic-support-for-SQE128.patch b/1021-setup-add-basic-support-for-SQE128.patch new file mode 100644 index 0000000000000000000000000000000000000000..4635b11a03ea9682b683d796f05e022fa38adcba --- /dev/null +++ b/1021-setup-add-basic-support-for-SQE128.patch @@ -0,0 +1,108 @@ +From 7061fff6f9e9024702c7775caedd8aa50c792ee4 Mon Sep 17 00:00:00 2001 +From: Ziyang Zhang +Date: Mon, 26 Dec 2022 16:14:44 +0800 +Subject: [PATCH 2/7] setup: add basic support for SQE128 + +Adjust the helpers for getting ring size and knowing how much memory +we need for a ring of a given size, and basic support for knowing +how much to increment a ring when an SQE is allocated. + +Signed-off-by: Jens Axboe +[Ziyang Zhang: backport for anck 5.10] +Signed-off-by: Ziyang Zhang +--- + src/queue.c | 28 +++++++++++++++------------- + src/setup.c | 17 +++++++++++++---- + 2 files changed, 28 insertions(+), 17 deletions(-) + +diff --git a/src/queue.c b/src/queue.c +index d8ee64f..b64885f 100644 +--- a/src/queue.c ++++ b/src/queue.c +@@ -364,18 +364,6 @@ int io_uring_submit_on_idle_and_wait(struct io_uring *ring, unsigned wait_nr) + return __io_uring_submit_and_wait(ring, wait_nr, IORING_ENTER_SQ_SUBMIT_ON_IDLE); + } + +-static inline struct io_uring_sqe * +-__io_uring_get_sqe(struct io_uring_sq *sq, unsigned int __head) +-{ +- unsigned int __next = (sq)->sqe_tail + 1; +- struct io_uring_sqe *__sqe = NULL; +- +- if (__next - __head <= *(sq)->kring_entries) { +- __sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask]; +- (sq)->sqe_tail = __next; +- } +- return __sqe; +-} + + /* + * Return an sqe to fill. Application must later call io_uring_submit() +@@ -387,6 +375,20 @@ __io_uring_get_sqe(struct io_uring_sq *sq, unsigned int __head) + struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) + { + struct io_uring_sq *sq = &ring->sq; ++ unsigned int head = io_uring_smp_load_acquire(sq->khead); ++ unsigned int next = sq->sqe_tail + 1; ++ int shift = 0; ++ ++ if (ring->flags & IORING_SETUP_SQE128) ++ shift = 1; ++ ++ if (next - head <= *sq->kring_entries) { ++ struct io_uring_sqe *sqe; ++ ++ sqe = &sq->sqes[(sq->sqe_tail & *sq->kring_mask) << shift]; ++ sq->sqe_tail = next; ++ return sqe; ++ } + +- return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead)); ++ return NULL; + } +diff --git a/src/setup.c b/src/setup.c +index 260dd2b..570eaed 100644 +--- a/src/setup.c ++++ b/src/setup.c +@@ -59,8 +59,10 @@ static int io_uring_mmap(int fd, struct io_uring_params *p, + sq->kdropped = sq->ring_ptr + p->sq_off.dropped; + sq->array = sq->ring_ptr + p->sq_off.array; + +- size = p->sq_entries * sizeof(struct io_uring_sqe); +- sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, ++ size = sizeof(struct io_uring_sqe); ++ if (p->flags & IORING_SETUP_SQE128) ++ size += 64; ++ sq->sqes = mmap(0, size * p->sq_entries, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, + IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { +@@ -112,7 +114,10 @@ int io_uring_ring_dontfork(struct io_uring *ring) + if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr) + return -EINVAL; + +- len = *ring->sq.kring_entries * sizeof(struct io_uring_sqe); ++ len = sizeof(struct io_uring_sqe); ++ if (ring->flags & IORING_SETUP_SQE128) ++ len += 64; ++ len *= *ring->sq.kring_entries; + ret = madvise(ring->sq.sqes, len, MADV_DONTFORK); + if (ret == -1) + return -errno; +@@ -169,8 +174,12 @@ void io_uring_queue_exit(struct io_uring *ring) + { + struct io_uring_sq *sq = &ring->sq; + struct io_uring_cq *cq = &ring->cq; ++ size_t sqe_size; + +- munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); ++ sqe_size = sizeof(struct io_uring_sqe); ++ if (ring->flags & IORING_SETUP_SQE128) ++ sqe_size += 64; ++ munmap(sq->sqes, sqe_size * *sq->kring_entries); + io_uring_unmap_rings(sq, cq); + close(ring->ring_fd); + } +-- +2.31.1 + diff --git a/1022-test-nop-add-basic-IORING_SETUP_SQE128-tests.patch b/1022-test-nop-add-basic-IORING_SETUP_SQE128-tests.patch new file mode 100644 index 0000000000000000000000000000000000000000..550713ee75d0a6c1d2207ac9c5ab43be6a7dd9cb --- /dev/null +++ b/1022-test-nop-add-basic-IORING_SETUP_SQE128-tests.patch @@ -0,0 +1,143 @@ +From 4b7df34a68054bf2007d193ae3c4ca645e670124 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Sun, 20 Feb 2022 12:08:42 -0700 +Subject: [PATCH 3/7] test/nop: add basic IORING_SETUP_SQE128 tests + +Just repeats the original test cases, but using a big ring. Assign and +check for user_data never being NULL, which would be a common issue +with mistakes between big and normal SQEs. + +Signed-off-by: Jens Axboe +--- + test/nop.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 57 insertions(+), 7 deletions(-) + +diff --git a/test/nop.c b/test/nop.c +index 82201bd..d477a1b 100644 +--- a/test/nop.c ++++ b/test/nop.c +@@ -12,6 +12,8 @@ + + #include "liburing.h" + ++static int seq; ++ + static int test_single_nop(struct io_uring *ring) + { + struct io_uring_cqe *cqe; +@@ -25,6 +27,7 @@ static int test_single_nop(struct io_uring *ring) + } + + io_uring_prep_nop(sqe); ++ sqe->user_data = ++seq; + + ret = io_uring_submit(ring); + if (ret <= 0) { +@@ -37,7 +40,10 @@ static int test_single_nop(struct io_uring *ring) + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } +- ++ if (!cqe->user_data) { ++ fprintf(stderr, "Unexpected 0 user_data\n"); ++ goto err; ++ } + io_uring_cqe_seen(ring, cqe); + return 0; + err: +@@ -60,6 +66,7 @@ static int test_barrier_nop(struct io_uring *ring) + io_uring_prep_nop(sqe); + if (i == 4) + sqe->flags = IOSQE_IO_DRAIN; ++ sqe->user_data = ++seq; + } + + ret = io_uring_submit(ring); +@@ -77,6 +84,10 @@ static int test_barrier_nop(struct io_uring *ring) + fprintf(stderr, "wait completion %d\n", ret); + goto err; + } ++ if (!cqe->user_data) { ++ fprintf(stderr, "Unexpected 0 user_data\n"); ++ goto err; ++ } + io_uring_cqe_seen(ring, cqe); + } + +@@ -85,15 +96,12 @@ err: + return 1; + } + +-int main(int argc, char *argv[]) ++static int test_p(struct io_uring_params *p) + { + struct io_uring ring; + int ret; + +- if (argc > 1) +- return 0; +- +- ret = io_uring_queue_init(8, &ring, 0); ++ ret = io_uring_queue_init_params(8, &ring, p); + if (ret) { + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; +@@ -102,12 +110,54 @@ int main(int argc, char *argv[]) + ret = test_single_nop(&ring); + if (ret) { + fprintf(stderr, "test_single_nop failed\n"); +- return ret; ++ goto err; + } + + ret = test_barrier_nop(&ring); + if (ret) { + fprintf(stderr, "test_barrier_nop failed\n"); ++ goto err; ++ } ++ ++ io_uring_queue_exit(&ring); ++ return 0; ++err: ++ io_uring_queue_exit(&ring); ++ return ret; ++} ++ ++static int test_normal_ring(void) ++{ ++ struct io_uring_params p = { }; ++ ++ return test_p(&p); ++} ++ ++static int test_big_ring(void) ++{ ++ struct io_uring_params p = { }; ++ ++ p.flags = IORING_SETUP_SQE128; ++ return test_p(&p); ++} ++ ++ ++int main(int argc, char *argv[]) ++{ ++ int ret; ++ ++ if (argc > 1) ++ return 0; ++ ++ ret = test_normal_ring(); ++ if (ret) { ++ fprintf(stderr, "Normal ring test failed\n"); ++ return ret; ++ } ++ ++ ret = test_big_ring(); ++ if (ret) { ++ fprintf(stderr, "Big ring test failed\n"); + return ret; + } + +-- +2.31.1 + diff --git a/1023-test-nop-make-less-verbose-and-don-t-fail-on-older-k.patch b/1023-test-nop-make-less-verbose-and-don-t-fail-on-older-k.patch new file mode 100644 index 0000000000000000000000000000000000000000..a1f93a3a452ff99cd3fb3ae66731587018e066eb --- /dev/null +++ b/1023-test-nop-make-less-verbose-and-don-t-fail-on-older-k.patch @@ -0,0 +1,30 @@ +From e5b7ad1f5d25d183346ac13f5714310972d5f4a2 Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Fri, 13 May 2022 11:40:55 -0600 +Subject: [PATCH 4/7] test/nop: make less verbose and don't fail on older + kernels + +Signed-off-by: Jens Axboe +Signed-off-by: Ziyang Zhang +--- + test/nop.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/test/nop.c b/test/nop.c +index d477a1b..bba7caa 100644 +--- a/test/nop.c ++++ b/test/nop.c +@@ -103,6 +103,10 @@ static int test_p(struct io_uring_params *p) + + ret = io_uring_queue_init_params(8, &ring, p); + if (ret) { ++ if (ret == -EINVAL) { ++ fprintf(stdout, "IORING_SETUP_SQE128 not supported\n"); ++ return 0; ++ } + fprintf(stderr, "ring setup failed: %d\n", ret); + return 1; + } +-- +2.31.1 + diff --git a/1024-io_uring.h-support-IORING_OP_URING_CMD.patch b/1024-io_uring.h-support-IORING_OP_URING_CMD.patch new file mode 100644 index 0000000000000000000000000000000000000000..fe7f2cf5fe5524397704959efd209a2e7817f4d7 --- /dev/null +++ b/1024-io_uring.h-support-IORING_OP_URING_CMD.patch @@ -0,0 +1,25 @@ +From 28ddb2fd15d69e043ed89b63db44907a8fad0eb8 Mon Sep 17 00:00:00 2001 +From: Ziyang Zhang +Date: Mon, 26 Dec 2022 15:59:26 +0800 +Subject: [PATCH 5/7] io_uring.h: support IORING_OP_URING_CMD + +Signed-off-by: Ziyang Zhang +--- + src/include/liburing/io_uring.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index 5306dff..0af8bc4 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -136,6 +136,7 @@ enum { + IORING_OP_PROVIDE_BUFFERS, + IORING_OP_REMOVE_BUFFERS, + IORING_OP_TEE, ++ IORING_OP_URING_CMD, + + /* this goes last, obviously */ + IORING_OP_LAST, +-- +2.31.1 + diff --git a/1025-man-io_uring_setup.2-document-IORING_SETUP_SQE128-fl.patch b/1025-man-io_uring_setup.2-document-IORING_SETUP_SQE128-fl.patch new file mode 100644 index 0000000000000000000000000000000000000000..0bc4a6d8075ab780ced0e20fa8f157b4c8037b50 --- /dev/null +++ b/1025-man-io_uring_setup.2-document-IORING_SETUP_SQE128-fl.patch @@ -0,0 +1,32 @@ +From f2d54142ad04900c7768fcf858fad498e3e7ad6a Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 16 May 2022 20:14:00 -0600 +Subject: [PATCH 6/7] man/io_uring_setup.2: document IORING_SETUP_SQE128 flag + +Signed-off-by: Jens Axboe +[Ziyang Zhang: note that it is used for uring-cmd by ublk] +Signed-off-by: Ziyang Zhang +--- + man/io_uring_setup.2 | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2 +index a2a9072..c0da6cd 100644 +--- a/man/io_uring_setup.2 ++++ b/man/io_uring_setup.2 +@@ -149,6 +149,12 @@ entries. The value must be greater than + .IR entries , + and may be rounded up to the next power-of-two. + .TP ++.B IORING_SETUP_SQE128 ++If set, io_uring will use 128-byte SQEs rather than the normal 64-byte sized ++variant. This is a requirement for using ++.B IORING_OP_URING_CMD ++by ublk. ++.TP + .B IORING_SETUP_IDLE_US + If this flag is set, the unit of + .I sq_thread_idle +-- +2.31.1 + diff --git a/1026-io_uring.h-Add-missing-fields-for-uring_cmd-in-struc.patch b/1026-io_uring.h-Add-missing-fields-for-uring_cmd-in-struc.patch new file mode 100644 index 0000000000000000000000000000000000000000..7fe236ebc8683db14442639aa7d1a0f2735cf6c4 --- /dev/null +++ b/1026-io_uring.h-Add-missing-fields-for-uring_cmd-in-struc.patch @@ -0,0 +1,47 @@ +From 7816dac311226fb0d5e6a0cc2b993f0e74c62744 Mon Sep 17 00:00:00 2001 +From: Ziyang Zhang +Date: Fri, 13 Jan 2023 17:06:38 +0800 +Subject: [PATCH 7/7] io_uring.h: Add missing fields for uring_cmd in struct + io_uring_sqe + +Signed-off-by: Ziyang Zhang +--- + src/include/liburing/io_uring.h | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h +index 0af8bc4..1970201 100644 +--- a/src/include/liburing/io_uring.h ++++ b/src/include/liburing/io_uring.h +@@ -22,6 +22,7 @@ struct io_uring_sqe { + union { + __u64 off; /* offset into file */ + __u64 addr2; ++ __u32 cmd_op; + }; + union { + __u64 addr; /* pointer to buffer or iovecs */ +@@ -54,12 +55,14 @@ struct io_uring_sqe { + /* personality to use, if used */ + __u16 personality; + __s32 splice_fd_in; +- __u64 __pad2[2]; +- /* +- * If the ring is initialized with IORING_SETUP_SQE128, then this field +- * contains 64-bytes of padding, doubling the size of the SQE. +- */ +- __u64 __big_sqe_pad[0]; ++ union { ++ __u64 __pad2[2]; ++ /* ++ * If the ring is initialized with IORING_SETUP_SQE128, then ++ * this field is used for 80 bytes of arbitrary command data ++ */ ++ __u8 cmd[0]; ++ }; + }; + + enum { +-- +2.31.1 + diff --git a/liburing.spec b/liburing.spec index 7a74e3747d5004fd640fde81e4e5ce0e9a7fe208..0e0771d38e85ad3f211a90fd1c685bc5e0d76932 100644 --- a/liburing.spec +++ b/liburing.spec @@ -1,4 +1,4 @@ -%define anolis_release .0.2 +%define anolis_release .0.3 Name: liburing Version: 1.0.7 @@ -29,6 +29,13 @@ Patch1016: 1016-liburing-anolis-support-us-granularity-of-io_sq_thread_idle.patc Patch1017: 1017-liburing-anolis-add-IORING_ENTER_SQ_SUBMIT_ON_IDLE-flag.patch Patch1018: 1018-liburing-anolis-adjust-value-for-IORING_ENTER_SQ_SUBMIT_ON_IDLE-to-s.patch Patch1019: 1019-liburing-anolis-Add-a-test-for-sqpoll-sq_thread_idle_us-mode.patch +Patch1020: 1020-io_uring.h-add-IORING_SETUP_SQE128.patch +Patch1021: 1021-setup-add-basic-support-for-SQE128.patch +Patch1022: 1022-test-nop-add-basic-IORING_SETUP_SQE128-tests.patch +Patch1023: 1023-test-nop-make-less-verbose-and-don-t-fail-on-older-k.patch +Patch1024: 1024-io_uring.h-support-IORING_OP_URING_CMD.patch +Patch1025: 1025-man-io_uring_setup.2-document-IORING_SETUP_SQE128-fl.patch +Patch1026: 1026-io_uring.h-Add-missing-fields-for-uring_cmd-in-struc.patch # End: Anolis customized patches %description @@ -68,6 +75,10 @@ for the Linux-native io_uring. %{_mandir}/man2/* %changelog +* Mon Mar 13 2023 Ziyang Zhang - 1.0.7-3.0.3 +- Add IORING_SETUP_SQE128 support and tests +- Add IORING_OP_URING_CMD support + * Tue Dec 6 2022 Ziyang Zhang - 1.0.7-3.0.2 - revert io_uring ioctl support - update uapi for IORING_ENTER_SQ_SUBMIT_ON_IDLE