diff --git a/0024-refactor-pkt-read-send-performance.patch b/0024-refactor-pkt-read-send-performance.patch new file mode 100644 index 0000000000000000000000000000000000000000..3affe2ae4b2d94fe4ad37b56393034485c63b8fb --- /dev/null +++ b/0024-refactor-pkt-read-send-performance.patch @@ -0,0 +1,320 @@ +From 10e21843fc3fde51cb99510792835a65c9b5baad Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 7 Jul 2022 20:00:14 +0800 +Subject: [PATCH] refactor pkt read/send + +--- + src/api/api_msg.c | 15 ++++++--------- + src/api/posix_api.c | 4 ++-- + src/api/sockets.c | 11 +++-------- + src/api/sys_arch.c | 11 +++++------ + src/include/arch/sys_arch.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ + src/include/lwipopts.h | 2 +- + src/include/lwipsock.h | 29 +++++++++++----------------- + src/include/posix_api.h | 2 +- + 8 files changed, 75 insertions(+), 45 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 672f022..7839526 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -341,13 +341,12 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + #if LWIP_SO_RCVBUF + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ +- /* Register event with callback */ +- API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); + #if USE_LIBOS +- if (conn->state == NETCONN_WRITE || conn->state == NETCONN_CLOSE || +- conn->state == NETCONN_CONNECT) { + add_recv_list(conn->socket); +- } ++ LWIP_UNUSED_ARG(len); ++#else ++ /* Register event with callback */ ++ API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); + #endif + } + +@@ -479,10 +478,7 @@ err_tcp(void *arg, err_t err) + /* use trypost to prevent deadlock */ + sys_mbox_trypost(&conn->recvmbox, mbox_msg); + #if USE_LIBOS +- if ((old_state == NETCONN_WRITE) || (old_state == NETCONN_CLOSE) || +- (old_state == NETCONN_CONNECT)) { +- add_recv_list(conn->socket); +- } ++ add_recv_list(conn->socket); + #endif + } + /* pass error message to acceptmbox to wake up pending accept */ +@@ -1356,6 +1352,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + } + } + SET_CONN_TYPE_LIBOS(conn); ++ add_epoll_event(conn, EPOLLOUT); + #endif + + LWIP_ASSERT("conn->state == NETCONN_CONNECT", conn->state == NETCONN_CONNECT); +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index 3f85bad..6afb9c6 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -60,7 +60,7 @@ static struct lwip_sock *chld_get_socket(int fd) + void posix_api_fork(void) + { + /* lstack helper api */ +- posix_api->is_chld = 1; ++ posix_api->ues_posix = 1; + posix_api->is_epfd = chld_is_epfd; + posix_api->get_socket = chld_get_socket; + } +@@ -117,7 +117,7 @@ int posix_api_init(void) + posix_api->epoll_close_fn = lstack_epoll_close; + + /* support fork */ +- posix_api->is_chld = 1; ++ posix_api->ues_posix = 1; + return ERR_OK; + + err_out: +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 3d94454..4d4cea1 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1039,11 +1039,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + { + u8_t apiflags = NETCONN_NOAUTORCVD; + ssize_t recvd = 0; +-#if USE_LIBOS +- apiflags = 0; +-#else + ssize_t recv_left = (len <= SSIZE_MAX) ? (ssize_t)len : SSIZE_MAX; +-#endif + + LWIP_ASSERT("no socket given", sock != NULL); + LWIP_ASSERT("this should be checked internally", NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP); +@@ -1134,6 +1130,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + + lwip_recv_tcp_done: + #else /* USE_LIBOS */ ++ LWIP_UNUSED_ARG(recv_left); + recvd = read_lwip_data(sock, flags, apiflags); + if (recvd <= 0) { + return recvd; +@@ -2667,10 +2664,8 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + #if USE_LIBOS +- if (conn->state == NETCONN_LISTEN) { +- add_epoll_event(conn, EPOLLIN); +- } else { +- add_recv_list(conn->socket); ++ if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { ++ add_epoll_event(conn, POLLIN); + } + #endif + break; +diff --git a/src/api/sys_arch.c b/src/api/sys_arch.c +index 9a92143..f93a00e 100644 +--- a/src/api/sys_arch.c ++++ b/src/api/sys_arch.c +@@ -37,7 +37,6 @@ + #include + + #include +-#include + + #include "lwip/err.h" + #include "lwip/mem.h" +@@ -134,7 +133,7 @@ err_t sys_mbox_trypost(struct sys_mbox **mb, void *msg) + unsigned int n; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL); ++ n = gazelle_st_ring_enqueue_busrt(mbox->ring, &msg, 1); + if (!n) + return ERR_BUF; + return ERR_OK; +@@ -148,7 +147,7 @@ void sys_mbox_post(struct sys_mbox **mb, void *msg) + * If the ring size of mbox is greater than MEMP_NUM_TCPIP_MSG_API, + * enqueue failure will never happen. + * */ +- if (!rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL)) { ++ if (!gazelle_st_ring_enqueue_busrt(mbox->ring, &msg, 1)) { + LWIP_ASSERT("It is failed to post msg into mbox", 0); + } + } +@@ -163,7 +162,7 @@ uint32_t sys_arch_mbox_tryfetch(struct sys_mbox **mb, void **msg) + unsigned int n; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + if (!n) { + *msg = NULL; + return SYS_MBOX_EMPTY; +@@ -179,7 +178,7 @@ uint32_t sys_arch_mbox_fetch(struct sys_mbox **mb, void **msg, uint32_t timeout) + uint32_t time_needed = 0; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + + if (timeout > 0) + poll_ts = sys_now(); +@@ -194,7 +193,7 @@ uint32_t sys_arch_mbox_fetch(struct sys_mbox **mb, void **msg, uint32_t timeout) + + (void)mbox->wait_fn(); + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + } + + return time_needed; +diff --git a/src/include/arch/sys_arch.h b/src/include/arch/sys_arch.h +index b8a0d28..fc4a9fd 100644 +--- a/src/include/arch/sys_arch.h ++++ b/src/include/arch/sys_arch.h +@@ -76,7 +76,53 @@ int sys_mbox_empty(struct sys_mbox *); + struct sys_thread; + typedef struct sys_thread *sys_thread_t; + ++#if USE_LIBOS + extern int eth_dev_poll(void); ++#include ++ ++/* ++ gazelle custom rte ring interface ++ lightweight ring no atomic. ++ only surpport in single thread. ++ */ ++static __rte_always_inline uint32_t gazelle_st_ring_enqueue_busrt(struct rte_ring *r, void **obj_table, uint32_t n) ++{ ++ uint32_t prod = r->prod.tail; ++ uint32_t cons = r->cons.tail; ++ uint32_t free_entries = r->capacity + cons - prod; ++ ++ if (n > free_entries) { ++ return 0; ++ } ++ ++ ENQUEUE_PTRS(r, &r[1], prod, obj_table, n, void *); ++ ++ r->prod.tail = prod + n; ++ ++ return n; ++} ++ ++static __rte_always_inline uint32_t gazelle_st_ring_dequeue_burst(struct rte_ring *r, void **obj_table, uint32_t n) ++{ ++ uint32_t cons = r->cons.tail; ++ uint32_t prod = r->prod.tail; ++ uint32_t entries = prod - cons; ++ ++ if (n > entries) { ++ n = entries; ++ } ++ ++ if (n == 0) { ++ return 0; ++ } ++ ++ DEQUEUE_PTRS(r, &r[1], cons, obj_table, n, void *); ++ ++ r->cons.tail = cons + n; ++ ++ return n; ++} ++#endif + + void sys_calibrate_tsc(void); + uint32_t sys_now(void); +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index df587c0..75d3c74 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -97,7 +97,7 @@ + + #define TCP_WND (40 * TCP_MSS) + +-#define TCP_SND_BUF (5 * TCP_MSS) ++#define TCP_SND_BUF (40 * TCP_MSS) + + #define TCP_SND_QUEUELEN (8191) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index eec4e8e..500292d 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -63,6 +63,7 @@ union lwip_sock_lastdata { + struct protocol_stack; + struct wakeup_poll; + struct rte_ring; ++#include + #endif + /** Contains all internal pointers and states used for a socket */ + struct lwip_sock { +@@ -92,28 +93,21 @@ struct lwip_sock { + #endif + + #if USE_LIBOS ++ volatile uint32_t events __rte_cache_aligned; /* available events */ ++ struct pbuf *recv_lastdata __rte_cache_aligned; /* unread data in one pbuf */ ++ struct list_node recv_list __rte_cache_aligned; ++ struct list_node event_list __rte_cache_aligned; ++ struct list_node send_list __rte_cache_aligned; ++ char pad __rte_cache_aligned; ++ + uint32_t epoll_events; /* registered events */ +- volatile uint32_t events; /* available events */ +- epoll_data_t ep_data; + struct wakeup_poll *wakeup; ++ epoll_data_t ep_data; ++ bool wait_close; ++ struct lwip_sock *listen_next; /* listenfd list */ + struct protocol_stack *stack; + struct rte_ring *recv_ring; +- struct rte_ring *recv_wait_free; +- struct pbuf *recv_lastdata; /* unread data in one pbuf */ +- struct pbuf *send_lastdata; /* unread data in one pbuf */ + struct rte_ring *send_ring; +- struct rte_ring *send_idle_ring; +- int32_t recv_flags; +- int32_t send_flags; +- bool wait_close; +- int32_t attach_fd; +- struct lwip_sock *shadowed_sock; +- struct list_node attach_list; +- struct list_node listen_list; +- struct list_node recv_list; +- struct list_node event_list; +- struct list_node send_list; +- int32_t nextfd; /* listenfd list */ + #endif + }; + +@@ -160,7 +154,6 @@ get_socket_without_errno(int s) + extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); +-extern void gazelle_clean_sock(int32_t fd); + extern void gazelle_init_sock(int32_t fd); + #endif /* USE_LIBOS */ + +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index 2afd266..c8f2cf9 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -76,7 +76,7 @@ typedef struct { + int (*poll_fn)(struct pollfd *fds, nfds_t nfds, int timeout); + int (*ioctl_fn)(int fd, int cmd, ...); + +- int is_chld; ++ int ues_posix; + } posix_api_t; + + extern posix_api_t *posix_api; +-- +2.8.4.windows.1 + diff --git a/lwip.spec b/lwip.spec index 1b831e82be722bec117c760a183f0a2f22d09616..660c8bd441c24be6bc5761ffe7203a5b47b76842 100644 --- a/lwip.spec +++ b/lwip.spec @@ -4,7 +4,7 @@ Summary: lwip is a small independent implementation of the TCP/IP protocol suite Name: lwip Version: 2.1.2 -Release: 3 +Release: 4 License: BSD URL: http://savannah.nongnu.org/projects/lwip/ Source0: http://download.savannah.nongnu.org/releases/lwip/%{name}-%{version}.zip @@ -37,6 +37,7 @@ Patch9020: 0020-remove-chose_dlsym_handle-function-set-handle-to-RTL.patch Patch9021: 0021-refactor-event-if-ring-is-full-the-node-is-added-to-.patch Patch9022: 0022-notify-app-that-sock-state-changes-to-CLOSE_WAIT.patch Patch9023: 0023-refactor-event-and-checksum-offload-support.patch +Patch9024: 0024-refactor-pkt-read-send-performance.patch BuildRequires: gcc-c++ dos2unix dpdk-devel @@ -78,6 +79,7 @@ find %{_builddir}/%{name}-%{version} -type f -exec dos2unix -q {} \; %patch9021 -p1 %patch9022 -p1 %patch9023 -p1 +%patch9024 -p1 %build cd %{_builddir}/%{name}-%{version}/src @@ -93,6 +95,9 @@ cd %{_builddir}/%{name}-%{version}/src %{_libdir}/liblwip.a %changelog +* Fri Jul 8 2022 xiusailong - 2.1.2-4 +- refactor pkt read send performance + * Tue Jun 07 2022 xiusailong - 2.1.2-3 - support gazelle feature