diff --git a/0002-adapt-lstack.patch b/0002-adapt-lstack.patch new file mode 100644 index 0000000000000000000000000000000000000000..87f36cdd89658315e5eaeb3cae0ee98083bd5731 --- /dev/null +++ b/0002-adapt-lstack.patch @@ -0,0 +1,5569 @@ +From 388525230f809bfa61fe31921b54ebfb6aae57ec Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 31 Dec 2021 17:32:49 +0800 +Subject: [PATCH] adapt lstack + +--- + src/Makefile | 5 +- + src/api/api_lib.c | 2 + + src/api/api_msg.c | 46 +++ + src/api/dir.mk | 2 +- + src/api/perf.c | 182 ++++++++++++ + src/api/posix_api.c | 156 ++++++++++ + src/api/sockets.c | 558 ++++++++++++++++++++++++++++++++++- + src/api/sys_arch.c | 379 ++++++++++++++++++++++++ + src/api/tcpip.c | 34 ++- + src/core/dir.mk | 8 +- + src/core/init.c | 4 +- + src/core/ip.c | 2 +- + src/core/ipv4/ip4.c | 14 + + src/core/ipv6/ip6.c | 10 + + src/core/mem.c | 6 +- + src/core/memp.c | 4 + + src/core/netif.c | 8 +- + src/core/pbuf.c | 4 + + src/core/stats.c | 13 +- + src/core/tcp.c | 196 +++++++++++- + src/core/tcp_in.c | 101 ++++++- + src/core/tcp_out.c | 25 +- + src/core/timeouts.c | 18 +- + src/core/udp.c | 15 + + src/include/arch/cc.h | 80 ++++- + src/include/arch/perf.h | 155 ++++++++++ + src/include/arch/sys_arch.h | 92 +++++- + src/include/eventpoll.h | 72 +++++ + src/include/hlist.h | 233 +++++++++++++++ + src/include/list.h | 110 +++++++ + src/include/lwip/api.h | 35 +++ + src/include/lwip/debug.h | 1 + + src/include/lwip/def.h | 15 + + src/include/lwip/ip.h | 8 +- + src/include/lwip/memp.h | 17 ++ + src/include/lwip/netif.h | 4 +- + src/include/lwip/opt.h | 62 +++- + src/include/lwip/priv/memp_std.h | 7 + + src/include/lwip/priv/sockets_priv.h | 49 +-- + src/include/lwip/priv/tcp_priv.h | 162 +++++++++- + src/include/lwip/prot/ip4.h | 15 + + src/include/lwip/sockets.h | 67 ++++- + src/include/lwip/stats.h | 4 +- + src/include/lwip/tcp.h | 94 +++++- + src/include/lwip/tcpip.h | 2 +- + src/include/lwip/timeouts.h | 4 + + src/include/lwiplog.h | 81 +++++ + src/include/lwipopts.h | 253 ++++++++++++---- + src/include/lwipsock.h | 155 ++++++++++ + src/include/memp_def.h | 66 +++++ + src/include/posix_api.h | 88 ++++++ + src/include/reg_sock.h | 62 ++++ + src/netif/dir.mk | 2 +- + 53 files changed, 3581 insertions(+), 206 deletions(-) + create mode 100644 src/api/perf.c + create mode 100644 src/api/posix_api.c + create mode 100644 src/api/sys_arch.c + create mode 100644 src/include/arch/perf.h + create mode 100644 src/include/eventpoll.h + create mode 100644 src/include/hlist.h + create mode 100644 src/include/list.h + create mode 100644 src/include/lwiplog.h + create mode 100644 src/include/lwipsock.h + create mode 100644 src/include/memp_def.h + create mode 100644 src/include/posix_api.h + create mode 100644 src/include/reg_sock.h + +diff --git a/src/Makefile b/src/Makefile +index 3ecf8d2..1676a71 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -2,7 +2,7 @@ LWIP_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + ROOT_DIR := $(dir $(abspath $(LWIP_DIR))) + + LWIP_INC = $(LWIP_DIR)/include +-#DPDK_INCLUDE_FILE ?= /usr/include/dpdk ++DPDK_INCLUDE_FILE ?= /usr/include/dpdk + + SEC_FLAGS = -fstack-protector-strong -Werror -Wall -Wl,-z,relro,-z,now -Wl,-z,noexecstack -Wtrampolines -fPIC + +@@ -10,7 +10,8 @@ CC = gcc + AR = ar + OPTIMIZATION = -O3 + INC = -I$(LWIP_DIR) \ +- -I$(LWIP_INC) ++ -I$(LWIP_INC) \ ++ -I$(DPDK_INCLUDE_FILE) + + CFLAGS = -g $(OPTIMIZATION) $(INC) $(SEC_FLAGS) + ARFLAGS = crDP +diff --git a/src/api/api_lib.c b/src/api/api_lib.c +index ffa14d6..ba9f3c5 100644 +--- a/src/api/api_lib.c ++++ b/src/api/api_lib.c +@@ -1061,7 +1061,9 @@ netconn_write_vectors_partly(struct netconn *conn, struct netvector *vectors, u1 + /* For locking the core: this _can_ be delayed on low memory/low send buffer, + but if it is, this is done inside api_msg.c:do_write(), so we can use the + non-blocking version here. */ ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_DATA_SEND); + err = netconn_apimsg(lwip_netconn_do_write, &API_MSG_VAR_REF(msg)); ++ PERF_STOP_INCREASE_COUNT("lwip_netconn_do_write", PERF_LAYER_TCP); + if (err == ERR_OK) { + if (bytes_written != NULL) { + *bytes_written = API_MSG_VAR_REF(msg).msg.w.offset; +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 3f08e03..d5a738f 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -54,6 +54,11 @@ + #include "lwip/mld6.h" + #include "lwip/priv/tcpip_priv.h" + ++#if USE_LIBOS ++#include "lwip/sockets.h" ++#include "lwipsock.h" ++#endif ++ + #include + + /* netconns are polled once per second (e.g. continue write on memory error) */ +@@ -452,6 +457,14 @@ err_tcp(void *arg, err_t err) + old_state = conn->state; + conn->state = NETCONN_NONE; + ++#if USE_LIBOS ++ if (CONN_TYPE_IS_HOST(conn)) { ++ LWIP_DEBUGF(API_MSG_DEBUG, ++ ("linux localhost connection already success, ignore lwip err_tcp fd=%d\n", conn->socket)); ++ return; ++ } ++#endif /* USE_LIBOS */ ++ + SYS_ARCH_UNPROTECT(lev); + + /* Notify the user layer about a connection error. Used to signal select. */ +@@ -595,6 +608,10 @@ accept_function(void *arg, struct tcp_pcb *newpcb, err_t err) + API_EVENT(conn, NETCONN_EVT_RCVPLUS, 0); + } + ++#if USE_LIBOS ++ LWIP_DEBUGF(API_MSG_DEBUG, ("libos incoming connection established\n")); ++ SET_CONN_TYPE_LIBOS(newconn); ++#endif + return ERR_OK; + } + #endif /* LWIP_TCP */ +@@ -1315,6 +1332,31 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + return ERR_VAL; + } + ++#if USE_LIBOS ++ if (CONN_TYPE_IS_HOST(conn)) { ++ LWIP_DEBUGF(API_MSG_DEBUG, ++ ("libos outgoing connection abort fd=%d\n", conn->socket)); ++ return ERR_ABRT; ++ } ++ ++ LWIP_DEBUGF(API_MSG_DEBUG, ("libos outgoing connection established\n")); ++ if (CONN_TYPE_HAS_INPRG(conn) && CONN_TYPE_HAS_HOST(conn)) { ++ int s = conn->socket; ++ struct lwip_sock *sock = get_socket_without_errno(s); ++ ++ if (!!sock && !!sock->epoll_data) { ++ struct epoll_event ee = {0}; ++ ee.data.fd = s; ++ ee.events |= EPOLLIN | EPOLLOUT | EPOLLERR; ++ posix_api->epoll_ctl_fn(sock->epoll_data->fd, EPOLL_CTL_DEL, s, &ee); ++ posix_api->shutdown_fn(s, SHUT_RDWR); ++ LWIP_DEBUGF(API_MSG_DEBUG, ++ ("linux outgoing connection abort fd=%d\n", s)); ++ } ++ } ++ SET_CONN_TYPE_LIBOS(conn); ++#endif ++ + LWIP_ASSERT("conn->state == NETCONN_CONNECT", conn->state == NETCONN_CONNECT); + LWIP_ASSERT("(conn->current_msg != NULL) || conn->in_non_blocking_connect", + (conn->current_msg != NULL) || IN_NONBLOCKING_CONNECT(conn)); +@@ -1338,6 +1380,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + if (was_blocking) { + sys_sem_signal(op_completed_sem); + } ++ + return ERR_OK; + } + #endif /* LWIP_TCP */ +@@ -1372,6 +1415,7 @@ lwip_netconn_do_connect(void *m) + #endif /* LWIP_UDP */ + #if LWIP_TCP + case NETCONN_TCP: ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_SEND); + /* Prevent connect while doing any other action. */ + if (msg->conn->state == NETCONN_CONNECT) { + err = ERR_ALREADY; +@@ -1389,6 +1433,7 @@ lwip_netconn_do_connect(void *m) + err = ERR_INPROGRESS; + } else { + msg->conn->current_msg = msg; ++ PERF_STOP_INCREASE_COUNT("lwip_netconn_do_connect", PERF_LAYER_TCP); + /* sys_sem_signal() is called from lwip_netconn_do_connected (or err_tcp()), + when the connection is established! */ + #if LWIP_TCPIP_CORE_LOCKING +@@ -1402,6 +1447,7 @@ lwip_netconn_do_connect(void *m) + } + } + } ++ PERF_STOP_INCREASE_COUNT("lwip_netconn_do_connect", PERF_LAYER_TCP); + break; + #endif /* LWIP_TCP */ + default: +diff --git a/src/api/dir.mk b/src/api/dir.mk +index 72142ab..afbf863 100644 +--- a/src/api/dir.mk ++++ b/src/api/dir.mk +@@ -1,3 +1,3 @@ +-SRC = api_lib.c api_msg.c err.c netbuf.c netdb.c netifapi.c sockets.c tcpip.c ++SRC = api_lib.c api_msg.c err.c netbuf.c netdb.c netifapi.c sockets.c tcpip.c perf.c posix_api.c sys_arch.c + + $(eval $(call register_dir, api, $(SRC))) +diff --git a/src/api/perf.c b/src/api/perf.c +new file mode 100644 +index 0000000..1c2a273 +--- /dev/null ++++ b/src/api/perf.c +@@ -0,0 +1,182 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#include "arch/perf.h" ++ ++#include ++ ++#include ++ ++#if LWIP_RECORD_PERF ++ ++#define SIG_FUNC_NUM 3 ++ ++#define SIG_STATS_DISPLAY 38 ++#define SIG_PERF_BEGIN 39 ++#define SIG_PERF_END 40 ++ ++typedef void (*pSignalFunc) (int); ++static void signal_stats_display(int s); ++static void signal_perf_begin(int s); ++static void signal_perf_end(int s); ++ ++uint32_t g_record_perf; ++__thread uint64_t g_timeTaken[PERF_POINT_END]; ++__thread int g_perfPoint[PERF_LAYER_END]; ++__thread struct timespec tvStart[PERF_LAYER_END]; ++volatile uint64_t g_perfMaxtime[PERF_POINT_END]; ++volatile uint64_t g_astPacketCnt[PERF_POINT_END]; ++volatile uint64_t g_astPacketProcTime[PERF_POINT_END]; ++ ++char *g_ppLayerName[PERF_POINT_END] = { ++ "IP_RECV", ++ "TCP_DATA_RECV", ++ "UDP_PARTIAL", ++ "TCP_SYN_RECV", ++ "TCP_SYN_ACK_SEND", ++ "TCP_ACK_RECV", ++ "TCP_SYN_SEND", ++ "TCP_SYN_ACK_RECV", ++ "TCP_ACK_SEND", ++ "TCP_DATA_SEND", ++ "IP_SEND" ++}; ++ ++static int gsig_arr[SIG_FUNC_NUM] = { ++ SIG_STATS_DISPLAY, ++ SIG_PERF_BEGIN, ++ SIG_PERF_END ++}; ++ ++static pSignalFunc g_Funcs[SIG_FUNC_NUM] = { ++ signal_stats_display, ++ signal_perf_begin, ++ signal_perf_end, ++}; ++ ++static void print_perf_data_and_reset() ++{ ++ int i; ++ printf("\n********* PERF DATA START*************\n"); ++ for (i = 0; i < PERF_POINT_END; i++) { ++ printf("%-20s Total: PacketProcTime: %-15"PRIu64", Maxtime: %-15"PRIu64", packetCnt: %-15"PRIu64"\n", ++ g_ppLayerName[i], __sync_fetch_and_or(&g_astPacketProcTime[i], 0), ++ __sync_fetch_and_or(&g_perfMaxtime[i], 0), ++ __sync_fetch_and_or(&g_astPacketCnt[i], 0)); ++ ++ if (__sync_fetch_and_or(&g_astPacketProcTime[i], 0) && __sync_fetch_and_or(&g_astPacketCnt[i], 0)) { ++ printf("%-20s Average: PacketProcTime: %-15lf, MaxTime: %-15"PRIu64"\n", g_ppLayerName[i], ++ (double)__sync_fetch_and_or(&g_astPacketProcTime[i], 0) / (double)__sync_fetch_and_or(&g_astPacketCnt[i], 0), ++ __sync_or_and_fetch(&g_perfMaxtime[i], 0)); ++ } ++ ++ __sync_fetch_and_and (&g_astPacketProcTime[i], 0); ++ __sync_fetch_and_and (&g_astPacketCnt[i], 0); ++ __sync_fetch_and_and (&g_perfMaxtime[i], 0); ++ } ++ printf("\n********* PERF DATA END*************\n"); ++} ++ ++static void signal_stats_display(int s) ++{ ++ struct sigaction s_test; ++ printf("Received signal %d, stats display.\n", s); ++ stats_display(); ++ s_test.sa_handler = (void *) signal_stats_display; ++ if (sigemptyset(&s_test.sa_mask) != 0) { ++ printf("sigemptyset failed.\n"); ++ } ++ s_test.sa_flags = SA_RESETHAND; ++ if (sigaction(s, &s_test, NULL) != 0) { ++ printf("Could not register %d signal handler.\n", s); ++ } ++} ++ ++static void signal_perf_begin(int s) ++{ ++ struct sigaction s_test; ++ printf("Received signal %d, perf_begin.\n", s); ++ g_record_perf = 1; ++ s_test.sa_handler = (void *) signal_perf_begin; ++ if (sigemptyset(&s_test.sa_mask) != 0) { ++ printf("sigemptyset failed.\n"); ++ } ++ s_test.sa_flags = SA_RESETHAND; ++ if (sigaction(s, &s_test, NULL) != 0) { ++ printf("Could not register %d signal handler.\n", s); ++ } ++} ++ ++static void signal_perf_end(int s) ++{ ++ struct sigaction s_test; ++ printf("Received signal %d, perf_end\n", s); ++ g_record_perf = 0; ++ print_perf_data_and_reset(); ++ s_test.sa_handler = (void *) signal_perf_end; ++ if (sigemptyset(&s_test.sa_mask) != 0) { ++ printf("sigemptyset failed.\n"); ++ } ++ s_test.sa_flags = SA_RESETHAND; ++ if (sigaction(s, &s_test, NULL) != 0) { ++ printf("Could not register %d signal handler.\n", s); ++ } ++} ++ ++int check_layer_point(int layer, int point) ++{ ++ if (point == g_perfPoint[layer]) { ++ return 1; ++ } ++ return 0; ++} ++ ++int perf_init(void) ++{ ++ int i; ++ struct sigaction s_test; ++ for (i = 0; i < SIG_FUNC_NUM; i++) { ++ s_test.sa_handler = (void *) g_Funcs[i]; ++ if (sigemptyset(&s_test.sa_mask) != 0) { ++ printf("sigemptyset failed.\n"); ++ return 1; ++ } ++ ++ s_test.sa_flags = SA_RESETHAND; ++ if (sigaction(gsig_arr[i], &s_test, NULL) != 0) { ++ printf("Could not register %d signal handler.\n", gsig_arr[i]); ++ return 1; ++ } ++ } ++ return 0; ++} ++#endif +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +new file mode 100644 +index 0000000..a917cea +--- /dev/null ++++ b/src/api/posix_api.c +@@ -0,0 +1,156 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "lwip/err.h" ++#include "lwipsock.h" ++ ++posix_api_t *posix_api; ++posix_api_t posix_api_val; ++ ++static int chld_is_epfd(int fd) ++{ ++ return 0; ++} ++ ++static struct lwip_sock *chld_get_socket(int fd) ++{ ++ return NULL; ++} ++ ++void posix_api_fork(void) ++{ ++ /* lstack helper api */ ++ posix_api->is_chld = 1; ++ posix_api->is_epfd = chld_is_epfd; ++ posix_api->get_socket = chld_get_socket; ++} ++ ++static int chose_dlsym_handle(void *__restrict* khandle) ++{ ++ void *dlhandle; ++ int (*gazelle_epoll_create)(int size); ++ dlhandle = dlopen ("liblstack.so", RTLD_LAZY); ++ if (dlhandle == NULL) { ++ return ERR_IF; ++ } ++ ++ gazelle_epoll_create = dlsym(dlhandle, "epoll_create"); ++ if (gazelle_epoll_create == NULL) { ++ return ERR_MEM; ++ } ++ ++ dlclose(dlhandle); ++ ++ *khandle = RTLD_NEXT; ++ if (dlsym(*khandle, "epoll_create") == gazelle_epoll_create) { ++ RTE_LOG(ERR, EAL, "posix api use RTLD_DEFAULT\n"); ++ *khandle = RTLD_DEFAULT; ++ } else { ++ RTE_LOG(ERR, EAL, "posix api use RTLD_NEXT\n"); ++ } ++ ++ return ERR_OK; ++} ++ ++int posix_api_init(void) ++{ ++/* the symbol we use here won't be NULL, so we don't need dlerror() ++ to test error */ ++#define CHECK_DLSYM_RET_RETURN(ret) do { \ ++ if ((ret) == NULL) \ ++ goto err_out; \ ++ } while (0) ++ ++ posix_api = &posix_api_val; ++ ++ void *__restrict handle; ++ int ret = chose_dlsym_handle(&handle); ++ if (ret != ERR_OK) { ++ return ret; ++ } ++ ++ /* glibc standard api */ ++ CHECK_DLSYM_RET_RETURN(posix_api->socket_fn = dlsym(handle, "socket")); ++ CHECK_DLSYM_RET_RETURN(posix_api->accept_fn = dlsym(handle, "accept")); ++ CHECK_DLSYM_RET_RETURN(posix_api->accept4_fn = dlsym(handle, "accept4")); ++ CHECK_DLSYM_RET_RETURN(posix_api->bind_fn = dlsym(handle, "bind")); ++ CHECK_DLSYM_RET_RETURN(posix_api->listen_fn = dlsym(handle, "listen")); ++ CHECK_DLSYM_RET_RETURN(posix_api->connect_fn = dlsym(handle, "connect")); ++ CHECK_DLSYM_RET_RETURN(posix_api->setsockopt_fn = dlsym(handle, "setsockopt")); ++ CHECK_DLSYM_RET_RETURN(posix_api->getsockopt_fn = dlsym(handle, "getsockopt")); ++ CHECK_DLSYM_RET_RETURN(posix_api->getpeername_fn = dlsym(handle, "getpeername")); ++ CHECK_DLSYM_RET_RETURN(posix_api->getsockname_fn = dlsym(handle, "getsockname")); ++ CHECK_DLSYM_RET_RETURN(posix_api->shutdown_fn = dlsym(handle, "shutdown")); ++ CHECK_DLSYM_RET_RETURN(posix_api->close_fn = dlsym(handle, "close")); ++ CHECK_DLSYM_RET_RETURN(posix_api->read_fn = dlsym(handle, "read")); ++ CHECK_DLSYM_RET_RETURN(posix_api->write_fn = dlsym(handle, "write")); ++ CHECK_DLSYM_RET_RETURN(posix_api->recv_fn = dlsym(handle, "recv")); ++ CHECK_DLSYM_RET_RETURN(posix_api->send_fn = dlsym(handle, "send")); ++ CHECK_DLSYM_RET_RETURN(posix_api->recv_msg = dlsym(handle, "recvmsg")); ++ CHECK_DLSYM_RET_RETURN(posix_api->send_msg = dlsym(handle, "sendmsg")); ++ CHECK_DLSYM_RET_RETURN(posix_api->recv_from = dlsym(handle, "recvfrom")); ++ CHECK_DLSYM_RET_RETURN(posix_api->send_to = dlsym(handle, "sendto")); ++ CHECK_DLSYM_RET_RETURN(posix_api->fcntl_fn = dlsym(handle, "fcntl")); ++ CHECK_DLSYM_RET_RETURN(posix_api->fcntl64_fn = dlsym(handle, "fcntl64")); ++ CHECK_DLSYM_RET_RETURN(posix_api->pipe_fn = dlsym(handle, "pipe")); ++ CHECK_DLSYM_RET_RETURN(posix_api->epoll_create_fn = dlsym(handle, "epoll_create")); ++ CHECK_DLSYM_RET_RETURN(posix_api->epoll_ctl_fn = dlsym(handle, "epoll_ctl")); ++ CHECK_DLSYM_RET_RETURN(posix_api->epoll_wait_fn = dlsym(handle, "epoll_wait")); ++ CHECK_DLSYM_RET_RETURN(posix_api->fork_fn = dlsym(handle, "fork")); ++ CHECK_DLSYM_RET_RETURN(posix_api->eventfd_fn = dlsym(handle, "eventfd")); ++ CHECK_DLSYM_RET_RETURN(posix_api->sigaction_fn = dlsym(handle, "sigaction")); ++ CHECK_DLSYM_RET_RETURN(posix_api->poll_fn = dlsym(handle, "poll")); ++ CHECK_DLSYM_RET_RETURN(posix_api->ioctl_fn = dlsym(handle, "ioctl")); ++ ++ /* lstack helper api */ ++ posix_api->get_socket = get_socket; ++ posix_api->is_epfd = lwip_is_epfd; ++ posix_api->epoll_close_fn = lwip_epoll_close; ++ ++ /* support fork */ ++ posix_api->is_chld = 0; ++ return ERR_OK; ++ ++err_out: ++ return ERR_MEM; ++#undef CHECK_DLSYM_RET_RETURN ++} +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 7852635..3262c1b 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -62,6 +62,11 @@ + #include + #endif + ++#if USE_LIBOS ++#include ++#include "lwipsock.h" ++#endif ++ + #include + + #ifdef LWIP_HOOK_FILENAME +@@ -85,13 +90,29 @@ + #define API_SELECT_CB_VAR_ALLOC(name, retblock) API_VAR_ALLOC_EXT(struct lwip_select_cb, MEMP_SELECT_CB, name, retblock) + #define API_SELECT_CB_VAR_FREE(name) API_VAR_FREE(MEMP_SELECT_CB, name) + ++#if USE_LIBOS ++enum KERNEL_LWIP_PATH { ++ PATH_KERNEL = 0, ++ PATH_LWIP, ++ PATH_ERR, ++}; ++#endif ++ + #if LWIP_IPV4 ++#if USE_LIBOS ++#define IP4ADDR_PORT_TO_SOCKADDR(sin, ipaddr, port) do { \ ++ (sin)->sin_family = AF_INET; \ ++ (sin)->sin_port = lwip_htons((port)); \ ++ inet_addr_from_ip4addr(&(sin)->sin_addr, ipaddr); \ ++ memset((sin)->sin_zero, 0, SIN_ZERO_LEN); }while(0) ++#else + #define IP4ADDR_PORT_TO_SOCKADDR(sin, ipaddr, port) do { \ + (sin)->sin_len = sizeof(struct sockaddr_in); \ + (sin)->sin_family = AF_INET; \ + (sin)->sin_port = lwip_htons((port)); \ + inet_addr_from_ip4addr(&(sin)->sin_addr, ipaddr); \ + memset((sin)->sin_zero, 0, SIN_ZERO_LEN); }while(0) ++#endif /* USE_LIBOS */ + #define SOCKADDR4_TO_IP4ADDR_PORT(sin, ipaddr, port) do { \ + inet_addr_to_ip4addr(ip_2_ip4(ipaddr), &((sin)->sin_addr)); \ + (port) = lwip_ntohs((sin)->sin_port); }while(0) +@@ -257,7 +278,12 @@ static void lwip_socket_drop_registered_mld6_memberships(int s); + #endif /* LWIP_IPV6_MLD */ + + /** The global array of available sockets */ ++#if USE_LIBOS ++uint32_t sockets_num; ++struct lwip_sock *sockets; ++#else + static struct lwip_sock sockets[NUM_SOCKETS]; ++#endif /* USE_LIBOS */ + + #if LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL + #if LWIP_TCPIP_CORE_LOCKING +@@ -285,7 +311,7 @@ static struct lwip_select_cb *select_cb_list; + + /* Forward declaration of some functions */ + #if LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL +-static void event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len); ++void event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len); + #define DEFAULT_SOCKET_EVENTCB event_callback + static void select_check_waiters(int s, int has_recvevent, int has_sendevent, int has_errevent); + #else +@@ -411,7 +437,13 @@ static struct lwip_sock * + tryget_socket_unconn_nouse(int fd) + { + int s = fd - LWIP_SOCKET_OFFSET; +- if ((s < 0) || (s >= NUM_SOCKETS)) { ++ ++#if USE_LIBOS ++ if ((s < 0) || (s >= sockets_num)) ++#else ++ if ((s < 0) || (s >= NUM_SOCKETS)) ++#endif /* USE_LIBOS */ ++ { + LWIP_DEBUGF(SOCKETS_DEBUG, ("tryget_socket_unconn(%d): invalid\n", fd)); + return NULL; + } +@@ -475,8 +507,13 @@ tryget_socket(int fd) + * @param fd externally used socket index + * @return struct lwip_sock for the socket or NULL if not found + */ ++#if USE_LIBOS ++struct lwip_sock * ++get_socket(int fd) ++#else + static struct lwip_sock * + get_socket(int fd) ++#endif /* USE_LIBOS */ + { + struct lwip_sock *sock = tryget_socket(fd); + if (!sock) { +@@ -489,6 +526,24 @@ get_socket(int fd) + return sock; + } + ++#if USE_LIBOS ++/** ++ * Map a externally used socket index to the internal socket representation. ++ * ++ * @param s externally used socket index ++ * @return struct lwip_sock for the socket or NULL if not found without ++ * checking. ++ */ ++struct lwip_sock * ++get_socket_by_fd(int fd) ++{ ++ if ((fd < LWIP_SOCKET_OFFSET) || (fd >= sockets_num + LWIP_SOCKET_OFFSET)) { ++ return NULL; ++ } ++ return &sockets[fd - LWIP_SOCKET_OFFSET]; ++} ++#endif /* USE_LIBOS */ ++ + /** + * Allocate a new socket for a given netconn. + * +@@ -504,6 +559,62 @@ alloc_socket(struct netconn *newconn, int accepted) + SYS_ARCH_DECL_PROTECT(lev); + LWIP_UNUSED_ARG(accepted); + ++#if USE_LIBOS ++ int type, protocol = 0, domain = AF_INET; ++ switch (NETCONNTYPE_GROUP(newconn->type)) { ++ case NETCONN_RAW: ++ type = SOCK_RAW; ++ break; ++ case NETCONN_UDPLITE: ++ case NETCONN_UDP: ++ type = SOCK_DGRAM; ++ break; ++ case NETCONN_TCP: ++ type = SOCK_STREAM; ++ break; ++ default: ++ type = -1; ++ break; ++ } ++ ++ SYS_ARCH_PROTECT(lev); ++ i = posix_api->socket_fn(domain, type, protocol); ++ if (i == -1) { ++ goto err; ++ } ++ ++ if ((i < LWIP_SOCKET_OFFSET) || (i >= sockets_num + LWIP_SOCKET_OFFSET)) { ++ goto err; ++ } ++ ++ if (!sockets[i].conn && (sockets[i].select_waiting == 0)) { ++ /*initialize state as NETCONN_HOST | NETCONN_LIBOS, ++ *if connection accepted and alloc_socket called, it can be only NETCONN_LIBOS*/ ++ if (accepted) ++ SET_CONN_TYPE_LIBOS(newconn); ++ else ++ SET_CONN_TYPE_LIBOS_OR_HOST(newconn); ++ sockets[i].conn = newconn; ++ /* The socket is not yet known to anyone, so no need to protect ++ after having marked it as used. */ ++ SYS_ARCH_UNPROTECT(lev); ++ sockets[i].lastdata.pbuf = NULL; ++ sockets[i].rcvevent = 0; ++ /* TCP sendbuf is empty, but the socket is not yet writable until connected ++ * (unless it has been created by accept()). */ ++ sockets[i].sendevent = (NETCONNTYPE_GROUP(newconn->type) == NETCONN_TCP ? (accepted != 0) : 1); ++ sockets[i].errevent = 0; ++ sockets[i].epoll_data = NULL; ++ init_list_node_null(&sockets[i].list); ++ return i + LWIP_SOCKET_OFFSET; ++ } ++ ++err: ++ posix_api->close_fn(i); ++ SYS_ARCH_UNPROTECT(lev); ++ return -1; ++#else /* USE_LIBOS */ ++ + /* allocate a new socket identifier */ + for (i = 0; i < NUM_SOCKETS; ++i) { + /* Protect socket array */ +@@ -535,6 +646,8 @@ alloc_socket(struct netconn *newconn, int accepted) + SYS_ARCH_UNPROTECT(lev); + } + return -1; ++ ++#endif /* USE_LIBOS */ + } + + /** Free a socket (under lock) +@@ -629,10 +742,43 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + SYS_ARCH_DECL_PROTECT(lev); + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_accept(%d)...\n", s)); ++#if USE_LIBOS ++ int sys_errno = 0; ++ ++ sock = posix_api->get_socket(s); ++ /*AF_UNIX case*/ ++ if (!sock) { ++ if (rearm_accept_fd(s) < 0) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ++ ("failed to rearm accept fd=%d errno=%d\n", s, errno)); ++ } ++ return posix_api->accept_fn(s, addr, addrlen); ++ } ++ ++ /*for AF_INET, we may try both linux and lwip*/ ++ if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); ++ set_errno(EINVAL); ++ return -1; ++ } ++ ++ if (rearm_accept_fd(s) < 0) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ++ ("failed to rearm accept fd=%d errno=%d\n", s, errno)); ++ } ++ ++ /* raise accept syscall in palce */ ++ newsock = posix_api->accept_fn(s, addr, addrlen); ++ if (newsock >= 0) { ++ return newsock; ++ } ++ sys_errno = errno; ++#else + sock = get_socket(s); + if (!sock) { + return -1; + } ++#endif + + /* wait for a new connection */ + err = netconn_accept(sock->conn, &newconn); +@@ -646,6 +792,9 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + sock_set_errno(sock, err_to_errno(err)); + } + done_socket(sock); ++#if USE_LIBOS ++ set_errno(sys_errno); ++#endif /* USE_LIBOS */ + return -1; + } + LWIP_ASSERT("newconn != NULL", newconn != NULL); +@@ -657,7 +806,11 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + done_socket(sock); + return -1; + } ++#if USE_LIBOS ++ LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < sockets_num + LWIP_SOCKET_OFFSET)); ++#else + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < NUM_SOCKETS + LWIP_SOCKET_OFFSET)); ++#endif /* USE_LIBOS */ + nsock = &sockets[newsock - LWIP_SOCKET_OFFSET]; + + /* See event_callback: If data comes in right away after an accept, even +@@ -695,9 +848,11 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + } + + IPADDR_PORT_TO_SOCKADDR(&tempaddr, &naddr, port); ++#if !USE_LIBOS + if (*addrlen > tempaddr.sa.sa_len) { + *addrlen = tempaddr.sa.sa_len; + } ++#endif /* USE_LIBOS */ + MEMCPY(addr, &tempaddr, *addrlen); + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_accept(%d) returning new sock=%d addr=", s, newsock)); +@@ -720,11 +875,24 @@ lwip_bind(int s, const struct sockaddr *name, socklen_t namelen) + ip_addr_t local_addr; + u16_t local_port; + err_t err; +- ++#if USE_LIBOS ++ sock = posix_api->get_socket(s); ++ /*AF_UNIX case*/ ++ if (!sock) { ++ return posix_api->bind_fn(s, name, namelen); ++ } ++ /*for AF_INET, we may try both linux and lwip*/ ++ if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); ++ set_errno(EINVAL); ++ return -1; ++ } ++#else + sock = get_socket(s); + if (!sock) { + return -1; + } ++#endif + + if (!SOCK_ADDR_TYPE_MATCH(name, sock)) { + /* sockaddr does not match socket type (IPv4/IPv6) */ +@@ -744,6 +912,18 @@ lwip_bind(int s, const struct sockaddr *name, socklen_t namelen) + ip_addr_debug_print_val(SOCKETS_DEBUG, local_addr); + LWIP_DEBUGF(SOCKETS_DEBUG, (" port=%"U16_F")\n", local_port)); + ++#if USE_LIBOS ++ /* Supports kernel NIC IP address. */ ++ int ret = posix_api->bind_fn(s, name, namelen); ++ if (ret < 0) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("bind syscall failed\n")); ++ /* bind must succeed on both linux and libos */ ++ if (!is_host_ipv4(local_addr.addr)) { ++ return ret; ++ } ++ } ++#endif /* USE_LIBOS */ ++ + #if LWIP_IPV4 && LWIP_IPV6 + /* Dual-stack: Unmap IPv4 mapped IPv6 addresses */ + if (IP_IS_V6_VAL(local_addr) && ip6_addr_isipv4mappedipv6(ip_2_ip6(&local_addr))) { +@@ -776,10 +956,29 @@ lwip_close(int s) + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_close(%d)\n", s)); + ++#if USE_LIBOS ++ int ret; ++ if (posix_api->is_epfd(s)) { ++ return posix_api->epoll_close_fn(s); ++ } ++ ++ ret = posix_api->close_fn(s); ++ if (ret < 0) ++ return ret; ++ if (posix_api->is_chld == 0) ++ clean_host_fd(s); ++ ++ sock = posix_api->get_socket(s); ++ /*AF_UNIX case*/ ++ if (!sock) { ++ return ret; ++ } ++#else + sock = get_socket(s); + if (!sock) { + return -1; + } ++#endif /* USE_LIBOS */ + + if (sock->conn != NULL) { + is_tcp = NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP; +@@ -803,6 +1002,13 @@ lwip_close(int s) + return -1; + } + ++#if USE_LIBOS ++ sock->epoll = LIBOS_EPOLLNONE; ++ sock->events = 0; ++ sock->epoll_data = NULL; ++ list_del_node_null(&sock->list); ++#endif ++ + free_socket(sock, is_tcp); + set_errno(0); + return 0; +@@ -814,10 +1020,28 @@ lwip_connect(int s, const struct sockaddr *name, socklen_t namelen) + struct lwip_sock *sock; + err_t err; + ++#if USE_LIBOS ++ int ret; ++ ++ sock = posix_api->get_socket(s); ++ if (!sock) { ++ return posix_api->connect_fn(s, name, namelen); ++ } ++ ++ /* raise connect syscall in place */ ++ ADD_CONN_TYPE_INPRG(sock->conn); ++ ret = posix_api->connect_fn(s, name, namelen); ++ if (!ret) { ++ SET_CONN_TYPE_HOST(sock->conn); ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("linux connect succeed fd=%d\n", s)); ++ return ret; ++ } ++#else + sock = get_socket(s); + if (!sock) { + return -1; + } ++#endif + + if (!SOCK_ADDR_TYPE_MATCH_OR_UNSPEC(name, sock)) { + /* sockaddr does not match socket type (IPv4/IPv6) */ +@@ -862,6 +1086,11 @@ lwip_connect(int s, const struct sockaddr *name, socklen_t namelen) + return -1; + } + ++#if USE_LIBOS ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("libos connect succeed fd=%d\n",s)); ++ SET_CONN_TYPE_LIBOS(sock->conn); ++#endif /* USE_LIBOS */ ++ + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_connect(%d) succeeded\n", s)); + sock_set_errno(sock, 0); + done_socket(sock); +@@ -884,10 +1113,29 @@ lwip_listen(int s, int backlog) + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_listen(%d, backlog=%d)\n", s, backlog)); + ++#if USE_LIBOS ++ int ret; ++ ++ sock = posix_api->get_socket(s); ++ /*AF_UNIX case*/ ++ if (!sock) { ++ return posix_api->listen_fn(s, backlog); ++ } ++ /*for AF_INET, we may try both linux and lwip*/ ++ if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); ++ set_errno(EADDRINUSE); ++ return -1; ++ } ++ ++ if ((ret = posix_api->listen_fn(s, backlog)) == -1) ++ return ret; ++#else + sock = get_socket(s); + if (!sock) { + return -1; + } ++#endif + + /* limit the "backlog" parameter to fit in an u8_t */ + backlog = LWIP_MIN(LWIP_MAX(backlog, 0), 0xff); +@@ -919,6 +1167,9 @@ static ssize_t + lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + { + u8_t apiflags = NETCONN_NOAUTORCVD; ++#if USE_LIBOS ++ apiflags = 0; ++#endif + ssize_t recvd = 0; + ssize_t recv_left = (len <= SSIZE_MAX) ? (ssize_t)len : SSIZE_MAX; + +@@ -938,6 +1189,13 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + /* Check if there is data left from the last recv operation. */ + if (sock->lastdata.pbuf) { + p = sock->lastdata.pbuf; ++#if USE_LIBOS ++ if ((flags & MSG_PEEK) == 0) { ++ if ((NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP)) { ++ del_epoll_event(sock->conn, EPOLLIN); ++ } ++ } ++#endif + } else { + /* No data was left from the previous operation, so we try to get + some from the network. */ +@@ -1008,10 +1266,22 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + /* @todo: do we need to support peeking more than one pbuf? */ + } while ((recv_left > 0) && !(flags & MSG_PEEK)); + lwip_recv_tcp_done: +- if ((recvd > 0) && !(flags & MSG_PEEK)) { +- /* ensure window update after copying all data */ +- netconn_tcp_recvd(sock->conn, (size_t)recvd); ++#if USE_LIBOS ++ if (apiflags & NETCONN_NOAUTORCVD) ++#endif ++ { ++ if ((recvd > 0) && !(flags & MSG_PEEK)) { ++ /* ensure window update after copying all data */ ++ netconn_tcp_recvd(sock->conn, (size_t)recvd); ++ } + } ++#if USE_LIBOS ++ if ((flags & MSG_PEEK) == 0) { ++ if (((NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP)) && sock->lastdata.pbuf) { ++ add_epoll_event(sock->conn, EPOLLIN); ++ } ++ } ++#endif + sock_set_errno(sock, 0); + return recvd; + } +@@ -1040,11 +1310,13 @@ lwip_sock_make_addr(struct netconn *conn, ip_addr_t *fromaddr, u16_t port, + #endif /* LWIP_IPV4 && LWIP_IPV6 */ + + IPADDR_PORT_TO_SOCKADDR(&saddr, fromaddr, port); ++#if !USE_LIBOS + if (*fromlen < saddr.sa.sa_len) { + truncated = 1; + } else if (*fromlen > saddr.sa.sa_len) { + *fromlen = saddr.sa.sa_len; + } ++#endif + MEMCPY(from, &saddr, *fromlen); + return truncated; + } +@@ -1194,6 +1466,43 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + return ERR_OK; + } + ++#if USE_LIBOS ++static inline enum KERNEL_LWIP_PATH select_path(int s) ++{ ++ struct lwip_sock *sock; ++ ++ sock = posix_api->get_socket(s); ++ /*AF_UNIX case*/ ++ if (!sock) { ++ if (rearm_host_fd(s) < 0) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to rearm fd=%d errno=%d\n", s, errno)); ++ } ++ return PATH_KERNEL; ++ } ++ ++ if (CONN_TYPE_HAS_INPRG(sock->conn)) { ++ set_errno(EWOULDBLOCK); ++ return PATH_ERR; ++ } ++ ++ /*for AF_INET, we can try erther linux or lwip*/ ++ if (CONN_TYPE_IS_HOST(sock->conn)) { ++ if (rearm_host_fd(s) < 0) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to rearm read fd=%d errno=%d\n", s, errno)); ++ } ++ return PATH_KERNEL; ++ } ++ ++ if (!CONN_TYPE_IS_LIBOS(sock->conn)) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type is not libos bit type=%x", netconn_type(sock->conn))); ++ set_errno(EINVAL); ++ return PATH_ERR; ++ } ++ ++ return PATH_LWIP; ++} ++#endif ++ + ssize_t + lwip_recvfrom(int s, void *mem, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen) +@@ -1201,6 +1510,15 @@ lwip_recvfrom(int s, void *mem, size_t len, int flags, + struct lwip_sock *sock; + ssize_t ret; + ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->recv_from(s, mem, len, flags, from, fromlen); ++ } ++#endif ++ + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvfrom(%d, %p, %"SZT_F", 0x%x, ..)\n", s, mem, len, flags)); + sock = get_socket(s); + if (!sock) { +@@ -1250,6 +1568,14 @@ lwip_recvfrom(int s, void *mem, size_t len, int flags, + ssize_t + lwip_read(int s, void *mem, size_t len) + { ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->read_fn(s, mem, len); ++ } ++#endif + return lwip_recvfrom(s, mem, len, 0, NULL, NULL); + } + +@@ -1283,6 +1609,15 @@ lwip_recvmsg(int s, struct msghdr *message, int flags) + int i; + ssize_t buflen; + ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->recv_msg(s, message, flags); ++ } ++#endif ++ + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvmsg(%d, message=%p, flags=0x%x)\n", s, (void *)message, flags)); + LWIP_ERROR("lwip_recvmsg: invalid message pointer", message != NULL, return ERR_ARG;); + LWIP_ERROR("lwip_recvmsg: unsupported flags", (flags & ~(MSG_PEEK|MSG_DONTWAIT)) == 0, +@@ -1427,6 +1762,15 @@ lwip_sendmsg(int s, const struct msghdr *msg, int flags) + #endif + err_t err = ERR_OK; + ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->send_msg(s, msg, flags); ++ } ++#endif ++ + sock = get_socket(s); + if (!sock) { + return -1; +@@ -1436,10 +1780,10 @@ lwip_sendmsg(int s, const struct msghdr *msg, int flags) + sock_set_errno(sock, err_to_errno(ERR_ARG)); done_socket(sock); return -1;); + LWIP_ERROR("lwip_sendmsg: invalid msghdr iov", msg->msg_iov != NULL, + sock_set_errno(sock, err_to_errno(ERR_ARG)); done_socket(sock); return -1;); +- LWIP_ERROR("lwip_sendmsg: maximum iovs exceeded", (msg->msg_iovlen > 0) && (msg->msg_iovlen <= IOV_MAX), +- sock_set_errno(sock, EMSGSIZE); done_socket(sock); return -1;); +- LWIP_ERROR("lwip_sendmsg: unsupported flags", (flags & ~(MSG_DONTWAIT | MSG_MORE)) == 0, +- sock_set_errno(sock, EOPNOTSUPP); done_socket(sock); return -1;); ++ //LWIP_ERROR("lwip_sendmsg: maximum iovs exceeded", (msg->msg_iovlen > 0) && (msg->msg_iovlen <= IOV_MAX), ++ // sock_set_errno(sock, EMSGSIZE); done_socket(sock); return -1;); ++ //LWIP_ERROR("lwip_sendmsg: unsupported flags", (flags & ~(MSG_DONTWAIT | MSG_MORE)) == 0, ++ // sock_set_errno(sock, EOPNOTSUPP); done_socket(sock); return -1;); + + LWIP_UNUSED_ARG(msg->msg_control); + LWIP_UNUSED_ARG(msg->msg_controllen); +@@ -1590,6 +1934,15 @@ lwip_sendto(int s, const void *data, size_t size, int flags, + u16_t remote_port; + struct netbuf buf; + ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->send_to(s, data, size, flags, to, tolen); ++ } ++#endif ++ + sock = get_socket(s); + if (!sock) { + return -1; +@@ -1688,6 +2041,11 @@ lwip_socket(int domain, int type, int protocol) + + LWIP_UNUSED_ARG(domain); /* @todo: check this */ + ++#if USE_LIBOS ++ if ((domain != AF_INET && domain != AF_UNSPEC) || posix_api->is_chld) ++ return posix_api->socket_fn(domain, type, protocol); ++#endif ++ + /* create a netconn */ + switch (type) { + case SOCK_RAW: +@@ -1744,6 +2102,14 @@ lwip_socket(int domain, int type, int protocol) + ssize_t + lwip_write(int s, const void *data, size_t size) + { ++#if USE_LIBOS ++ enum KERNEL_LWIP_PATH path = select_path(s); ++ if (path == PATH_ERR) { ++ return -1; ++ } else if (path == PATH_KERNEL) { ++ return posix_api->write_fn(s, data, size); ++ } ++#endif + return lwip_send(s, data, size, 0); + } + +@@ -2479,7 +2845,7 @@ lwip_poll_should_wake(const struct lwip_select_cb *scb, int fd, int has_recveven + * NETCONN_EVT_ERROR + * This requirement will be asserted in select_check_waiters() + */ +-static void ++void + event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + { + int s, check_waiters; +@@ -2528,23 +2894,38 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + if (sock->rcvevent > 1) { + check_waiters = 0; + } ++#if USE_LIBOS ++ add_epoll_event(conn, EPOLLIN); ++#endif + break; + case NETCONN_EVT_RCVMINUS: + sock->rcvevent--; + check_waiters = 0; ++#if USE_LIBOS ++ del_epoll_event(conn, EPOLLIN); ++#endif + break; + case NETCONN_EVT_SENDPLUS: + if (sock->sendevent) { + check_waiters = 0; + } + sock->sendevent = 1; ++#if USE_LIBOS ++ add_epoll_event(conn, EPOLLOUT); ++#endif + break; + case NETCONN_EVT_SENDMINUS: + sock->sendevent = 0; + check_waiters = 0; ++#if USE_LIBOS ++ del_epoll_event(conn, EPOLLOUT); ++#endif + break; + case NETCONN_EVT_ERROR: + sock->errevent = 1; ++#if USE_LIBOS ++ add_epoll_event(conn, EPOLLERR); ++#endif + break; + default: + LWIP_ASSERT("unknown event", 0); +@@ -2739,9 +3120,11 @@ lwip_getaddrname(int s, struct sockaddr *name, socklen_t *namelen, u8_t local) + ip_addr_debug_print_val(SOCKETS_DEBUG, naddr); + LWIP_DEBUGF(SOCKETS_DEBUG, (" port=%"U16_F")\n", port)); + ++#if !USE_LIBOS + if (*namelen > saddr.sa.sa_len) { + *namelen = saddr.sa.sa_len; + } ++#endif + MEMCPY(name, &saddr, *namelen); + + sock_set_errno(sock, 0); +@@ -2752,12 +3135,41 @@ lwip_getaddrname(int s, struct sockaddr *name, socklen_t *namelen, u8_t local) + int + lwip_getpeername(int s, struct sockaddr *name, socklen_t *namelen) + { ++#if USE_LIBOS ++ struct lwip_sock *sock; ++ ++ sock = posix_api->get_socket(s); ++ if (!sock) { ++ return posix_api->getpeername_fn(s, name, namelen); ++ } ++ /*for AF_INET, if has only host type bit, just call linux api, ++ *if has libos and host type bits, it's a not connected fd, call ++ *linux api and return -1(errno == ENOTCONN) is also ok*/ ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ return posix_api->getpeername_fn(s, name, namelen); ++ } ++#endif ++ + return lwip_getaddrname(s, name, namelen, 0); + } + + int + lwip_getsockname(int s, struct sockaddr *name, socklen_t *namelen) + { ++#if USE_LIBOS ++ struct lwip_sock *sock; ++ ++ sock = posix_api->get_socket(s); ++ if (!sock) { ++ return posix_api->getsockname_fn(s, name, namelen); ++ } ++ /*for AF_INET, if has only host type bit, just call linux api, ++ *if has libos and host type bits, also call linux api*/ ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ return posix_api->getsockname_fn(s, name, namelen); ++ } ++#endif ++ + return lwip_getaddrname(s, name, namelen, 1); + } + +@@ -2765,15 +3177,28 @@ int + lwip_getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen) + { + int err; +- struct lwip_sock *sock = get_socket(s); + #if !LWIP_TCPIP_CORE_LOCKING + err_t cberr; + LWIP_SETGETSOCKOPT_DATA_VAR_DECLARE(data); + #endif /* !LWIP_TCPIP_CORE_LOCKING */ + ++#if USE_LIBOS ++ struct lwip_sock *sock = posix_api->get_socket(s); ++ ++ if (!sock) { ++ return posix_api->getsockopt_fn(s, level, optname, optval, optlen); ++ } ++ /*for AF_INET, we return linux result? */ ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ return posix_api->getsockopt_fn(s, level, optname, optval, optlen); ++ } ++#else ++ struct lwip_sock *sock = get_socket(s); ++ + if (!sock) { + return -1; + } ++#endif /* USE_LIBOS */ + + if ((NULL == optval) || (NULL == optlen)) { + sock_set_errno(sock, EFAULT); +@@ -3211,15 +3636,30 @@ int + lwip_setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen) + { + int err = 0; +- struct lwip_sock *sock = get_socket(s); + #if !LWIP_TCPIP_CORE_LOCKING + err_t cberr; + LWIP_SETGETSOCKOPT_DATA_VAR_DECLARE(data); + #endif /* !LWIP_TCPIP_CORE_LOCKING */ + ++#if USE_LIBOS ++ struct lwip_sock *sock = posix_api->get_socket(s); ++ ++ if (!sock) { ++ return posix_api->setsockopt_fn(s, level, optname, optval, optlen); ++ } ++ /*for AF_INET, we may try both linux and lwip*/ ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ if (posix_api->setsockopt_fn(s, level, optname, optval, optlen) < 0) { ++ return -1; ++ } ++ } ++#else ++ struct lwip_sock *sock = get_socket(s); ++ + if (!sock) { + return -1; + } ++#endif /* USE_LIBOS */ + + if (NULL == optval) { + sock_set_errno(sock, EFAULT); +@@ -3333,6 +3773,7 @@ lwip_setsockopt_impl(int s, int level, int optname, const void *optval, socklen_ + case SO_KEEPALIVE: + #if SO_REUSE + case SO_REUSEADDR: ++ case SO_REUSEPORT: + #endif /* SO_REUSE */ + if ((optname == SO_BROADCAST) && + (NETCONNTYPE_GROUP(sock->conn->type) != NETCONN_UDP)) { +@@ -3745,6 +4186,29 @@ lwip_setsockopt_impl(int s, int level, int optname, const void *optval, socklen_ + return err; + } + ++#if USE_LIBOS ++int ++lwip_ioctl(int s, long cmd, ...) ++{ ++ struct lwip_sock *sock = posix_api->get_socket(s); ++ u8_t val; ++ ++ int ret = -1; ++ void *argp; ++ va_list ap; ++ ++ va_start(ap, cmd); ++ argp = va_arg(ap, void *); ++ va_end(ap); ++ ++ if (!sock) { ++ return posix_api->ioctl_fn(s, cmd, argp); ++ } ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ if ((ret = posix_api->ioctl_fn(s, cmd, argp)) == -1) ++ return ret; ++ } ++#else + int + lwip_ioctl(int s, long cmd, void *argp) + { +@@ -3757,6 +4221,7 @@ lwip_ioctl(int s, long cmd, void *argp) + if (!sock) { + return -1; + } ++#endif /* USE_LIBOS */ + + switch (cmd) { + #if LWIP_SO_RCVBUF || LWIP_FIONREAD_LINUXMODE +@@ -3839,6 +4304,26 @@ lwip_ioctl(int s, long cmd, void *argp) + * the flag O_NONBLOCK is implemented for F_SETFL. + */ + int ++#if USE_LIBOS ++lwip_fcntl(int s, int cmd, ...) ++{ ++ struct lwip_sock *sock = posix_api->get_socket(s); ++ int val, ret = -1; ++ int op_mode = 0; ++ va_list ap; ++ ++ va_start(ap, cmd); ++ val = va_arg(ap, int); ++ va_end(ap); ++ ++ if (!sock) { ++ return posix_api->fcntl_fn(s, cmd, val); ++ } ++ if (CONN_TYPE_HAS_HOST(sock->conn)) { ++ if ((ret = posix_api->fcntl_fn(s, cmd, val)) == -1) ++ return ret; ++ } ++#else /* USE_LIBOS */ + lwip_fcntl(int s, int cmd, int val) + { + struct lwip_sock *sock = get_socket(s); +@@ -3848,6 +4333,7 @@ lwip_fcntl(int s, int cmd, int val) + if (!sock) { + return -1; + } ++#endif /* USE_LIBOS */ + + switch (cmd) { + case F_GETFL: +@@ -4163,4 +4649,50 @@ lwip_socket_drop_registered_mld6_memberships(int s) + } + #endif /* LWIP_IPV6_MLD */ + ++#if USE_LIBOS ++void lwip_sock_init(void) ++{ ++ if (sockets_num == 0) { ++ sockets_num = NUM_SOCKETS; ++ sockets = calloc(sockets_num, sizeof(struct lwip_sock)); ++ LWIP_ASSERT("sockets != NULL", sockets != NULL); ++ memset(sockets, 0, sockets_num * sizeof(struct lwip_sock)); ++ } ++ return; ++} ++ ++//modify from lwip_close ++void lwip_exit(void) ++{ ++ int i, is_tcp; ++ struct lwip_sock *sock; ++ ++ if (memp_pools[MEMP_SYS_MBOX] == NULL) { ++ return; ++ } ++ ++ for (i = 0; i < sockets_num; i++) { ++ sock = &sockets[i]; ++ if (!sock->conn) ++ continue; ++#if LWIP_IGMP ++ /* drop all possibly joined IGMP memberships */ ++ lwip_socket_drop_registered_memberships(i); ++#endif /* LWIP_IGMP */ ++ /* ++ * process is exiting, call netconn_delete to ++ * close tcp connection, and ignore the return value ++ */ ++ is_tcp = NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP; ++ netconn_delete(sock->conn); ++ free_socket(sock, is_tcp); ++ } ++ ++ free(sockets); ++ sockets = NULL; ++ sockets_num = 0; ++} ++ ++#endif /* USE_LIBOS */ ++ + #endif /* LWIP_SOCKET */ +diff --git a/src/api/sys_arch.c b/src/api/sys_arch.c +new file mode 100644 +index 0000000..55561b1 +--- /dev/null ++++ b/src/api/sys_arch.c +@@ -0,0 +1,379 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "lwip/err.h" ++#include "lwip/mem.h" ++#include "lwip/memp.h" ++#include "lwip/opt.h" ++#include "lwip/sys.h" ++#include "lwip/timeouts.h" ++#include "arch/sys_arch.h" ++ ++struct sys_mutex { ++ volatile unsigned int m; ++}; ++ ++struct sys_mutex lstack_mutex; ++ ++struct sys_sem lstack_sem; ++ ++#define MAX_THREAD_NAME 64 ++#define MBOX_NAME_PREFIX "_mbox_0x" ++#define MAX_MBOX_NAME_LEN (sizeof(MBOX_NAME_PREFIX) + 32) // log(UINT64_MAX) < 32 ++ ++struct sys_thread { ++ struct sys_thread *next; ++ char name[MAX_THREAD_NAME]; ++ lwip_thread_fn fn; ++ void *arg; ++ int stacksize; ++ int prio; ++ pthread_t tid; ++}; ++ ++ ++struct sys_mem_stats { ++ uint32_t tot_len; ++}; ++ ++static PER_THREAD struct sys_mem_stats hugepage_stats; ++ ++static PER_THREAD uint64_t cycles_per_ms __attribute__((aligned(64))); ++static PER_THREAD uint64_t sys_start_ms __attribute__((aligned(64))); ++ ++/* ++ * Mailbox ++ * */ ++static int mbox_wait_func(void) ++{ ++#if LWIP_TIMERS ++ sys_timer_run(); ++#endif /* LWIP_TIMER */ ++ return eth_dev_poll(); ++} ++ ++err_t sys_mbox_new(struct sys_mbox **mb, int size) ++{ ++ int ret; ++ struct sys_mbox *mbox; ++ ++ mbox = (struct sys_mbox *)memp_malloc(MEMP_SYS_MBOX); ++ if (mbox == NULL) { ++ return ERR_MEM; ++ } ++ ++ mbox->flags = RING_F_SP_ENQ | RING_F_SC_DEQ; ++ ++ ret = snprintf(mbox->name, sizeof(mbox->name), MBOX_NAME_PREFIX"%"PRIXPTR, (uintptr_t)mbox); ++ if (ret < 0) { ++ memp_free(MEMP_SYS_MBOX, mbox); ++ return ERR_VAL; ++ } ++ ++ mbox->size = size; ++ mbox->socket_id = rte_socket_id(); ++ mbox->ring = rte_ring_create(mbox->name, mbox->size, mbox->socket_id, mbox->flags); ++ if (!mbox->ring) { ++ RTE_LOG(ERR, EAL, "cannot create rte_ring for mbox\n"); ++ memp_free(MEMP_SYS_MBOX, mbox); ++ return ERR_MEM; ++ } ++ mbox->wait_fn = mbox_wait_func; ++ *mb = mbox; ++ ++ return ERR_OK; ++} ++ ++void sys_mbox_free(struct sys_mbox **mb) ++{ ++ struct sys_mbox *mbox = *mb; ++ rte_ring_free(mbox->ring); ++ memp_free(MEMP_SYS_MBOX, mbox); ++} ++ ++err_t sys_mbox_trypost(struct sys_mbox **mb, void *msg) ++{ ++ unsigned int n; ++ struct sys_mbox *mbox = *mb; ++ ++ n = rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL); ++ if (!n) ++ return ERR_BUF; ++ return ERR_OK; ++} ++ ++void sys_mbox_post(struct sys_mbox **mb, void *msg) ++{ ++ struct sys_mbox *mbox = *mb; ++ ++ /* NOTE: sys_mbox_post is used on mbox defined in src/api/tcpip.c. ++ * If the ring size of mbox is greater than MEMP_NUM_TCPIP_MSG_API, ++ * enqueue failure will never happen. ++ * */ ++ if (!rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL)) { ++ LWIP_ASSERT("It is failed to post msg into mbox", 0); ++ } ++} ++ ++err_t sys_mbox_trypost_fromisr(sys_mbox_t *q, void *msg) ++{ ++ return sys_mbox_trypost(q, msg); ++} ++ ++uint32_t sys_arch_mbox_tryfetch(struct sys_mbox **mb, void **msg) ++{ ++ unsigned int n; ++ struct sys_mbox *mbox = *mb; ++ ++ n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ if (!n) { ++ *msg = NULL; ++ return SYS_MBOX_EMPTY; ++ } ++ ++ return 0; ++} ++ ++uint32_t sys_arch_mbox_fetch(struct sys_mbox **mb, void **msg, uint32_t timeout) ++{ ++ unsigned int n; ++ uint32_t poll_ts = 0; ++ uint32_t time_needed = 0; ++ struct sys_mbox *mbox = *mb; ++ ++ n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ ++ if (timeout > 0) ++ poll_ts = sys_now(); ++ ++ while (!n) { ++ if (timeout > 0) { ++ time_needed = sys_now() - poll_ts; ++ if (time_needed >= timeout) { ++ return SYS_ARCH_TIMEOUT; ++ } ++ } ++ ++ (void)mbox->wait_fn(); ++ ++ n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ } ++ ++ return time_needed; ++} ++ ++int sys_mbox_empty(struct sys_mbox *mb) ++{ ++ return rte_ring_count(mb->ring) == 0; ++} ++ ++/* ++ * Threads ++ * */ ++sys_thread_t sys_thread_new(const char *name, lwip_thread_fn function, void *arg, int stacksize, int prio) ++{ ++ int err; ++ pthread_t tid; ++ struct sys_thread *thread; ++ ++ thread = (struct sys_thread *)malloc(sizeof(struct sys_thread)); ++ if (thread == NULL) { ++ LWIP_DEBUGF(SYS_DEBUG, ("sys_thread_new: malloc sys_thread failed\n")); ++ rte_exit(EXIT_FAILURE, "malloc sys_thread failed\n"); ++ } ++ ++ err = pthread_create(&tid, NULL, (void*(*)(void *))function, arg); ++ if (err > 0) { ++ LWIP_DEBUGF(SYS_DEBUG, ("sys_thread_new: pthread_create failed\n")); ++ rte_exit(EXIT_FAILURE, "pthread_create failed\n"); ++ } ++ ++ err = pthread_setname_np(tid, name); ++ if (err > 0) { ++ LWIP_DEBUGF(SYS_DEBUG, ("sys_thread_new: pthread_setname_np failed\n")); ++ } ++ thread->tid = tid; ++ thread->stacksize = stacksize; ++ thread->prio = prio; ++ ++ return thread; ++} ++ ++/* ++ * Semaphore ++ * */ ++err_t sys_sem_new(struct sys_sem **sem, uint8_t count) ++{ ++ *sem = (struct sys_sem *)memp_malloc(MEMP_SYS_SEM); ++ if ((*sem) == NULL) { ++ return ERR_MEM; ++ } ++ (*sem)->c = 0; ++ (*sem)->wait_fn = mbox_wait_func; ++ return ERR_OK; ++} ++ ++void sys_sem_signal(struct sys_sem **s) ++{ ++ struct sys_sem *sem = NULL; ++ LWIP_ASSERT("invalid sem", (s != NULL) && (*s != NULL)); ++ sem = *s; ++ ++(sem->c); ++} ++ ++static uint32_t cond_wait(struct sys_sem *sem, uint32_t timeout) ++{ ++ uint32_t used_ms = 0; ++ uint32_t poll_ts; ++ ++ if (timeout == 0) { ++ (void)sem->wait_fn(); ++ return 0; ++ } ++ ++ poll_ts = sys_now(); ++ ++ while (used_ms < timeout) { ++ if (sem->c > 0) ++ return timeout - used_ms; ++ ++ (void)sem->wait_fn(); ++ used_ms = sys_now() - poll_ts; ++ } ++ ++ return SYS_ARCH_TIMEOUT; ++} ++ ++uint32_t sys_arch_sem_wait(struct sys_sem **s, uint32_t timeout) ++{ ++ uint32_t time_needed = 0; ++ struct sys_sem *sem = NULL; ++ LWIP_ASSERT("invalid sem", (s != NULL) && (*s != NULL)); ++ sem = *s; ++ ++ while (sem->c <= 0) { ++ if (timeout > 0) { ++ time_needed = cond_wait(sem, timeout); ++ ++ if (time_needed == SYS_ARCH_TIMEOUT) { ++ return SYS_ARCH_TIMEOUT; ++ } ++ } else { ++ cond_wait(sem, 0); ++ } ++ } ++ ++ sem->c--; ++ return time_needed; ++} ++ ++void sys_sem_free(struct sys_sem **s) ++{ ++ if ((s != NULL) && (*s != SYS_SEM_NULL)) ++ memp_free(MEMP_SYS_SEM, *s); ++} ++ ++/* ++ * Mutex ++ * */ ++err_t sys_mutex_new(struct sys_mutex **mutex) ++{ ++ return ERR_OK; ++} ++ ++void sys_mutex_lock(struct sys_mutex **mutex) ++{ ++} ++ ++void sys_mutex_unlock(struct sys_mutex **mutex) ++{ ++} ++ ++void sys_mutex_free(struct sys_mutex **mutex) ++{ ++} ++ ++/* Timer from DPDK */ ++void sys_calibrate_tsc(void) ++{ ++#define MS_PER_SEC 1E3 ++ uint64_t freq = rte_get_tsc_hz(); ++ ++ cycles_per_ms = (freq + MS_PER_SEC - 1) / MS_PER_SEC; ++ sys_start_ms = rte_rdtsc() / cycles_per_ms; ++} ++ ++uint32_t sys_now(void) ++{ ++ uint64_t cur_ms = rte_rdtsc() / cycles_per_ms; ++ return (uint32_t)(cur_ms - sys_start_ms); ++} ++ ++/* ++ * Critical section ++ * */ ++sys_prot_t sys_arch_protect(void) ++{ ++ return 0; ++} ++ ++void sys_arch_unprotect(sys_prot_t pval) ++{ ++} ++ ++/* ++ * Hugepage memory manager ++ * */ ++uint8_t *sys_hugepage_malloc(const char *name, uint32_t size) ++{ ++ const struct rte_memzone *mz; ++ ++ mz = rte_memzone_reserve(name, size, rte_socket_id(), 0); ++ if (mz == NULL) { ++ rte_exit(EXIT_FAILURE, "failed to reserver memory for mempool[%s]\n", name); ++ return NULL; ++ } ++ ++ memset(mz->addr, 0, mz->len); ++ hugepage_stats.tot_len += mz->len; ++ ++ return (uint8_t*)mz->addr; ++} +diff --git a/src/api/tcpip.c b/src/api/tcpip.c +index a7e312a..d3d0b55 100644 +--- a/src/api/tcpip.c ++++ b/src/api/tcpip.c +@@ -56,13 +56,13 @@ + #define TCPIP_MSG_VAR_FREE(name) API_VAR_FREE(MEMP_TCPIP_MSG_API, name) + + /* global variables */ +-static tcpip_init_done_fn tcpip_init_done; +-static void *tcpip_init_done_arg; +-static sys_mbox_t tcpip_mbox; ++static PER_THREAD tcpip_init_done_fn tcpip_init_done; ++static PER_THREAD void *tcpip_init_done_arg; ++static PER_THREAD sys_mbox_t tcpip_mbox; + + #if LWIP_TCPIP_CORE_LOCKING + /** The global semaphore to lock the stack. */ +-sys_mutex_t lock_tcpip_core; ++PER_THREAD sys_mutex_t lock_tcpip_core; + #endif /* LWIP_TCPIP_CORE_LOCKING */ + + static void tcpip_thread_handle_msg(struct tcpip_msg *msg); +@@ -123,8 +123,13 @@ again: + * + * @param arg unused argument + */ ++#if USE_LIBOS ++__attribute__((unused)) static void ++tcpip_thread(void *arg) ++#else + static void + tcpip_thread(void *arg) ++#endif /* USE_LIBOS */ + { + struct tcpip_msg *msg; + LWIP_UNUSED_ARG(arg); +@@ -242,6 +247,9 @@ tcpip_inpkt(struct pbuf *p, struct netif *inp, netif_input_fn input_fn) + #if LWIP_TCPIP_CORE_LOCKING_INPUT + err_t ret; + LWIP_DEBUGF(TCPIP_DEBUG, ("tcpip_inpkt: PACKET %p/%p\n", (void *)p, (void *)inp)); ++#if USE_LIBOS && LWIP_TIMERS ++ sys_timer_run(); ++#endif + LOCK_TCPIP_CORE(); + ret = input_fn(p, inp); + UNLOCK_TCPIP_CORE(); +@@ -321,6 +329,9 @@ tcpip_callback(tcpip_callback_fn function, void *ctx) + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; + ++#if USE_LIBOS && LWIP_TIMER ++ sys_timer_run(); ++#endif + sys_mbox_post(&tcpip_mbox, msg); + return ERR_OK; + } +@@ -357,6 +368,9 @@ tcpip_try_callback(tcpip_callback_fn function, void *ctx) + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; + ++#if USE_LIBOS && LWIP_TIMER ++ sys_timer_run(); ++#endif + if (sys_mbox_trypost(&tcpip_mbox, msg) != ERR_OK) { + memp_free(MEMP_TCPIP_MSG_API, msg); + return ERR_MEM; +@@ -438,6 +452,9 @@ tcpip_send_msg_wait_sem(tcpip_callback_fn fn, void *apimsg, sys_sem_t *sem) + { + #if LWIP_TCPIP_CORE_LOCKING + LWIP_UNUSED_ARG(sem); ++#if USE_LIBOS && LWIP_TIMERS ++ sys_timer_run(); ++#endif + LOCK_TCPIP_CORE(); + fn(apimsg); + UNLOCK_TCPIP_CORE(); +@@ -475,6 +492,9 @@ tcpip_api_call(tcpip_api_call_fn fn, struct tcpip_api_call_data *call) + #if LWIP_TCPIP_CORE_LOCKING + err_t err; + LOCK_TCPIP_CORE(); ++#if USE_LIBOS && LWIP_TIMERS ++ sys_timer_run(); ++#endif + err = fn(call); + UNLOCK_TCPIP_CORE(); + return err; +@@ -537,6 +557,10 @@ tcpip_callbackmsg_new(tcpip_callback_fn function, void *ctx) + msg->type = TCPIP_MSG_CALLBACK_STATIC; + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; ++ ++#if USE_LIBOS && LWIP_TIMER ++ sys_timer_run(); ++#endif + return (struct tcpip_callback_msg *)msg; + } + +@@ -614,7 +638,9 @@ tcpip_init(tcpip_init_done_fn initfunc, void *arg) + } + #endif /* LWIP_TCPIP_CORE_LOCKING */ + ++#if !USE_LIBOS + sys_thread_new(TCPIP_THREAD_NAME, tcpip_thread, NULL, TCPIP_THREAD_STACKSIZE, TCPIP_THREAD_PRIO); ++#endif + } + + /** +diff --git a/src/core/dir.mk b/src/core/dir.mk +index e5a055b..ebc01a5 100644 +--- a/src/core/dir.mk ++++ b/src/core/dir.mk +@@ -1,6 +1,6 @@ +-SRC = inet_chksum.c init.c ip.c mem.c memp.c netif.c pbuf.c \ +- raw.c stats.c tcp.c tcp_in.c tcp_out.c timeouts.c udp.c \ +- ipv4/etharp.c ipv4/icmp.c ipv4/ip4_addr.c ipv4/ip4.c \ +- ipv4/ip4_frag.c ++SRC = def.c inet_chksum.c init.c ip.c mem.c memp.c netif.c pbuf.c \ ++ raw.c tcp.c tcp_in.c tcp_out.c timeouts.c udp.c stats.c\ ++ ipv4/icmp.c ipv4/ip4_addr.c ipv4/ip4_frag.c ipv4/etharp.c \ ++ ipv4/ip4.c + + $(eval $(call register_dir, core, $(SRC))) +diff --git a/src/core/init.c b/src/core/init.c +index 3620e1d..60e1c68 100644 +--- a/src/core/init.c ++++ b/src/core/init.c +@@ -343,9 +343,7 @@ lwip_init(void) + + /* Modules initialization */ + stats_init(); +-#if !NO_SYS +- sys_init(); +-#endif /* !NO_SYS */ ++ + mem_init(); + memp_init(); + pbuf_init(); +diff --git a/src/core/ip.c b/src/core/ip.c +index 18514cf..0d39d2d 100644 +--- a/src/core/ip.c ++++ b/src/core/ip.c +@@ -61,7 +61,7 @@ + #include "lwip/ip.h" + + /** Global data for both IPv4 and IPv6 */ +-struct ip_globals ip_data; ++PER_THREAD struct ip_globals ip_data; + + #if LWIP_IPV4 && LWIP_IPV6 + +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index 26c26a9..c83afbe 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -282,7 +282,9 @@ ip4_forward(struct pbuf *p, struct ip_hdr *iphdr, struct netif *inp) + { + struct netif *netif; + ++#ifndef LWIP_PERF + PERF_START; ++#endif + LWIP_UNUSED_ARG(inp); + + if (!ip4_canforward(p)) { +@@ -344,7 +346,9 @@ ip4_forward(struct pbuf *p, struct ip_hdr *iphdr, struct netif *inp) + MIB2_STATS_INC(mib2.ipforwdatagrams); + IP_STATS_INC(ip.xmit); + ++#ifndef LWIP_PERF + PERF_STOP("ip4_forward"); ++#endif + /* don't fragment if interface has mtu set to 0 [loopif] */ + if (netif->mtu && (p->tot_len > netif->mtu)) { + if ((IPH_OFFSET(iphdr) & PP_NTOHS(IP_DF)) == 0) { +@@ -438,6 +442,8 @@ ip4_input(struct pbuf *p, struct netif *inp) + + LWIP_ASSERT_CORE_LOCKED(); + ++ PERF_START(PERF_LAYER_IP, PERF_POINT_IP_RECV); ++ + IP_STATS_INC(ip.recv); + MIB2_STATS_INC(mib2.ipinreceives); + +@@ -700,13 +706,19 @@ ip4_input(struct pbuf *p, struct netif *inp) + case IP_PROTO_UDPLITE: + #endif /* LWIP_UDPLITE */ + MIB2_STATS_INC(mib2.ipindelivers); ++ PERF_PAUSE(PERF_LAYER_IP); + udp_input(p, inp); ++ PERF_RESUME(PERF_LAYER_IP, PERF_POINT_IP_RECV); + break; + #endif /* LWIP_UDP */ + #if LWIP_TCP + case IP_PROTO_TCP: + MIB2_STATS_INC(mib2.ipindelivers); ++ PERF_PAUSE(PERF_LAYER_IP); ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_RECV); + tcp_input(p, inp); ++ PERF_STOP_INCREASE_COUNT("tcp_input", PERF_LAYER_TCP); ++ PERF_RESUME(PERF_LAYER_IP, PERF_POINT_IP_RECV); + break; + #endif /* LWIP_TCP */ + #if LWIP_ICMP +@@ -755,6 +767,8 @@ ip4_input(struct pbuf *p, struct netif *inp) + ip4_addr_set_any(ip4_current_src_addr()); + ip4_addr_set_any(ip4_current_dest_addr()); + ++ PERF_STOP_INCREASE_COUNT("ip4_input", PERF_LAYER_IP); ++ + return ERR_OK; + } + +diff --git a/src/core/ipv6/ip6.c b/src/core/ipv6/ip6.c +index 060d5f3..9d904ec 100644 +--- a/src/core/ipv6/ip6.c ++++ b/src/core/ipv6/ip6.c +@@ -522,6 +522,8 @@ ip6_input(struct pbuf *p, struct netif *inp) + + LWIP_ASSERT_CORE_LOCKED(); + ++ PERF_START(PERF_LAYER_IP, PERF_POINT_IP_RECV); ++ + IP6_STATS_INC(ip6.recv); + + /* identify the IP header */ +@@ -1069,12 +1071,18 @@ options_done: + #if LWIP_UDPLITE + case IP6_NEXTH_UDPLITE: + #endif /* LWIP_UDPLITE */ ++ PERF_PAUSE(PERF_LAYER_IP); + udp_input(p, inp); ++ PERF_RESUME(PERF_LAYER_IP, PERF_POINT_IP_RECV); + break; + #endif /* LWIP_UDP */ + #if LWIP_TCP + case IP6_NEXTH_TCP: ++ PERF_PAUSE(PERF_LAYER_IP); ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_RECV); + tcp_input(p, inp); ++ PERF_STOP_INCREASE_COUNT("tcp_input", PERF_LAYER_TCP); ++ PERF_RESUME(PERF_LAYER_IP, PERF_POINT_IP_RECV); + break; + #endif /* LWIP_TCP */ + #if LWIP_ICMP6 +@@ -1115,6 +1123,8 @@ ip6_input_cleanup: + ip6_addr_set_zero(ip6_current_src_addr()); + ip6_addr_set_zero(ip6_current_dest_addr()); + ++ PERF_STOP_INCREASE_COUNT("ip6_input", PERF_LAYER_IP); ++ + return ERR_OK; + } + +diff --git a/src/core/mem.c b/src/core/mem.c +index 315fb3c..84b3fcc 100644 +--- a/src/core/mem.c ++++ b/src/core/mem.c +@@ -381,9 +381,9 @@ LWIP_DECLARE_MEMORY_ALIGNED(ram_heap, MEM_SIZE_ALIGNED + (2U * SIZEOF_STRUCT_MEM + #endif /* LWIP_RAM_HEAP_POINTER */ + + /** pointer to the heap (ram_heap): for alignment, ram is now a pointer instead of an array */ +-static u8_t *ram; ++static PER_THREAD u8_t *ram; + /** the last entry, always unused! */ +-static struct mem *ram_end; ++static PER_THREAD struct mem *ram_end; + + /** concurrent access protection */ + #if !NO_SYS +@@ -418,7 +418,7 @@ static volatile u8_t mem_free_count; + #endif /* LWIP_ALLOW_MEM_FREE_FROM_OTHER_CONTEXT */ + + /** pointer to the lowest free block, this is used for faster search */ +-static struct mem * LWIP_MEM_LFREE_VOLATILE lfree; ++static PER_THREAD struct mem * LWIP_MEM_LFREE_VOLATILE lfree; + + #if MEM_SANITY_CHECK + static void mem_sanity(void); +diff --git a/src/core/memp.c b/src/core/memp.c +index 352ce5a..454ba32 100644 +--- a/src/core/memp.c ++++ b/src/core/memp.c +@@ -78,10 +78,14 @@ + #define LWIP_MEMPOOL(name,num,size,desc) LWIP_MEMPOOL_DECLARE(name,num,size,desc) + #include "lwip/priv/memp_std.h" + ++#if USE_LIBOS ++PER_THREAD struct memp_desc* memp_pools[MEMP_MAX] = {NULL}; ++#else + const struct memp_desc *const memp_pools[MEMP_MAX] = { + #define LWIP_MEMPOOL(name,num,size,desc) &memp_ ## name, + #include "lwip/priv/memp_std.h" + }; ++#endif /* USE_LIBOS */ + + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME +diff --git a/src/core/netif.c b/src/core/netif.c +index 088b50e..70392cb 100644 +--- a/src/core/netif.c ++++ b/src/core/netif.c +@@ -107,12 +107,12 @@ static netif_ext_callback_t *ext_callback; + #endif + + #if !LWIP_SINGLE_NETIF +-struct netif *netif_list; ++PER_THREAD struct netif *netif_list; + #endif /* !LWIP_SINGLE_NETIF */ +-struct netif *netif_default; ++PER_THREAD struct netif *netif_default; + + #define netif_index_to_num(index) ((index) - 1) +-static u8_t netif_num; ++static PER_THREAD u8_t netif_num; + + #if LWIP_NUM_NETIF_CLIENT_DATA > 0 + static u8_t netif_client_id; +@@ -138,7 +138,7 @@ static err_t netif_loop_output_ipv6(struct netif *netif, struct pbuf *p, const i + #endif + + +-static struct netif loop_netif; ++static PER_THREAD struct netif loop_netif; + + /** + * Initialize a lwip network interface structure for a loopback interface +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 7638dfd..27afc28 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -737,7 +737,9 @@ pbuf_free(struct pbuf *p) + } + LWIP_DEBUGF(PBUF_DEBUG | LWIP_DBG_TRACE, ("pbuf_free(%p)\n", (void *)p)); + ++#ifndef LWIP_PERF + PERF_START; ++#endif + + count = 0; + /* de-allocate all consecutive pbufs from the head of the chain that +@@ -794,7 +796,9 @@ pbuf_free(struct pbuf *p) + p = NULL; + } + } ++#ifndef LWIP_PERF + PERF_STOP("pbuf_free"); ++#endif + /* return number of de-allocated pbufs */ + return count; + } +diff --git a/src/core/stats.c b/src/core/stats.c +index 34e9b27..f7e0604 100644 +--- a/src/core/stats.c ++++ b/src/core/stats.c +@@ -47,7 +47,7 @@ + + #include + +-struct stats_ lwip_stats; ++PER_THREAD struct stats_ lwip_stats; + + void + stats_init(void) +@@ -59,6 +59,17 @@ stats_init(void) + #endif /* LWIP_DEBUG */ + } + ++int get_mib2_stats(char *buf) ++{ ++ int len = 0; ++#if MIB2_STATS ++ len = (long)&((struct stats_mib2 *)0)->udpindatagrams; ++ /* we just need the ip&tcp, others not needed. */ ++ memcpy(buf, &lwip_stats.mib2, len); ++#endif ++ return len; ++} ++ + #if LWIP_STATS_DISPLAY + void + stats_display_proto(struct stats_proto *proto, const char *name) +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 371db2b..9e75810 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -113,6 +113,7 @@ + #include "lwip/nd6.h" + + #include ++#include + + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME +@@ -157,36 +158,50 @@ static const char *const tcp_state_str[] = { + + /* last local TCP port */ + static u16_t tcp_port = TCP_LOCAL_PORT_RANGE_START; ++static pthread_mutex_t g_tcp_port_mutex = PTHREAD_MUTEX_INITIALIZER; + + /* Incremented every coarse grained timer shot (typically every 500 ms). */ +-u32_t tcp_ticks; +-static const u8_t tcp_backoff[13] = ++PER_THREAD u32_t tcp_ticks; ++static PER_THREAD const u8_t tcp_backoff[13] = + { 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7}; + /* Times per slowtmr hits */ +-static const u8_t tcp_persist_backoff[7] = { 3, 6, 12, 24, 48, 96, 120 }; ++static PER_THREAD const u8_t tcp_persist_backoff[7] = { 3, 6, 12, 24, 48, 96, 120 }; + + /* The TCP PCB lists. */ + + /** List of all TCP PCBs bound but not yet (connected || listening) */ +-struct tcp_pcb *tcp_bound_pcbs; ++PER_THREAD struct tcp_pcb *tcp_bound_pcbs; + /** List of all TCP PCBs in LISTEN state */ +-union tcp_listen_pcbs_t tcp_listen_pcbs; ++PER_THREAD union tcp_listen_pcbs_t tcp_listen_pcbs; + /** List of all TCP PCBs that are in a state in which + * they accept or send data. */ +-struct tcp_pcb *tcp_active_pcbs; ++PER_THREAD struct tcp_pcb *tcp_active_pcbs; + /** List of all TCP PCBs in TIME-WAIT state */ +-struct tcp_pcb *tcp_tw_pcbs; ++PER_THREAD struct tcp_pcb *tcp_tw_pcbs; + + /** An array with all (non-temporary) PCB lists, mainly used for smaller code size */ +-struct tcp_pcb **const tcp_pcb_lists[] = {&tcp_listen_pcbs.pcbs, &tcp_bound_pcbs, +- &tcp_active_pcbs, &tcp_tw_pcbs +-}; ++PER_THREAD struct tcp_pcb ** tcp_pcb_lists[NUM_TCP_PCB_LISTS] = {NULL, NULL, NULL, NULL}; ++ ++#if TCP_PCB_HASH ++#define INIT_TCP_HTABLE(ht_ptr) \ ++ do { \ ++ int _i; \ ++ (ht_ptr)->size = TCP_HTABLE_SIZE; \ ++ for (_i = 0; _i < TCP_HTABLE_SIZE; ++_i) { \ ++ if (sys_mutex_new(&(ht_ptr)->array[_i].mutex) != ERR_OK) \ ++ LWIP_ASSERT("failed to create ht->array[].mutex", 0);\ ++ INIT_HLIST_HEAD(&(ht_ptr)->array[_i].chain); \ ++ }\ ++ } while (0) ++ ++PER_THREAD struct tcp_hash_table *tcp_active_htable; /* key: lport/fport/lip/fip */ ++#endif + +-u8_t tcp_active_pcbs_changed; ++PER_THREAD u8_t tcp_active_pcbs_changed; + + /** Timer counter to handle calling slow-timer from tcp_tmr() */ +-static u8_t tcp_timer; +-static u8_t tcp_timer_ctr; ++static PER_THREAD u8_t tcp_timer; ++static PER_THREAD u8_t tcp_timer_ctr; + static u16_t tcp_new_port(void); + + static err_t tcp_close_shutdown_fin(struct tcp_pcb *pcb); +@@ -200,9 +215,20 @@ static void tcp_ext_arg_invoke_callbacks_destroyed(struct tcp_pcb_ext_args *ext_ + void + tcp_init(void) + { ++ tcp_pcb_lists[0] = &tcp_listen_pcbs.pcbs; ++ tcp_pcb_lists[1] = &tcp_bound_pcbs; ++ tcp_pcb_lists[2] = &tcp_active_pcbs; ++ tcp_pcb_lists[3] = &tcp_tw_pcbs; ++ + #ifdef LWIP_RAND + tcp_port = TCP_ENSURE_LOCAL_PORT_RANGE(LWIP_RAND()); + #endif /* LWIP_RAND */ ++ ++#if TCP_PCB_HASH ++ tcp_active_htable = (struct tcp_hash_table*)mem_malloc(sizeof(struct tcp_hash_table)); ++ LWIP_ASSERT("malloc tcp_active_htable mem failed.", tcp_active_htable != NULL); ++ INIT_TCP_HTABLE(tcp_active_htable); ++#endif + } + + /** Free a tcp pcb */ +@@ -361,6 +387,9 @@ tcp_close_shutdown(struct tcp_pcb *pcb, u8_t rst_on_unacked_data) + pcb->local_port, pcb->remote_port); + + tcp_pcb_purge(pcb); ++#if TCP_PCB_HASH ++ TCP_RMV_ACTIVE_HASH(pcb); ++#endif + TCP_RMV_ACTIVE(pcb); + /* Deallocate the pcb since we already sent a RST for it */ + if (tcp_input_pcb == pcb) { +@@ -395,6 +424,9 @@ tcp_close_shutdown(struct tcp_pcb *pcb, u8_t rst_on_unacked_data) + tcp_free_listen(pcb); + break; + case SYN_SENT: ++#if TCP_PCB_HASH ++ TCP_PCB_REMOVE_ACTIVE_HASH(pcb); ++#endif + TCP_PCB_REMOVE_ACTIVE(pcb); + tcp_free(pcb); + MIB2_STATS_INC(mib2.tcpattemptfails); +@@ -494,6 +526,7 @@ tcp_close(struct tcp_pcb *pcb) + /* Set a flag not to receive any more data... */ + tcp_set_flags(pcb, TF_RXCLOSED); + } ++ + /* ... and close */ + return tcp_close_shutdown(pcb, 1); + } +@@ -599,6 +632,9 @@ tcp_abandon(struct tcp_pcb *pcb, int reset) + } else { + send_rst = reset; + local_port = pcb->local_port; ++#if TCP_PCB_HASH ++ TCP_PCB_REMOVE_ACTIVE_HASH(pcb); ++#endif + TCP_PCB_REMOVE_ACTIVE(pcb); + } + if (pcb->unacked != NULL) { +@@ -880,6 +916,11 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + } + } + #endif /* SO_REUSE */ ++ ++#if USE_LIBOS ++ vdev_reg_done(REG_RING_TCP_LISTEN, pcb); ++#endif ++ + lpcb = (struct tcp_pcb_listen *)memp_malloc(MEMP_TCP_PCB_LISTEN); + if (lpcb == NULL) { + res = ERR_MEM; +@@ -1015,6 +1056,7 @@ tcp_new_port(void) + u16_t n = 0; + struct tcp_pcb *pcb; + ++ pthread_mutex_lock(&g_tcp_port_mutex); + again: + tcp_port++; + if (tcp_port == TCP_LOCAL_PORT_RANGE_END) { +@@ -1032,6 +1074,8 @@ again: + } + } + } ++ pthread_mutex_unlock(&g_tcp_port_mutex); ++ + return tcp_port; + } + +@@ -1142,6 +1186,10 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + #endif /* SO_REUSE */ + } + ++#if USE_LIBOS ++ vdev_reg_done(REG_RING_TCP_CONNECT, pcb); ++#endif ++ + iss = tcp_next_iss(pcb); + pcb->rcv_nxt = 0; + pcb->snd_nxt = iss; +@@ -1174,6 +1222,9 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + if (old_local_port != 0) { + TCP_RMV(&tcp_bound_pcbs, pcb); + } ++#if TCP_PCB_HASH ++ TCP_REG_ACTIVE_HASH(pcb); ++#endif + TCP_REG_ACTIVE(pcb); + MIB2_STATS_INC(mib2.tcpactiveopens); + +@@ -1389,11 +1440,26 @@ tcp_slowtmr_start: + if (prev != NULL) { + LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_active_pcbs", pcb != tcp_active_pcbs); + prev->next = pcb->next; ++#if USE_LIBOS ++ if (pcb->next) ++ pcb->next->prev = prev; ++ //dont set next NULL, it will be used below ++ pcb->prev = NULL; ++#endif + } else { + /* This PCB was the first. */ + LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_active_pcbs", tcp_active_pcbs == pcb); + tcp_active_pcbs = pcb->next; ++#if USE_LIBOS ++ if (pcb->next) ++ pcb->next->prev = NULL; ++ //dont set next NULL, it will be used below ++ pcb->prev = NULL; ++#endif + } ++#if TCP_PCB_HASH ++ TCP_RMV_ACTIVE_HASH(pcb); ++#endif + + if (pcb_reset) { + tcp_rst(pcb, pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip, +@@ -1404,6 +1470,9 @@ tcp_slowtmr_start: + last_state = pcb->state; + pcb2 = pcb; + pcb = pcb->next; ++#if USE_LIBOS ++ pcb2->next = NULL; ++#endif + tcp_free(pcb2); + + tcp_active_pcbs_changed = 0; +@@ -1455,13 +1524,28 @@ tcp_slowtmr_start: + if (prev != NULL) { + LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_tw_pcbs", pcb != tcp_tw_pcbs); + prev->next = pcb->next; ++#if USE_LIBOS ++ if (pcb->next) ++ pcb->next->prev = prev; ++ //dont set next NULL, it will be used below ++ pcb->prev = NULL; ++#endif + } else { + /* This PCB was the first. */ + LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_tw_pcbs", tcp_tw_pcbs == pcb); + tcp_tw_pcbs = pcb->next; ++#if USE_LIBOS ++ if (pcb->next) ++ pcb->next->prev = NULL; ++ //dont set next NULL, it will be used below ++ pcb->prev = NULL; ++#endif + } + pcb2 = pcb; + pcb = pcb->next; ++#if USE_LIBOS ++ pcb2->next = NULL; ++#endif + tcp_free(pcb2); + } else { + prev = pcb; +@@ -2210,6 +2294,14 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + LWIP_ASSERT("tcp_pcb_remove: tcp_pcbs_sane()", tcp_pcbs_sane()); + } + ++#if TCP_PCB_HASH ++void ++tcp_pcb_remove_hash(struct tcp_hash_table *htb, struct tcp_pcb *pcb) ++{ ++ TCP_RMV_HASH(htb, pcb); ++} ++#endif /* TCP_PCB_HASH */ ++ + /** + * Calculates a new initial sequence number for new connections. + * +@@ -2384,6 +2476,84 @@ tcp_tcp_get_tcp_addrinfo(struct tcp_pcb *pcb, int local, ip_addr_t *addr, u16_t + return ERR_VAL; + } + ++uint32_t tcp_get_conn_num(void) ++{ ++ struct tcp_pcb *pcb = NULL; ++ struct tcp_pcb_listen *pcbl = NULL; ++ uint32_t conn_num = 0; ++ ++ for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) { ++ conn_num++; ++ } ++ ++ for (pcbl = tcp_listen_pcbs.listen_pcbs; pcbl != NULL; pcbl = pcbl->next) { ++ conn_num++; ++ } ++ ++ for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) { ++ conn_num++; ++ } ++ ++ return conn_num; ++} ++ ++void tcp_get_conn(char *buf, int32_t len, uint32_t *conn_num) ++{ ++ int tmp_len = 0; ++ char *tmp_buf = buf; ++ struct tcp_pcb_dp tdp; ++ struct tcp_pcb *pcb = NULL; ++ struct tcp_pcb_listen *pcbl = NULL; ++ ++#define COPY_TDP(b, l) \ ++ do { \ ++ if (l + sizeof(tdp) <= len) { \ ++ memcpy(b, &tdp, sizeof(tdp)); \ ++ b += sizeof(tdp); \ ++ l += sizeof(tdp); \ ++ *conn_num += 1; \ ++ } else \ ++ return; \ ++ } while(0); ++ ++ *conn_num = 0; ++ ++ for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) { ++ tdp.state = ACTIVE_LIST; ++ tdp.lip = pcb->local_ip.addr; ++ tdp.rip = pcb->remote_ip.addr; ++ tdp.l_port = pcb->local_port; ++ tdp.r_port = pcb->remote_port; ++ tdp.s_next = pcb->snd_queuelen; ++ /* lwip not cache rcv buf. Set it to 0. */ ++ tdp.r_next = 0; ++ tdp.tcp_sub_state = pcb->state; ++ COPY_TDP(tmp_buf, tmp_len); ++ } ++ ++ for (pcbl = tcp_listen_pcbs.listen_pcbs; pcbl != NULL; pcbl = pcbl->next) { ++ tdp.state = LISTEN_LIST; ++ tdp.lip = pcbl->local_ip.addr; ++ tdp.rip = pcbl->remote_ip.addr; ++ tdp.l_port = pcbl->local_port; ++ tdp.tcp_sub_state = pcbl->state; ++ COPY_TDP(tmp_buf, tmp_len); ++ } ++ ++ for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) { ++ tdp.state = TIME_WAIT_LIST; ++ tdp.lip = pcb->local_ip.addr; ++ tdp.rip = pcb->remote_ip.addr; ++ tdp.l_port = pcb->local_port; ++ tdp.r_port = pcb->remote_port; ++ tdp.s_next = pcb->snd_queuelen; ++ /* lwip not cache rcv buf. Set it to 0. */ ++ tdp.r_next = 0; ++ tdp.tcp_sub_state = pcb->state; ++ COPY_TDP(tmp_buf, tmp_len); ++ } ++} ++ + #if TCP_QUEUE_OOSEQ + /* Free all ooseq pbufs (and possibly reset SACK state) */ + void +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 2202e38..2b4c160 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -71,21 +71,22 @@ + /* These variables are global to all functions involved in the input + processing of TCP segments. They are set by the tcp_input() + function. */ +-static struct tcp_seg inseg; +-static struct tcp_hdr *tcphdr; +-static u16_t tcphdr_optlen; +-static u16_t tcphdr_opt1len; +-static u8_t *tcphdr_opt2; +-static u16_t tcp_optidx; +-static u32_t seqno, ackno; +-static tcpwnd_size_t recv_acked; +-static u16_t tcplen; +-static u8_t flags; +- +-static u8_t recv_flags; +-static struct pbuf *recv_data; +- +-struct tcp_pcb *tcp_input_pcb; ++static PER_THREAD struct tcp_seg inseg; ++static PER_THREAD struct tcp_hdr *tcphdr; ++static PER_THREAD u16_t tcphdr_optlen; ++static PER_THREAD u16_t tcphdr_opt1len; ++static PER_THREAD u8_t *tcphdr_opt2; ++static PER_THREAD u16_t tcp_optidx; ++static PER_THREAD u32_t seqno; ++static PER_THREAD u32_t ackno; ++static PER_THREAD tcpwnd_size_t recv_acked; ++static PER_THREAD u16_t tcplen; ++static PER_THREAD u8_t flags; ++ ++static PER_THREAD u8_t recv_flags; ++static PER_THREAD struct pbuf *recv_data; ++ ++PER_THREAD struct tcp_pcb *tcp_input_pcb; + + /* Forward declarations. */ + static err_t tcp_process(struct tcp_pcb *pcb); +@@ -126,11 +127,20 @@ tcp_input(struct pbuf *p, struct netif *inp) + u8_t hdrlen_bytes; + err_t err; + ++#if TCP_PCB_HASH ++ u32_t idx; ++ struct hlist_head *head; ++ struct hlist_node *node; ++ pcb = NULL; ++#endif ++ + LWIP_UNUSED_ARG(inp); + LWIP_ASSERT_CORE_LOCKED(); + LWIP_ASSERT("tcp_input: invalid pbuf", p != NULL); + ++#ifndef LWIP_PERF + PERF_START; ++#endif + + TCP_STATS_INC(tcp.recv); + MIB2_STATS_INC(mib2.tcpinsegs); +@@ -247,7 +257,15 @@ tcp_input(struct pbuf *p, struct netif *inp) + for an active connection. */ + prev = NULL; + ++#if TCP_PCB_HASH ++ idx = TUPLE4_HASH_FN( ip_current_dest_addr()->addr, tcphdr->dest, ++ ip_current_src_addr()->addr, tcphdr->src) & ++ (tcp_active_htable->size - 1); ++ head = &tcp_active_htable->array[idx].chain; ++ tcppcb_hlist_for_each(pcb, node, head) { ++#else + for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) { ++#endif + LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb->state != CLOSED); + LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb->state != TIME_WAIT); + LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb->state != LISTEN); +@@ -263,6 +281,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + pcb->local_port == tcphdr->dest && + ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) && + ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) { ++#if !TCP_PCB_HASH + /* Move this PCB to the front of the list so that subsequent + lookups will be faster (we exploit locality in TCP segment + arrivals). */ +@@ -275,9 +294,14 @@ tcp_input(struct pbuf *p, struct netif *inp) + TCP_STATS_INC(tcp.cachehit); + } + LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb->next != pcb); ++#endif + break; + } ++#if TCP_PCB_HASH ++ pcb = NULL; ++#else + prev = pcb; ++#endif + } + + if (pcb == NULL) { +@@ -363,8 +387,15 @@ tcp_input(struct pbuf *p, struct netif *inp) + arrivals). */ + if (prev != NULL) { + ((struct tcp_pcb_listen *)prev)->next = lpcb->next; ++#if USE_LIBOS ++ if (lpcb->next) ++ lpcb->next->prev = (struct tcp_pcb_listen *)prev; ++#endif + /* our successor is the remainder of the listening list */ + lpcb->next = tcp_listen_pcbs.listen_pcbs; ++#if USE_LIBOS ++ lpcb->prev = NULL; ++#endif + /* put this listening pcb at the head of the listening list */ + tcp_listen_pcbs.listen_pcbs = lpcb; + } else { +@@ -445,6 +476,9 @@ tcp_input(struct pbuf *p, struct netif *inp) + application that the connection is dead before we + deallocate the PCB. */ + TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_RST); ++#if TCP_PCB_HASH ++ tcp_pcb_remove_hash(tcp_active_htable, pcb); ++#endif + tcp_pcb_remove(&tcp_active_pcbs, pcb); + tcp_free(pcb); + } else { +@@ -550,7 +584,19 @@ tcp_input(struct pbuf *p, struct netif *inp) + goto aborted; + } + /* Try to send something out. */ ++#if LWIP_RECORD_PERF ++ if (check_layer_point(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_ACK_RECV)) { ++ PERF_PAUSE(PERF_LAYER_TCP); ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_ACK_SEND); ++ } ++#endif + tcp_output(pcb); ++#if LWIP_RECORD_PERF ++ if (check_layer_point(PERF_LAYER_TCP, PERF_POINT_TCP_ACK_SEND)) { ++ PERF_STOP_INCREASE_COUNT("tcp_in", PERF_LAYER_TCP); ++ PERF_RESUME(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_ACK_RECV); ++ } ++#endif + #if TCP_INPUT_DEBUG + #if TCP_DEBUG + tcp_debug_print_state(pcb->state); +@@ -583,7 +629,9 @@ aborted: + } + + LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane()); ++#ifndef LWIP_PERF + PERF_STOP("tcp_input"); ++#endif + return; + dropped: + TCP_STATS_INC(tcp.drop); +@@ -610,6 +658,9 @@ tcp_input_delayed_close(struct tcp_pcb *pcb) + ensure the application doesn't continue using the PCB. */ + TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_CLSD); + } ++#if TCP_PCB_HASH ++ tcp_pcb_remove_hash(tcp_active_htable, pcb); ++#endif + tcp_pcb_remove(&tcp_active_pcbs, pcb); + tcp_free(pcb); + return 1; +@@ -649,6 +700,7 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + tcp_rst((const struct tcp_pcb *)pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } else if (flags & TCP_SYN) { ++ PERF_UPDATE_POINT(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_RECV); + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection request %"U16_F" -> %"U16_F".\n", tcphdr->src, tcphdr->dest)); + #if TCP_LISTEN_BACKLOG + if (pcb->accepts_pending >= pcb->backlog) { +@@ -695,6 +747,9 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + npcb->netif_idx = pcb->netif_idx; + /* Register the new PCB so that we can begin receiving segments + for it. */ ++#if TCP_PCB_HASH ++ TCP_REG_ACTIVE_HASH(npcb); ++#endif + TCP_REG_ACTIVE(npcb); + + /* Parse any options in the SYN. */ +@@ -715,13 +770,18 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + } + #endif + ++ PERF_PAUSE(PERF_LAYER_TCP); ++ PERF_START(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_ACK_SEND); + /* Send a SYN|ACK together with the MSS option. */ + rc = tcp_enqueue_flags(npcb, TCP_SYN | TCP_ACK); + if (rc != ERR_OK) { + tcp_abandon(npcb, 0); ++ PERF_RESUME(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_RECV); + return; + } + tcp_output(npcb); ++ PERF_STOP_INCREASE_COUNT("tcp_output", PERF_LAYER_TCP); ++ PERF_RESUME(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_RECV); + } + return; + } +@@ -858,6 +918,7 @@ tcp_process(struct tcp_pcb *pcb) + /* received SYN ACK with expected sequence number? */ + if ((flags & TCP_ACK) && (flags & TCP_SYN) + && (ackno == pcb->lastack + 1)) { ++ PERF_UPDATE_POINT(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_ACK_RECV); + pcb->rcv_nxt = seqno + 1; + pcb->rcv_ann_right_edge = pcb->rcv_nxt; + pcb->lastack = ackno; +@@ -925,6 +986,7 @@ tcp_process(struct tcp_pcb *pcb) + /* expected ACK number? */ + if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) { + pcb->state = ESTABLISHED; ++ PERF_UPDATE_POINT(PERF_LAYER_TCP, PERF_POINT_TCP_ACK_RECV); + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection established %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + #if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG + if (pcb->listener == NULL) { +@@ -995,6 +1057,9 @@ tcp_process(struct tcp_pcb *pcb) + ("TCP connection closed: FIN_WAIT_1 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_ack_now(pcb); + tcp_pcb_purge(pcb); ++#if TCP_PCB_HASH ++ TCP_RMV_ACTIVE_HASH(pcb); ++#endif + TCP_RMV_ACTIVE(pcb); + pcb->state = TIME_WAIT; + TCP_REG(&tcp_tw_pcbs, pcb); +@@ -1013,6 +1078,9 @@ tcp_process(struct tcp_pcb *pcb) + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: FIN_WAIT_2 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_ack_now(pcb); + tcp_pcb_purge(pcb); ++#if TCP_PCB_HASH ++ TCP_RMV_ACTIVE_HASH(pcb); ++#endif + TCP_RMV_ACTIVE(pcb); + pcb->state = TIME_WAIT; + TCP_REG(&tcp_tw_pcbs, pcb); +@@ -1023,6 +1091,9 @@ tcp_process(struct tcp_pcb *pcb) + if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) { + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: CLOSING %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_pcb_purge(pcb); ++#if TCP_PCB_HASH ++ TCP_RMV_ACTIVE_HASH(pcb); ++#endif + TCP_RMV_ACTIVE(pcb); + pcb->state = TIME_WAIT; + TCP_REG(&tcp_tw_pcbs, pcb); +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 8149d39..dac498e 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -271,7 +271,7 @@ tcp_pbuf_prealloc(pbuf_layer layer, u16_t length, u16_t max_length, + return p; + } + #else /* TCP_OVERSIZE */ +-#define tcp_pbuf_prealloc(layer, length, mx, os, pcb, api, fst) pbuf_alloc((layer), (length), PBUF_RAM) ++#define tcp_pbuf_prealloc(layer, length, mx, os, pcb, api, fst) pbuf_alloc((layer), (length), PBUF_POOL) + #endif /* TCP_OVERSIZE */ + + #if TCP_CHECKSUM_ON_COPY +@@ -640,7 +640,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + ((struct pbuf_rom *)p2)->payload = (const u8_t *)arg + pos; + + /* Second, allocate a pbuf for the headers. */ +- if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) { ++ if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_POOL)) == NULL) { + /* If allocation fails, we have to deallocate the data pbuf as + * well. */ + pbuf_free(p2); +@@ -1458,6 +1458,11 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + err_t err; + u16_t len; + u32_t *opts; ++ ++#if LWIP_RECORD_PERF ++ int tmpPoint; ++#endif ++ + #if TCP_CHECKSUM_ON_COPY + int seg_chksum_was_swapped = 0; + #endif +@@ -1604,6 +1609,9 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + #endif /* CHECKSUM_GEN_TCP */ + TCP_STATS_INC(tcp.xmit); + ++ PERF_PAUSE_RETURN_POINT(PERF_LAYER_TCP, tmpPoint); ++ PERF_START(PERF_LAYER_IP, PERF_POINT_IP_SEND); ++ + NETIF_SET_HINTS(netif, &(pcb->netif_hints)); + err = ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, pcb->ttl, + pcb->tos, IP_PROTO_TCP, netif); +@@ -1618,6 +1626,9 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + } + #endif + ++ PERF_STOP_INCREASE_COUNT("ip_out", PERF_LAYER_IP); ++ PERF_RESUME(PERF_LAYER_TCP, tmpPoint); ++ + return err; + } + +@@ -2024,6 +2035,10 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) + u8_t optlen, optflags = 0; + u8_t num_sacks = 0; + ++#if LWIP_RECORD_PERF ++ int tmpPoint; ++#endif ++ + LWIP_ASSERT("tcp_send_empty_ack: invalid pcb", pcb != NULL); + + #if LWIP_TCP_TIMESTAMPS +@@ -2040,6 +2055,9 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) + } + #endif + ++ PERF_PAUSE_RETURN_POINT(PERF_LAYER_TCP, tmpPoint); ++ PERF_START(PERF_LAYER_IP, PERF_POINT_IP_SEND); ++ + p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt)); + if (p == NULL) { + /* let tcp_fasttmr retry sending this ACK */ +@@ -2064,6 +2082,9 @@ tcp_send_empty_ack(struct tcp_pcb *pcb) + tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); + } + ++ PERF_STOP_INCREASE_COUNT("ip_out", PERF_LAYER_IP); ++ PERF_RESUME(PERF_LAYER_TCP, tmpPoint); ++ + return err; + } + +diff --git a/src/core/timeouts.c b/src/core/timeouts.c +index f37acfe..0542a32 100644 +--- a/src/core/timeouts.c ++++ b/src/core/timeouts.c +@@ -119,9 +119,9 @@ const int lwip_num_cyclic_timers = LWIP_ARRAYSIZE(lwip_cyclic_timers); + #if LWIP_TIMERS && !LWIP_TIMERS_CUSTOM + + /** The one and only timeout list */ +-static struct sys_timeo *next_timeout; ++static PER_THREAD struct sys_timeo *next_timeout; + +-static u32_t current_timeout_due_time; ++static PER_THREAD u32_t current_timeout_due_time; + + #if LWIP_TESTMODE + struct sys_timeo** +@@ -133,7 +133,7 @@ sys_timeouts_get_next_timeout(void) + + #if LWIP_TCP + /** global variable that shows if the tcp timer is currently scheduled or not */ +-static int tcpip_tcp_timer_active; ++static PER_THREAD int tcpip_tcp_timer_active; + + /** + * Timer callback function that calls tcp_tmr() and reschedules itself. +@@ -442,6 +442,18 @@ sys_timeouts_sleeptime(void) + } + } + ++#if USE_LIBOS ++void sys_timer_run(void) ++{ ++ u32_t sleeptime; ++ ++ sleeptime = sys_timeouts_sleeptime(); ++ if (sleeptime == 0) { ++ sys_check_timeouts(); ++ } ++} ++#endif /* USE_LIBOS */ ++ + #else /* LWIP_TIMERS && !LWIP_TIMERS_CUSTOM */ + /* Satisfy the TCP code which calls this function */ + void +diff --git a/src/core/udp.c b/src/core/udp.c +index 0b609d3..a5f76b9 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -207,7 +207,11 @@ udp_input(struct pbuf *p, struct netif *inp) + LWIP_ASSERT("udp_input: invalid pbuf", p != NULL); + LWIP_ASSERT("udp_input: invalid netif", inp != NULL); + ++#if LWIP_RECORD_PERF ++ PERF_START(PERF_LAYER_UDP, PERF_POINT_UDP); ++#else + PERF_START; ++#endif + + UDP_STATS_INC(udp.recv); + +@@ -428,7 +432,12 @@ udp_input(struct pbuf *p, struct netif *inp) + pbuf_free(p); + } + end: ++#if LWIP_RECORD_PERF ++ PERF_STOP_INCREASE_COUNT("udp_input", PERF_LAYER_UDP); ++#else + PERF_STOP("udp_input"); ++#endif ++ + return; + #if CHECKSUM_CHECK_UDP + chkerr: +@@ -438,7 +447,13 @@ chkerr: + UDP_STATS_INC(udp.drop); + MIB2_STATS_INC(mib2.udpinerrors); + pbuf_free(p); ++ ++#if LWIP_RECORD_PERF ++ PERF_STOP_INCREASE_COUNT("udp_input", PERF_LAYER_UDP); ++#else + PERF_STOP("udp_input"); ++#endif ++ + #endif /* CHECKSUM_CHECK_UDP */ + } + +diff --git a/src/include/arch/cc.h b/src/include/arch/cc.h +index 52b76f9..33c24b4 100644 +--- a/src/include/arch/cc.h ++++ b/src/include/arch/cc.h +@@ -1,7 +1,81 @@ +-#ifndef LWIP_CC_H +-#define LWIP_CC_H ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ + ++#ifndef LWIP_ARCH_CC_H ++#define LWIP_ARCH_CC_H + ++#include ++#include ++#include ++#include + +-#endif /* LWIP_CC_H */ ++#include "lwiplog.h" + ++#define LWIP_NOASSERT ++ ++#define LWIP_ERRNO_STDINCLUDE 1 ++#define MEMP_MEMORY_BASE_PLACEHOLDER 0 ++#define MEMZONE_NAMESIZE 32 ++ ++#define LWIP_RAND() ((uint32_t)rand()) ++ ++extern uint8_t *sys_hugepage_malloc(const char *name, uint32_t size); ++ ++#define LWIP_DECLARE_MEMP_BASE_ALIGNED(name, __size)\ ++PER_THREAD uint8_t *memp_memory_##name##_base; \ ++void alloc_memp_##name##_base(void) \ ++{ \ ++ memp_ ## name.desc = memp_desc_ ## name; \ ++ memp_ ## name.stats = &memp_stat ## name; \ ++ memp_ ## name.size = memp_size ## name; \ ++ memp_ ## name.num = memp_num ## name; \ ++ memp_ ## name.tab = &memp_tab_ ## name; \ ++ memp_pools[MEMP_##name] = &memp_ ## name; \ ++ \ ++ char mpname[MEMZONE_NAMESIZE] = {0}; \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%ld_%s", gettid(), #name); \ ++ memp_memory_##name##_base = \ ++ sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(__size)); \ ++ memp_pools[MEMP_##name]->base = memp_memory_##name##_base; \ ++} ++ ++#define LWIP_DECLARE_MEMORY_ALIGNED(variable_name, size) \ ++PER_THREAD uint8_t *variable_name; \ ++void alloc_memory_##variable_name(void) \ ++{ \ ++ char mpname[MEMZONE_NAMESIZE] = {0}; \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%ld_%s", gettid(), #variable_name); \ ++ (variable_name) = \ ++ sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(size)); \ ++} ++ ++#endif /* LWIP_ARCH_CC_H */ +diff --git a/src/include/arch/perf.h b/src/include/arch/perf.h +new file mode 100644 +index 0000000..e505da7 +--- /dev/null ++++ b/src/include/arch/perf.h +@@ -0,0 +1,155 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef LWIP_ARCH_PERF_H ++#define LWIP_ARCH_PERF_H ++ ++#include ++ ++#include "lwip/debug.h" ++ ++#if LWIP_RECORD_PERF ++enum PERF_POINT { ++ PERF_POINT_IP_RECV, ++ PERF_POINT_TCP_RECV, ++ PERF_POINT_UDP, ++ PERF_POINT_TCP_SYN_RECV, ++ PERF_POINT_TCP_SYN_ACK_SEND, ++ PERF_POINT_TCP_ACK_RECV, ++ PERF_POINT_TCP_SYN_SEND, ++ PERF_POINT_TCP_SYN_ACK_RECV, ++ PERF_POINT_TCP_ACK_SEND, ++ PERF_POINT_TCP_DATA_SEND, ++ PERF_POINT_IP_SEND, ++ PERF_POINT_END ++}; ++ ++enum PERF_LAYER { ++ PERF_LAYER_IP, ++ PERF_LAYER_TCP, ++ PERF_LAYER_UDP, ++ PERF_LAYER_END ++}; ++ ++extern uint32_t g_record_perf; ++ ++extern __thread uint64_t g_timeTaken[PERF_POINT_END]; ++extern __thread int g_perfPoint[PERF_LAYER_END]; ++extern __thread struct timespec tvStart[PERF_LAYER_END]; ++ ++extern char *g_ppLayerName[PERF_POINT_END]; ++extern volatile uint64_t g_perfMaxtime[PERF_POINT_END]; ++extern volatile uint64_t g_astPacketCnt[PERF_POINT_END]; ++extern volatile uint64_t g_astPacketProcTime[PERF_POINT_END]; ++ ++#define PERF_START(layer, point) do {\ ++ g_perfPoint[(layer)] = (point);\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("set point %d:%s\n", layer, g_ppLayerName[g_perfPoint[(layer)]]));\ ++ clock_gettime(CLOCK_MONOTONIC, &tvStart[(layer)]);\ ++ g_timeTaken[(point)] = 0;\ ++} while (0) ++ ++#define PERF_UPDATE_POINT(layer, point) do {\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("old point %d:%s\n", layer, g_ppLayerName[g_perfPoint[(layer)]]));\ ++ g_timeTaken[(point)] = g_timeTaken[g_perfPoint[(layer)]];\ ++ g_timeTaken[g_perfPoint[(layer)]] = 0;\ ++ g_perfPoint[(layer)] = (point);\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("new point %d:%s\n", layer, g_ppLayerName[g_perfPoint[(layer)]]));\ ++} while (0) ++ ++#define PERF_PAUSE(layer) do {\ ++ struct timespec tvEnd;\ ++ clock_gettime(CLOCK_MONOTONIC, &tvEnd);\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("perf pause layer%d\n", layer));\ ++ g_timeTaken[g_perfPoint[(layer)]] += ((tvEnd.tv_sec - tvStart[(layer)].tv_sec) \ ++ * (1000000000UL) + (tvEnd.tv_nsec - tvStart[(layer)].tv_nsec));\ ++} while (0) ++ ++#define PERF_PAUSE_RETURN_POINT(layer, pause_point) do {\ ++ struct timespec tvEnd;\ ++ clock_gettime(CLOCK_MONOTONIC, &tvEnd);\ ++ g_timeTaken[g_perfPoint[(layer)]] += ((tvEnd.tv_sec - tvStart[(layer)].tv_sec) \ ++ * (1000000000UL) + (tvEnd.tv_nsec - tvStart[(layer)].tv_nsec));\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("perf pause point %d:%s\n", layer, g_ppLayerName[g_perfPoint[(layer)]]));\ ++ (pause_point) = g_perfPoint[(layer)];\ ++} while (0) ++ ++ ++#define PERF_RESUME(layer, point) do {\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("perf resule point %d:%s\n", layer, g_ppLayerName[point]));\ ++ clock_gettime(CLOCK_MONOTONIC, &tvStart[(layer)]);\ ++ g_perfPoint[(layer)] = (point);\ ++} while (0) ++ ++ ++/* x is a prompt */ ++#define PERF_STOP_INCREASE_COUNT(x, layer) do {\ ++ if (g_record_perf)\ ++ {\ ++ struct timespec tvEnd;\ ++ int i = 2;\ ++ uint32_t oldValue = 0;\ ++ clock_gettime(CLOCK_MONOTONIC, &tvEnd);\ ++ g_timeTaken[g_perfPoint[(layer)]] += ((tvEnd.tv_sec - tvStart[(layer)].tv_sec) \ ++ * (1000000000UL) + (tvEnd.tv_nsec - tvStart[(layer)].tv_nsec));\ ++ while (i && !oldValue)\ ++ {\ ++ oldValue = __sync_or_and_fetch(&g_perfMaxtime[g_perfPoint[(layer)]], 0);\ ++ if (oldValue >= g_timeTaken[g_perfPoint[(layer)]])\ ++ {\ ++ break;\ ++ }\ ++ oldValue = __sync_val_compare_and_swap(&g_perfMaxtime[g_perfPoint[(layer)]],\ ++ oldValue, g_timeTaken[g_perfPoint[(layer)]]);\ ++ i--;\ ++ }\ ++ __sync_fetch_and_add(&g_astPacketCnt[g_perfPoint[(layer)]], 1);\ ++ __sync_fetch_and_add(&g_astPacketProcTime[g_perfPoint[(layer)]], g_timeTaken[g_perfPoint[(layer)]]);\ ++ LWIP_DEBUGF(PERF_OUTPUT_DEBUG, ("Time for %s is: %ld\n",\ ++ g_ppLayerName[g_perfPoint[(layer)]], g_timeTaken[g_perfPoint[(layer)]]));\ ++ }\ ++} while (0) ++ ++ ++int check_layer_point(int layer, int point); ++int perf_init(); ++ ++#else ++#define PERF_START(layer, point) do { } while (0) ++#define PERF_UPDATE_POINT(layer, point) do { } while (0) ++#define PERF_PAUSE(layer) do { } while (0) ++#define PERF_PAUSE_RETURN_POINT(layer, pause_point) do { } while (0) ++#define PERF_RESUME(layer, point) do { } while (0) ++#define PERF_STOP_INCREASE_COUNT(x, layer) do { } while (0) ++#endif ++ ++#endif /* LWIP_ARCH_PERF_H */ +diff --git a/src/include/arch/sys_arch.h b/src/include/arch/sys_arch.h +index 3f555ee..b8a0d28 100644 +--- a/src/include/arch/sys_arch.h ++++ b/src/include/arch/sys_arch.h +@@ -1,7 +1,93 @@ +-#ifndef LWIP_SYS_ARCH_H +-#define LWIP_SYS_ARCH_H ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ + ++#ifndef LWIP_ARCH_SYS_ARCH_H ++#define LWIP_ARCH_SYS_ARCH_H + ++#include ++#include + +-#endif /* LWIP_SYS_ARCH_H */ ++#define SYS_MBOX_NULL NULL ++#define SYS_SEM_NULL NULL ++typedef uint32_t sys_prot_t; + ++struct sys_sem { ++ volatile unsigned int c; ++ int (*wait_fn)(void); ++}; ++ ++#define MBOX_NAME_LEN 64 ++struct sys_mbox { ++ struct rte_ring *ring; ++ char name[MBOX_NAME_LEN]; ++ int size; ++ int socket_id; ++ unsigned flags; ++ int (*wait_fn)(void); ++}; ++ ++typedef struct sys_sem *sys_sem_t; ++#define sys_sem_valid(sem) (((sem) != NULL) && (*(sem) != NULL)) ++#define sys_sem_valid_val(sem) ((sem) != NULL) ++#define sys_sem_set_invalid(sem) do { if ((sem) != NULL) { *(sem) = NULL; }} while(0) ++#define sys_sem_set_invalid_val(sem) do { (sem) = NULL; } while(0) ++ ++struct sys_mutex; ++typedef struct sys_mutex *sys_mutex_t; ++#define sys_mutex_valid(mutex) sys_sem_valid(mutex) ++#define sys_mutex_set_invalid(mutex) sys_sem_set_invalid(mutex) ++ ++typedef struct sys_mbox *sys_mbox_t; ++#define sys_mbox_valid(mbox) sys_sem_valid(mbox) ++#define sys_mbox_valid_val(mbox) sys_sem_valid_val(mbox) ++#define sys_mbox_set_invalid(mbox) sys_sem_set_invalid(mbox) ++#define sys_mbox_set_invalid_val(mbox) sys_sem_set_invalid_val(mbox) ++int sys_mbox_empty(struct sys_mbox *); ++ ++struct sys_thread; ++typedef struct sys_thread *sys_thread_t; ++ ++extern int eth_dev_poll(void); ++ ++void sys_calibrate_tsc(void); ++uint32_t sys_now(void); ++__attribute__((always_inline)) inline int update_timeout(int timeout, uint32_t poll_ts) ++{ ++ uint32_t used_ms = sys_now() - poll_ts; ++ if (timeout > 0 && used_ms < timeout) { ++ return timeout; ++ } else { ++ return 0; ++ } ++} ++ ++#endif /* LWIP_ARCH_SYS_ARCH_H */ +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +new file mode 100644 +index 0000000..01f8d64 +--- /dev/null ++++ b/src/include/eventpoll.h +@@ -0,0 +1,72 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __EVENTPOLL_H__ ++#define __EVENTPOLL_H__ ++ ++#include ++ ++#include "lwip/api.h" ++#include "list.h" ++ ++#define MAX_EPOLLFDS 32 ++ ++#define LIBOS_EPOLLNONE (0x0) ++#define LIBOS_BADEP (NULL) ++ ++struct event_queue { ++ struct list_node events; ++ /* total number of sockets have events */ ++ int num_events; ++}; ++ ++struct event_array { ++ sys_mbox_t mbox; ++ volatile int num_events; ++ struct epoll_event events[0]; ++}; ++ ++struct libos_epoll { ++ struct event_queue *libos_queue; ++ struct event_array *host_queue; ++ int num_hostfds; ++ int hints; ++ int fd; /* self fd */ ++ int efd; /* eventfd */ ++}; ++ ++extern int add_epoll_event(struct netconn*, uint32_t); ++extern int del_epoll_event(struct netconn*, uint32_t); ++extern int lwip_epoll_close(int); ++extern int lwip_is_epfd(int); ++ ++#endif /* __EVENTPOLL_H__ */ +diff --git a/src/include/hlist.h b/src/include/hlist.h +new file mode 100644 +index 0000000..7059488 +--- /dev/null ++++ b/src/include/hlist.h +@@ -0,0 +1,233 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __HLIST_H__ ++#define __HLIST_H__ ++ ++#include "list.h" ++ ++//#if TCP_PCB_HASH ++struct hlist_node { ++ /** ++ * @pprev: point the previous node's next pointer ++ */ ++ struct hlist_node *next; ++ struct hlist_node **pprev; ++}; ++ ++struct hlist_head { ++ struct hlist_node *first; ++}; ++ ++struct hlist_tail { ++ struct hlist_node *end; ++}; ++ ++struct hlist_ctl { ++ struct hlist_head head; ++ struct hlist_tail tail; ++}; ++ ++#define INIT_HLIST_CTRL(ptr) {(ptr)->head.first = NULL; (ptr)->tail.end = NULL;} ++#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) ++#define INIT_HLIST_NODE(ptr) {(ptr)->next = NULL; (ptr)->pprev = NULL;} ++#define hlist_entry(ptr, type, member) \ ++ container_of(ptr, type, member) ++ ++/** ++ * hlist_for_each_entry - iterate over list of given type ++ * @tpos: the type * to use as a loop cursor. ++ * @pos: the &struct hlist_node to use as a loop cursor. ++ * @head: the head for your list. ++ * @member: the name of the hlist_node within the struct. ++ */ ++#define hlist_for_each_entry(tpos, pos, head, member) \ ++ for (pos = (head)->first; \ ++ pos && ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ ++ pos = (pos)->next) ++ ++/** ++ * next must be != NULL ++ * add n node before next node ++ * ++ * @n: new node ++ * @next: node in the hlist ++ */ ++static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) ++{ ++ n->pprev = next->pprev; ++ n->next = next; ++ next->pprev = &n->next; ++ *(n->pprev) = n; ++} ++ ++static inline int hlist_empty(const struct hlist_head *h) ++{ ++ return !h->first; ++} ++ ++static inline int hlist_unhashed(const struct hlist_node *h) ++{ ++ return !h->pprev; ++} ++ ++static inline void hlist_del_init(struct hlist_node *n) ++{ ++ struct hlist_node *next = n->next; ++ struct hlist_node **pprev = n->pprev; ++ ++ if (pprev == NULL) { ++ return; ++ } ++ ++ *pprev = next; ++ if (next != NULL) { ++ next->pprev = pprev; ++ } ++ ++ n->next = NULL; ++ n->pprev = NULL; ++} ++ ++static inline void hlist_ctl_del(struct hlist_ctl *ctl, struct hlist_node *n) ++{ ++ if (ctl->head.first == ctl->tail.end) { ++ ctl->head.first = NULL; ++ ctl->tail.end = NULL; ++ return; ++ } ++ ++ if (ctl->tail.end == n) { ++ ctl->tail.end = (struct hlist_node *)n->pprev; ++ } ++ ++ hlist_del_init(n); ++} ++ ++static inline struct hlist_node *hlist_pop_tail(struct hlist_ctl *ctl) ++{ ++ if (hlist_empty(&ctl->head)) { ++ return NULL; ++ } ++ ++ if (ctl->head.first == ctl->tail.end) { ++ struct hlist_node *ret = ctl->tail.end; ++ ctl->tail.end = NULL; ++ ctl->head.first = NULL; ++ return ret; ++ } ++ ++ struct hlist_node *temp = ctl->tail.end; ++ ++ struct hlist_node **ptailPrev = ctl->tail.end->pprev; ++ *ptailPrev = NULL; ++ ++ ctl->tail.end = (struct hlist_node *)ptailPrev; ++ temp->pprev = NULL; ++ return temp; ++} ++ ++static inline void hlist_add_after(struct hlist_node *n, struct hlist_node *next) ++{ ++ next->next = n->next; ++ n->next = next; ++ next->pprev = &n->next; ++ if (next->next) { ++ next->next->pprev = &next->next; ++ } ++} ++ ++static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) ++{ ++ struct hlist_node *first = h->first; ++ ++ n->next = first; ++ if (first != NULL) { ++ first->pprev = &n->next; ++ } ++ ++ h->first = n; ++ n->pprev = &h->first; ++} ++ ++static inline struct hlist_node *hlist_pop_head(struct hlist_ctl *ctl) ++{ ++ if (hlist_empty(&ctl->head)) { ++ return NULL; ++ } ++ ++ struct hlist_node *temp = ctl->head.first; ++ hlist_ctl_del(ctl, temp); ++ return temp; ++} ++ ++static inline void hlist_ctl_add_tail(struct hlist_ctl *ctl, struct hlist_node *node) ++{ ++ if (hlist_empty(&ctl->head)) { ++ hlist_add_head(node, &ctl->head); ++ ctl->tail.end = ctl->head.first; ++ return; ++ } ++ ++ ctl->tail.end->next = node; ++ ++ node->pprev = &(ctl->tail.end->next); ++ node->next = NULL; ++ ctl->tail.end = node; ++} ++ ++static inline void hlist_ctl_add_head(struct hlist_node *node, struct hlist_ctl *ctl) ++{ ++ hlist_add_head(node, &ctl->head); ++ if (ctl->tail.end == NULL) { ++ ctl->tail.end = ctl->head.first; ++ } ++} ++ ++static inline void hlist_ctl_add_before(struct hlist_node *n, struct hlist_node *next, struct hlist_ctl *ctl) ++{ ++ hlist_add_before(n, next); ++ if (next == ctl->head.first) { ++ ctl->head.first = n; ++ } ++} ++ ++static inline void hlist_ctl_add_after(struct hlist_node *n, struct hlist_node *next, struct hlist_ctl *ctl) ++{ ++ hlist_add_after(n, next); ++ if (n == ctl->tail.end) { ++ ctl->tail.end = next; ++ } ++} ++//#endif /* TCP_PCB_HASH */ ++ ++#endif /* __HLIST_H__ */ +diff --git a/src/include/list.h b/src/include/list.h +new file mode 100644 +index 0000000..11f94c2 +--- /dev/null ++++ b/src/include/list.h +@@ -0,0 +1,110 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __LIST_H__ ++#define __LIST_H__ ++ ++#ifndef NULL ++#ifdef __cplusplus ++#define NULL 0 ++#else ++#define NULL ((void *)0) ++#endif ++#endif ++ ++struct list_node { ++ struct list_node *prev; ++ struct list_node *next; ++}; ++ ++static inline void init_list_node_null(struct list_node *n) ++{ ++ n->prev = NULL; ++ n->next = NULL; ++} ++ ++static inline void init_list_node(struct list_node *n) ++{ ++ n->prev = n; ++ n->next = n; ++} ++ ++static inline void list_add_node(struct list_node *h, struct list_node *n) ++{ ++ n->next = h; ++ n->prev = h->prev; ++ h->prev->next = n; ++ h->prev = n; ++} ++ ++static inline void list_del_node(struct list_node *n) ++{ ++ struct list_node *prev = n->prev; ++ struct list_node *next = n->next; ++ next->prev = prev; ++ prev->next = next; ++} ++ ++static inline void list_del_node_init(struct list_node *n) ++{ ++ list_del_node(n); ++ init_list_node(n); ++} ++ ++static inline void list_del_node_null(struct list_node *n) ++{ ++ if ((n->next) && (n->prev)) { ++ list_del_node(n); ++ } ++ init_list_node_null(n); ++} ++ ++static inline int list_is_null(const struct list_node *n) ++{ ++ return (n->prev == NULL) && (n->next == NULL); ++} ++ ++static inline int list_is_empty(const struct list_node *h) ++{ ++ return h == h->next; ++} ++ ++#define list_for_each_safe(pos, n, head) \ ++ for (pos = (head)->next, n = (pos)->next; pos != (head); pos = n, n = (pos)->next) ++ ++#ifndef container_of ++#define container_of(ptr, type, member) ({ \ ++ typeof( ((type *)0)->member ) *__mptr = (ptr); \ ++ (type *)((char *)__mptr - offsetof(type,member));}) ++#endif /* container_of */ ++ ++#endif /* __LIST_H__ */ +diff --git a/src/include/lwip/api.h b/src/include/lwip/api.h +index c2afaf2..6dec8c0 100644 +--- a/src/include/lwip/api.h ++++ b/src/include/lwip/api.h +@@ -140,8 +140,43 @@ enum netconn_type { + /** Raw connection IPv6 (dual-stack by default, unless you call @ref netconn_set_ipv6only) */ + , NETCONN_RAW_IPV6 = NETCONN_RAW | NETCONN_TYPE_IPV6 /* 0x48 */ + #endif /* LWIP_IPV6 */ ++ ++#if USE_LIBOS ++ /*here must bigger than 0xff, because (type & 0xff) is for lwip inner use*/ ++ , NETCONN_LIBOS = 0x100 ++ , NETCONN_HOST = 0x200 ++ , NETCONN_INPRG = 0x400 ++ , NETCONN_STACK = NETCONN_LIBOS | NETCONN_HOST | NETCONN_INPRG ++#endif /* USE_LIBOS */ + }; + ++#ifdef USE_LIBOS ++#define SET_CONN_TYPE_LIBOS_OR_HOST(conn) do { \ ++ conn->type &= ~(NETCONN_STACK); \ ++ conn->type |= (NETCONN_LIBOS | NETCONN_HOST); } while (0) ++#define SET_CONN_TYPE_LIBOS(conn) do { \ ++ conn->type &= ~(NETCONN_STACK); \ ++ conn->type |= NETCONN_LIBOS; } while (0) ++#define SET_CONN_TYPE_HOST(conn) do { \ ++ conn->type &= ~(NETCONN_STACK); \ ++ conn->type |= NETCONN_HOST; } while (0) ++#define ADD_CONN_TYPE_INPRG(conn) do { \ ++ conn->type |= NETCONN_INPRG; } while(0) ++#define CONN_TYPE_HAS_LIBOS_AND_HOST(conn) ((conn->type & (NETCONN_LIBOS | NETCONN_HOST)) == (NETCONN_LIBOS | NETCONN_HOST)) ++#define CONN_TYPE_HAS_LIBOS(conn) (conn->type & NETCONN_LIBOS) ++#define CONN_TYPE_HAS_HOST(conn) (conn->type & NETCONN_HOST) ++#define CONN_TYPE_HAS_INPRG(conn) (!!(conn->type & NETCONN_INPRG)) ++#define CONN_TYPE_IS_LIBOS(conn) (!!(NETCONN_LIBOS == (conn->type & NETCONN_STACK))) ++#define CONN_TYPE_IS_HOST(conn) (!!(NETCONN_HOST == (conn->type & NETCONN_STACK))) ++#else ++#define SET_CONN_TYPE_LIBOS_OR_HOST(conn) do {} while (0) ++#define SET_CONN_TYPE_LIBOS(conn) do {} while (0) ++#define SET_CONN_TYPE_HOST(conn) do {} while (0) ++#define CONN_TYPE_HAS_LIBOS_AND_HOST(conn) (0) ++#define CONN_TYPE_HAS_LIBOS(conn) (0) ++#define CONN_TYPE_HAS_HOST(conn) (0) ++#endif /* USE_LIBOS */ ++ + /** Current state of the netconn. Non-TCP netconns are always + * in state NETCONN_NONE! */ + enum netconn_state { +diff --git a/src/include/lwip/debug.h b/src/include/lwip/debug.h +index 579fd24..f47cbfe 100644 +--- a/src/include/lwip/debug.h ++++ b/src/include/lwip/debug.h +@@ -145,6 +145,7 @@ + ((debug) & LWIP_DBG_ON) && \ + ((debug) & LWIP_DBG_TYPES_ON) && \ + ((s16_t)((debug) & LWIP_DBG_MASK_LEVEL) >= LWIP_DBG_MIN_LEVEL)) { \ ++ LWIP_PLATFORM_LOG(debug, STRIP_BRACES(ESC_ARGS message)); \ + LWIP_PLATFORM_DIAG(message); \ + if ((debug) & LWIP_DBG_HALT) { \ + while(1); \ +diff --git a/src/include/lwip/def.h b/src/include/lwip/def.h +index dfb266d..fea7187 100644 +--- a/src/include/lwip/def.h ++++ b/src/include/lwip/def.h +@@ -116,6 +116,21 @@ u32_t lwip_htonl(u32_t x); + + /* Provide usual function names as macros for users, but this can be turned off */ + #ifndef LWIP_DONT_PROVIDE_BYTEORDER_FUNCTIONS ++ ++/* avoid conflicts with netinet/in.h */ ++#ifdef htons ++#undef htons ++#endif ++#ifdef ntohs ++#undef ntohs ++#endif ++#ifdef htonl ++#undef htonl ++#endif ++#ifdef ntohl ++#undef ntohl ++#endif ++ + #define htons(x) lwip_htons(x) + #define ntohs(x) lwip_ntohs(x) + #define htonl(x) lwip_htonl(x) +diff --git a/src/include/lwip/ip.h b/src/include/lwip/ip.h +index 653c3b2..d560f6b 100644 +--- a/src/include/lwip/ip.h ++++ b/src/include/lwip/ip.h +@@ -96,9 +96,15 @@ struct ip_pcb { + /* + * Option flags per-socket. These are the same like SO_XXX in sockets.h + */ ++#if USE_LIBOS ++#define SOF_REUSEADDR 0x02U /* allow local address reuse */ ++#define SOF_KEEPALIVE 0x09U /* keep connections alive */ ++#define SOF_BROADCAST 0x06U /* permit to send and to receive broadcast messages (see IP_SOF_BROADCAST option) */ ++#else + #define SOF_REUSEADDR 0x04U /* allow local address reuse */ + #define SOF_KEEPALIVE 0x08U /* keep connections alive */ + #define SOF_BROADCAST 0x20U /* permit to send and to receive broadcast messages (see IP_SOF_BROADCAST option) */ ++#endif /* USE_LIBOS */ + + /* These flags are inherited (e.g. from a listen-pcb to a connection-pcb): */ + #define SOF_INHERITED (SOF_REUSEADDR|SOF_KEEPALIVE) +@@ -125,7 +131,7 @@ struct ip_globals + /** Destination IP address of current_header */ + ip_addr_t current_iphdr_dest; + }; +-extern struct ip_globals ip_data; ++extern PER_THREAD struct ip_globals ip_data; + + + /** Get the interface that accepted the current packet. +diff --git a/src/include/lwip/memp.h b/src/include/lwip/memp.h +index 1630b26..64d8f31 100644 +--- a/src/include/lwip/memp.h ++++ b/src/include/lwip/memp.h +@@ -58,7 +58,11 @@ typedef enum { + #include "lwip/priv/memp_priv.h" + #include "lwip/stats.h" + ++#if USE_LIBOS ++extern PER_THREAD struct memp_desc* memp_pools[MEMP_MAX]; ++#else + extern const struct memp_desc* const memp_pools[MEMP_MAX]; ++#endif /* USE_LIBOS */ + + /** + * @ingroup mempool +@@ -92,6 +96,18 @@ extern const struct memp_desc* const memp_pools[MEMP_MAX]; + * To relocate a pool, declare it as extern in cc.h. Example for GCC: + * extern u8_t \_\_attribute\_\_((section(".onchip_mem"))) memp_memory_my_private_pool_base[]; + */ ++#if USE_LIBOS ++#define LWIP_MEMPOOL_DECLARE(name,num,size,desc) \ ++ PER_THREAD struct memp_desc memp_ ## name = {0}; \ ++ PER_THREAD char memp_desc_ ## name[] = desc; \ ++ PER_THREAD struct stats_mem memp_stat ## name = {0}; \ ++ PER_THREAD u16_t memp_size ## name = size; \ ++ PER_THREAD u16_t memp_num ## name = num; \ ++ PER_THREAD struct memp *memp_tab_ ## name = NULL; \ ++ LWIP_DECLARE_MEMP_BASE_ALIGNED(name, ((num) * (MEMP_SIZE + MEMP_ALIGN_SIZE(size)))); ++ ++#else /* USE_LIBOS */ ++ + #define LWIP_MEMPOOL_DECLARE(name,num,size,desc) \ + LWIP_DECLARE_MEMORY_ALIGNED(memp_memory_ ## name ## _base, ((num) * (MEMP_SIZE + MEMP_ALIGN_SIZE(size)))); \ + \ +@@ -108,6 +124,7 @@ extern const struct memp_desc* const memp_pools[MEMP_MAX]; + &memp_tab_ ## name \ + }; + ++#endif /* USE_LIBOS */ + #endif /* MEMP_MEM_MALLOC */ + + /** +diff --git a/src/include/lwip/netif.h b/src/include/lwip/netif.h +index 9a16ded..057c51f 100644 +--- a/src/include/lwip/netif.h ++++ b/src/include/lwip/netif.h +@@ -406,11 +406,11 @@ struct netif { + #define NETIF_FOREACH(netif) if (((netif) = netif_default) != NULL) + #else /* LWIP_SINGLE_NETIF */ + /** The list of network interfaces. */ +-extern struct netif *netif_list; ++extern PER_THREAD struct netif *netif_list; + #define NETIF_FOREACH(netif) for ((netif) = netif_list; (netif) != NULL; (netif) = (netif)->next) + #endif /* LWIP_SINGLE_NETIF */ + /** The default network interface. */ +-extern struct netif *netif_default; ++extern PER_THREAD struct netif *netif_default; + + void netif_init(void); + +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index d8c82d1..8294cdd 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -533,6 +533,22 @@ + #endif + + /** ++ * MEMP_NUM_SYS_SEM: the number of struct sys_sems. ++ * (only needed if you use the sequential API, like api_lib.c) ++ */ ++#if !defined MEMP_NUM_SYS_SEM || defined __DOXYGEN__ ++#define MEMP_NUM_SYS_SEM 128 ++#endif ++ ++/** ++ * MEMP_NUM_SYS_MBOX: the number of struct sys_sems. ++ * (only needed if you use the sequential API, like api_lib.c) ++ */ ++#if !defined MEMP_NUM_SYS_MBOX || defined __DOXYGEN__ ++#define MEMP_NUM_SYS_MBOX 128 ++#endif ++ ++/** + * MEMP_NUM_SELECT_CB: the number of struct lwip_select_cb. + * (Only needed if you have LWIP_MPU_COMPATIBLE==1 and use the socket API. + * In that case, you need one per thread calling lwip_select.) +@@ -2232,7 +2248,7 @@ + * MIB2_STATS==1: Stats for SNMP MIB2. + */ + #if !defined MIB2_STATS || defined __DOXYGEN__ +-#define MIB2_STATS 0 ++#define MIB2_STATS 1 + #endif + + #else +@@ -3422,6 +3438,10 @@ + #define TCP_OUTPUT_DEBUG LWIP_DBG_OFF + #endif + ++#ifndef PERF_OUTPUT_DEBUG ++ #define PERF_OUTPUT_DEBUG LWIP_DBG_OFF ++#endif ++ + /** + * TCP_RST_DEBUG: Enable debugging for TCP with the RST message. + */ +@@ -3502,6 +3522,46 @@ + #define LWIP_TESTMODE 0 + #endif + ++/** ++ * EPOLL_DEBUG: Enable debugging in epoll.c. ++ */ ++#if !defined EPOLL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#define EPOLL_DEBUG LWIP_DBG_OFF ++#endif ++/** ++ * @} ++ */ ++ ++/** ++ * ETHDEV_DEBUG: Enable debugging in ethdev.c. ++ */ ++#if !defined ETHDEV_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#define ETHDEV_DEBUG LWIP_DBG_OFF ++#endif ++/** ++ * @} ++ */ ++ ++/** ++ * ETHDEV_DEBUG: Enable debugging in ethdev.c. ++ */ ++#if !defined SYSCALL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#define SYSCALL_DEBUG LWIP_DBG_OFF ++#endif ++/** ++ * @} ++ */ ++ ++/** ++ * CONTROL_DEBUG: Enable debugging in control_plane.c. ++ */ ++#if !defined CONTROL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#define CONTROL_DEBUG LWIP_DBG_ON ++#endif ++/** ++ * @} ++ */ ++ + /* + -------------------------------------------------- + ---------- Performance tracking options ---------- +diff --git a/src/include/lwip/priv/memp_std.h b/src/include/lwip/priv/memp_std.h +index 669ad4d..395ac0c 100644 +--- a/src/include/lwip/priv/memp_std.h ++++ b/src/include/lwip/priv/memp_std.h +@@ -122,6 +122,13 @@ LWIP_MEMPOOL(MLD6_GROUP, MEMP_NUM_MLD6_GROUP, sizeof(struct mld_group), + #endif /* LWIP_IPV6 && LWIP_IPV6_MLD */ + + ++#if USE_LIBOS ++#if !LWIP_NETCONN_SEM_PER_THREAD ++LWIP_MEMPOOL(SYS_SEM, MEMP_NUM_SYS_SEM, sizeof(struct sys_sem), "SYS_SEM") ++#endif ++ ++LWIP_MEMPOOL(SYS_MBOX, MEMP_NUM_SYS_MBOX, sizeof(struct sys_mbox), "SYS_MBOX") ++#endif /* USE_LIBOS */ + /* + * A list of pools of pbuf's used by LWIP. + * +diff --git a/src/include/lwip/priv/sockets_priv.h b/src/include/lwip/priv/sockets_priv.h +index d8f9904..7268a17 100644 +--- a/src/include/lwip/priv/sockets_priv.h ++++ b/src/include/lwip/priv/sockets_priv.h +@@ -45,56 +45,17 @@ + #include "lwip/sockets.h" + #include "lwip/sys.h" + ++/* move some definitions to the lwipsock.h for libnet to use, and ++ * at the same time avoid conflict between lwip/sockets.h and sys/socket.h ++ */ ++#include "lwipsock.h" ++ + #ifdef __cplusplus + extern "C" { + #endif + + #define NUM_SOCKETS MEMP_NUM_NETCONN + +-/** This is overridable for the rare case where more than 255 threads +- * select on the same socket... +- */ +-#ifndef SELWAIT_T +-#define SELWAIT_T u8_t +-#endif +- +-union lwip_sock_lastdata { +- struct netbuf *netbuf; +- struct pbuf *pbuf; +-}; +- +-/** Contains all internal pointers and states used for a socket */ +-struct lwip_sock { +- /** sockets currently are built on netconns, each socket has one netconn */ +- struct netconn *conn; +- /** data that was left from the previous read */ +- union lwip_sock_lastdata lastdata; +-#if LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL +- /** number of times data was received, set by event_callback(), +- tested by the receive and select functions */ +- s16_t rcvevent; +- /** number of times data was ACKed (free send buffer), set by event_callback(), +- tested by select */ +- u16_t sendevent; +- /** error happened for this socket, set by event_callback(), tested by select */ +- u16_t errevent; +- /** counter of how many threads are waiting for this socket using select */ +- SELWAIT_T select_waiting; +-#endif /* LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL */ +-#if LWIP_NETCONN_FULLDUPLEX +- /* counter of how many threads are using a struct lwip_sock (not the 'int') */ +- u8_t fd_used; +- /* status of pending close/delete actions */ +- u8_t fd_free_pending; +-#define LWIP_SOCK_FD_FREE_TCP 1 +-#define LWIP_SOCK_FD_FREE_FREE 2 +-#endif +-}; +- +-#ifndef set_errno +-#define set_errno(err) do { if (err) { errno = (err); } } while(0) +-#endif +- + #if !LWIP_TCPIP_CORE_LOCKING + /** Maximum optlen used by setsockopt/getsockopt */ + #define LWIP_SETGETSOCKOPT_MAXOPTLEN LWIP_MAX(16, sizeof(struct ifreq)) +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index 72f9126..192edc4 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -323,25 +323,42 @@ struct tcp_seg { + #endif /* LWIP_WND_SCALE */ + + /* Global variables: */ +-extern struct tcp_pcb *tcp_input_pcb; +-extern u32_t tcp_ticks; +-extern u8_t tcp_active_pcbs_changed; ++extern PER_THREAD struct tcp_pcb *tcp_input_pcb; ++extern PER_THREAD u32_t tcp_ticks; ++extern PER_THREAD u8_t tcp_active_pcbs_changed; + + /* The TCP PCB lists. */ + union tcp_listen_pcbs_t { /* List of all TCP PCBs in LISTEN state. */ + struct tcp_pcb_listen *listen_pcbs; + struct tcp_pcb *pcbs; + }; +-extern struct tcp_pcb *tcp_bound_pcbs; +-extern union tcp_listen_pcbs_t tcp_listen_pcbs; +-extern struct tcp_pcb *tcp_active_pcbs; /* List of all TCP PCBs that are in a ++extern PER_THREAD struct tcp_pcb *tcp_bound_pcbs; ++extern PER_THREAD union tcp_listen_pcbs_t tcp_listen_pcbs; ++extern PER_THREAD struct tcp_pcb *tcp_active_pcbs; /* List of all TCP PCBs that are in a + state in which they accept or send + data. */ +-extern struct tcp_pcb *tcp_tw_pcbs; /* List of all TCP PCBs in TIME-WAIT. */ ++extern PER_THREAD struct tcp_pcb *tcp_tw_pcbs; /* List of all TCP PCBs in TIME-WAIT. */ + + #define NUM_TCP_PCB_LISTS_NO_TIME_WAIT 3 + #define NUM_TCP_PCB_LISTS 4 +-extern struct tcp_pcb ** const tcp_pcb_lists[NUM_TCP_PCB_LISTS]; ++extern PER_THREAD struct tcp_pcb ** tcp_pcb_lists[NUM_TCP_PCB_LISTS]; ++ ++#if USE_LIBOS ++#include "reg_sock.h" ++static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pcb *pcb) ++{ ++ LWIP_ASSERT("Invalid parameter", pcb != NULL); ++ ++ struct libnet_quintuple qtuple; ++ qtuple.protocol = 0; ++ qtuple.src_ip = pcb->local_ip.addr; ++ qtuple.src_port = lwip_htons(pcb->local_port); ++ qtuple.dst_ip = pcb->remote_ip.addr; ++ qtuple.dst_port = lwip_htons(pcb->remote_port); ++ ++ return vdev_reg_xmit(reg_type, &qtuple); ++} ++#endif + + /* Axioms about the above lists: + 1) Every TCP PCB that is not CLOSED is in one of the lists. +@@ -355,6 +372,54 @@ extern struct tcp_pcb ** const tcp_pcb_lists[NUM_TCP_PCB_LISTS]; + #define TCP_DEBUG_PCB_LISTS 0 + #endif + #if TCP_DEBUG_PCB_LISTS ++#if USE_LIBOS ++#define TCP_REG(pcbs, npcb) do {\ ++ struct tcp_pcb *tcp_tmp_pcb; \ ++ LWIP_DEBUGF(TCP_DEBUG, ("TCP_REG %p local port %d\n", (npcb), (npcb)->local_port)); \ ++ for (tcp_tmp_pcb = *(pcbs); \ ++ tcp_tmp_pcb != NULL; \ ++ tcp_tmp_pcb = tcp_tmp_pcb->next) { \ ++ LWIP_ASSERT("TCP_REG: already registered\n", tcp_tmp_pcb != (npcb)); \ ++ } \ ++ LWIP_ASSERT("TCP_REG: pcb->state != CLOSED", ((pcbs) == &tcp_bound_pcbs) || ((npcb)->state != CLOSED)); \ ++ if (*pcbs) \ ++ (*pcbs)->prev = npcb; \ ++ (npcb)->prev = NULL; \ ++ (npcb)->next = *(pcbs); \ ++ LWIP_ASSERT("TCP_REG: npcb->next != npcb", (npcb)->next != (npcb)); \ ++ *(pcbs) = (npcb); \ ++ LWIP_ASSERT("TCP_RMV: tcp_pcbs sane", tcp_pcbs_sane()); \ ++ tcp_timer_needed(); \ ++ } while(0) ++#define TCP_RMV(pcbs, npcb) do { \ ++ if (pcb->state == LISTEN) \ ++ vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ ++ else \ ++ vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ ++ struct tcp_pcb *tcp_tmp_pcb; \ ++ LWIP_ASSERT("TCP_RMV: pcbs != NULL", *(pcbs) != NULL); \ ++ LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removing %p from %p\n", (npcb), *(pcbs))); \ ++ if(*(pcbs) == (npcb)) { \ ++ *(pcbs) = (*pcbs)->next; \ ++ if (*pcbs) \ ++ (*pcbs)->prev = NULL; \ ++ } else { \ ++ struct tcp_pcb *prev, *next; \ ++ prev = npcb->prev; \ ++ next = npcb->next; \ ++ if (prev) \ ++ prev->next = next; \ ++ if (next) \ ++ next->prev = prev; \ ++ } \ ++ } \ ++ (npcb)->prev = NULL; \ ++ (npcb)->next = NULL; \ ++ LWIP_ASSERT("TCP_RMV: tcp_pcbs sane", tcp_pcbs_sane()); \ ++ LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removed %p from %p\n", (npcb), *(pcbs))); \ ++ } while(0) ++ ++#else /* USE_LIBOS */ + #define TCP_REG(pcbs, npcb) do {\ + struct tcp_pcb *tcp_tmp_pcb; \ + LWIP_DEBUGF(TCP_DEBUG, ("TCP_REG %p local port %"U16_F"\n", (void *)(npcb), (npcb)->local_port)); \ +@@ -387,8 +452,65 @@ extern struct tcp_pcb ** const tcp_pcb_lists[NUM_TCP_PCB_LISTS]; + LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removed %p from %p\n", (void *)(npcb), (void *)(*(pcbs)))); \ + } while(0) + ++#endif /* USE_LIBOS */ + #else /* LWIP_DEBUG */ + ++#if TCP_PCB_HASH ++#define TCP_REG_HASH(pcbs, npcb) \ ++ do { \ ++ u32_t idx; \ ++ struct hlist_head *hd; \ ++ struct tcp_hash_table *htb = pcbs; \ ++ idx = TUPLE4_HASH_FN((npcb)->local_ip.addr, (npcb)->local_port, \ ++ (npcb)->remote_ip.addr, (npcb)->remote_port) & \ ++ (htb->size - 1); \ ++ hd = &htb->array[idx].chain; \ ++ hlist_add_head(&(npcb)->tcp_node, hd); \ ++ tcp_timer_needed(); \ ++ } while (0) ++ ++#define TCP_RMV_HASH(pcbs, npcb) \ ++ do { \ ++ hlist_del_init(&(npcb)->tcp_node); \ ++ } while (0) ++#endif /* TCP_PCB_HASH */ ++ ++#if USE_LIBOS ++#define TCP_REG(pcbs, npcb) \ ++ do { \ ++ if (*pcbs) \ ++ (*pcbs)->prev = npcb; \ ++ (npcb)->prev = NULL; \ ++ (npcb)->next = *pcbs; \ ++ *(pcbs) = (npcb); \ ++ tcp_timer_needed(); \ ++ } while (0) ++ ++#define TCP_RMV(pcbs, npcb) \ ++ do { \ ++ if (pcb->state == LISTEN) \ ++ vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ ++ else \ ++ vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ ++ if(*(pcbs) == (npcb)) { \ ++ (*(pcbs)) = (*pcbs)->next; \ ++ if (*pcbs) \ ++ (*pcbs)->prev = NULL; \ ++ } \ ++ else { \ ++ struct tcp_pcb *prev, *next; \ ++ prev = npcb->prev; \ ++ next = npcb->next; \ ++ if (prev) \ ++ prev->next = next; \ ++ if (next) \ ++ next->prev = prev; \ ++ } \ ++ (npcb)->prev = NULL; \ ++ (npcb)->next = NULL; \ ++ } while(0) ++ ++#else /* USE_LIBOS */ + #define TCP_REG(pcbs, npcb) \ + do { \ + (npcb)->next = *pcbs; \ +@@ -415,8 +537,32 @@ extern struct tcp_pcb ** const tcp_pcb_lists[NUM_TCP_PCB_LISTS]; + (npcb)->next = NULL; \ + } while(0) + ++#endif /* USE_LIBOS */ + #endif /* LWIP_DEBUG */ + ++ ++#if TCP_PCB_HASH ++#define TCP_REG_ACTIVE_HASH(npcb) \ ++ do { \ ++ TCP_REG_HASH(tcp_active_htable, npcb); \ ++ tcp_active_pcbs_changed = 1; \ ++ } while (0) ++ ++#define TCP_RMV_ACTIVE_HASH(npcb) \ ++ do { \ ++ TCP_RMV_HASH(tcp_active_htable, npcb); \ ++ tcp_active_pcbs_changed = 1; \ ++ } while (0) ++ ++#define TCP_PCB_REMOVE_ACTIVE_HASH(pcb) \ ++ do { \ ++ tcp_pcb_remove_hash(tcp_active_htable, pcb); \ ++ tcp_active_pcbs_changed = 1; \ ++ } while (0) ++ ++void tcp_pcb_remove_hash(struct tcp_hash_table *htb, struct tcp_pcb *pcb); ++#endif /* TCP_PCB_HASH */ ++ + #define TCP_REG_ACTIVE(npcb) \ + do { \ + TCP_REG(&tcp_active_pcbs, npcb); \ +diff --git a/src/include/lwip/prot/ip4.h b/src/include/lwip/prot/ip4.h +index 9347461..c9ad89c 100644 +--- a/src/include/lwip/prot/ip4.h ++++ b/src/include/lwip/prot/ip4.h +@@ -81,6 +81,21 @@ struct ip_hdr { + PACK_STRUCT_FIELD(u16_t _id); + /* fragment offset field */ + PACK_STRUCT_FIELD(u16_t _offset); ++ ++/* avoid conflicts with netinet/ip.h */ ++#ifdef IP_RF ++#undef IP_RF ++#endif ++#ifdef IP_DF ++#undef IP_DF ++#endif ++#ifdef IP_MF ++#undef IP_MF ++#endif ++#ifdef IP_OFFMASK ++#undef IP_OFFMASK ++#endif ++ + #define IP_RF 0x8000U /* reserved fragment flag */ + #define IP_DF 0x4000U /* don't fragment flag */ + #define IP_MF 0x2000U /* more fragments flag */ +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index d70d36c..345e26c 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -57,6 +57,11 @@ extern "C" { + + /* If your port already typedef's sa_family_t, define SA_FAMILY_T_DEFINED + to prevent this code from redefining it. */ ++#if USE_LIBOS ++#define SA_FAMILY_T_DEFINED ++ typedef u16_t sa_family_t; ++#endif ++ + #if !defined(sa_family_t) && !defined(SA_FAMILY_T_DEFINED) + typedef u8_t sa_family_t; + #endif +@@ -69,7 +74,9 @@ typedef u16_t in_port_t; + #if LWIP_IPV4 + /* members are in network byte order */ + struct sockaddr_in { ++#if !USE_LIBOS + u8_t sin_len; ++#endif + sa_family_t sin_family; + in_port_t sin_port; + struct in_addr sin_addr; +@@ -90,7 +97,9 @@ struct sockaddr_in6 { + #endif /* LWIP_IPV6 */ + + struct sockaddr { ++#if !USE_LIBOS + u8_t sa_len; ++#endif + sa_family_t sa_family; + char sa_data[14]; + }; +@@ -189,6 +198,9 @@ struct ifreq { + #define SOCK_DGRAM 2 + #define SOCK_RAW 3 + ++#if USE_LIBOS ++#include ++#else + /* + * Option flags per-socket. These must match the SOF_ flags in ip.h (checked in init.c) + */ +@@ -221,6 +233,12 @@ struct ifreq { + #define SO_BINDTODEVICE 0x100b /* bind to device */ + + /* ++ * Level number for (get/set)sockopt() to apply to socket itself. ++ */ ++#define SOL_SOCKET 0xfff /* options for socket level */ ++#endif /* USE_LIBOS */ ++ ++/* + * Structure used for manipulating linger option. + */ + struct linger { +@@ -228,11 +246,6 @@ struct linger { + int l_linger; /* linger time in seconds */ + }; + +-/* +- * Level number for (get/set)sockopt() to apply to socket itself. +- */ +-#define SOL_SOCKET 0xfff /* options for socket level */ +- + + #define AF_UNSPEC 0 + #define AF_INET 2 +@@ -276,11 +289,20 @@ struct linger { + /* + * Options for level IPPROTO_TCP + */ ++#if USE_LIBOS ++/* come from netinet/tcp.h */ ++#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ ++#define TCP_KEEPALIVE 0x24 /* send KEEPALIVE probes when idle for pcb->keep_idle milliseconds */ ++#define TCP_KEEPIDLE 0x04 /* set pcb->keep_idle - Same as TCP_KEEPALIVE, but use seconds for get/setsockopt */ ++#define TCP_KEEPINTVL 0x05 /* set pcb->keep_intvl - Use seconds for get/setsockopt */ ++#define TCP_KEEPCNT 0x06 /* set pcb->keep_cnt - Use number of probes sent for get/setsockopt */ ++#else /* USE_LIBOS */ + #define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ + #define TCP_KEEPALIVE 0x02 /* send KEEPALIVE probes when idle for pcb->keep_idle milliseconds */ + #define TCP_KEEPIDLE 0x03 /* set pcb->keep_idle - Same as TCP_KEEPALIVE, but use seconds for get/setsockopt */ + #define TCP_KEEPINTVL 0x04 /* set pcb->keep_intvl - Use seconds for get/setsockopt */ + #define TCP_KEEPCNT 0x05 /* set pcb->keep_cnt - Use number of probes sent for get/setsockopt */ ++#endif /* USE_LIBOS */ + #endif /* LWIP_TCP */ + + #if LWIP_IPV6 +@@ -483,12 +505,30 @@ typedef struct fd_set + unsigned char fd_bits [(FD_SETSIZE+7)/8]; + } fd_set; + +-#elif FD_SETSIZE < (LWIP_SOCKET_OFFSET + MEMP_NUM_NETCONN) ++#elif FD_SETSIZE < (LWIP_SOCKET_OFFSET + MEMP_NUM_NETCONN) && !USE_LIBOS + #error "external FD_SETSIZE too small for number of sockets" + #else + #define LWIP_SELECT_MAXNFDS FD_SETSIZE + #endif /* FD_SET */ + ++#if USE_LIBOS ++#if !defined(POLLIN) && !defined(POLLOUT) ++/* come from bits/poll.h */ ++#define POLLIN 0x001 ++#define POLLOUT 0x004 ++#define POLLERR 0x008 ++#define POLLNVAL 0x020 ++/* Below values are unimplemented */ ++#define POLLRDNORM 0x040 ++#define POLLRDBAND 0x080 ++#define POLLPRI 0x002 ++#define POLLWRNORM 0x100 ++#define POLLWRBAND 0x200 ++#define POLLHUP 0x010 ++#endif ++#endif /* USE_LIBOS */ ++ ++#if LWIP_SOCKET_POLL + /* poll-related defines and types */ + /* @todo: find a better way to guard the definition of these defines and types if already defined */ + #if !defined(POLLIN) && !defined(POLLOUT) +@@ -511,6 +551,7 @@ struct pollfd + short revents; + }; + #endif ++#endif /* LWIP_SOCKET_POLL */ + + /** LWIP_TIMEVAL_PRIVATE: if you want to use the struct timeval provided + * by your system, set this to 0 and include in cc.h */ +@@ -603,8 +644,15 @@ int lwip_select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptse + #if LWIP_SOCKET_POLL + int lwip_poll(struct pollfd *fds, nfds_t nfds, int timeout); + #endif ++ ++#if USE_LIBOS ++int lwip_ioctl(int s, long cmd, ...); ++int lwip_fcntl(int s, int cmd, ...); ++#else + int lwip_ioctl(int s, long cmd, void *argp); + int lwip_fcntl(int s, int cmd, int val); ++#endif /* USE_LIBOS */ ++ + const char *lwip_inet_ntop(int af, const void *src, char *dst, socklen_t size); + int lwip_inet_pton(int af, const char *src, void *dst); + +@@ -670,10 +718,17 @@ int lwip_inet_pton(int af, const char *src, void *dst); + #define writev(s,iov,iovcnt) lwip_writev(s,iov,iovcnt) + /** @ingroup socket */ + #define close(s) lwip_close(s) ++ ++#if USE_LIBOS ++#define fcntl(s,cmd...) lwip_fcntl(s,cmd) ++#define ioctl(s,cmd...) lwip_ioctl(s,cmd) ++#else + /** @ingroup socket */ + #define fcntl(s,cmd,val) lwip_fcntl(s,cmd,val) + /** @ingroup socket */ + #define ioctl(s,cmd,argp) lwip_ioctl(s,cmd,argp) ++#endif /* USE_LIBOS */ ++ + #endif /* LWIP_POSIX_SOCKETS_IO_NAMES */ + #endif /* LWIP_COMPAT_SOCKETS != 2 */ + +diff --git a/src/include/lwip/stats.h b/src/include/lwip/stats.h +index b570dba..4470531 100644 +--- a/src/include/lwip/stats.h ++++ b/src/include/lwip/stats.h +@@ -301,7 +301,7 @@ struct stats_ { + }; + + /** Global variable containing lwIP internal statistics. Add this to your debugger's watchlist. */ +-extern struct stats_ lwip_stats; ++extern PER_THREAD struct stats_ lwip_stats; + + /** Init statistics */ + void stats_init(void); +@@ -467,6 +467,8 @@ void stats_init(void); + #define MIB2_STATS_INC(x) + #endif + ++int get_mib2_stats(char *buf); ++ + /* Display of statistics */ + #if LWIP_STATS_DISPLAY + void stats_display(void); +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index daf7599..4f86b46 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -51,6 +51,11 @@ + #include "lwip/ip6.h" + #include "lwip/ip6_addr.h" + ++#if TCP_PCB_HASH ++#include "lwip/sys.h" ++#include "hlist.h" ++#endif ++ + #ifdef __cplusplus + extern "C" { + #endif +@@ -209,15 +214,27 @@ typedef u16_t tcpflags_t; + /** + * members common to struct tcp_pcb and struct tcp_listen_pcb + */ ++#if USE_LIBOS + #define TCP_PCB_COMMON(type) \ + type *next; /* for the linked list */ \ ++ type *prev; /* for the linked list */ \ + void *callback_arg; \ + TCP_PCB_EXTARGS \ + enum tcp_state state; /* TCP state */ \ + u8_t prio; \ + /* ports are in host byte order */ \ + u16_t local_port +- ++ ++#else /* USE_LIBOS */ ++#define TCP_PCB_COMMON(type) \ ++ type *next; /* for the linked list */ \ ++ void *callback_arg; \ ++ TCP_PCB_EXTARGS \ ++ enum tcp_state state; /* TCP state */ \ ++ u8_t prio; \ ++ /* ports are in host byte order */ \ ++ u16_t local_port ++#endif /* USE_LIBOS */ + + /** the TCP protocol control block for listening pcbs */ + struct tcp_pcb_listen { +@@ -244,6 +261,9 @@ struct tcp_pcb { + IP_PCB; + /** protocol specific PCB members */ + TCP_PCB_COMMON(struct tcp_pcb); ++#if TCP_PCB_HASH ++ struct hlist_node tcp_node; ++#endif + + /* ports are in host byte order */ + u16_t remote_port; +@@ -388,6 +408,58 @@ struct tcp_pcb { + #endif + }; + ++#if TCP_PCB_HASH ++#define TCP_HTABLE_SIZE MEMP_NUM_NETCONN*12 ++ ++struct tcp_hashbucket ++{ ++ sys_mutex_t mutex; ++ struct hlist_head chain; ++}; ++ ++struct tcp_hash_table ++{ ++ u32_t size; ++ struct tcp_hashbucket array[TCP_HTABLE_SIZE]; ++}; ++ ++extern PER_THREAD struct tcp_hash_table *tcp_active_htable; /* key: lport/fport/lip/fip */ ++ ++#define JHASH_INITVAL 0xdeadbeef ++ ++static inline unsigned int rol32(unsigned int word, unsigned int shift) ++{ ++ return (word << shift) | (word >> (32 - shift)); ++} ++ ++#define __jhash_final(a, b, c) \ ++{ \ ++ c ^= b; c -= rol32(b, 14); \ ++ a ^= c; a -= rol32(c, 11); \ ++ b ^= a; b -= rol32(a, 25); \ ++ c ^= b; c -= rol32(b, 16); \ ++ a ^= c; a -= rol32(c, 4); \ ++ b ^= a; b -= rol32(a, 14); \ ++ c ^= b; c -= rol32(b, 24); \ ++} ++ ++static inline unsigned int jhash_3words(unsigned int a, unsigned int b, unsigned int c) ++{ ++ a += JHASH_INITVAL; ++ b += JHASH_INITVAL;; ++ ++ __jhash_final(a, b, c); ++ ++ return c; ++} ++ ++#define TUPLE4_HASH_FN(laddr, lport, faddr, fport) jhash_3words(laddr, faddr,lport|(fport<<16)) ++ ++#define tcppcb_hlist_for_each(tcppcb, node, list) \ ++ hlist_for_each_entry(tcppcb, node, list, tcp_node) ++ ++#endif /* TCP_PCB_HASH */ ++ + #if LWIP_EVENT_API + + enum lwip_event { +@@ -481,6 +553,26 @@ err_t tcp_tcp_get_tcp_addrinfo(struct tcp_pcb *pcb, int local, ip_add + + #define tcp_dbg_get_tcp_state(pcb) ((pcb)->state) + ++enum tcp_list_state { ++ ACTIVE_LIST, ++ LISTEN_LIST, ++ TIME_WAIT_LIST, ++}; ++ ++struct tcp_pcb_dp { ++ uint32_t state; ++ uint32_t lip; ++ uint32_t rip; ++ uint16_t l_port; ++ uint16_t r_port; ++ uint32_t r_next; ++ uint32_t s_next; ++ uint32_t tcp_sub_state; ++}; ++ ++void tcp_get_conn(char *buf, int32_t len, uint32_t *conn_num); ++uint32_t tcp_get_conn_num(void); ++ + /* for compatibility with older implementation */ + #define tcp_new_ip6() tcp_new_ip_type(IPADDR_TYPE_V6) + +diff --git a/src/include/lwip/tcpip.h b/src/include/lwip/tcpip.h +index 0b8880a..d2c2440 100644 +--- a/src/include/lwip/tcpip.h ++++ b/src/include/lwip/tcpip.h +@@ -51,7 +51,7 @@ extern "C" { + + #if LWIP_TCPIP_CORE_LOCKING + /** The global semaphore to lock the stack. */ +-extern sys_mutex_t lock_tcpip_core; ++extern PER_THREAD sys_mutex_t lock_tcpip_core; + #if !defined LOCK_TCPIP_CORE || defined __DOXYGEN__ + /** Lock lwIP core mutex (needs @ref LWIP_TCPIP_CORE_LOCKING 1) */ + #define LOCK_TCPIP_CORE() sys_mutex_lock(&lock_tcpip_core) +diff --git a/src/include/lwip/timeouts.h b/src/include/lwip/timeouts.h +index b601f9e..b451554 100644 +--- a/src/include/lwip/timeouts.h ++++ b/src/include/lwip/timeouts.h +@@ -119,6 +119,10 @@ struct sys_timeo** sys_timeouts_get_next_timeout(void); + void lwip_cyclic_timer(void *arg); + #endif + ++#if USE_LIBOS ++void sys_timer_run(void); ++#endif /* USE_LIBOS */ ++ + #endif /* LWIP_TIMERS */ + + #ifdef __cplusplus +diff --git a/src/include/lwiplog.h b/src/include/lwiplog.h +new file mode 100644 +index 0000000..363e516 +--- /dev/null ++++ b/src/include/lwiplog.h +@@ -0,0 +1,81 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __LWIPLOG_H__ ++#define __LWIPLOG_H__ ++ ++#include ++#include ++ ++#include ++ ++#include "lwipopts.h" ++ ++#define gettid() syscall(__NR_gettid) ++ ++#if USE_DPDK_LOG ++ ++#define LWIP_LOG_WARN LWIP_DBG_LEVEL_WARNING ++#define LWIP_LOG_ERROR LWIP_DBG_LEVEL_SERIOUS ++#define LWIP_LOG_FATAL LWIP_DBG_LEVEL_SEVERE ++ ++#define LWIP_PLATFORM_LOG(level, fmt, ...) \ ++do { \ ++ if ((level) & LWIP_LOG_FATAL) { \ ++ RTE_LOG(ERR, EAL, fmt, ##__VA_ARGS__); \ ++ abort(); \ ++ } else if ((level) & LWIP_LOG_ERROR) { \ ++ RTE_LOG(ERR, EAL, fmt, ##__VA_ARGS__); \ ++ } else if ((level) & LWIP_LOG_WARN) { \ ++ RTE_LOG(WARNING, EAL, fmt, ##__VA_ARGS__); \ ++ } else { \ ++ RTE_LOG(INFO, EAL, fmt, ##__VA_ARGS__); \ ++ } \ ++} while(0) ++ ++ ++#define LWIP_PLATFORM_DIAG(x) ++ ++#define ESC_ARGS(...) __VA_ARGS__ ++#define STRIP_BRACES(args) args ++ ++#define LWIP_PLATFORM_ASSERT(x) \ ++do { LWIP_PLATFORM_LOG(LWIP_LOG_FATAL, "Assertion \"%s\" failed at line %d in %s\n", \ ++ x, __LINE__, __FILE__); abort();} while(0) ++ ++#else ++ ++#define LWIP_PLATFORM_LOG(debug, message) ++ ++#endif /* USE_DPDK_LOG */ ++ ++#endif /* __LWIPLOG_H__ */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 4ab26f2..8893a5f 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -1,8 +1,8 @@ + /* +- * Copyright (c) 2001-2003 Swedish Institute of Computer Science. +- * All rights reserved. +- * +- * Redistribution and use in source and binary forms, with or without modification, ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, +@@ -11,70 +11,193 @@ + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products +- * derived from this software without specific prior written permission. ++ * derived from this software without specific prior written permission. + * +- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +- * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY + * OF SUCH DAMAGE. + * + * This file is part of the lwIP TCP/IP stack. +- * +- * Author: Simon Goldschmidt ++ * ++ * Author: Huawei Technologies + * + */ +-#ifndef LWIP_HDR_LWIPOPTS_H__ +-#define LWIP_HDR_LWIPOPTS_H__ +- +-/* Prevent having to link sys_arch.c (we don't test the API layers in unit tests) */ +-#define NO_SYS 1 +-#define LWIP_NETCONN 0 +-#define LWIP_SOCKET 0 +-#define SYS_LIGHTWEIGHT_PROT 0 +- +-#define LWIP_IPV6 1 +-#define IPV6_FRAG_COPYHEADER 1 +-#define LWIP_IPV6_DUP_DETECT_ATTEMPTS 0 +- +-/* Enable some protocols to test them */ +-#define LWIP_DHCP 1 +-#define LWIP_AUTOIP 1 +- +-#define LWIP_IGMP 1 +-#define LWIP_DNS 1 +- +-#define LWIP_ALTCP 1 +- +-/* Turn off checksum verification of fuzzed data */ +-#define CHECKSUM_CHECK_IP 0 +-#define CHECKSUM_CHECK_UDP 0 +-#define CHECKSUM_CHECK_TCP 0 +-#define CHECKSUM_CHECK_ICMP 0 +-#define CHECKSUM_CHECK_ICMP6 0 +- +-/* Minimal changes to opt.h required for tcp unit tests: */ +-#define MEM_SIZE 16000 +-#define TCP_SND_QUEUELEN 40 +-#define MEMP_NUM_TCP_SEG TCP_SND_QUEUELEN +-#define TCP_OVERSIZE 1 +-#define TCP_SND_BUF (12 * TCP_MSS) +-#define TCP_WND (10 * TCP_MSS) +-#define LWIP_WND_SCALE 1 +-#define TCP_RCV_SCALE 2 +-#define PBUF_POOL_SIZE 400 /* pbuf tests need ~200KByte */ +- +-/* Minimal changes to opt.h required for etharp unit tests: */ +-#define ETHARP_SUPPORT_STATIC_ENTRIES 1 +- +-#define LWIP_NUM_NETIF_CLIENT_DATA 1 +-#define LWIP_SNMP 1 +-#define MIB2_STATS 1 +-#define LWIP_MDNS_RESPONDER 1 +- +-#endif /* LWIP_HDR_LWIPOPTS_H__ */ ++ ++#ifndef __LWIPOPTS_H__ ++#define __LWIPOPTS_H__ ++ ++#define LWIP_TCPIP_CORE_LOCKING 1 ++ ++#define LWIP_NETCONN_SEM_PER_THREAD 0 ++ ++#define LWIP_TCP 1 ++ ++#define LWIP_SO_SENTIMEO 0 ++ ++#define LIP_SO_LINGER 0 ++ ++#define MEMP_USE_CUSTOM_POOLS 0 ++#define MEM_USE_POOLS 0 ++ ++#define PER_TCP_PCB_BUFFER (16 * 128) ++ ++#define MAX_CLIENTS (20000) ++ ++#define RESERVED_CLIENTS (2000) ++ ++#define MEMP_NUM_TCP_PCB (MAX_CLIENTS + RESERVED_CLIENTS) ++ ++/* we use PBUF_POOL instead of PBUF_RAM in tcp_write, so reduce PBUF_RAM size, ++ * and do NOT let PBUF_POOL_BUFSIZE less then TCP_MSS ++*/ ++#define MEM_SIZE (((PER_TCP_PCB_BUFFER + 128) * MEMP_NUM_TCP_SEG) >> 2) ++ ++#define MEMP_NUM_TCP_PCB_LISTEN 3000 ++ ++#define MEMP_NUM_TCP_SEG (128 * 128 * 2) ++ ++#define MEMP_NUM_NETCONN (MAX_CLIENTS + RESERVED_CLIENTS) ++ ++#define MEMP_NUM_SYS_SEM (MAX_CLIENTS + RESERVED_CLIENTS) ++ ++#define MEMP_NUM_SYS_MBOX (MAX_CLIENTS + RESERVED_CLIENTS) ++ ++#define PBUF_POOL_SIZE (MAX_CLIENTS * 2) ++ ++#define MEMP_MEM_MALLOC 0 ++ ++#define LWIP_ARP 1 ++ ++#define ETHARP_SUPPORT_STATIC_ENTRIES 1 ++ ++#define LWIP_IPV4 1 ++ ++#define IP_FORWARD 0 ++ ++#define IP_REASSEMBLY 1 ++ ++#define LWIP_UDP 0 ++ ++#define LWIP_TCP 1 ++ ++#define IP_HLEN 20 ++ ++#define TCP_HLEN 20 ++ ++#define FRAME_MTU 1500 ++ ++#define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) ++ ++#define TCP_WND (40 * TCP_MSS) ++ ++#define TCP_SND_BUF (5 * TCP_MSS) ++ ++#define TCP_SND_QUEUELEN (8191) ++ ++#define TCP_SNDLOWAT (TCP_SND_BUF / 5) ++ ++#define TCP_SNDQUEUELOWAT (TCP_SND_QUEUELEN / 5) ++ ++#define TCP_LISTEN_BACKLOG 1 ++ ++#define TCP_DEFAULT_LISTEN_BACKLOG 0xff ++ ++#define TCP_OVERSIZE 0 ++ ++#define LWIP_NETIF_API 1 ++ ++#define DEFAULT_TCP_RECVMBOX_SIZE 128 ++ ++#define DEFAULT_ACCEPTMBOX_SIZE 1024 ++ ++#define LWIP_NETCONN 1 ++ ++#define LWIP_TCPIP_TIMEOUT 0 ++ ++#define LWIP_SOCKET 1 ++ ++#define LWIP_TCP_KEEPALIVE 1 ++ ++#define LWIP_STATS 1 ++ ++#define LWIP_STATS_DISPLAY 1 ++ ++#define CHECKSUM_GEN_IP 1 /* master switch */ ++ ++#define CHECKSUM_GEN_TCP 1 /* master switch */ ++ ++#define CHECKSUM_CHECK_IP 1 /* master switch */ ++ ++#define CHECKSUM_CHECK_TCP 1 /* master switch */ ++ ++#define LWIP_TIMEVAL_PRIVATE 0 ++ ++#define USE_LIBOS 1 ++ ++#define LWIP_DEBUG 1 ++ ++#define LWIP_PERF 1 ++ ++#define LWIP_RECORD_PERF 0 ++ ++#define LWIP_SOCKET_POLL 0 ++ ++#define USE_LIBOS_ZC_RING 0 ++ ++#define SO_REUSE 1 ++ ++#define SIOCSHIWAT 1 ++ ++#define O_NONBLOCK 04000 /* same as define in bits/fcntl-linux.h */ ++ ++#define O_NDELAY O_NONBLOCK ++ ++#define FIONBIO 0x5421 /* same as define in asm-generic/ioctls.h */ ++ ++#define LWIP_SUPPORT_CUSTOM_PBUF 1 ++ ++#define MEM_LIBC_MALLOC 0 ++ ++#define LWIP_TIMERS 1 ++ ++#define TCPIP_MBOX_SIZE (MEMP_NUM_TCPIP_MSG_API) ++ ++#define TCP_PCB_HASH 1 ++ ++#define USE_DPDK_LOG 1 ++ ++#define LWIP_EPOOL_WAIT_MAX_EVENTS 30 ++ ++#define ARP_TABLE_SIZE 512 ++ ++/* ++ --------------------------------------- ++ ------- Syscall thread options -------- ++ --------------------------------------- ++*/ ++#define USE_SYSCALL_THREAD 1 ++ ++#define MAX_BLOCKING_ACCEPT_FD (100) ++ ++#define MAX_BLOCKING_CONNECT_FD (100) ++ ++#define MAX_BLOCKING_EPOLL_FD (100) ++ ++#define MAX_SYSCALL_EVENTS (MAX_BLOCKING_ACCEPT_FD + MAX_BLOCKING_CONNECT_FD + MAX_BLOCKING_EPOLL_FD) ++ ++#define MAX_HOST_FD (MAX_CLIENTS + RESERVED_CLIENTS) ++ ++#if USE_LIBOS ++#define PER_THREAD __thread ++#else ++#define PER_THREAD ++#endif ++ ++#endif /* __LWIPOPTS_H__ */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +new file mode 100644 +index 0000000..dbc67b9 +--- /dev/null ++++ b/src/include/lwipsock.h +@@ -0,0 +1,155 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __LWIPSOCK_H__ ++#define __LWIPSOCK_H__ ++ ++#include "lwip/opt.h" ++#include "lwip/api.h" ++ ++#include "posix_api.h" ++#include "eventpoll.h" ++ ++/* move some definitions to the lwipsock.h for libnet to use, and ++ * at the same time avoid conflict between lwip/sockets.h and sys/socket.h ++ */ ++ ++/* -------------------------------------------------- ++ * the following definition is copied from lwip/priv/tcpip_priv.h ++ * -------------------------------------------------- ++ */ ++ ++/** This is overridable for the rare case where more than 255 threads ++ * select on the same socket... ++ */ ++#ifndef SELWAIT_T ++#define SELWAIT_T u8_t ++#endif ++ ++union lwip_sock_lastdata { ++ struct netbuf *netbuf; ++ struct pbuf *pbuf; ++}; ++ ++/** Contains all internal pointers and states used for a socket */ ++struct lwip_sock { ++ /** sockets currently are built on netconns, each socket has one netconn */ ++ struct netconn *conn; ++ /** data that was left from the previous read */ ++ union lwip_sock_lastdata lastdata; ++#if LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL ++ /** number of times data was received, set by event_callback(), ++ tested by the receive and select functions */ ++ s16_t rcvevent; ++ /** number of times data was ACKed (free send buffer), set by event_callback(), ++ tested by select */ ++ u16_t sendevent; ++ /** error happened for this socket, set by event_callback(), tested by select */ ++ u16_t errevent; ++ /** counter of how many threads are waiting for this socket using select */ ++ SELWAIT_T select_waiting; ++#endif /* LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL */ ++#if LWIP_NETCONN_FULLDUPLEX ++ /* counter of how many threads are using a struct lwip_sock (not the 'int') */ ++ u8_t fd_used; ++ /* status of pending close/delete actions */ ++ u8_t fd_free_pending; ++#define LWIP_SOCK_FD_FREE_TCP 1 ++#define LWIP_SOCK_FD_FREE_FREE 2 ++#endif ++ ++#if USE_LIBOS ++ struct list_node list; ++ /* registered events */ ++ uint32_t epoll; ++ /* available events */ ++ uint32_t events; ++ epoll_data_t ep_data; ++ /* libos_epoll pointer in use */ ++ struct libos_epoll *epoll_data; ++#endif ++}; ++ ++#ifndef set_errno ++#define set_errno(err) do { if (err) { errno = (err); } } while(0) ++#endif ++ ++ ++/* -------------------------------------------------- ++ * --------------- LIBNET references ---------------- ++ * -------------------------------------------------- ++ */ ++#if USE_LIBOS ++extern uint32_t sockets_num; ++extern struct lwip_sock *sockets; ++/** ++ * Map a externally used socket index to the internal socket representation. ++ * ++ * @param s externally used socket index ++ * @return struct lwip_sock for the socket or NULL if not found ++ */ ++static inline struct lwip_sock * ++get_socket_without_errno(int s) ++{ ++ struct lwip_sock *sock = NULL; ++ ++ s -= LWIP_SOCKET_OFFSET; ++ ++ if ((s < 0) || (s >= sockets_num)) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("get_socket(%d): invalid\n", s + LWIP_SOCKET_OFFSET)); ++ return NULL; ++ } ++ ++ sock = &sockets[s]; ++ ++ if (!sock->conn) { ++ LWIP_DEBUGF(SOCKETS_DEBUG, ("get_socket(%d): not active\n", s + LWIP_SOCKET_OFFSET)); ++ return NULL; ++ } ++ ++ return sock; ++} ++#endif /* USE_LIBOS */ ++ ++struct lwip_sock *get_socket(int s); ++struct lwip_sock *get_socket_by_fd(int s); ++void lwip_sock_init(void); ++void lwip_exit(void); ++ ++extern int is_host_ipv4(uint32_t ipv4); ++extern int rearm_host_fd(int fd); ++extern int rearm_accept_fd(int fd); ++extern void unarm_host_fd(int fd); ++extern void clean_host_fd(int fd); ++extern int arm_host_fd(struct libos_epoll *ep, int op, int fd, struct epoll_event *event); ++ ++#endif /* __LWIPSOCK_H__ */ +diff --git a/src/include/memp_def.h b/src/include/memp_def.h +new file mode 100644 +index 0000000..082f685 +--- /dev/null ++++ b/src/include/memp_def.h +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __MEMP_DEF_H__ ++#define __MEMP_DEF_H__ ++ ++#include "lwip/opt.h" ++#include "arch/cc.h" ++ ++#define LWIP_MEMPOOL_BASE_DECLARE(name) \ ++ extern void alloc_memp_##name##_base(void); ++ ++#define LWIP_MEM_MEMORY_DECLARE(name) \ ++ extern void alloc_memory_##name(void); ++ ++#define LWIP_MEMPOOL_BASE_INIT(name) \ ++ alloc_memp_##name##_base(); ++ ++#define LWIP_MEM_MEMORY_INIT(name) \ ++ alloc_memory_##name(); ++ ++#define LWIP_MEMPOOL(name, num, size, desc) LWIP_MEMPOOL_BASE_DECLARE(name) ++#include ++#undef LWIP_MEMPOOL ++ ++static inline void hugepage_init(void) ++{ ++#define LWIP_MEMPOOL(name,num,size,desc) LWIP_MEMPOOL_BASE_INIT(name) ++#include "lwip/priv/memp_std.h" ++ ++#if !MEM_LIBC_MALLOC ++ LWIP_MEM_MEMORY_DECLARE(ram_heap) ++ LWIP_MEM_MEMORY_INIT(ram_heap) ++#endif /* MEM_LIBC_MALLOC */ ++} ++ ++#endif /* __MEMP_DEF_H__ */ +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +new file mode 100644 +index 0000000..8aa8516 +--- /dev/null ++++ b/src/include/posix_api.h +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __POSIX_API_H__ ++#define __POSIX_API_H__ ++ ++#include ++#include ++#include ++#include ++ ++typedef struct { ++ void *handle; ++ int (*socket_fn)(int domain, int type, int protocol); ++ int (*accept_fn)(int s, struct sockaddr*, socklen_t*); ++ int (*accept4_fn)(int s, struct sockaddr *addr, socklen_t *addrlen, int flags); ++ int (*bind_fn)(int s, const struct sockaddr*, socklen_t); ++ int (*listen_fn)(int s, int backlog); ++ int (*connect_fn)(int s, const struct sockaddr *name, socklen_t namelen); ++ int (*getpeername_fn)(int s, struct sockaddr *name, socklen_t *namelen); ++ int (*getsockname_fn)(int s, struct sockaddr *name, socklen_t *namelen); ++ int (*setsockopt_fn)(int s, int level, int optname, const void *optval, socklen_t optlen); ++ int (*getsockopt_fn)(int s, int level, int optname, void *optval, socklen_t *optlen); ++ int (*shutdown_fn)(int s, int how); ++ int (*close_fn)(int fd); ++ pid_t (*fork_fn)(void); ++ ssize_t (*read_fn)(int fd, void *mem, size_t len); ++ ssize_t (*write_fn)(int fd, const void *data, size_t len); ++ ssize_t (*recv_fn)(int sockfd, void *buf, size_t len, int flags); ++ ssize_t (*send_fn)(int sockfd, const void *buf, size_t len, int flags); ++ ssize_t (*recv_msg)(int sockfd, const struct msghdr *msg, int flags); ++ ssize_t (*send_msg)(int sockfd, const struct msghdr *msg, int flags); ++ ssize_t (*recv_from)(int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen); ++ ssize_t (*send_to)(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, ++ socklen_t addrlen); ++ int (*fcntl_fn)(int fd, int cmd, ...); ++ int (*fcntl64_fn)(int fd, int cmd, ...); ++ int (*pipe_fn)(int pipefd[2]); ++ int (*epoll_create_fn)(int size); ++ int (*epoll_ctl_fn)(int epfd, int op, int fd, struct epoll_event *event); ++ int (*epoll_wait_fn)(int epfd, struct epoll_event *events, int maxevents, int timeout); ++ int (*epoll_close_fn)(int epfd); ++ int (*eventfd_fn)(unsigned int initval, int flags); ++ int (*is_epfd)(int fd); ++ struct lwip_sock* (*get_socket)(int fd); ++ int (*sigaction_fn)(int signum, const struct sigaction *act, struct sigaction *oldact); ++ int (*poll_fn)(struct pollfd *fds, nfds_t nfds, int timeout); ++ int (*ioctl_fn)(int fd, int cmd, ...); ++ ++ int is_chld; ++} posix_api_t; ++ ++posix_api_t *posix_api; ++ ++int posix_api_init(void); ++void posix_api_free(void); ++void posix_api_fork(void); ++ ++#endif /* __POSIX_API_H__ */ +diff --git a/src/include/reg_sock.h b/src/include/reg_sock.h +new file mode 100644 +index 0000000..76d4c48 +--- /dev/null ++++ b/src/include/reg_sock.h +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __REG_SOCK_H__ ++#define __REG_SOCK_H__ ++ ++enum reg_ring_type { ++ REG_RING_TCP_LISTEN = 0, ++ REG_RING_TCP_LISTEN_CLOSE, ++ REG_RING_TCP_CONNECT, ++ REG_RING_TCP_CONNECT_CLOSE, ++ RING_REG_MAX, ++}; ++ ++struct libnet_quintuple { ++ uint32_t protocol; ++ /* net byte order */ ++ uint16_t src_port; ++ uint16_t dst_port; ++ uint32_t src_ip; ++ uint32_t dst_ip; ++}; ++ ++struct reg_ring_msg { ++ enum reg_ring_type type; ++ ++ uint32_t tid; ++ struct libnet_quintuple qtuple; ++}; ++ ++extern int vdev_reg_xmit(enum reg_ring_type type, struct libnet_quintuple *qtuple); ++ ++#endif /* __REG_SOCK_H__ */ +\ No newline at end of file +diff --git a/src/netif/dir.mk b/src/netif/dir.mk +index 233c79a..f585d5e 100644 +--- a/src/netif/dir.mk ++++ b/src/netif/dir.mk +@@ -1,3 +1,3 @@ +-SRC = ethernet.c ++SRC = ethernet.c + + $(eval $(call register_dir, netif, $(SRC))) +-- +1.8.3.1 + diff --git a/0003-fix-the-occasional-coredump-when-the-lwip-exits.patch b/0003-fix-the-occasional-coredump-when-the-lwip-exits.patch new file mode 100644 index 0000000000000000000000000000000000000000..a540728e505561062ba85e1a12a0cdb6ebdc4019 --- /dev/null +++ b/0003-fix-the-occasional-coredump-when-the-lwip-exits.patch @@ -0,0 +1,63 @@ +From 0d5070b4a40912a7921e0101461a9c7d61919acd Mon Sep 17 00:00:00 2001 +From: HuangLiming +Date: Tue, 25 May 2021 03:08:33 -0400 +Subject: [PATCH] fix the occasional coredump when the lwip exits + +Signed-off-by: HuangLiming +--- + src/api/sockets.c | 37 +++++++++---------------------------- + 1 file changed, 9 insertions(+), 28 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index d62e55b..658f762 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -4655,36 +4655,17 @@ void lwip_sock_init(void) + return; + } + +-//modify from lwip_close + void lwip_exit(void) + { +- int i, is_tcp; +- struct lwip_sock *sock; +- +- if (memp_pools[MEMP_SYS_MBOX] == NULL) { +- return; +- } +- +- for (i = 0; i < sockets_num; i++) { +- sock = &sockets[i]; +- if (!sock->conn) +- continue; +-#if LWIP_IGMP +- /* drop all possibly joined IGMP memberships */ +- lwip_socket_drop_registered_memberships(i); +-#endif /* LWIP_IGMP */ +- /* +- * process is exiting, call netconn_delete to +- * close tcp connection, and ignore the return value +- */ +- is_tcp = NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP; +- netconn_delete(sock->conn); +- free_socket(sock, is_tcp); +- } +- +- free(sockets); +- sockets = NULL; +- sockets_num = 0; ++ /* ++ * LwIP has the following two parts of memory application, but ++ * it is unnecessary to release all memory in sequentially, ++ * which increases complexity. Therefore, we rely on the process ++ * reclamation mechanism of the system to release memory. ++ * 1. a sockets table of the process. ++ * 2. a batch of hugepage memory of each thread. ++ */ ++ return; + } + + #endif /* USE_LIBOS */ +-- +2.23.0 + diff --git a/0004-fix-error-of-deleting-conn-table-in-connect.patch b/0004-fix-error-of-deleting-conn-table-in-connect.patch new file mode 100644 index 0000000000000000000000000000000000000000..32081820c642ce73423290fc8cb9965e45a6b707 --- /dev/null +++ b/0004-fix-error-of-deleting-conn-table-in-connect.patch @@ -0,0 +1,79 @@ +From ed999b65aac44fcb68fc533e8bd5a23cf2d09e7c Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Wed, 26 May 2021 19:09:41 +0800 +Subject: [PATCH] fix-error-of-deleting-conn-table-in-connect + +--- + src/include/lwip/priv/tcp_priv.h | 42 ++++++++++++++++++++++++++------ + 1 file changed, 34 insertions(+), 8 deletions(-) + +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index 192edc4..599289f 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -358,6 +358,28 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + + return vdev_reg_xmit(reg_type, &qtuple); + } ++ ++/* TCP_RMV pcb whether to call vdev_reg_xmit to reg conn-sock table. ++ fix the error of adding conn table in connect func and deleting conn table ++ when moving pcb from tcp_bound_pcbs to tcp_listen_pcbs */ ++static inline int need_vdev_reg(struct tcp_pcb *pcb_list, const struct tcp_pcb *pcb) ++{ ++ /* tw_pcbs_list and tcp_listen_pcbs will not change pcb to other list always reg */ ++ if ((pcb_list == tcp_tw_pcbs) || (pcb_list == tcp_listen_pcbs.pcbs)) { ++ return 1; ++ } ++ ++ /* tcp_active_pcbs in FIN_WAIT_1,FIN_WAIT_2,CLOSING state will change pcb to tw_pcbs_list don't reg. ++ detail info see func tcp_process in tcp_in.c */ ++ if (pcb_list == tcp_active_pcbs) { ++ if ((pcb->state != FIN_WAIT_1) && (pcb->state != FIN_WAIT_2) && (pcb->state != CLOSING)) { ++ return 1; ++ } ++ } ++ ++ /* tcp_bound_pcbs and others don't reg */ ++ return 0; ++} + #endif + + /* Axioms about the above lists: +@@ -392,10 +414,12 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + tcp_timer_needed(); \ + } while(0) + #define TCP_RMV(pcbs, npcb) do { \ +- if (pcb->state == LISTEN) \ +- vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ +- else \ +- vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ ++ if (need_vdev_reg(*pcbs, npcb)) { \ ++ if (npcb->state == LISTEN) \ ++ vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ ++ else \ ++ vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb); \ ++ } \ + struct tcp_pcb *tcp_tmp_pcb; \ + LWIP_ASSERT("TCP_RMV: pcbs != NULL", *(pcbs) != NULL); \ + LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removing %p from %p\n", (npcb), *(pcbs))); \ +@@ -488,10 +512,12 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + + #define TCP_RMV(pcbs, npcb) \ + do { \ +- if (pcb->state == LISTEN) \ +- vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ +- else \ +- vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ ++ if (need_vdev_reg(*pcbs, npcb)) { \ ++ if (npcb->state == LISTEN) \ ++ vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ ++ else \ ++ vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ ++ } \ + if(*(pcbs) == (npcb)) { \ + (*(pcbs)) = (*pcbs)->next; \ + if (*pcbs) \ +-- +2.23.0 + diff --git a/0005-syn-rcvd-state-reg-conn-into-conntable.patch b/0005-syn-rcvd-state-reg-conn-into-conntable.patch new file mode 100644 index 0000000000000000000000000000000000000000..2634f11570cb718d480b44f2f9af1db00760222a --- /dev/null +++ b/0005-syn-rcvd-state-reg-conn-into-conntable.patch @@ -0,0 +1,27 @@ +From 19c51d7baf7eeeae72525f6b716253557be2b31c Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Tue, 29 Jun 2021 14:12:25 +0800 +Subject: [PATCH] add-conn-check + +--- + src/core/tcp_in.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index c3d1f54..57186c7 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -752,6 +752,10 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + #endif + TCP_REG_ACTIVE(npcb); + ++#if USE_LIBOS ++ vdev_reg_done(REG_RING_TCP_CONNECT, npcb); ++#endif ++ + /* Parse any options in the SYN. */ + tcp_parseopt(npcb); + npcb->snd_wnd = tcphdr->wnd; +-- +2.23.0 + diff --git a/0006-fix-coredump-in-etharp.patch b/0006-fix-coredump-in-etharp.patch new file mode 100644 index 0000000000000000000000000000000000000000..d361649228ab8d302cdfc937da09a78f8360d6fd --- /dev/null +++ b/0006-fix-coredump-in-etharp.patch @@ -0,0 +1,29 @@ +From a066306d783693d3f78b9c5e84feca7d690cf27a Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 2 Jul 2021 16:54:43 +0800 +Subject: [PATCH] fix coredump in etharp + +--- + src/core/ipv4/etharp.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c +index c3a5a10..effb7db 100644 +--- a/src/core/ipv4/etharp.c ++++ b/src/core/ipv4/etharp.c +@@ -102,10 +102,10 @@ struct etharp_entry { + u8_t state; + }; + +-static struct etharp_entry arp_table[ARP_TABLE_SIZE]; ++static PER_THREAD struct etharp_entry arp_table[ARP_TABLE_SIZE]; + + #if !LWIP_NETIF_HWADDRHINT +-static netif_addr_idx_t etharp_cached_entry; ++static PER_THREAD netif_addr_idx_t etharp_cached_entry; + #endif /* !LWIP_NETIF_HWADDRHINT */ + + /** Try hard to create a new entry - we want the IP address to appear in +-- +2.23.0 + diff --git a/0007-gazelle-fix-epoll_ctl-EPOLLET-mode-error.patch b/0007-gazelle-fix-epoll_ctl-EPOLLET-mode-error.patch new file mode 100644 index 0000000000000000000000000000000000000000..97b5d782a98d7acc488e98f38c033c8f28ebdec7 --- /dev/null +++ b/0007-gazelle-fix-epoll_ctl-EPOLLET-mode-error.patch @@ -0,0 +1,102 @@ +From b867f6901773def31884a9ae527a1282d274a85d Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sat, 10 Jul 2021 22:27:19 +0800 +Subject: [PATCH] fix epoll_ctl EPOLLET mode error +--- + src/api/sockets.c | 33 +++++++++++++++++++++++---------- + 1 file changed, 23 insertions(+), 10 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 658f762..eccc7f9 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -714,6 +714,13 @@ free_socket(struct lwip_sock *sock, int is_tcp) + /* Protect socket array */ + SYS_ARCH_PROTECT(lev); + ++#if USE_LIBOS ++ sock->epoll = LIBOS_EPOLLNONE; ++ sock->events = 0; ++ sock->epoll_data = NULL; ++ list_del_node_null(&sock->list); ++#endif ++ + freed = free_socket_locked(sock, is_tcp, &conn, &lastdata); + SYS_ARCH_UNPROTECT(lev); + /* don't use 'sock' after this line, as another task might have allocated it */ +@@ -1003,13 +1010,6 @@ lwip_close(int s) + return -1; + } + +-#if USE_LIBOS +- sock->epoll = LIBOS_EPOLLNONE; +- sock->events = 0; +- sock->epoll_data = NULL; +- list_del_node_null(&sock->list); +-#endif +- + free_socket(sock, is_tcp); + set_errno(0); + return 0; +@@ -1191,7 +1191,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + if (sock->lastdata.pbuf) { + p = sock->lastdata.pbuf; + #if USE_LIBOS +- if ((flags & MSG_PEEK) == 0) { ++ if (((flags & MSG_PEEK) == 0) && ((sock->epoll & EPOLLET) == 0)) { + if ((NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP)) { + del_epoll_event(sock->conn, EPOLLIN); + } +@@ -2889,6 +2889,9 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + #if USE_LIBOS ++ if (sock->epoll & EPOLLET) { ++ list_del_node_null(&sock->list); ++ } + add_epoll_event(conn, EPOLLIN); + #endif + break; +@@ -2896,7 +2899,9 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + sock->rcvevent--; + check_waiters = 0; + #if USE_LIBOS +- del_epoll_event(conn, EPOLLIN); ++ if ((sock->epoll & EPOLLET) == 0) { ++ del_epoll_event(conn, EPOLLIN); ++ } + #endif + break; + case NETCONN_EVT_SENDPLUS: +@@ -2905,6 +2910,9 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + } + sock->sendevent = 1; + #if USE_LIBOS ++ if (sock->epoll & EPOLLET) { ++ list_del_node_null(&sock->list); ++ } + add_epoll_event(conn, EPOLLOUT); + #endif + break; +@@ -2912,12 +2920,17 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + sock->sendevent = 0; + check_waiters = 0; + #if USE_LIBOS +- del_epoll_event(conn, EPOLLOUT); ++ if ((sock->epoll & EPOLLET) == 0) { ++ del_epoll_event(conn, EPOLLOUT); ++ } + #endif + break; + case NETCONN_EVT_ERROR: + sock->errevent = 1; + #if USE_LIBOS ++ if (sock->epoll & EPOLLET) { ++ list_del_node_null(&sock->list); ++ } + add_epoll_event(conn, EPOLLERR); + #endif + break; +-- +2.23.0 + diff --git a/0008-gazelle-fix-lwip_accept-memcpy-sockaddr-large.patch b/0008-gazelle-fix-lwip_accept-memcpy-sockaddr-large.patch new file mode 100644 index 0000000000000000000000000000000000000000..94eec7f42725814a3772415816480f5a2e9c43d6 --- /dev/null +++ b/0008-gazelle-fix-lwip_accept-memcpy-sockaddr-large.patch @@ -0,0 +1,25 @@ +From bf1c7febb9f6c3a2336f18f658694393dea451ae Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 16 Jul 2021 14:44:03 +0800 +Subject: [PATCH] [Huawei]gazelle: fix lwip_accept memcpy sockaddr larger than + actual +--- + src/api/sockets.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index eccc7f9..e640945 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -860,6 +860,8 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + if (*addrlen > tempaddr.sa.sa_len) { + *addrlen = tempaddr.sa.sa_len; + } ++#else ++ *addrlen = LWIP_MIN(*addrlen, sizeof(tempaddr)); + #endif /* USE_LIBOS */ + MEMCPY(addr, &tempaddr, *addrlen); + +-- +2.23.0 + diff --git a/0009-fix-stack-buffer-overflow-when-memcpy-addr.patch b/0009-fix-stack-buffer-overflow-when-memcpy-addr.patch new file mode 100644 index 0000000000000000000000000000000000000000..38f97ee5fbee8acb3c145ae0bb20310f3739db20 --- /dev/null +++ b/0009-fix-stack-buffer-overflow-when-memcpy-addr.patch @@ -0,0 +1,35 @@ +From d1f9ccd5da1712477f30bf2662e8888395ed95cd Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Wed, 21 Jul 2021 20:01:47 +0800 +Subject: [PATCH] fix stack-buffer-overflow in lwip_sock_make_addr and + lwip_getaddrname + +--- + src/api/sockets.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index e640945..7ce9378 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1319,6 +1319,8 @@ lwip_sock_make_addr(struct netconn *conn, ip_addr_t *fromaddr, u16_t port, + } else if (*fromlen > saddr.sa.sa_len) { + *fromlen = saddr.sa.sa_len; + } ++#else ++ *fromlen = LWIP_MIN(*fromlen, sizeof(saddr)); + #endif + MEMCPY(from, &saddr, *fromlen); + return truncated; +@@ -3133,6 +3135,8 @@ lwip_getaddrname(int s, struct sockaddr *name, socklen_t *namelen, u8_t local) + if (*namelen > saddr.sa.sa_len) { + *namelen = saddr.sa.sa_len; + } ++#else ++ *namelen = LWIP_MIN(*namelen, sizeof(saddr)); + #endif + MEMCPY(name, &saddr, *namelen); + +-- +2.23.0 + diff --git a/0010-fix-the-incomplete-release-of-the-conntable.patch b/0010-fix-the-incomplete-release-of-the-conntable.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c7c4f091877d21e78695eb788f742289eefadc7 --- /dev/null +++ b/0010-fix-the-incomplete-release-of-the-conntable.patch @@ -0,0 +1,115 @@ +From 70a1cdd2618f117c9f7da17b111a6c51db242f4b Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Tue, 3 Aug 2021 11:23:10 +0800 +Subject: [PATCH] fix-the-incomplete-release-of-the-conntable + +--- + src/core/tcp.c | 12 +++++++++++ + src/include/lwip/priv/tcp_priv.h | 37 ++++++-------------------------- + 2 files changed, 19 insertions(+), 30 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 0aafa9b..2cfbce2 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -235,6 +235,9 @@ tcp_init(void) + void + tcp_free(struct tcp_pcb *pcb) + { ++#if USE_LIBOS ++ vdev_unreg_done(pcb); ++#endif + LWIP_ASSERT("tcp_free: LISTEN", pcb->state != LISTEN); + #if LWIP_TCP_PCB_NUM_EXT_ARGS + tcp_ext_arg_invoke_callbacks_destroyed(pcb->ext_args); +@@ -943,6 +946,11 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + #if LWIP_TCP_PCB_NUM_EXT_ARGS + /* copy over ext_args to listening pcb */ + memcpy(&lpcb->ext_args, &pcb->ext_args, sizeof(pcb->ext_args)); ++#endif ++#if USE_LIBOS ++ /* pcb transfer to lpcb and reg into tcp_listen_pcbs. freeing pcb shouldn't release sock table in here. ++ * local_port=0 avoid to release sock table in tcp_free */ ++ pcb->local_port = 0; + #endif + tcp_free(pcb); + #if LWIP_CALLBACK_API +@@ -2263,6 +2271,10 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + LWIP_ASSERT("tcp_pcb_remove: invalid pcb", pcb != NULL); + LWIP_ASSERT("tcp_pcb_remove: invalid pcblist", pcblist != NULL); + ++#if USE_LIBOS ++ vdev_unreg_done(pcb); ++#endif ++ + TCP_RMV(pcblist, pcb); + + tcp_pcb_purge(pcb); +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index 599289f..f771725 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -358,27 +358,16 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + + return vdev_reg_xmit(reg_type, &qtuple); + } +- +-/* TCP_RMV pcb whether to call vdev_reg_xmit to reg conn-sock table. +- fix the error of adding conn table in connect func and deleting conn table +- when moving pcb from tcp_bound_pcbs to tcp_listen_pcbs */ +-static inline int need_vdev_reg(struct tcp_pcb *pcb_list, const struct tcp_pcb *pcb) ++static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + { +- /* tw_pcbs_list and tcp_listen_pcbs will not change pcb to other list always reg */ +- if ((pcb_list == tcp_tw_pcbs) || (pcb_list == tcp_listen_pcbs.pcbs)) { +- return 1; ++ if (pcb->local_port == 0) { ++ return; + } +- +- /* tcp_active_pcbs in FIN_WAIT_1,FIN_WAIT_2,CLOSING state will change pcb to tw_pcbs_list don't reg. +- detail info see func tcp_process in tcp_in.c */ +- if (pcb_list == tcp_active_pcbs) { +- if ((pcb->state != FIN_WAIT_1) && (pcb->state != FIN_WAIT_2) && (pcb->state != CLOSING)) { +- return 1; +- } ++ if (pcb->state == LISTEN) { ++ vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, pcb); ++ } else { ++ vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, pcb); + } +- +- /* tcp_bound_pcbs and others don't reg */ +- return 0; + } + #endif + +@@ -414,12 +403,6 @@ static inline int need_vdev_reg(struct tcp_pcb *pcb_list, const struct tcp_pcb * + tcp_timer_needed(); \ + } while(0) + #define TCP_RMV(pcbs, npcb) do { \ +- if (need_vdev_reg(*pcbs, npcb)) { \ +- if (npcb->state == LISTEN) \ +- vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ +- else \ +- vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb); \ +- } \ + struct tcp_pcb *tcp_tmp_pcb; \ + LWIP_ASSERT("TCP_RMV: pcbs != NULL", *(pcbs) != NULL); \ + LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removing %p from %p\n", (npcb), *(pcbs))); \ +@@ -512,12 +495,6 @@ static inline int need_vdev_reg(struct tcp_pcb *pcb_list, const struct tcp_pcb * + + #define TCP_RMV(pcbs, npcb) \ + do { \ +- if (need_vdev_reg(*pcbs, npcb)) { \ +- if (npcb->state == LISTEN) \ +- vdev_reg_done(REG_RING_TCP_LISTEN_CLOSE, npcb); \ +- else \ +- vdev_reg_done(REG_RING_TCP_CONNECT_CLOSE, npcb);\ +- } \ + if(*(pcbs) == (npcb)) { \ + (*(pcbs)) = (*pcbs)->next; \ + if (*pcbs) \ +-- +2.23.0 + diff --git a/0011-remove-gazelle-tcp-conn-func.patch b/0011-remove-gazelle-tcp-conn-func.patch new file mode 100644 index 0000000000000000000000000000000000000000..08a3dd393653a3db87960e83853e2d86325ffbb6 --- /dev/null +++ b/0011-remove-gazelle-tcp-conn-func.patch @@ -0,0 +1,116 @@ +From fdccb3a2c430c6270ff5272220cf471bf760fda7 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sat, 21 Aug 2021 15:22:52 +0800 +Subject: [PATCH] del tcp_conn + +--- + src/core/tcp.c | 78 ------------------------------------------ + src/include/lwip/tcp.h | 3 -- + 2 files changed, 81 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 2cfbce2..0f3e830 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -2484,84 +2484,6 @@ tcp_tcp_get_tcp_addrinfo(struct tcp_pcb *pcb, int local, ip_addr_t *addr, u16_t + return ERR_VAL; + } + +-uint32_t tcp_get_conn_num(void) +-{ +- struct tcp_pcb *pcb = NULL; +- struct tcp_pcb_listen *pcbl = NULL; +- uint32_t conn_num = 0; +- +- for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) { +- conn_num++; +- } +- +- for (pcbl = tcp_listen_pcbs.listen_pcbs; pcbl != NULL; pcbl = pcbl->next) { +- conn_num++; +- } +- +- for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) { +- conn_num++; +- } +- +- return conn_num; +-} +- +-void tcp_get_conn(char *buf, int32_t len, uint32_t *conn_num) +-{ +- int tmp_len = 0; +- char *tmp_buf = buf; +- struct tcp_pcb_dp tdp; +- struct tcp_pcb *pcb = NULL; +- struct tcp_pcb_listen *pcbl = NULL; +- +-#define COPY_TDP(b, l) \ +- do { \ +- if (l + sizeof(tdp) <= len) { \ +- memcpy(b, &tdp, sizeof(tdp)); \ +- b += sizeof(tdp); \ +- l += sizeof(tdp); \ +- *conn_num += 1; \ +- } else \ +- return; \ +- } while(0); +- +- *conn_num = 0; +- +- for (pcb = tcp_active_pcbs; pcb != NULL; pcb = pcb->next) { +- tdp.state = ACTIVE_LIST; +- tdp.lip = pcb->local_ip.addr; +- tdp.rip = pcb->remote_ip.addr; +- tdp.l_port = pcb->local_port; +- tdp.r_port = pcb->remote_port; +- tdp.s_next = pcb->snd_queuelen; +- /* lwip not cache rcv buf. Set it to 0. */ +- tdp.r_next = 0; +- tdp.tcp_sub_state = pcb->state; +- COPY_TDP(tmp_buf, tmp_len); +- } +- +- for (pcbl = tcp_listen_pcbs.listen_pcbs; pcbl != NULL; pcbl = pcbl->next) { +- tdp.state = LISTEN_LIST; +- tdp.lip = pcbl->local_ip.addr; +- tdp.rip = pcbl->remote_ip.addr; +- tdp.l_port = pcbl->local_port; +- tdp.tcp_sub_state = pcbl->state; +- COPY_TDP(tmp_buf, tmp_len); +- } +- +- for (pcb = tcp_tw_pcbs; pcb != NULL; pcb = pcb->next) { +- tdp.state = TIME_WAIT_LIST; +- tdp.lip = pcb->local_ip.addr; +- tdp.rip = pcb->remote_ip.addr; +- tdp.l_port = pcb->local_port; +- tdp.r_port = pcb->remote_port; +- tdp.s_next = pcb->snd_queuelen; +- /* lwip not cache rcv buf. Set it to 0. */ +- tdp.r_next = 0; +- tdp.tcp_sub_state = pcb->state; +- COPY_TDP(tmp_buf, tmp_len); +- } +-} +- + #if TCP_QUEUE_OOSEQ + /* Free all ooseq pbufs (and possibly reset SACK state) */ + void +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 4f86b46..b36bf33 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -570,9 +570,6 @@ struct tcp_pcb_dp { + uint32_t tcp_sub_state; + }; + +-void tcp_get_conn(char *buf, int32_t len, uint32_t *conn_num); +-uint32_t tcp_get_conn_num(void); +- + /* for compatibility with older implementation */ + #define tcp_new_ip6() tcp_new_ip_type(IPADDR_TYPE_V6) + +-- +2.23.0 + diff --git a/0012-fix-incomplete-resource-release-in-lwip-close.patch b/0012-fix-incomplete-resource-release-in-lwip-close.patch new file mode 100644 index 0000000000000000000000000000000000000000..5ea83689da60b3f8bdade738122a088dd4e48b81 --- /dev/null +++ b/0012-fix-incomplete-resource-release-in-lwip-close.patch @@ -0,0 +1,49 @@ +From c5db70bef7f1ac6627b278fdf06be57bce0ef00b Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 19 Aug 2021 14:53:14 +0800 +Subject: [PATCH] fix event.data.ptr double free due to socket don't free in +lwip_close + +--- +src/api/sockets.c | 10 ++++++---- +1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 7ce9378..ac4cccb 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -963,18 +963,20 @@ lwip_close(int s) + struct lwip_sock *sock; + int is_tcp = 0; + err_t err; ++ int ret = 0; + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_close(%d)\n", s)); + + #if USE_LIBOS +- int ret; + if (posix_api->is_epfd(s)) { + return posix_api->epoll_close_fn(s); + } + ++ /* No matter what the result of close, lwip_sock resources should release ++ * to prevent the potential double freee problem caused by reporting events after the close */ + ret = posix_api->close_fn(s); +- if (ret < 0) +- return ret; ++ if ((ret < 0) && (errno == EINTR)) ++ ret = posix_api->close_fn(s); + if (posix_api->is_chld == 0) + clean_host_fd(s); + +@@ -1014,7 +1016,7 @@ lwip_close(int s) + + free_socket(sock, is_tcp); + set_errno(0); +- return 0; ++ return ret; + } + + int +-- +2.23.0 diff --git a/0013-remove-gazelle-syscall-thread.patch b/0013-remove-gazelle-syscall-thread.patch new file mode 100644 index 0000000000000000000000000000000000000000..64e0c0d69dd0c24b74d899cbb41d9fd43a18997f --- /dev/null +++ b/0013-remove-gazelle-syscall-thread.patch @@ -0,0 +1,126 @@ +From afd0d39d31196a74d6808120d1ca5664825d477c Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Mon, 6 Sep 2021 22:52:41 +0800 +Subject: [PATCH] aaa + +--- + src/api/sockets.c | 17 ----------------- + src/include/eventpoll.h | 1 - + src/include/lwipopts.h | 17 ----------------- + src/include/lwipsock.h | 5 ----- + 4 files changed, 40 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index ac4cccb..8719568 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -755,10 +755,6 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + sock = posix_api->get_socket(s); + /*AF_UNIX case*/ + if (!sock) { +- if (rearm_accept_fd(s) < 0) { +- LWIP_DEBUGF(SOCKETS_DEBUG, +- ("failed to rearm accept fd=%d errno=%d\n", s, errno)); +- } + return posix_api->accept_fn(s, addr, addrlen); + } + +@@ -769,11 +765,6 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + return -1; + } + +- if (rearm_accept_fd(s) < 0) { +- LWIP_DEBUGF(SOCKETS_DEBUG, +- ("failed to rearm accept fd=%d errno=%d\n", s, errno)); +- } +- + /* raise accept syscall in palce */ + newsock = posix_api->accept_fn(s, addr, addrlen); + if (newsock >= 0) { +@@ -977,8 +968,6 @@ lwip_close(int s) + ret = posix_api->close_fn(s); + if ((ret < 0) && (errno == EINTR)) + ret = posix_api->close_fn(s); +- if (posix_api->is_chld == 0) +- clean_host_fd(s); + + sock = posix_api->get_socket(s); + /*AF_UNIX case*/ +@@ -1481,9 +1470,6 @@ static inline enum KERNEL_LWIP_PATH select_path(int s) + sock = posix_api->get_socket(s); + /*AF_UNIX case*/ + if (!sock) { +- if (rearm_host_fd(s) < 0) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to rearm fd=%d errno=%d\n", s, errno)); +- } + return PATH_KERNEL; + } + +@@ -1494,9 +1480,6 @@ static inline enum KERNEL_LWIP_PATH select_path(int s) + + /*for AF_INET, we can try erther linux or lwip*/ + if (CONN_TYPE_IS_HOST(sock->conn)) { +- if (rearm_host_fd(s) < 0) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("failed to rearm read fd=%d errno=%d\n", s, errno)); +- } + return PATH_KERNEL; + } + +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +index 01f8d64..f525bc2 100644 +--- a/src/include/eventpoll.h ++++ b/src/include/eventpoll.h +@@ -57,7 +57,6 @@ struct event_array { + + struct libos_epoll { + struct event_queue *libos_queue; +- struct event_array *host_queue; + int num_hostfds; + int hints; + int fd; /* self fd */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 8893a5f..e0364a2 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -177,23 +177,6 @@ + + #define ARP_TABLE_SIZE 512 + +-/* +- --------------------------------------- +- ------- Syscall thread options -------- +- --------------------------------------- +-*/ +-#define USE_SYSCALL_THREAD 1 +- +-#define MAX_BLOCKING_ACCEPT_FD (100) +- +-#define MAX_BLOCKING_CONNECT_FD (100) +- +-#define MAX_BLOCKING_EPOLL_FD (100) +- +-#define MAX_SYSCALL_EVENTS (MAX_BLOCKING_ACCEPT_FD + MAX_BLOCKING_CONNECT_FD + MAX_BLOCKING_EPOLL_FD) +- +-#define MAX_HOST_FD (MAX_CLIENTS + RESERVED_CLIENTS) +- + #if USE_LIBOS + #define PER_THREAD __thread + #else +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index dbc67b9..e9ffbb1 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -146,10 +146,5 @@ void lwip_sock_init(void); + void lwip_exit(void); + + extern int is_host_ipv4(uint32_t ipv4); +-extern int rearm_host_fd(int fd); +-extern int rearm_accept_fd(int fd); +-extern void unarm_host_fd(int fd); +-extern void clean_host_fd(int fd); +-extern int arm_host_fd(struct libos_epoll *ep, int op, int fd, struct epoll_event *event); + + #endif /* __LWIPSOCK_H__ */ +-- +2.23.0 + diff --git a/0014-fix-some-compile-errors.patch b/0014-fix-some-compile-errors.patch new file mode 100644 index 0000000000000000000000000000000000000000..5be2bdfc4dba02893f85e5d276ab212cfb532ef2 --- /dev/null +++ b/0014-fix-some-compile-errors.patch @@ -0,0 +1,62 @@ +From 4970d00fecf52a472a28d55243f87142d3d08268 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 4 Jan 2022 17:23:03 +0800 +Subject: [PATCH] fix some compile errors + +--- + src/include/arch/cc.h | 4 ++-- + src/include/lwiplog.h | 2 +- + src/include/posix_api.h | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/include/arch/cc.h b/src/include/arch/cc.h +index 33c24b4..222b0c9 100644 +--- a/src/include/arch/cc.h ++++ b/src/include/arch/cc.h +@@ -62,7 +62,7 @@ void alloc_memp_##name##_base(void) \ + memp_pools[MEMP_##name] = &memp_ ## name; \ + \ + char mpname[MEMZONE_NAMESIZE] = {0}; \ +- snprintf(mpname, MEMZONE_NAMESIZE, "%ld_%s", gettid(), #name); \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", gettid(), #name); \ + memp_memory_##name##_base = \ + sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(__size)); \ + memp_pools[MEMP_##name]->base = memp_memory_##name##_base; \ +@@ -73,7 +73,7 @@ PER_THREAD uint8_t *variable_name; \ + void alloc_memory_##variable_name(void) \ + { \ + char mpname[MEMZONE_NAMESIZE] = {0}; \ +- snprintf(mpname, MEMZONE_NAMESIZE, "%ld_%s", gettid(), #variable_name); \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", gettid(), #variable_name); \ + (variable_name) = \ + sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(size)); \ + } +diff --git a/src/include/lwiplog.h b/src/include/lwiplog.h +index 363e516..6fccac8 100644 +--- a/src/include/lwiplog.h ++++ b/src/include/lwiplog.h +@@ -40,7 +40,7 @@ + + #include "lwipopts.h" + +-#define gettid() syscall(__NR_gettid) ++extern int gettid(void); + + #if USE_DPDK_LOG + +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index 8aa8516..0dca8eb 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -79,7 +79,7 @@ typedef struct { + int is_chld; + } posix_api_t; + +-posix_api_t *posix_api; ++extern posix_api_t *posix_api; + + int posix_api_init(void); + void posix_api_free(void); +-- +1.8.3.1 + diff --git a/0015-fix-tcp-port-alloc-issue.patch b/0015-fix-tcp-port-alloc-issue.patch new file mode 100644 index 0000000000000000000000000000000000000000..4576af88ea7ecbc8a87aabb57eb470a1ab69d417 --- /dev/null +++ b/0015-fix-tcp-port-alloc-issue.patch @@ -0,0 +1,36 @@ +From bd0fdaf755544da1a276820a7cc3f664a2765194 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 18 Jan 2022 10:34:42 +0800 +Subject: [PATCH] fix tcp port alloc issue + +--- + src/core/tcp.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index a9a91fd..b65ab33 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1062,6 +1062,7 @@ tcp_new_port(void) + { + u8_t i; + u16_t n = 0; ++ u16_t tmp_port; + struct tcp_pcb *pcb; + + pthread_mutex_lock(&g_tcp_port_mutex); +@@ -1082,9 +1083,10 @@ again: + } + } + } ++ tmp_port = tcp_port; + pthread_mutex_unlock(&g_tcp_port_mutex); + +- return tcp_port; ++ return tmp_port; + } + + /** +-- +1.8.3.1 + diff --git a/0016-lstack-support-mysql-mode.patch b/0016-lstack-support-mysql-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ac7fe1816b84e8190e849c4d478c2f74f22836f --- /dev/null +++ b/0016-lstack-support-mysql-mode.patch @@ -0,0 +1,943 @@ +From 1f0f3742019e2fa62ba1669c5a880fb63a3fee12 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Thu, 24 Feb 2022 20:08:46 +0800 +Subject: [PATCH] lstack support mysql mode + +--- + src/api/api_msg.c | 26 +-- + src/api/posix_api.c | 5 +- + src/api/sockets.c | 350 ++----------------------------- + src/api/sys_arch.c | 12 +- + src/core/tcp_out.c | 13 ++ + src/include/eventpoll.h | 6 +- + src/include/lwip/priv/tcp_priv.h | 2 +- + src/include/lwip/sockets.h | 2 +- + src/include/lwipsock.h | 29 ++- + src/include/posix_api.h | 2 +- + src/include/reg_sock.h | 8 +- + 11 files changed, 85 insertions(+), 370 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index d5a738f..3072dd9 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -342,6 +342,12 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + #endif /* LWIP_SO_RCVBUF */ + /* Register event with callback */ + API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); ++#if USE_LIBOS ++ if (conn->state == NETCONN_WRITE || conn->state == NETCONN_CLOSE || ++ conn->state == NETCONN_CONNECT) { ++ add_recv_list(conn->socket); ++ } ++#endif + } + + return ERR_OK; +@@ -457,14 +463,6 @@ err_tcp(void *arg, err_t err) + old_state = conn->state; + conn->state = NETCONN_NONE; + +-#if USE_LIBOS +- if (CONN_TYPE_IS_HOST(conn)) { +- LWIP_DEBUGF(API_MSG_DEBUG, +- ("linux localhost connection already success, ignore lwip err_tcp fd=%d\n", conn->socket)); +- return; +- } +-#endif /* USE_LIBOS */ +- + SYS_ARCH_UNPROTECT(lev); + + /* Notify the user layer about a connection error. Used to signal select. */ +@@ -479,6 +477,12 @@ err_tcp(void *arg, err_t err) + if (NETCONN_MBOX_VALID(conn, &conn->recvmbox)) { + /* use trypost to prevent deadlock */ + sys_mbox_trypost(&conn->recvmbox, mbox_msg); ++#if USE_LIBOS ++ if ((old_state == NETCONN_WRITE) || (old_state == NETCONN_CLOSE) || ++ (old_state == NETCONN_CONNECT)) { ++ add_recv_list(conn->socket); ++ } ++#endif + } + /* pass error message to acceptmbox to wake up pending accept */ + if (NETCONN_MBOX_VALID(conn, &conn->acceptmbox)) { +@@ -1344,11 +1348,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + int s = conn->socket; + struct lwip_sock *sock = get_socket_without_errno(s); + +- if (!!sock && !!sock->epoll_data) { +- struct epoll_event ee = {0}; +- ee.data.fd = s; +- ee.events |= EPOLLIN | EPOLLOUT | EPOLLERR; +- posix_api->epoll_ctl_fn(sock->epoll_data->fd, EPOLL_CTL_DEL, s, &ee); ++ if (!!sock) { + posix_api->shutdown_fn(s, SHUT_RDWR); + LWIP_DEBUGF(API_MSG_DEBUG, + ("linux outgoing connection abort fd=%d\n", s)); +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index a917cea..eff9f46 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -143,11 +143,10 @@ int posix_api_init(void) + + /* lstack helper api */ + posix_api->get_socket = get_socket; +- posix_api->is_epfd = lwip_is_epfd; +- posix_api->epoll_close_fn = lwip_epoll_close; ++ posix_api->epoll_close_fn = lstack_epoll_close; + + /* support fork */ +- posix_api->is_chld = 0; ++ posix_api->is_chld = 1; + return ERR_OK; + + err_out: +diff --git a/src/api/sockets.c b/src/api/sockets.c +index f44c34f..b032ce9 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -90,14 +90,6 @@ + #define API_SELECT_CB_VAR_ALLOC(name, retblock) API_VAR_ALLOC_EXT(struct lwip_select_cb, MEMP_SELECT_CB, name, retblock) + #define API_SELECT_CB_VAR_FREE(name) API_VAR_FREE(MEMP_SELECT_CB, name) + +-#if USE_LIBOS +-enum KERNEL_LWIP_PATH { +- PATH_KERNEL = 0, +- PATH_LWIP, +- PATH_ERR, +-}; +-#endif +- + #if LWIP_IPV4 + #if USE_LIBOS + #define IP4ADDR_PORT_TO_SOCKADDR(sin, ipaddr, port) do { \ +@@ -604,8 +596,6 @@ alloc_socket(struct netconn *newconn, int accepted) + * (unless it has been created by accept()). */ + sockets[i].sendevent = (NETCONNTYPE_GROUP(newconn->type) == NETCONN_TCP ? (accepted != 0) : 1); + sockets[i].errevent = 0; +- sockets[i].epoll_data = NULL; +- init_list_node_null(&sockets[i].list); + return i + LWIP_SOCKET_OFFSET; + } + +@@ -714,13 +704,6 @@ free_socket(struct lwip_sock *sock, int is_tcp) + /* Protect socket array */ + SYS_ARCH_PROTECT(lev); + +-#if USE_LIBOS +- sock->epoll = LIBOS_EPOLLNONE; +- sock->events = 0; +- sock->epoll_data = NULL; +- list_del_node_null(&sock->list); +-#endif +- + freed = free_socket_locked(sock, is_tcp, &conn, &lastdata); + SYS_ARCH_UNPROTECT(lev); + /* don't use 'sock' after this line, as another task might have allocated it */ +@@ -749,34 +732,11 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + SYS_ARCH_DECL_PROTECT(lev); + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_accept(%d)...\n", s)); +-#if USE_LIBOS +- int sys_errno = 0; +- +- sock = posix_api->get_socket(s); +- /*AF_UNIX case*/ +- if (!sock) { +- return posix_api->accept_fn(s, addr, addrlen); +- } +- +- /*for AF_INET, we may try both linux and lwip*/ +- if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); +- set_errno(EINVAL); +- return -1; +- } +- +- /* raise accept syscall in palce */ +- newsock = posix_api->accept_fn(s, addr, addrlen); +- if (newsock >= 0) { +- return newsock; +- } +- sys_errno = errno; +-#else ++ + sock = get_socket(s); + if (!sock) { + return -1; + } +-#endif + + /* wait for a new connection */ + err = netconn_accept(sock->conn, &newconn); +@@ -790,9 +750,6 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + sock_set_errno(sock, err_to_errno(err)); + } + done_socket(sock); +-#if USE_LIBOS +- set_errno(sys_errno); +-#endif /* USE_LIBOS */ + return -1; + } + LWIP_ASSERT("newconn != NULL", newconn != NULL); +@@ -875,24 +832,11 @@ lwip_bind(int s, const struct sockaddr *name, socklen_t namelen) + ip_addr_t local_addr; + u16_t local_port; + err_t err; +-#if USE_LIBOS +- sock = posix_api->get_socket(s); +- /*AF_UNIX case*/ +- if (!sock) { +- return posix_api->bind_fn(s, name, namelen); +- } +- /*for AF_INET, we may try both linux and lwip*/ +- if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); +- set_errno(EINVAL); +- return -1; +- } +-#else ++ + sock = get_socket(s); + if (!sock) { + return -1; + } +-#endif + + if (!SOCK_ADDR_TYPE_MATCH(name, sock)) { + /* sockaddr does not match socket type (IPv4/IPv6) */ +@@ -912,18 +856,6 @@ lwip_bind(int s, const struct sockaddr *name, socklen_t namelen) + ip_addr_debug_print_val(SOCKETS_DEBUG, local_addr); + LWIP_DEBUGF(SOCKETS_DEBUG, (" port=%"U16_F")\n", local_port)); + +-#if USE_LIBOS +- /* Supports kernel NIC IP address. */ +- int ret = posix_api->bind_fn(s, name, namelen); +- if (ret < 0) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("bind syscall failed\n")); +- /* bind must succeed on both linux and libos */ +- if (!is_host_ipv4(local_addr.addr)) { +- return ret; +- } +- } +-#endif /* USE_LIBOS */ +- + #if LWIP_IPV4 && LWIP_IPV6 + /* Dual-stack: Unmap IPv4 mapped IPv6 addresses */ + if (IP_IS_V6_VAL(local_addr) && ip6_addr_isipv4mappedipv6(ip_2_ip6(&local_addr))) { +@@ -953,32 +885,13 @@ lwip_close(int s) + struct lwip_sock *sock; + int is_tcp = 0; + err_t err; +- int ret = 0; + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_close(%d)\n", s)); + +-#if USE_LIBOS +- if (posix_api->is_epfd(s)) { +- return posix_api->epoll_close_fn(s); +- } +- +- /* No matter what the result of close, lwip_sock resources should release +- * to prevent the potential double freee problem caused by reporting events after the close */ +- ret = posix_api->close_fn(s); +- if ((ret < 0) && (errno == EINTR)) +- ret = posix_api->close_fn(s); +- +- sock = posix_api->get_socket(s); +- /*AF_UNIX case*/ +- if (!sock) { +- return ret; +- } +-#else + sock = get_socket(s); + if (!sock) { + return -1; + } +-#endif /* USE_LIBOS */ + + if (sock->conn != NULL) { + is_tcp = NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP; +@@ -1004,7 +917,7 @@ lwip_close(int s) + + free_socket(sock, is_tcp); + set_errno(0); +- return ret; ++ return 0; + } + + int +@@ -1013,28 +926,10 @@ lwip_connect(int s, const struct sockaddr *name, socklen_t namelen) + struct lwip_sock *sock; + err_t err; + +-#if USE_LIBOS +- int ret; +- +- sock = posix_api->get_socket(s); +- if (!sock) { +- return posix_api->connect_fn(s, name, namelen); +- } +- +- /* raise connect syscall in place */ +- ADD_CONN_TYPE_INPRG(sock->conn); +- ret = posix_api->connect_fn(s, name, namelen); +- if (!ret) { +- SET_CONN_TYPE_HOST(sock->conn); +- LWIP_DEBUGF(SOCKETS_DEBUG, ("linux connect succeed fd=%d\n", s)); +- return ret; +- } +-#else + sock = get_socket(s); + if (!sock) { + return -1; + } +-#endif + + if (!SOCK_ADDR_TYPE_MATCH_OR_UNSPEC(name, sock)) { + /* sockaddr does not match socket type (IPv4/IPv6) */ +@@ -1106,29 +1001,10 @@ lwip_listen(int s, int backlog) + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_listen(%d, backlog=%d)\n", s, backlog)); + +-#if USE_LIBOS +- int ret; +- +- sock = posix_api->get_socket(s); +- /*AF_UNIX case*/ +- if (!sock) { +- return posix_api->listen_fn(s, backlog); +- } +- /*for AF_INET, we may try both linux and lwip*/ +- if (!CONN_TYPE_HAS_LIBOS_AND_HOST(sock->conn)) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type has libos and host bits")); +- set_errno(EADDRINUSE); +- return -1; +- } +- +- if ((ret = posix_api->listen_fn(s, backlog)) == -1) +- return ret; +-#else + sock = get_socket(s); + if (!sock) { + return -1; + } +-#endif + + /* limit the "backlog" parameter to fit in an u8_t */ + backlog = LWIP_MIN(LWIP_MAX(backlog, 0), 0xff); +@@ -1160,11 +1036,12 @@ static ssize_t + lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + { + u8_t apiflags = NETCONN_NOAUTORCVD; ++ ssize_t recvd = 0; + #if USE_LIBOS + apiflags = 0; +-#endif +- ssize_t recvd = 0; ++#else + ssize_t recv_left = (len <= SSIZE_MAX) ? (ssize_t)len : SSIZE_MAX; ++#endif + + LWIP_ASSERT("no socket given", sock != NULL); + LWIP_ASSERT("this should be checked internally", NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP); +@@ -1173,6 +1050,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + apiflags |= NETCONN_DONTBLOCK; + } + ++#if !USE_LIBOS + do { + struct pbuf *p; + err_t err; +@@ -1182,13 +1060,6 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + /* Check if there is data left from the last recv operation. */ + if (sock->lastdata.pbuf) { + p = sock->lastdata.pbuf; +-#if USE_LIBOS +- if (((flags & MSG_PEEK) == 0) && ((sock->epoll & EPOLLET) == 0)) { +- if ((NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP)) { +- del_epoll_event(sock->conn, EPOLLIN); +- } +- } +-#endif + } else { + /* No data was left from the previous operation, so we try to get + some from the network. */ +@@ -1258,23 +1129,21 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + apiflags |= NETCONN_DONTBLOCK | NETCONN_NOFIN; + /* @todo: do we need to support peeking more than one pbuf? */ + } while ((recv_left > 0) && !(flags & MSG_PEEK)); ++ + lwip_recv_tcp_done: +-#if USE_LIBOS +- if (apiflags & NETCONN_NOAUTORCVD) +-#endif +- { ++#else /* USE_LIBOS */ ++ recvd = read_lwip_data(sock, flags, apiflags); ++ if (recvd <= 0) { ++ return recvd; ++ } ++#endif /* USE_LIBOS */ ++ if (apiflags & NETCONN_NOAUTORCVD) { + if ((recvd > 0) && !(flags & MSG_PEEK)) { + /* ensure window update after copying all data */ + netconn_tcp_recvd(sock->conn, (size_t)recvd); + } + } +-#if USE_LIBOS +- if ((flags & MSG_PEEK) == 0) { +- if (((NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP)) && sock->lastdata.pbuf) { +- add_epoll_event(sock->conn, EPOLLIN); +- } +- } +-#endif ++ + sock_set_errno(sock, 0); + return recvd; + } +@@ -1461,37 +1330,6 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + return ERR_OK; + } + +-#if USE_LIBOS +-static inline enum KERNEL_LWIP_PATH select_path(int s) +-{ +- struct lwip_sock *sock; +- +- sock = posix_api->get_socket(s); +- /*AF_UNIX case*/ +- if (!sock) { +- return PATH_KERNEL; +- } +- +- if (CONN_TYPE_HAS_INPRG(sock->conn)) { +- set_errno(EWOULDBLOCK); +- return PATH_ERR; +- } +- +- /*for AF_INET, we can try erther linux or lwip*/ +- if (CONN_TYPE_IS_HOST(sock->conn)) { +- return PATH_KERNEL; +- } +- +- if (!CONN_TYPE_IS_LIBOS(sock->conn)) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("conn->type is not libos bit type=%x", netconn_type(sock->conn))); +- set_errno(EINVAL); +- return PATH_ERR; +- } +- +- return PATH_LWIP; +-} +-#endif +- + ssize_t + lwip_recvfrom(int s, void *mem, size_t len, int flags, + struct sockaddr *from, socklen_t *fromlen) +@@ -1499,15 +1337,6 @@ lwip_recvfrom(int s, void *mem, size_t len, int flags, + struct lwip_sock *sock; + ssize_t ret; + +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->recv_from(s, mem, len, flags, from, fromlen); +- } +-#endif +- + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvfrom(%d, %p, %"SZT_F", 0x%x, ..)\n", s, mem, len, flags)); + sock = get_socket(s); + if (!sock) { +@@ -1557,14 +1386,6 @@ lwip_recvfrom(int s, void *mem, size_t len, int flags, + ssize_t + lwip_read(int s, void *mem, size_t len) + { +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->read_fn(s, mem, len); +- } +-#endif + return lwip_recvfrom(s, mem, len, 0, NULL, NULL); + } + +@@ -1598,15 +1419,6 @@ lwip_recvmsg(int s, struct msghdr *message, int flags) + int i; + ssize_t buflen; + +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->recv_msg(s, message, flags); +- } +-#endif +- + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvmsg(%d, message=%p, flags=0x%x)\n", s, (void *)message, flags)); + LWIP_ERROR("lwip_recvmsg: invalid message pointer", message != NULL, return ERR_ARG;); + LWIP_ERROR("lwip_recvmsg: unsupported flags", (flags & ~(MSG_PEEK|MSG_DONTWAIT)) == 0, +@@ -1751,15 +1563,6 @@ lwip_sendmsg(int s, const struct msghdr *msg, int flags) + #endif + err_t err = ERR_OK; + +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->send_msg(s, msg, flags); +- } +-#endif +- + sock = get_socket(s); + if (!sock) { + return -1; +@@ -1923,15 +1726,6 @@ lwip_sendto(int s, const void *data, size_t size, int flags, + u16_t remote_port; + struct netbuf buf; + +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->send_to(s, data, size, flags, to, tolen); +- } +-#endif +- + sock = get_socket(s); + if (!sock) { + return -1; +@@ -2030,11 +1824,6 @@ lwip_socket(int domain, int type, int protocol) + + LWIP_UNUSED_ARG(domain); /* @todo: check this */ + +-#if USE_LIBOS +- if ((domain != AF_INET && domain != AF_UNSPEC) || posix_api->is_chld) +- return posix_api->socket_fn(domain, type, protocol); +-#endif +- + /* create a netconn */ + switch (type) { + case SOCK_RAW: +@@ -2091,14 +1880,6 @@ lwip_socket(int domain, int type, int protocol) + ssize_t + lwip_write(int s, const void *data, size_t size) + { +-#if USE_LIBOS +- enum KERNEL_LWIP_PATH path = select_path(s); +- if (path == PATH_ERR) { +- return -1; +- } else if (path == PATH_KERNEL) { +- return posix_api->write_fn(s, data, size); +- } +-#endif + return lwip_send(s, data, size, 0); + } + +@@ -2884,20 +2665,16 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + #if USE_LIBOS +- if (sock->epoll & EPOLLET) { +- list_del_node_null(&sock->list); ++ if (conn->state == NETCONN_LISTEN) { ++ add_epoll_event(conn, EPOLLIN); ++ } else { ++ add_recv_list(conn->socket); + } +- add_epoll_event(conn, EPOLLIN); + #endif + break; + case NETCONN_EVT_RCVMINUS: + sock->rcvevent--; + check_waiters = 0; +-#if USE_LIBOS +- if ((sock->epoll & EPOLLET) == 0) { +- del_epoll_event(conn, EPOLLIN); +- } +-#endif + break; + case NETCONN_EVT_SENDPLUS: + if (sock->sendevent) { +@@ -2905,27 +2682,16 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + } + sock->sendevent = 1; + #if USE_LIBOS +- if (sock->epoll & EPOLLET) { +- list_del_node_null(&sock->list); +- } + add_epoll_event(conn, EPOLLOUT); + #endif + break; + case NETCONN_EVT_SENDMINUS: + sock->sendevent = 0; + check_waiters = 0; +-#if USE_LIBOS +- if ((sock->epoll & EPOLLET) == 0) { +- del_epoll_event(conn, EPOLLOUT); +- } +-#endif + break; + case NETCONN_EVT_ERROR: + sock->errevent = 1; + #if USE_LIBOS +- if (sock->epoll & EPOLLET) { +- list_del_node_null(&sock->list); +- } + add_epoll_event(conn, EPOLLERR); + #endif + break; +@@ -3139,41 +2905,12 @@ lwip_getaddrname(int s, struct sockaddr *name, socklen_t *namelen, u8_t local) + int + lwip_getpeername(int s, struct sockaddr *name, socklen_t *namelen) + { +-#if USE_LIBOS +- struct lwip_sock *sock; +- +- sock = posix_api->get_socket(s); +- if (!sock) { +- return posix_api->getpeername_fn(s, name, namelen); +- } +- /*for AF_INET, if has only host type bit, just call linux api, +- *if has libos and host type bits, it's a not connected fd, call +- *linux api and return -1(errno == ENOTCONN) is also ok*/ +- if (CONN_TYPE_HAS_HOST(sock->conn)) { +- return posix_api->getpeername_fn(s, name, namelen); +- } +-#endif +- + return lwip_getaddrname(s, name, namelen, 0); + } + + int + lwip_getsockname(int s, struct sockaddr *name, socklen_t *namelen) + { +-#if USE_LIBOS +- struct lwip_sock *sock; +- +- sock = posix_api->get_socket(s); +- if (!sock) { +- return posix_api->getsockname_fn(s, name, namelen); +- } +- /*for AF_INET, if has only host type bit, just call linux api, +- *if has libos and host type bits, also call linux api*/ +- if (CONN_TYPE_HAS_HOST(sock->conn)) { +- return posix_api->getsockname_fn(s, name, namelen); +- } +-#endif +- + return lwip_getaddrname(s, name, namelen, 1); + } + +@@ -3186,23 +2923,11 @@ lwip_getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen) + LWIP_SETGETSOCKOPT_DATA_VAR_DECLARE(data); + #endif /* !LWIP_TCPIP_CORE_LOCKING */ + +-#if USE_LIBOS +- struct lwip_sock *sock = posix_api->get_socket(s); +- +- if (!sock) { +- return posix_api->getsockopt_fn(s, level, optname, optval, optlen); +- } +- /*for AF_INET, we return linux result? */ +- if (CONN_TYPE_HAS_HOST(sock->conn)) { +- return posix_api->getsockopt_fn(s, level, optname, optval, optlen); +- } +-#else + struct lwip_sock *sock = get_socket(s); + + if (!sock) { + return -1; + } +-#endif /* USE_LIBOS */ + + if ((NULL == optval) || (NULL == optlen)) { + sock_set_errno(sock, EFAULT); +@@ -3645,25 +3370,11 @@ lwip_setsockopt(int s, int level, int optname, const void *optval, socklen_t opt + LWIP_SETGETSOCKOPT_DATA_VAR_DECLARE(data); + #endif /* !LWIP_TCPIP_CORE_LOCKING */ + +-#if USE_LIBOS +- struct lwip_sock *sock = posix_api->get_socket(s); +- +- if (!sock) { +- return posix_api->setsockopt_fn(s, level, optname, optval, optlen); +- } +- /*for AF_INET, we may try both linux and lwip*/ +- if (CONN_TYPE_HAS_HOST(sock->conn)) { +- if (posix_api->setsockopt_fn(s, level, optname, optval, optlen) < 0) { +- return -1; +- } +- } +-#else + struct lwip_sock *sock = get_socket(s); + + if (!sock) { + return -1; + } +-#endif /* USE_LIBOS */ + + if (NULL == optval) { + sock_set_errno(sock, EFAULT); +@@ -4308,26 +4019,6 @@ lwip_ioctl(int s, long cmd, void *argp) + * the flag O_NONBLOCK is implemented for F_SETFL. + */ + int +-#if USE_LIBOS +-lwip_fcntl(int s, int cmd, ...) +-{ +- struct lwip_sock *sock = posix_api->get_socket(s); +- int val, ret = -1; +- int op_mode = 0; +- va_list ap; +- +- va_start(ap, cmd); +- val = va_arg(ap, int); +- va_end(ap); +- +- if (!sock) { +- return posix_api->fcntl_fn(s, cmd, val); +- } +- if (CONN_TYPE_HAS_HOST(sock->conn)) { +- if ((ret = posix_api->fcntl_fn(s, cmd, val)) == -1) +- return ret; +- } +-#else /* USE_LIBOS */ + lwip_fcntl(int s, int cmd, int val) + { + struct lwip_sock *sock = get_socket(s); +@@ -4337,7 +4028,6 @@ lwip_fcntl(int s, int cmd, int val) + if (!sock) { + return -1; + } +-#endif /* USE_LIBOS */ + + switch (cmd) { + case F_GETFL: +diff --git a/src/api/sys_arch.c b/src/api/sys_arch.c +index 55561b1..9a92143 100644 +--- a/src/api/sys_arch.c ++++ b/src/api/sys_arch.c +@@ -76,8 +76,8 @@ struct sys_mem_stats { + + static PER_THREAD struct sys_mem_stats hugepage_stats; + +-static PER_THREAD uint64_t cycles_per_ms __attribute__((aligned(64))); +-static PER_THREAD uint64_t sys_start_ms __attribute__((aligned(64))); ++static uint64_t cycles_per_ms __attribute__((aligned(64))); ++static uint64_t sys_start_ms __attribute__((aligned(64))); + + /* + * Mailbox +@@ -337,8 +337,12 @@ void sys_calibrate_tsc(void) + #define MS_PER_SEC 1E3 + uint64_t freq = rte_get_tsc_hz(); + +- cycles_per_ms = (freq + MS_PER_SEC - 1) / MS_PER_SEC; +- sys_start_ms = rte_rdtsc() / cycles_per_ms; ++ if (cycles_per_ms == 0) { ++ cycles_per_ms = (freq + MS_PER_SEC - 1) / MS_PER_SEC; ++ } ++ if (sys_start_ms == 0) { ++ sys_start_ms = rte_rdtsc() / cycles_per_ms; ++ } + } + + uint32_t sys_now(void) +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index dac498e..b99974d 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -472,6 +472,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + * pos records progress as data is segmented. + */ + ++#if !USE_LIBOS + /* Find the tail of the unsent queue. */ + if (pcb->unsent != NULL) { + u16_t space; +@@ -587,6 +588,13 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + pcb->unsent_oversize == 0); + #endif /* TCP_OVERSIZE */ + } ++#else /* USE_LIBOS */ ++ if (pcb->unsent != NULL) { ++ /* @todo: this could be sped up by keeping last_unsent in the pcb */ ++ for (last_unsent = pcb->unsent; last_unsent->next != NULL; ++ last_unsent = last_unsent->next); ++ } ++#endif /* USE_LIBOS */ + + /* + * Phase 3: Create new segments. +@@ -604,6 +612,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + u8_t chksum_swapped = 0; + #endif /* TCP_CHECKSUM_ON_COPY */ + ++#if !USE_LIBOS + if (apiflags & TCP_WRITE_FLAG_COPY) { + /* If copy is set, memory should be allocated and data copied + * into pbuf */ +@@ -650,6 +659,10 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + /* Concatenate the headers and data pbufs together. */ + pbuf_cat(p/*header*/, p2/*data*/); + } ++#else /* USE_LIBOS */ ++ p = (struct pbuf *)arg; ++ seglen = p->len; ++#endif /* USE_LIBOS */ + + queuelen += pbuf_clen(p); + +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +index f525bc2..aacc1d2 100644 +--- a/src/include/eventpoll.h ++++ b/src/include/eventpoll.h +@@ -63,9 +63,7 @@ struct libos_epoll { + int efd; /* eventfd */ + }; + +-extern int add_epoll_event(struct netconn*, uint32_t); +-extern int del_epoll_event(struct netconn*, uint32_t); +-extern int lwip_epoll_close(int); +-extern int lwip_is_epfd(int); ++extern void add_epoll_event(struct netconn*, uint32_t); ++extern int32_t lstack_epoll_close(int32_t); + + #endif /* __EVENTPOLL_H__ */ +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index f771725..83208bf 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -349,7 +349,7 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + { + LWIP_ASSERT("Invalid parameter", pcb != NULL); + +- struct libnet_quintuple qtuple; ++ struct gazelle_quintuple qtuple; + qtuple.protocol = 0; + qtuple.src_ip = pcb->local_ip.addr; + qtuple.src_port = lwip_htons(pcb->local_port); +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index 345e26c..4e7e671 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -647,7 +647,7 @@ int lwip_poll(struct pollfd *fds, nfds_t nfds, int timeout); + + #if USE_LIBOS + int lwip_ioctl(int s, long cmd, ...); +-int lwip_fcntl(int s, int cmd, ...); ++int lwip_fcntl(int s, int cmd, int val); + #else + int lwip_ioctl(int s, long cmd, void *argp); + int lwip_fcntl(int s, int cmd, int val); +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index e9ffbb1..069cdcb 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -60,6 +60,10 @@ union lwip_sock_lastdata { + struct pbuf *pbuf; + }; + ++#if USE_LIBOS ++struct protocol_stack; ++struct weakup_poll; ++#endif + /** Contains all internal pointers and states used for a socket */ + struct lwip_sock { + /** sockets currently are built on netconns, each socket has one netconn */ +@@ -88,14 +92,19 @@ struct lwip_sock { + #endif + + #if USE_LIBOS +- struct list_node list; +- /* registered events */ +- uint32_t epoll; +- /* available events */ +- uint32_t events; ++ uint32_t epoll_events; /* registered events */ ++ uint32_t events; /* available events */ ++ int32_t in_event; /* avoid recurring events */ + epoll_data_t ep_data; +- /* libos_epoll pointer in use */ +- struct libos_epoll *epoll_data; ++ struct weakup_poll *weakup; ++ struct protocol_stack *stack; ++ void *recv_ring; ++ struct pbuf *recv_lastdata; /* unread data in one pbuf */ ++ struct pbuf *send_lastdata; /* unread data in one pbuf */ ++ void *send_ring; ++ int32_t recv_flags; ++ int32_t nextfd; /* listenfd list */ ++ struct list_node recv_list; + #endif + }; + +@@ -138,6 +147,10 @@ get_socket_without_errno(int s) + + return sock; + } ++ ++extern void add_recv_list(int32_t fd); ++extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); ++extern void gazelle_clean_sock(int32_t fd); + #endif /* USE_LIBOS */ + + struct lwip_sock *get_socket(int s); +@@ -145,6 +158,4 @@ struct lwip_sock *get_socket_by_fd(int s); + void lwip_sock_init(void); + void lwip_exit(void); + +-extern int is_host_ipv4(uint32_t ipv4); +- + #endif /* __LWIPSOCK_H__ */ +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index 0dca8eb..2afd266 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -34,7 +34,7 @@ + #define __POSIX_API_H__ + + #include +-#include ++#include + #include + #include + +diff --git a/src/include/reg_sock.h b/src/include/reg_sock.h +index 76d4c48..76673da 100644 +--- a/src/include/reg_sock.h ++++ b/src/include/reg_sock.h +@@ -41,7 +41,7 @@ enum reg_ring_type { + RING_REG_MAX, + }; + +-struct libnet_quintuple { ++struct gazelle_quintuple { + uint32_t protocol; + /* net byte order */ + uint16_t src_port; +@@ -54,9 +54,9 @@ struct reg_ring_msg { + enum reg_ring_type type; + + uint32_t tid; +- struct libnet_quintuple qtuple; ++ struct gazelle_quintuple qtuple; + }; + +-extern int vdev_reg_xmit(enum reg_ring_type type, struct libnet_quintuple *qtuple); ++extern int vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple); + +-#endif /* __REG_SOCK_H__ */ +\ No newline at end of file ++#endif /* __REG_SOCK_H__ */ +-- +2.30.0 + diff --git a/0017-support-REUSEPOR-option.patch b/0017-support-REUSEPOR-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..c6c8b0f2acc621ad48cff5334f67423d4bd6f01f --- /dev/null +++ b/0017-support-REUSEPOR-option.patch @@ -0,0 +1,58 @@ +From 670f888704c7bbb1121e63bc380ca34b83c43464 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Thu, 3 Mar 2022 17:06:03 +0800 +Subject: [PATCH] support REUSEPOR option fix rpc msg too much + fix recurring events + +--- + src/api/sockets.c | 4 ++++ + src/include/lwipsock.h | 10 ++++++++-- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index b032ce9..4b682f3 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -3029,6 +3029,10 @@ lwip_sockopt_to_ipopt(int optname) + return SOF_KEEPALIVE; + case SO_REUSEADDR: + return SOF_REUSEADDR; ++#if USE_LIBOS ++ case SO_REUSEPORT: ++ return SO_REUSEPORT; ++#endif + default: + LWIP_ASSERT("Unknown socket option", 0); + return 0; +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 069cdcb..e2519ff 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -94,7 +94,8 @@ struct lwip_sock { + #if USE_LIBOS + uint32_t epoll_events; /* registered events */ + uint32_t events; /* available events */ +- int32_t in_event; /* avoid recurring events */ ++ volatile bool have_event; /* avoid recurring events */ ++ volatile bool have_rpc_send; /* avoid recurring rpc_send */ + epoll_data_t ep_data; + struct weakup_poll *weakup; + struct protocol_stack *stack; +@@ -103,8 +104,13 @@ struct lwip_sock { + struct pbuf *send_lastdata; /* unread data in one pbuf */ + void *send_ring; + int32_t recv_flags; +- int32_t nextfd; /* listenfd list */ ++ bool wait_close; ++ int32_t attach_fd; ++ struct lwip_sock *shadowed_sock; ++ struct list_node attach_list; ++ struct list_node listen_list; + struct list_node recv_list; ++ int32_t nextfd; /* listenfd list */ + #endif + }; + +-- +1.8.3.1 + diff --git a/0018-exec-gazelle_init_sock-before-read-event.patch b/0018-exec-gazelle_init_sock-before-read-event.patch new file mode 100644 index 0000000000000000000000000000000000000000..11d4db0d0ab86c90d3bcc319c04e4f3b090457e3 --- /dev/null +++ b/0018-exec-gazelle_init_sock-before-read-event.patch @@ -0,0 +1,37 @@ +From 544bf45ec99c853ad5e9ec2607669df01b4e0572 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Mon, 7 Mar 2022 21:06:39 +0800 +Subject: [PATCH] exec gazelle_init_sock() before read event + +--- + src/api/sockets.c | 1 + + src/include/lwipsock.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 4b682f3..21de5d9 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -763,6 +763,7 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + } + #if USE_LIBOS + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < sockets_num + LWIP_SOCKET_OFFSET)); ++ gazelle_init_sock(newsock); + #else + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < NUM_SOCKETS + LWIP_SOCKET_OFFSET)); + #endif /* USE_LIBOS */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index e2519ff..355bf47 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -157,6 +157,7 @@ get_socket_without_errno(int s) + extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); + extern void gazelle_clean_sock(int32_t fd); ++extern void gazelle_init_sock(int32_t fd); + #endif /* USE_LIBOS */ + + struct lwip_sock *get_socket(int s); +-- +1.8.3.1 + diff --git a/0019-gazelle-reduce-copy-in-send.patch b/0019-gazelle-reduce-copy-in-send.patch new file mode 100644 index 0000000000000000000000000000000000000000..fffa8b7f96d4a375390d3d19246af6e9e3222d1b --- /dev/null +++ b/0019-gazelle-reduce-copy-in-send.patch @@ -0,0 +1,50 @@ +From 05bfdb54fc744d835c8b3b50b54d220fe7e87277 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Mon, 7 Mar 2022 21:10:06 +0800 +Subject: [PATCH] reduce copy in send + +--- + src/core/pbuf.c | 5 +++++ + src/include/lwip/pbuf.h | 3 +++ + 2 files changed, 8 insertions(+) + +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 27afc28..cd6b558 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -281,6 +281,10 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + } + + /* If pbuf is to be allocated in RAM, allocate memory for it. */ ++#if USE_LIBOS ++ /* alloc mbuf to reduce copy in sending */ ++ p = lwip_alloc_pbuf(layer, length, type); ++#else + p = (struct pbuf *)mem_malloc(alloc_len); + if (p == NULL) { + return NULL; +@@ -289,6 +293,7 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + length, length, type, 0); + LWIP_ASSERT("pbuf_alloc: pbuf->payload properly aligned", + ((mem_ptr_t)p->payload % MEM_ALIGNMENT) == 0); ++#endif + break; + } + default: +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index e5daf96..3894574 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -272,6 +272,9 @@ void pbuf_free_ooseq(void); + /* Initializes the pbuf module. This call is empty for now, but may not be in future. */ + #define pbuf_init() + ++#if USE_LIBOS ++struct pbuf *lwip_alloc_pbuf(pbuf_layer l, u16_t length, pbuf_type type); ++#endif + struct pbuf *pbuf_alloc(pbuf_layer l, u16_t length, pbuf_type type); + struct pbuf *pbuf_alloc_reference(void *payload, u16_t length, pbuf_type type); + #if LWIP_SUPPORT_CUSTOM_PBUF +-- +2.30.0 + diff --git a/0020-remove-chose_dlsym_handle-function-set-handle-to-RTL.patch b/0020-remove-chose_dlsym_handle-function-set-handle-to-RTL.patch new file mode 100644 index 0000000000000000000000000000000000000000..54243bc4ee25c5e33605d7e169327bcf59340117 --- /dev/null +++ b/0020-remove-chose_dlsym_handle-function-set-handle-to-RTL.patch @@ -0,0 +1,63 @@ +From 970d9d6fd15c433af20bbbd7418c5e9773d58471 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Mon, 7 Mar 2022 21:08:13 +0800 +Subject: [PATCH] remove chose_dlsym_handle function, set handle to RTLD_NEXT + +--- + src/api/posix_api.c | 33 +-------------------------------- + 1 file changed, 1 insertion(+), 32 deletions(-) + +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index eff9f46..bce07f5 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -64,33 +64,6 @@ void posix_api_fork(void) + posix_api->get_socket = chld_get_socket; + } + +-static int chose_dlsym_handle(void *__restrict* khandle) +-{ +- void *dlhandle; +- int (*gazelle_epoll_create)(int size); +- dlhandle = dlopen ("liblstack.so", RTLD_LAZY); +- if (dlhandle == NULL) { +- return ERR_IF; +- } +- +- gazelle_epoll_create = dlsym(dlhandle, "epoll_create"); +- if (gazelle_epoll_create == NULL) { +- return ERR_MEM; +- } +- +- dlclose(dlhandle); +- +- *khandle = RTLD_NEXT; +- if (dlsym(*khandle, "epoll_create") == gazelle_epoll_create) { +- RTE_LOG(ERR, EAL, "posix api use RTLD_DEFAULT\n"); +- *khandle = RTLD_DEFAULT; +- } else { +- RTE_LOG(ERR, EAL, "posix api use RTLD_NEXT\n"); +- } +- +- return ERR_OK; +-} +- + int posix_api_init(void) + { + /* the symbol we use here won't be NULL, so we don't need dlerror() +@@ -102,11 +75,7 @@ int posix_api_init(void) + + posix_api = &posix_api_val; + +- void *__restrict handle; +- int ret = chose_dlsym_handle(&handle); +- if (ret != ERR_OK) { +- return ret; +- } ++ void *__restrict handle = RTLD_NEXT; + + /* glibc standard api */ + CHECK_DLSYM_RET_RETURN(posix_api->socket_fn = dlsym(handle, "socket")); +-- +1.8.3.1 + diff --git a/0021-refactor-event-if-ring-is-full-the-node-is-added-to-.patch b/0021-refactor-event-if-ring-is-full-the-node-is-added-to-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b5b5e290c9d249049bf95274853a0610c3853c9b --- /dev/null +++ b/0021-refactor-event-if-ring-is-full-the-node-is-added-to-.patch @@ -0,0 +1,33 @@ +From b7faf0800631668d4d23cb497f1ceeb5948e4a41 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 15 Mar 2022 19:22:22 +0800 +Subject: [PATCH] refactor event, if ring is full, the node is added to list + +--- + src/include/lwipsock.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 355bf47..36bcaed 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -104,12 +104,16 @@ struct lwip_sock { + struct pbuf *send_lastdata; /* unread data in one pbuf */ + void *send_ring; + int32_t recv_flags; ++ int32_t send_flags; + bool wait_close; + int32_t attach_fd; + struct lwip_sock *shadowed_sock; + struct list_node attach_list; + struct list_node listen_list; + struct list_node recv_list; ++ struct list_node event_list; ++ struct list_node wakeup_list; ++ struct list_node send_list; + int32_t nextfd; /* listenfd list */ + #endif + }; +-- +1.8.3.1 + diff --git a/0022-notify-app-that-sock-state-changes-to-CLOSE_WAIT.patch b/0022-notify-app-that-sock-state-changes-to-CLOSE_WAIT.patch new file mode 100644 index 0000000000000000000000000000000000000000..be4b0e78ce9ce06a77c2cc6eef7e9b9266e533b9 --- /dev/null +++ b/0022-notify-app-that-sock-state-changes-to-CLOSE_WAIT.patch @@ -0,0 +1,56 @@ +From 05159c41efdc2f07ddbe3520330faf2675baa3d6 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 15 Mar 2022 20:10:07 +0800 +Subject: [PATCH] notify app that sock changes to CLOSE_WAAIT + +--- + src/core/tcp_in.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 1652b86..0d3a2f1 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -58,6 +58,9 @@ + #if LWIP_ND6_TCP_REACHABILITY_HINTS + #include "lwip/nd6.h" + #endif /* LWIP_ND6_TCP_REACHABILITY_HINTS */ ++#if USE_LIBOS ++#include "lwip/api.h" ++#endif + + #include + +@@ -1032,6 +1035,9 @@ tcp_process(struct tcp_pcb *pcb) + if (recv_flags & TF_GOT_FIN) { + tcp_ack_now(pcb); + pcb->state = CLOSE_WAIT; ++#if USE_LIBOS ++ API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); ++#endif + } + } else { + /* incorrect ACK number, send RST */ +@@ -1050,6 +1056,9 @@ tcp_process(struct tcp_pcb *pcb) + if (recv_flags & TF_GOT_FIN) { /* passive close */ + tcp_ack_now(pcb); + pcb->state = CLOSE_WAIT; ++#if USE_LIBOS ++ API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); ++#endif + } + break; + case FIN_WAIT_1: +@@ -1676,6 +1685,9 @@ tcp_receive(struct tcp_pcb *pcb) + recv_flags |= TF_GOT_FIN; + if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */ + pcb->state = CLOSE_WAIT; ++#if USE_LIBOS ++ API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); ++#endif + } + } + +-- +1.8.3.1 + diff --git a/0023-refactor-event-and-checksum-offload-support.patch b/0023-refactor-event-and-checksum-offload-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..b092d04e6f708187ee7d2fd37e7014eb5bdc1ac7 --- /dev/null +++ b/0023-refactor-event-and-checksum-offload-support.patch @@ -0,0 +1,698 @@ +From 8dd0a15e60cfee7e7f1be1ea051d0e09031f8fdd Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 29 Mar 2022 21:33:17 +0800 +Subject: [PATCH] refactor event and add HW checksum offload + +--- + src/api/api_msg.c | 9 ++++ + src/api/posix_api.c | 2 + + src/api/sockets.c | 4 +- + src/core/ipv4/icmp.c | 13 +++++ + src/core/ipv4/ip4.c | 24 ++++++++- + src/core/ipv4/ip4_frag.c | 23 +++++++++ + src/core/pbuf.c | 9 +++- + src/core/tcp_in.c | 17 +++++++ + src/core/tcp_out.c | 72 +++++++++++++++++++++++++- + src/include/dpdk_cksum.h | 107 +++++++++++++++++++++++++++++++++++++++ + src/include/lwip/pbuf.h | 12 ++++- + src/include/lwipopts.h | 30 ++++++++--- + src/include/lwipsock.h | 18 +++---- + src/netif/ethernet.c | 8 +++ + 14 files changed, 322 insertions(+), 26 deletions(-) + create mode 100644 src/include/dpdk_cksum.h + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 3072dd9..672f022 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -57,6 +57,7 @@ + #if USE_LIBOS + #include "lwip/sockets.h" + #include "lwipsock.h" ++#include "posix_api.h" + #endif + + #include +@@ -1758,7 +1759,15 @@ lwip_netconn_do_writemore(struct netconn *conn WRITE_DELAYED_PARAM) + } else { + write_more = 0; + } ++#if USE_LIBOS ++ /* vector->ptr is private arg sock */ ++ LWIP_UNUSED_ARG(dataptr); ++ write_more = 0; ++ err = tcp_write(conn->pcb.tcp, conn->current_msg->msg.w.vector->ptr, len, apiflags); ++ conn->current_msg->msg.w.len = len; ++#else + err = tcp_write(conn->pcb.tcp, dataptr, len, apiflags); ++#endif + if (err == ERR_OK) { + conn->current_msg->msg.w.offset += len; + conn->current_msg->msg.w.vector_off += len; +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index bce07f5..3f85bad 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -42,6 +42,7 @@ + + #include "lwip/err.h" + #include "lwipsock.h" ++#include "posix_api.h" + + posix_api_t *posix_api; + posix_api_t posix_api_val; +@@ -64,6 +65,7 @@ void posix_api_fork(void) + posix_api->get_socket = chld_get_socket; + } + ++ + int posix_api_init(void) + { + /* the symbol we use here won't be NULL, so we don't need dlerror() +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 21de5d9..3d94454 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -65,6 +65,7 @@ + #if USE_LIBOS + #include + #include "lwipsock.h" ++#include "posix_api.h" + #endif + + #include +@@ -2682,9 +2683,6 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + sock->sendevent = 1; +-#if USE_LIBOS +- add_epoll_event(conn, EPOLLOUT); +-#endif + break; + case NETCONN_EVT_SENDMINUS: + sock->sendevent = 0; +diff --git a/src/core/ipv4/icmp.c b/src/core/ipv4/icmp.c +index 59b493a..c58ae25 100644 +--- a/src/core/ipv4/icmp.c ++++ b/src/core/ipv4/icmp.c +@@ -51,6 +51,10 @@ + + #include + ++#if USE_LIBOS && CHECKSUM_GEN_IP_HW ++#include "dpdk_cksum.h" ++#endif ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -236,7 +240,16 @@ icmp_input(struct pbuf *p, struct netif *inp) + IPH_CHKSUM_SET(iphdr, 0); + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_GEN_IP) { ++#if CHECKSUM_GEN_IP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ iph_cksum_set(p, hlen, 1); ++ } else { ++ iph_cksum_set(p, hlen, 0); ++ IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, hlen)); ++ } ++#else + IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, hlen)); ++#endif + } + #endif /* CHECKSUM_GEN_IP */ + +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index c83afbe..1334cdc 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -59,6 +59,10 @@ + + #include + ++#if USE_LIBOS && (CHECKSUM_CHECK_IP_HW || CHECKSUM_GEN_IP_HW) ++#include "dpdk_cksum.h" ++#endif ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -503,8 +507,17 @@ ip4_input(struct pbuf *p, struct netif *inp) + /* verify checksum */ + #if CHECKSUM_CHECK_IP + IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_IP) { ++#if CHECKSUM_CHECK_IP_HW ++ u64_t ret; ++ if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_IPV4_CKSUM) { ++ ret = is_cksum_ipbad(p); ++ } else { ++ ret = (u64_t)inet_chksum(iphdr, iphdr_hlen); ++ } ++ if (ret != 0) { ++#else + if (inet_chksum(iphdr, iphdr_hlen) != 0) { +- ++#endif + LWIP_DEBUGF(IP_DEBUG | LWIP_DBG_LEVEL_SERIOUS, + ("Checksum (0x%"X16_F") failed, IP packet dropped.\n", inet_chksum(iphdr, iphdr_hlen))); + ip4_debug_print(p); +@@ -972,7 +985,16 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + IPH_CHKSUM_SET(iphdr, 0); + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_IP) { ++#if CHECKSUM_GEN_IP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ iph_cksum_set(p, ip_hlen, 1); ++ } else { ++ iph_cksum_set(p, ip_hlen, 0); ++ IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, ip_hlen)); ++ } ++#else + IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, ip_hlen)); ++#endif + } + #endif /* CHECKSUM_GEN_IP */ + #endif /* CHECKSUM_GEN_IP_INLINE */ +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index a445530..17a4ccd 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -51,6 +51,10 @@ + + #include + ++#if USE_LIBOS && CHECKSUM_GEN_IP_HW ++#include "dpdk_cksum.h" ++#endif ++ + #if IP_REASSEMBLY + /** + * The IP reassembly code currently has the following limitations: +@@ -632,8 +636,17 @@ ip4_reass(struct pbuf *p) + /* @todo: do we need to set/calculate the correct checksum? */ + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(ip_current_input_netif(), NETIF_CHECKSUM_GEN_IP) { ++#if CHECKSUM_GEN_IP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ iph_cksum_set(p, IP_HLEN, 1); ++ } else { ++ iph_cksum_set(p, IP_HLEN, 0); + IPH_CHKSUM_SET(fraghdr, inet_chksum(fraghdr, IP_HLEN)); + } ++#else ++ IPH_CHKSUM_SET(fraghdr, inet_chksum(fraghdr, IP_HLEN)); ++#endif ++ } + #endif /* CHECKSUM_GEN_IP */ + + p = ipr->p; +@@ -862,8 +875,18 @@ ip4_frag(struct pbuf *p, struct netif *netif, const ip4_addr_t *dest) + IPH_CHKSUM_SET(iphdr, 0); + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_IP) { ++#if CHECKSUM_GEN_IP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ iph_cksum_set(p, IP_HLEN, 1); ++ } else { ++ iph_cksum_set(p, IP_HLEN, 0); + IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, IP_HLEN)); + } ++ ++#else ++ IPH_CHKSUM_SET(iphdr, inet_chksum(iphdr, IP_HLEN)); ++#endif ++ } + #endif /* CHECKSUM_GEN_IP */ + + /* No need for separate header pbuf - we allowed room for it in rambuf +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index cd6b558..247681d 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -282,7 +282,7 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + + /* If pbuf is to be allocated in RAM, allocate memory for it. */ + #if USE_LIBOS +- /* alloc mbuf to reduce copy in sending */ ++ /* alloc mbuf avoid send copy */ + p = lwip_alloc_pbuf(layer, length, type); + #else + p = (struct pbuf *)mem_malloc(alloc_len); +@@ -1019,6 +1019,13 @@ pbuf_copy_partial_pbuf(struct pbuf *p_to, const struct pbuf *p_from, u16_t copy_ + /* current p_from does not fit into current p_to */ + len_calc = p_to->len - offset_to; + } ++ ++#if USE_LIBOS && (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW) ++ p_to->l2_len = p_from->l2_len; ++ p_to->l3_len = p_from->l3_len; ++ p_to->ol_flags = p_from->ol_flags; ++#endif ++ + len = (u16_t)LWIP_MIN(copy_len, len_calc); + MEMCPY((u8_t *)p_to->payload + offset_to, (u8_t *)p_from->payload + offset_from, len); + offset_to += len; +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 0d3a2f1..b1bbe00 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -64,6 +64,10 @@ + + #include + ++#if USE_LIBOS && CHECKSUM_CHECK_TCP_HW ++#include ++#endif /* CHECKSUM_CHECK_TCP_HW */ ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -172,11 +176,24 @@ tcp_input(struct pbuf *p, struct netif *inp) + #if CHECKSUM_CHECK_TCP + IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_TCP) { + /* Verify TCP checksum. */ ++#if CHECKSUM_CHECK_TCP_HW ++ u64_t ret; ++ if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_TCP_CKSUM) { ++ ret = is_cksum_tcpbad(p); ++ } else { ++ ret = (u64_t)ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, ++ ip_current_src_addr(), ip_current_dest_addr()); ++ ++ } ++ if (ret != 0) { ++ LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum\n")); ++#else + u16_t chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, + ip_current_src_addr(), ip_current_dest_addr()); + if (chksum != 0) { + LWIP_DEBUGF(TCP_INPUT_DEBUG, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F"\n", + chksum)); ++#endif + tcp_debug_print(tcphdr); + TCP_STATS_INC(tcp.chkerr); + goto dropped; +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index b99974d..1b0af8d 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -80,6 +80,13 @@ + + #include + ++#if USE_LIBOS ++#include "lwipsock.h" ++#if CHECKSUM_GEN_TCP_HW ++#include "dpdk_cksum.h" ++#endif ++#endif ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -660,8 +667,11 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + pbuf_cat(p/*header*/, p2/*data*/); + } + #else /* USE_LIBOS */ +- p = (struct pbuf *)arg; +- seglen = p->len; ++ p = write_lwip_data((struct lwip_sock *)arg, len - pos, &apiflags); ++ if (p == NULL) { ++ break; ++ } ++ seglen = p->tot_len; + #endif /* USE_LIBOS */ + + queuelen += pbuf_clen(p); +@@ -789,8 +799,13 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + /* + * Finally update the pcb state. + */ ++#if USE_LIBOS ++ pcb->snd_lbb += pos; ++ pcb->snd_buf -= pos; ++#else + pcb->snd_lbb += len; + pcb->snd_buf -= len; ++#endif + pcb->snd_queuelen = queuelen; + + LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n", +@@ -1584,6 +1599,11 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + + #if CHECKSUM_GEN_TCP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { ++#if CHECKSUM_GEN_TCP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { ++ tcph_cksum_set(seg->p, TCP_HLEN); ++ seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); ++ } else { + #if TCP_CHECKSUM_ON_COPY + u32_t acc; + #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK +@@ -1618,6 +1638,44 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + seg->tcphdr->chksum = ip_chksum_pseudo(seg->p, IP_PROTO_TCP, + seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); + #endif /* TCP_CHECKSUM_ON_COPY */ ++ ++ } ++#else ++#if TCP_CHECKSUM_ON_COPY ++ u32_t acc; ++#if TCP_CHECKSUM_ON_COPY_SANITY_CHECK ++ u16_t chksum_slow = ip_chksum_pseudo(seg->p, IP_PROTO_TCP, ++ seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); ++#endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */ ++ if ((seg->flags & TF_SEG_DATA_CHECKSUMMED) == 0) { ++ LWIP_ASSERT("data included but not checksummed", ++ seg->p->tot_len == TCPH_HDRLEN_BYTES(seg->tcphdr)); ++ } ++ ++ /* rebuild TCP header checksum (TCP header changes for retransmissions!) */ ++ acc = ip_chksum_pseudo_partial(seg->p, IP_PROTO_TCP, ++ seg->p->tot_len, TCPH_HDRLEN_BYTES(seg->tcphdr), &pcb->local_ip, &pcb->remote_ip); ++ /* add payload checksum */ ++ if (seg->chksum_swapped) { ++ seg_chksum_was_swapped = 1; ++ seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum); ++ seg->chksum_swapped = 0; ++ } ++ acc = (u16_t)~acc + seg->chksum; ++ seg->tcphdr->chksum = (u16_t)~FOLD_U32T(acc); ++#if TCP_CHECKSUM_ON_COPY_SANITY_CHECK ++ if (chksum_slow != seg->tcphdr->chksum) { ++ TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL( ++ ("tcp_output_segment: calculated checksum is %"X16_F" instead of %"X16_F"\n", ++ seg->tcphdr->chksum, chksum_slow)); ++ seg->tcphdr->chksum = chksum_slow; ++ } ++#endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */ ++#else /* TCP_CHECKSUM_ON_COPY */ ++ seg->tcphdr->chksum = ip_chksum_pseudo(seg->p, IP_PROTO_TCP, ++ seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); ++#endif /* TCP_CHECKSUM_ON_COPY */ ++#endif /* CHECKSUM_GEN_TCP_HW */ + } + #endif /* CHECKSUM_GEN_TCP */ + TCP_STATS_INC(tcp.xmit); +@@ -1959,8 +2017,18 @@ tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p, + #if CHECKSUM_GEN_TCP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { + struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload; ++#if CHECKSUM_GEN_TCP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { ++ tcph_cksum_set(p, TCP_HLEN); ++ tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP, p->tot_len, src, dst); ++ } else { ++ tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, ++ src, dst); ++ } ++#else + tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, + src, dst); ++#endif + } + #endif + if (pcb != NULL) { +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +new file mode 100644 +index 0000000..e57be4d +--- /dev/null ++++ b/src/include/dpdk_cksum.h +@@ -0,0 +1,107 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __DPDK_CKSUM_H__ ++#define __DPDK_CKSUM_H__ ++ ++#include "lwipopts.h" ++#if USE_LIBOS ++#include ++ ++#if CHECKSUM_OFFLOAD_ALL ++#include ++#include "lwip/pbuf.h" ++#endif ++ ++extern uint64_t get_eth_params_rx_ol(void); ++extern uint64_t get_eth_params_tx_ol(void); ++#if CHECKSUM_CHECK_IP_HW ++// for ip4_input ++static inline u64_t is_cksum_ipbad(struct pbuf *p) { ++ return p->ol_flags & (RTE_MBUF_F_RX_IP_CKSUM_BAD); ++} ++#endif /* CHECKSUM_CHECK_IP_HW */ ++ ++#if CHECKSUM_CHECK_TCP_HW ++// for tcp_input ++static inline u64_t is_cksum_tcpbad(struct pbuf *p) { ++ return p->ol_flags & (RTE_MBUF_F_RX_L4_CKSUM_BAD); ++} ++#endif /* CHECKSUM_CHECK_TCP_HW */ ++ ++#if CHECKSUM_GEN_IP_HW ++static inline void ethh_cksum_set(struct pbuf *p, u16_t len) { ++ p->l2_len = len; ++} ++ ++// replaces IPH_CHKSUM_SET ++static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { ++ p->ol_flags |= RTE_MBUF_F_TX_IPV4; ++ if (do_ipcksum) { ++ p->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; ++ } ++ p->l3_len = len; ++} ++#endif /* CHECKSUM_GEN_IP_HW */ ++ ++// replace ip_chksum_pseudo ++#if CHECKSUM_GEN_TCP_HW ++#include ++ ++static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { ++ (void)len; ++ p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; ++} ++ ++static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, ++ const ip_addr_t *src, const ip_addr_t *dst) ++{ ++ struct ipv4_psd_header { ++ uint32_t src_addr; /* IP address of source host. */ ++ uint32_t dst_addr; /* IP address of destination host. */ ++ uint8_t zero; /* zero. */ ++ uint8_t proto; /* L4 protocol type. */ ++ uint16_t len; /* L4 length. */ ++ } psd_hdr; ++ ++ psd_hdr.src_addr = ip4_addr_get_u32(src); ++ psd_hdr.dst_addr = ip4_addr_get_u32(dst); ++ psd_hdr.proto = proto; ++ psd_hdr.len = lwip_htons(proto_len); ++ psd_hdr.zero = 0; ++ ++ return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); ++} ++#endif /* CHECKSUM_GEN_TCP_HW */ ++ ++#endif /* USE_LIBOS */ ++#endif /* __DPDK_CKSUM_H__ */ +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 3894574..87cd960 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -220,6 +220,15 @@ struct pbuf { + /** For incoming packets, this contains the input netif's index */ + u8_t if_idx; + ++#if USE_LIBOS && CHECKSUM_OFFLOAD_ALL ++ /** checksum offload ol_flags */ ++ u64_t ol_flags; ++ /** checksum offload l2_len */ ++ u64_t l2_len:7; ++ /** checksum offload l3_len */ ++ u64_t l3_len:9; ++#endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ ++ + /** In case the user needs to store data custom data on a pbuf */ + LWIP_PBUF_CUSTOM_DATA + }; +@@ -271,9 +280,8 @@ void pbuf_free_ooseq(void); + + /* Initializes the pbuf module. This call is empty for now, but may not be in future. */ + #define pbuf_init() +- + #if USE_LIBOS +-struct pbuf *lwip_alloc_pbuf(pbuf_layer l, u16_t length, pbuf_type type); ++struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type); + #endif + struct pbuf *pbuf_alloc(pbuf_layer l, u16_t length, pbuf_type type); + struct pbuf *pbuf_alloc_reference(void *payload, u16_t length, pbuf_type type); +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index e0364a2..df587c0 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -129,14 +129,6 @@ + + #define LWIP_STATS_DISPLAY 1 + +-#define CHECKSUM_GEN_IP 1 /* master switch */ +- +-#define CHECKSUM_GEN_TCP 1 /* master switch */ +- +-#define CHECKSUM_CHECK_IP 1 /* master switch */ +- +-#define CHECKSUM_CHECK_TCP 1 /* master switch */ +- + #define LWIP_TIMEVAL_PRIVATE 0 + + #define USE_LIBOS 1 +@@ -177,6 +169,28 @@ + + #define ARP_TABLE_SIZE 512 + ++/* --------------------------------------- ++ * ------- NIC offloads -------- ++ * --------------------------------------- ++ */ ++#define LWIP_CHECKSUM_CTRL_PER_NETIF 1 /* checksum ability check before checksum*/ ++ ++// rx cksum ++#define CHECKSUM_CHECK_IP 1 /* master switch */ ++#define CHECKSUM_CHECK_TCP 1 /* master switch */ ++// tx cksum ++#define CHECKSUM_GEN_IP 1 /* master switch */ ++#define CHECKSUM_GEN_TCP 1 /* master switch */ ++ ++// rx offload cksum ++#define CHECKSUM_CHECK_IP_HW (1 && CHECKSUM_CHECK_IP) /* hardware switch */ ++#define CHECKSUM_CHECK_TCP_HW (1 && CHECKSUM_CHECK_TCP) /* hardware switch */ ++// tx offload cksum ++#define CHECKSUM_GEN_IP_HW (1 && CHECKSUM_GEN_IP) /* hardware switch */ ++#define CHECKSUM_GEN_TCP_HW (1 && CHECKSUM_GEN_TCP) /* hardware switch */ ++ ++#define CHECKSUM_OFFLOAD_ALL (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW || CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW) ++ + #if USE_LIBOS + #define PER_THREAD __thread + #else +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 36bcaed..eec4e8e 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -36,7 +36,6 @@ + #include "lwip/opt.h" + #include "lwip/api.h" + +-#include "posix_api.h" + #include "eventpoll.h" + + /* move some definitions to the lwipsock.h for libnet to use, and +@@ -62,7 +61,8 @@ union lwip_sock_lastdata { + + #if USE_LIBOS + struct protocol_stack; +-struct weakup_poll; ++struct wakeup_poll; ++struct rte_ring; + #endif + /** Contains all internal pointers and states used for a socket */ + struct lwip_sock { +@@ -93,16 +93,16 @@ struct lwip_sock { + + #if USE_LIBOS + uint32_t epoll_events; /* registered events */ +- uint32_t events; /* available events */ +- volatile bool have_event; /* avoid recurring events */ +- volatile bool have_rpc_send; /* avoid recurring rpc_send */ ++ volatile uint32_t events; /* available events */ + epoll_data_t ep_data; +- struct weakup_poll *weakup; ++ struct wakeup_poll *wakeup; + struct protocol_stack *stack; +- void *recv_ring; ++ struct rte_ring *recv_ring; ++ struct rte_ring *recv_wait_free; + struct pbuf *recv_lastdata; /* unread data in one pbuf */ + struct pbuf *send_lastdata; /* unread data in one pbuf */ +- void *send_ring; ++ struct rte_ring *send_ring; ++ struct rte_ring *send_idle_ring; + int32_t recv_flags; + int32_t send_flags; + bool wait_close; +@@ -112,7 +112,6 @@ struct lwip_sock { + struct list_node listen_list; + struct list_node recv_list; + struct list_node event_list; +- struct list_node wakeup_list; + struct list_node send_list; + int32_t nextfd; /* listenfd list */ + #endif +@@ -160,6 +159,7 @@ get_socket_without_errno(int s) + + extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); ++extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); + extern void gazelle_clean_sock(int32_t fd); + extern void gazelle_init_sock(int32_t fd); + #endif /* USE_LIBOS */ +diff --git a/src/netif/ethernet.c b/src/netif/ethernet.c +index dd171e2..ab976a8 100644 +--- a/src/netif/ethernet.c ++++ b/src/netif/ethernet.c +@@ -56,6 +56,10 @@ + #include "netif/ppp/pppoe.h" + #endif /* PPPOE_SUPPORT */ + ++#if USE_LIBOS && (CHECKSUM_GEN_TCP_HW || CHECKSUM_GEN_IP_HW) ++#include "dpdk_cksum.h" ++#endif ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -308,6 +312,10 @@ ethernet_output(struct netif * netif, struct pbuf * p, + LWIP_DEBUGF(ETHARP_DEBUG | LWIP_DBG_TRACE, + ("ethernet_output: sending packet %p\n", (void *)p)); + ++#if CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW ++ ethh_cksum_set(p, sizeof(*ethhdr)); ++#endif ++ + /* send the packet */ + return netif->linkoutput(netif, p); + +-- +2.23.0 diff --git a/0024-refactor-pkt-read-send-performance.patch b/0024-refactor-pkt-read-send-performance.patch new file mode 100644 index 0000000000000000000000000000000000000000..529738aaa57be403909d4339043f0005c0423aa6 --- /dev/null +++ b/0024-refactor-pkt-read-send-performance.patch @@ -0,0 +1,320 @@ +From 10e21843fc3fde51cb99510792835a65c9b5baad Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 7 Jul 2022 20:00:14 +0800 +Subject: [PATCH] refactor pkt read/send + +--- + src/api/api_msg.c | 15 ++++++--------- + src/api/posix_api.c | 4 ++-- + src/api/sockets.c | 11 +++-------- + src/api/sys_arch.c | 11 +++++------ + src/include/arch/sys_arch.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ + src/include/lwipopts.h | 2 +- + src/include/lwipsock.h | 29 +++++++++++----------------- + src/include/posix_api.h | 2 +- + 8 files changed, 75 insertions(+), 45 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 672f022..7839526 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -341,13 +341,12 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + #if LWIP_SO_RCVBUF + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ +- /* Register event with callback */ +- API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); + #if USE_LIBOS +- if (conn->state == NETCONN_WRITE || conn->state == NETCONN_CLOSE || +- conn->state == NETCONN_CONNECT) { + add_recv_list(conn->socket); +- } ++ LWIP_UNUSED_ARG(len); ++#else ++ /* Register event with callback */ ++ API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); + #endif + } + +@@ -479,10 +478,7 @@ err_tcp(void *arg, err_t err) + /* use trypost to prevent deadlock */ + sys_mbox_trypost(&conn->recvmbox, mbox_msg); + #if USE_LIBOS +- if ((old_state == NETCONN_WRITE) || (old_state == NETCONN_CLOSE) || +- (old_state == NETCONN_CONNECT)) { +- add_recv_list(conn->socket); +- } ++ add_recv_list(conn->socket); + #endif + } + /* pass error message to acceptmbox to wake up pending accept */ +@@ -1356,6 +1352,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + } + } + SET_CONN_TYPE_LIBOS(conn); ++ add_epoll_event(conn, EPOLLOUT); + #endif + + LWIP_ASSERT("conn->state == NETCONN_CONNECT", conn->state == NETCONN_CONNECT); +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index 3f85bad..6afb9c6 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -60,7 +60,7 @@ static struct lwip_sock *chld_get_socket(int fd) + void posix_api_fork(void) + { + /* lstack helper api */ +- posix_api->is_chld = 1; ++ posix_api->ues_posix = 1; + posix_api->is_epfd = chld_is_epfd; + posix_api->get_socket = chld_get_socket; + } +@@ -117,7 +117,7 @@ int posix_api_init(void) + posix_api->epoll_close_fn = lstack_epoll_close; + + /* support fork */ +- posix_api->is_chld = 1; ++ posix_api->ues_posix = 1; + return ERR_OK; + + err_out: +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 3d94454..4d4cea1 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1039,11 +1039,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + { + u8_t apiflags = NETCONN_NOAUTORCVD; + ssize_t recvd = 0; +-#if USE_LIBOS +- apiflags = 0; +-#else + ssize_t recv_left = (len <= SSIZE_MAX) ? (ssize_t)len : SSIZE_MAX; +-#endif + + LWIP_ASSERT("no socket given", sock != NULL); + LWIP_ASSERT("this should be checked internally", NETCONNTYPE_GROUP(netconn_type(sock->conn)) == NETCONN_TCP); +@@ -1134,6 +1130,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + + lwip_recv_tcp_done: + #else /* USE_LIBOS */ ++ LWIP_UNUSED_ARG(recv_left); + recvd = read_lwip_data(sock, flags, apiflags); + if (recvd <= 0) { + return recvd; +@@ -2667,10 +2664,8 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + #if USE_LIBOS +- if (conn->state == NETCONN_LISTEN) { +- add_epoll_event(conn, EPOLLIN); +- } else { +- add_recv_list(conn->socket); ++ if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { ++ add_epoll_event(conn, POLLIN); + } + #endif + break; +diff --git a/src/api/sys_arch.c b/src/api/sys_arch.c +index 9a92143..f93a00e 100644 +--- a/src/api/sys_arch.c ++++ b/src/api/sys_arch.c +@@ -37,7 +37,6 @@ + #include + + #include +-#include + + #include "lwip/err.h" + #include "lwip/mem.h" +@@ -134,7 +133,7 @@ err_t sys_mbox_trypost(struct sys_mbox **mb, void *msg) + unsigned int n; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL); ++ n = gazelle_st_ring_enqueue_busrt(mbox->ring, &msg, 1); + if (!n) + return ERR_BUF; + return ERR_OK; +@@ -148,7 +147,7 @@ void sys_mbox_post(struct sys_mbox **mb, void *msg) + * If the ring size of mbox is greater than MEMP_NUM_TCPIP_MSG_API, + * enqueue failure will never happen. + * */ +- if (!rte_ring_sp_enqueue_bulk(mbox->ring, &msg, 1, NULL)) { ++ if (!gazelle_st_ring_enqueue_busrt(mbox->ring, &msg, 1)) { + LWIP_ASSERT("It is failed to post msg into mbox", 0); + } + } +@@ -163,7 +162,7 @@ uint32_t sys_arch_mbox_tryfetch(struct sys_mbox **mb, void **msg) + unsigned int n; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + if (!n) { + *msg = NULL; + return SYS_MBOX_EMPTY; +@@ -179,7 +178,7 @@ uint32_t sys_arch_mbox_fetch(struct sys_mbox **mb, void **msg, uint32_t timeout) + uint32_t time_needed = 0; + struct sys_mbox *mbox = *mb; + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + + if (timeout > 0) + poll_ts = sys_now(); +@@ -194,7 +193,7 @@ uint32_t sys_arch_mbox_fetch(struct sys_mbox **mb, void **msg, uint32_t timeout) + + (void)mbox->wait_fn(); + +- n = rte_ring_sc_dequeue_bulk(mbox->ring, msg, 1, NULL); ++ n = gazelle_st_ring_dequeue_burst(mbox->ring, msg, 1); + } + + return time_needed; +diff --git a/src/include/arch/sys_arch.h b/src/include/arch/sys_arch.h +index b8a0d28..fc4a9fd 100644 +--- a/src/include/arch/sys_arch.h ++++ b/src/include/arch/sys_arch.h +@@ -76,7 +76,53 @@ int sys_mbox_empty(struct sys_mbox *); + struct sys_thread; + typedef struct sys_thread *sys_thread_t; + ++#if USE_LIBOS + extern int eth_dev_poll(void); ++#include ++ ++/* ++ gazelle custom rte ring interface ++ lightweight ring no atomic. ++ only surpport in single thread. ++ */ ++static __rte_always_inline uint32_t gazelle_st_ring_enqueue_busrt(struct rte_ring *r, void **obj_table, uint32_t n) ++{ ++ uint32_t prod = r->prod.tail; ++ uint32_t cons = r->cons.tail; ++ uint32_t free_entries = r->capacity + cons - prod; ++ ++ if (n > free_entries) { ++ return 0; ++ } ++ ++ __rte_ring_enqueue_elems(r, prod, obj_table, sizeof(void *), n); ++ ++ r->prod.tail = prod + n; ++ ++ return n; ++} ++ ++static __rte_always_inline uint32_t gazelle_st_ring_dequeue_burst(struct rte_ring *r, void **obj_table, uint32_t n) ++{ ++ uint32_t cons = r->cons.tail; ++ uint32_t prod = r->prod.tail; ++ uint32_t entries = prod - cons; ++ ++ if (n > entries) { ++ n = entries; ++ } ++ ++ if (n == 0) { ++ return 0; ++ } ++ ++ __rte_ring_dequeue_elems(r, cons, obj_table, sizeof(void *), n); ++ ++ r->cons.tail = cons + n; ++ ++ return n; ++} ++#endif + + void sys_calibrate_tsc(void); + uint32_t sys_now(void); +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index df587c0..75d3c74 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -97,7 +97,7 @@ + + #define TCP_WND (40 * TCP_MSS) + +-#define TCP_SND_BUF (5 * TCP_MSS) ++#define TCP_SND_BUF (40 * TCP_MSS) + + #define TCP_SND_QUEUELEN (8191) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index eec4e8e..500292d 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -63,6 +63,7 @@ union lwip_sock_lastdata { + struct protocol_stack; + struct wakeup_poll; + struct rte_ring; ++#include + #endif + /** Contains all internal pointers and states used for a socket */ + struct lwip_sock { +@@ -92,28 +93,21 @@ struct lwip_sock { + #endif + + #if USE_LIBOS ++ volatile uint32_t events __rte_cache_aligned; /* available events */ ++ struct pbuf *recv_lastdata __rte_cache_aligned; /* unread data in one pbuf */ ++ struct list_node recv_list __rte_cache_aligned; ++ struct list_node event_list __rte_cache_aligned; ++ struct list_node send_list __rte_cache_aligned; ++ char pad __rte_cache_aligned; ++ + uint32_t epoll_events; /* registered events */ +- volatile uint32_t events; /* available events */ +- epoll_data_t ep_data; + struct wakeup_poll *wakeup; ++ epoll_data_t ep_data; ++ bool wait_close; ++ struct lwip_sock *listen_next; /* listenfd list */ + struct protocol_stack *stack; + struct rte_ring *recv_ring; +- struct rte_ring *recv_wait_free; +- struct pbuf *recv_lastdata; /* unread data in one pbuf */ +- struct pbuf *send_lastdata; /* unread data in one pbuf */ + struct rte_ring *send_ring; +- struct rte_ring *send_idle_ring; +- int32_t recv_flags; +- int32_t send_flags; +- bool wait_close; +- int32_t attach_fd; +- struct lwip_sock *shadowed_sock; +- struct list_node attach_list; +- struct list_node listen_list; +- struct list_node recv_list; +- struct list_node event_list; +- struct list_node send_list; +- int32_t nextfd; /* listenfd list */ + #endif + }; + +@@ -160,7 +154,6 @@ get_socket_without_errno(int s) + extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); +-extern void gazelle_clean_sock(int32_t fd); + extern void gazelle_init_sock(int32_t fd); + #endif /* USE_LIBOS */ + +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index 2afd266..c8f2cf9 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -76,7 +76,7 @@ typedef struct { + int (*poll_fn)(struct pollfd *fds, nfds_t nfds, int timeout); + int (*ioctl_fn)(int fd, int cmd, ...); + +- int is_chld; ++ int ues_posix; + } posix_api_t; + + extern posix_api_t *posix_api; +-- +2.8.4.windows.1 + diff --git a/0025-Replace-gettid-with-syscall-SYS_gettid.patch b/0025-Replace-gettid-with-syscall-SYS_gettid.patch new file mode 100644 index 0000000000000000000000000000000000000000..92abd14d374db2f218bb9f5b015cab379a5d8a32 --- /dev/null +++ b/0025-Replace-gettid-with-syscall-SYS_gettid.patch @@ -0,0 +1,57 @@ +From 35300925c26ce9eba9f4f1c9a4181708da771392 Mon Sep 17 00:00:00 2001 +From: Honggang LI +Date: Tue, 12 Jul 2022 10:15:36 +0800 +Subject: [PATCH] Replace gettid() with syscall(SYS_gettid) + +Remove gettid() to address a backport issue for gazelle library. + +Signed-off-by: Honggang LI +--- + src/include/arch/cc.h | 4 ++-- + src/include/lwiplog.h | 3 +-- + 2 files changed, 3 insertions(+), 4 deletions(-) + +diff --git a/src/include/arch/cc.h b/src/include/arch/cc.h +index 222b0c9..aa18573 100644 +--- a/src/include/arch/cc.h ++++ b/src/include/arch/cc.h +@@ -62,7 +62,7 @@ void alloc_memp_##name##_base(void) \ + memp_pools[MEMP_##name] = &memp_ ## name; \ + \ + char mpname[MEMZONE_NAMESIZE] = {0}; \ +- snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", gettid(), #name); \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", (int)syscall(SYS_gettid), #name); \ + memp_memory_##name##_base = \ + sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(__size)); \ + memp_pools[MEMP_##name]->base = memp_memory_##name##_base; \ +@@ -73,7 +73,7 @@ PER_THREAD uint8_t *variable_name; \ + void alloc_memory_##variable_name(void) \ + { \ + char mpname[MEMZONE_NAMESIZE] = {0}; \ +- snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", gettid(), #variable_name); \ ++ snprintf(mpname, MEMZONE_NAMESIZE, "%d_%s", (int)syscall(SYS_gettid), #variable_name); \ + (variable_name) = \ + sys_hugepage_malloc(mpname, LWIP_MEM_ALIGN_BUFFER(size)); \ + } +diff --git a/src/include/lwiplog.h b/src/include/lwiplog.h +index 6fccac8..011ed21 100644 +--- a/src/include/lwiplog.h ++++ b/src/include/lwiplog.h +@@ -35,13 +35,12 @@ + + #include + #include ++#include + + #include + + #include "lwipopts.h" + +-extern int gettid(void); +- + #if USE_DPDK_LOG + + #define LWIP_LOG_WARN LWIP_DBG_LEVEL_WARNING +-- +2.31.1 + diff --git a/0026-del-redundant-wait_close-and-move-epoll_events-pos.patch b/0026-del-redundant-wait_close-and-move-epoll_events-pos.patch new file mode 100644 index 0000000000000000000000000000000000000000..cee127b172236cbeecff52375ff619c1c7cc4b73 --- /dev/null +++ b/0026-del-redundant-wait_close-and-move-epoll_events-pos.patch @@ -0,0 +1,30 @@ +From ab62f970793c257c712c357a6976b9aca2e63b98 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Tue, 26 Jul 2022 17:36:29 +0800 +Subject: [PATCH] del redundant wait_close and move epoll_events pos + +--- + src/include/lwipsock.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 500292d..16e0dd3 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -98,12 +98,11 @@ struct lwip_sock { + struct list_node recv_list __rte_cache_aligned; + struct list_node event_list __rte_cache_aligned; + struct list_node send_list __rte_cache_aligned; ++ uint32_t epoll_events; /* registered events, EPOLLONESHOT write frequently */ + char pad __rte_cache_aligned; + +- uint32_t epoll_events; /* registered events */ + struct wakeup_poll *wakeup; + epoll_data_t ep_data; +- bool wait_close; + struct lwip_sock *listen_next; /* listenfd list */ + struct protocol_stack *stack; + struct rte_ring *recv_ring; +-- +2.23.0 + diff --git a/0027-modify-EISCONN-condition.patch b/0027-modify-EISCONN-condition.patch new file mode 100644 index 0000000000000000000000000000000000000000..2693e91a75668b93dc8f63d229d4b7a51be74751 --- /dev/null +++ b/0027-modify-EISCONN-condition.patch @@ -0,0 +1,39 @@ +From b8c388a7adef4dc53d3bb135102da64bf8a08b76 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 15:57:33 +0800 +Subject: [PATCH] modify-EISCONN-condition + +--- + src/api/api_msg.c | 2 +- + src/include/lwipsock.h | 2 ++ + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 7839526..2dded75 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -1417,7 +1417,7 @@ lwip_netconn_do_connect(void *m) + /* Prevent connect while doing any other action. */ + if (msg->conn->state == NETCONN_CONNECT) { + err = ERR_ALREADY; +- } else if (msg->conn->state != NETCONN_NONE) { ++ } else if (msg->conn->pcb.tcp->state != ESTABLISHED) { + err = ERR_ISCONN; + } else { + setup_tcp(msg->conn); +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 16e0dd3..3c5c44b 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -98,6 +98,8 @@ struct lwip_sock { + struct list_node recv_list __rte_cache_aligned; + struct list_node event_list __rte_cache_aligned; + struct list_node send_list __rte_cache_aligned; ++ uint32_t in_send __rte_cache_aligned; /* avoid sock too much send rpc msg*/ ++ uint32_t send_flag __rte_cache_aligned; /* avoid sock too much send rpc msg*/ + uint32_t epoll_events; /* registered events, EPOLLONESHOT write frequently */ + char pad __rte_cache_aligned; + +-- +2.27.0 + diff --git a/0028-per-thread-reassdata-variables.patch b/0028-per-thread-reassdata-variables.patch new file mode 100644 index 0000000000000000000000000000000000000000..28eff780779b12f2507977f0c405f9dc53453516 --- /dev/null +++ b/0028-per-thread-reassdata-variables.patch @@ -0,0 +1,30 @@ +From a554661e9dd189f2d4b5dee8970fd009db89d9aa Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 17:33:16 +0800 +Subject: [PATCH] per thread reassdata variables + +--- + src/core/ipv4/ip4_frag.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index 17a4ccd..c60523d 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -115,8 +115,13 @@ PACK_STRUCT_END + IPH_ID(iphdrA) == IPH_ID(iphdrB)) ? 1 : 0 + + /* global variables */ ++#if USE_LIBOS ++static PER_THREAD struct ip_reassdata *reassdatagrams; ++static PER_THREAD u16_t ip_reass_pbufcount; ++#else + static struct ip_reassdata *reassdatagrams; + static u16_t ip_reass_pbufcount; ++#endif + + /* function prototypes */ + static void ip_reass_dequeue_datagram(struct ip_reassdata *ipr, struct ip_reassdata *prev); +-- +2.27.0 + diff --git a/0029-fix-EISCONN-err-and-remove-same-customized-modificat.patch b/0029-fix-EISCONN-err-and-remove-same-customized-modificat.patch new file mode 100644 index 0000000000000000000000000000000000000000..91ac15ef74201b88990a33a2691d65b553343836 --- /dev/null +++ b/0029-fix-EISCONN-err-and-remove-same-customized-modificat.patch @@ -0,0 +1,114 @@ +From ec2f5414c6c98b63376e4bce9534abc5c01ce13c Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 18:47:06 +0800 +Subject: [PATCH] fix EISCONN err and remove same customized modification + +--- + src/api/api_msg.c | 22 ++-------------------- + src/include/lwipsock.h | 33 ++++----------------------------- + 2 files changed, 6 insertions(+), 49 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 2dded75..1fedaad 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -1334,25 +1334,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + } + + #if USE_LIBOS +- if (CONN_TYPE_IS_HOST(conn)) { +- LWIP_DEBUGF(API_MSG_DEBUG, +- ("libos outgoing connection abort fd=%d\n", conn->socket)); +- return ERR_ABRT; +- } +- +- LWIP_DEBUGF(API_MSG_DEBUG, ("libos outgoing connection established\n")); +- if (CONN_TYPE_HAS_INPRG(conn) && CONN_TYPE_HAS_HOST(conn)) { +- int s = conn->socket; +- struct lwip_sock *sock = get_socket_without_errno(s); +- +- if (!!sock) { +- posix_api->shutdown_fn(s, SHUT_RDWR); +- LWIP_DEBUGF(API_MSG_DEBUG, +- ("linux outgoing connection abort fd=%d\n", s)); +- } +- } +- SET_CONN_TYPE_LIBOS(conn); +- add_epoll_event(conn, EPOLLOUT); ++ gazelle_connected_callback(conn); + #endif + + LWIP_ASSERT("conn->state == NETCONN_CONNECT", conn->state == NETCONN_CONNECT); +@@ -1417,7 +1399,7 @@ lwip_netconn_do_connect(void *m) + /* Prevent connect while doing any other action. */ + if (msg->conn->state == NETCONN_CONNECT) { + err = ERR_ALREADY; +- } else if (msg->conn->pcb.tcp->state != ESTABLISHED) { ++ } else if (msg->conn->pcb.tcp->state == ESTABLISHED) { + err = ERR_ISCONN; + } else { + setup_tcp(msg->conn); +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 3c5c44b..912d471 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -93,13 +93,14 @@ struct lwip_sock { + #endif + + #if USE_LIBOS ++ struct pbuf *send_lastdata; ++ uint16_t send_datalen; + volatile uint32_t events __rte_cache_aligned; /* available events */ + struct pbuf *recv_lastdata __rte_cache_aligned; /* unread data in one pbuf */ + struct list_node recv_list __rte_cache_aligned; + struct list_node event_list __rte_cache_aligned; + struct list_node send_list __rte_cache_aligned; + uint32_t in_send __rte_cache_aligned; /* avoid sock too much send rpc msg*/ +- uint32_t send_flag __rte_cache_aligned; /* avoid sock too much send rpc msg*/ + uint32_t epoll_events; /* registered events, EPOLLONESHOT write frequently */ + char pad __rte_cache_aligned; + +@@ -124,38 +125,12 @@ struct lwip_sock { + #if USE_LIBOS + extern uint32_t sockets_num; + extern struct lwip_sock *sockets; +-/** +- * Map a externally used socket index to the internal socket representation. +- * +- * @param s externally used socket index +- * @return struct lwip_sock for the socket or NULL if not found +- */ +-static inline struct lwip_sock * +-get_socket_without_errno(int s) +-{ +- struct lwip_sock *sock = NULL; +- +- s -= LWIP_SOCKET_OFFSET; +- +- if ((s < 0) || (s >= sockets_num)) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("get_socket(%d): invalid\n", s + LWIP_SOCKET_OFFSET)); +- return NULL; +- } +- +- sock = &sockets[s]; +- +- if (!sock->conn) { +- LWIP_DEBUGF(SOCKETS_DEBUG, ("get_socket(%d): not active\n", s + LWIP_SOCKET_OFFSET)); +- return NULL; +- } +- +- return sock; +-} +- ++extern void gazelle_connected_callback(struct netconn *conn); + extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); + extern void gazelle_init_sock(int32_t fd); ++extern void write_lwip_over(struct lwip_sock *sock, uint32_t n); + #endif /* USE_LIBOS */ + + struct lwip_sock *get_socket(int s); +-- +2.27.0 + diff --git a/0030-refactor-tcp-new-port.patch b/0030-refactor-tcp-new-port.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d59fbcc80166d17192bbbe304687b920c21f193 --- /dev/null +++ b/0030-refactor-tcp-new-port.patch @@ -0,0 +1,195 @@ +From 68c1fe8794077eab032b542094608338947f3d4f Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 19:27:41 +0800 +Subject: [PATCH] fix tcp new port + +--- + src/core/tcp.c | 87 +++++++++++++++++++++++++++++------------- + src/include/reg_sock.h | 1 + + 2 files changed, 61 insertions(+), 27 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index b65ab33..436ef85 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -202,13 +202,26 @@ PER_THREAD u8_t tcp_active_pcbs_changed; + /** Timer counter to handle calling slow-timer from tcp_tmr() */ + static PER_THREAD u8_t tcp_timer; + static PER_THREAD u8_t tcp_timer_ctr; ++#if USE_LIBOS ++static u16_t tcp_new_port(struct tcp_pcb *pcb); ++#else + static u16_t tcp_new_port(void); ++#endif + + static err_t tcp_close_shutdown_fin(struct tcp_pcb *pcb); + #if LWIP_TCP_PCB_NUM_EXT_ARGS + static void tcp_ext_arg_invoke_callbacks_destroyed(struct tcp_pcb_ext_args *ext_args); + #endif + ++#if USE_LIBOS ++static u8_t port_state[TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1] = {0}; ++void release_port(u16_t port) ++{ ++ if (port >= TCP_LOCAL_PORT_RANGE_START && port <= TCP_LOCAL_PORT_RANGE_END) { ++ port_state[port - TCP_LOCAL_PORT_RANGE_START] = 0; ++ } ++} ++#endif + /** + * Initialize this module. + */ +@@ -237,6 +250,7 @@ tcp_free(struct tcp_pcb *pcb) + { + #if USE_LIBOS + vdev_unreg_done(pcb); ++ release_port(pcb->local_port); + #endif + LWIP_ASSERT("tcp_free: LISTEN", pcb->state != LISTEN); + #if LWIP_TCP_PCB_NUM_EXT_ARGS +@@ -746,7 +760,11 @@ tcp_bind(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + #endif /* LWIP_IPV6 && LWIP_IPV6_SCOPES */ + + if (port == 0) { ++#if USE_LIBOS ++ port = tcp_new_port(pcb); ++#else + port = tcp_new_port(); ++#endif + if (port == 0) { + return ERR_BUF; + } +@@ -1057,33 +1075,43 @@ tcp_recved(struct tcp_pcb *pcb, u16_t len) + * + * @return a new (free) local TCP port number + */ ++#if USE_LIBOS ++static u16_t ++tcp_new_port(struct tcp_pcb *pcb) ++#else + static u16_t + tcp_new_port(void) ++#endif + { +- u8_t i; + u16_t n = 0; +- u16_t tmp_port; +- struct tcp_pcb *pcb; ++ u16_t tmp_port = 0; + + pthread_mutex_lock(&g_tcp_port_mutex); +-again: +- tcp_port++; +- if (tcp_port == TCP_LOCAL_PORT_RANGE_END) { +- tcp_port = TCP_LOCAL_PORT_RANGE_START; +- } +- /* Check all PCB lists. */ +- for (i = 0; i < NUM_TCP_PCB_LISTS; i++) { +- for (pcb = *tcp_pcb_lists[i]; pcb != NULL; pcb = pcb->next) { +- if (pcb->local_port == tcp_port) { +- n++; +- if (n > (TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START)) { +- return 0; ++ do { ++ tcp_port++; ++ if (tcp_port == TCP_LOCAL_PORT_RANGE_END) { ++ tcp_port = TCP_LOCAL_PORT_RANGE_START; ++ } ++ ++ if (__atomic_load_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { ++#if USE_LIBOS ++ if (port_in_stack_queue(pcb->remote_ip.addr, pcb->local_ip.addr, pcb->remote_port, tcp_port)) { ++ tmp_port = tcp_port; ++ __atomic_store_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); ++ break; + } +- goto again; ++#else ++ __atomic_store_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); ++ break; ++#endif + } +- } +- } +- tmp_port = tcp_port; ++ ++ n++; ++ if (n > TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START) { ++ break; ++ } ++ } while (tmp_port == 0); ++ + pthread_mutex_unlock(&g_tcp_port_mutex); + + return tmp_port; +@@ -1169,7 +1197,11 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + + old_local_port = pcb->local_port; + if (pcb->local_port == 0) { ++#if USE_LIBOS ++ pcb->local_port = tcp_new_port(pcb); ++#else + pcb->local_port = tcp_new_port(); ++#endif + if (pcb->local_port == 0) { + return ERR_BUF; + } +@@ -1196,10 +1228,6 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + #endif /* SO_REUSE */ + } + +-#if USE_LIBOS +- vdev_reg_done(REG_RING_TCP_CONNECT, pcb); +-#endif +- + iss = tcp_next_iss(pcb); + pcb->rcv_nxt = 0; + pcb->snd_nxt = iss; +@@ -1227,6 +1255,10 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + /* Send a SYN together with the MSS option. */ + ret = tcp_enqueue_flags(pcb, TCP_SYN); + if (ret == ERR_OK) { ++#if USE_LIBOS ++ vdev_reg_done(REG_RING_TCP_CONNECT, pcb); ++#endif ++ + /* SYN segment was enqueued, changed the pcbs state now */ + pcb->state = SYN_SENT; + if (old_local_port != 0) { +@@ -2277,10 +2309,6 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + LWIP_ASSERT("tcp_pcb_remove: invalid pcb", pcb != NULL); + LWIP_ASSERT("tcp_pcb_remove: invalid pcblist", pcblist != NULL); + +-#if USE_LIBOS +- vdev_unreg_done(pcb); +-#endif +- + TCP_RMV(pcblist, pcb); + + tcp_pcb_purge(pcb); +@@ -2301,6 +2329,11 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + #endif /* TCP_QUEUE_OOSEQ */ + } + ++#if USE_LIBOS ++ vdev_unreg_done(pcb); ++ release_port(pcb->local_port); ++#endif ++ + pcb->state = CLOSED; + /* reset the local port to prevent the pcb from being 'bound' */ + pcb->local_port = 0; +diff --git a/src/include/reg_sock.h b/src/include/reg_sock.h +index 76673da..e349e85 100644 +--- a/src/include/reg_sock.h ++++ b/src/include/reg_sock.h +@@ -58,5 +58,6 @@ struct reg_ring_msg { + }; + + extern int vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple); ++extern bool port_in_stack_queue(uint32_t src_ip, uint32_t dst_ip, uint16_t src_port, uint16_t dst_port); + + #endif /* __REG_SOCK_H__ */ +-- +2.27.0 + diff --git a/0031-refactor-add-event-limit-send-pkts-num.patch b/0031-refactor-add-event-limit-send-pkts-num.patch new file mode 100644 index 0000000000000000000000000000000000000000..63ba30347b7228303bff89a9a530bf1b70ef37d7 --- /dev/null +++ b/0031-refactor-add-event-limit-send-pkts-num.patch @@ -0,0 +1,71 @@ +From 87166f699e0febd36b81d914713b770119ead471 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 20:16:06 +0800 +Subject: [PATCH] refactor add event, limit send pkts num + +--- + src/api/sockets.c | 4 ++-- + src/core/tcp_out.c | 8 ++++++++ + src/include/eventpoll.h | 3 ++- + 3 files changed, 12 insertions(+), 3 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 4d4cea1..d5b69eb 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -2665,7 +2665,7 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + } + #if USE_LIBOS + if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { +- add_epoll_event(conn, POLLIN); ++ add_sock_event(sock, POLLIN); + } + #endif + break; +@@ -2686,7 +2686,7 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + case NETCONN_EVT_ERROR: + sock->errevent = 1; + #if USE_LIBOS +- add_epoll_event(conn, EPOLLERR); ++ add_sock_event(sock, EPOLLERR); + #endif + break; + default: +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 1b0af8d..dd780d3 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1358,8 +1358,16 @@ tcp_output(struct tcp_pcb *pcb) + for (; useg->next != NULL; useg = useg->next); + } + /* data available and window allows it to be sent? */ ++#if USE_LIBOS ++ /* avoid send cose too much time, limit send pkts num max 10 */ ++ uint16_t send_pkt = 0; ++ while (seg != NULL && send_pkt < 10 && ++ lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { ++ send_pkt++; ++#else + while (seg != NULL && + lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { ++#endif + LWIP_ASSERT("RST not expected here!", + (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); + /* Stop sending if the nagle algorithm would prevent it +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +index aacc1d2..a10c84b 100644 +--- a/src/include/eventpoll.h ++++ b/src/include/eventpoll.h +@@ -63,7 +63,8 @@ struct libos_epoll { + int efd; /* eventfd */ + }; + +-extern void add_epoll_event(struct netconn*, uint32_t); ++struct lwip_sock; ++extern void add_sock_event(struct lwip_sock *sock, uint32_t event); + extern int32_t lstack_epoll_close(int32_t); + + #endif /* __EVENTPOLL_H__ */ +-- +2.27.0 + diff --git a/0032-fix-free-pbuf-miss-data.patch b/0032-fix-free-pbuf-miss-data.patch new file mode 100644 index 0000000000000000000000000000000000000000..08161272f2ff9e746a3eb49dc5cf3e1371d023a9 --- /dev/null +++ b/0032-fix-free-pbuf-miss-data.patch @@ -0,0 +1,80 @@ +From 0c7d7ad7f9a79a557a867a6009aa2aac067d454e Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Thu, 6 Oct 2022 21:07:12 +0800 +Subject: [PATCH] fix free pbuf miss data + +--- + src/core/tcp_out.c | 18 ++++++++++++++++++ + src/include/lwipopts.h | 2 +- + 2 files changed, 19 insertions(+), 1 deletion(-) + +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index dd780d3..2834ba3 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -682,11 +682,24 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + if (queuelen > LWIP_MIN(TCP_SND_QUEUELEN, TCP_SNDQUEUELEN_OVERFLOW)) { + LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: queue too long %"U16_F" (%d)\n", + queuelen, (int)TCP_SND_QUEUELEN)); ++#if USE_LIBOS ++ if (pos > 0) { ++ queuelen -= pbuf_clen(p); ++ break; ++ } ++#else + pbuf_free(p); ++#endif + goto memerr; + } + + if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) { ++#if USE_LIBOS ++ if (pos > 0) { ++ queuelen -= pbuf_clen(p); ++ break; ++ } ++#endif + goto memerr; + } + #if TCP_OVERSIZE_DBGCHECK +@@ -714,6 +727,9 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg))); + + pos += seglen; ++#if USE_LIBOS ++ write_lwip_over((struct lwip_sock*)arg, 1); ++#endif + } + + /* +@@ -825,12 +841,14 @@ memerr: + tcp_set_flags(pcb, TF_NAGLEMEMERR); + TCP_STATS_INC(tcp.memerr); + ++#if !USE_LIBOS + if (concat_p != NULL) { + pbuf_free(concat_p); + } + if (queue != NULL) { + tcp_segs_free(queue); + } ++#endif + if (pcb->snd_queuelen != 0) { + LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL || + pcb->unsent != NULL); +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 75d3c74..7459991 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -133,7 +133,7 @@ + + #define USE_LIBOS 1 + +-#define LWIP_DEBUG 1 ++//#define LWIP_DEBUG 1 + + #define LWIP_PERF 1 + +-- +2.27.0 + diff --git a/0033-alloc-socket-fail-clean-sock.patch b/0033-alloc-socket-fail-clean-sock.patch new file mode 100644 index 0000000000000000000000000000000000000000..0a932dd07d21ce38371bbb4c0d3c0b2386008b4c --- /dev/null +++ b/0033-alloc-socket-fail-clean-sock.patch @@ -0,0 +1,39 @@ +From fc2a5b52f7d9f02eb43931414767635e5cf4c8c1 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 11 Oct 2022 21:47:24 +0800 +Subject: [PATCH] alloc socket fail clean sock + +--- + src/api/sockets.c | 3 +++ + src/include/lwipsock.h | 1 + + 2 files changed, 4 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 14f2b35..3552599 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -598,6 +598,9 @@ alloc_socket(struct netconn *newconn, int accepted) + sockets[i].sendevent = (NETCONNTYPE_GROUP(newconn->type) == NETCONN_TCP ? (accepted != 0) : 1); + sockets[i].errevent = 0; + return i + LWIP_SOCKET_OFFSET; ++ } else { ++ lwip_close(i); ++ gazelle_clean_sock(i); + } + + err: +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 912d471..2ffb077 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -130,6 +130,7 @@ extern void add_recv_list(int32_t fd); + extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); + extern void gazelle_init_sock(int32_t fd); ++extern void gazelle_clean_sock(int32_t fd); + extern void write_lwip_over(struct lwip_sock *sock, uint32_t n); + #endif /* USE_LIBOS */ + +-- +2.23.0 + diff --git a/0034-add-accept4-and-epoll_create1.patch b/0034-add-accept4-and-epoll_create1.patch new file mode 100644 index 0000000000000000000000000000000000000000..37594ebaa81cbf7185b049cce82f88d34eeaa3a0 --- /dev/null +++ b/0034-add-accept4-and-epoll_create1.patch @@ -0,0 +1,182 @@ +From 547f316821a3b24e028d539f7f48b5e3e5ba5c36 Mon Sep 17 00:00:00 2001 +From: compile_success <980965867@qq.com> +Date: Wed, 19 Oct 2022 12:14:08 +0000 +Subject: [PATCH] add epoll_create1 and accept4 + +--- + src/api/posix_api.c | 1 + + src/api/sockets.c | 34 ++++++++++++++++++++++++++++++---- + src/include/lwip/sockets.h | 21 +++++++++++++++++++++ + src/include/posix_api.h | 1 + + 4 files changed, 53 insertions(+), 4 deletions(-) + +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index 6afb9c6..e721381 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -104,6 +104,7 @@ int posix_api_init(void) + CHECK_DLSYM_RET_RETURN(posix_api->fcntl64_fn = dlsym(handle, "fcntl64")); + CHECK_DLSYM_RET_RETURN(posix_api->pipe_fn = dlsym(handle, "pipe")); + CHECK_DLSYM_RET_RETURN(posix_api->epoll_create_fn = dlsym(handle, "epoll_create")); ++ CHECK_DLSYM_RET_RETURN(posix_api->epoll_create1_fn = dlsym(handle, "epoll_create1")); + CHECK_DLSYM_RET_RETURN(posix_api->epoll_ctl_fn = dlsym(handle, "epoll_ctl")); + CHECK_DLSYM_RET_RETURN(posix_api->epoll_wait_fn = dlsym(handle, "epoll_wait")); + CHECK_DLSYM_RET_RETURN(posix_api->fork_fn = dlsym(handle, "fork")); +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 4d4cea1..c939899 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -543,10 +543,11 @@ get_socket_by_fd(int fd) + * @param newconn the netconn for which to allocate a socket + * @param accepted 1 if socket has been created by accept(), + * 0 if socket has been created by socket() ++ * @param flags only support SOCK_CLOEXEC and SOCK_NONBLOCK + * @return the index of the new socket; -1 on error + */ + static int +-alloc_socket(struct netconn *newconn, int accepted) ++alloc_socket(struct netconn *newconn, int accepted, int flags) + { + int i; + SYS_ARCH_DECL_PROTECT(lev); +@@ -570,12 +571,19 @@ alloc_socket(struct netconn *newconn, int accepted) + break; + } + ++ /*add CLOEXEC OR NONBLOCK OR NONE*/ ++ type |= flags; ++ + SYS_ARCH_PROTECT(lev); + i = posix_api->socket_fn(domain, type, protocol); + if (i == -1) { + goto err; + } + ++ if ((flags & O_NONBLOCK) != 0){ ++ netconn_set_nonblocking(newconn, flags & O_NONBLOCK); ++ } ++ + if ((i < LWIP_SOCKET_OFFSET) || (i >= sockets_num + LWIP_SOCKET_OFFSET)) { + goto err; + } +@@ -721,7 +729,7 @@ free_socket(struct lwip_sock *sock, int is_tcp) + */ + + int +-lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) ++lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + { + struct lwip_sock *sock, *nsock; + struct netconn *newconn; +@@ -755,7 +763,7 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + } + LWIP_ASSERT("newconn != NULL", newconn != NULL); + +- newsock = alloc_socket(newconn, 1); ++ newsock = alloc_socket(newconn, 1, flags); + if (newsock == -1) { + netconn_delete(newconn); + sock_set_errno(sock, ENFILE); +@@ -827,6 +835,12 @@ lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) + return newsock; + } + ++int ++lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen) ++{ ++ return lwip_accept4(s, addr, addrlen, 0); ++} ++ + int + lwip_bind(int s, const struct sockaddr *name, socklen_t namelen) + { +@@ -1823,6 +1837,10 @@ lwip_socket(int domain, int type, int protocol) + + LWIP_UNUSED_ARG(domain); /* @todo: check this */ + ++ int flags = type & ~SOCK_TYPE_MASK; ++ type &= SOCK_TYPE_MASK; ++ ++ + /* create a netconn */ + switch (type) { + case SOCK_RAW: +@@ -1862,7 +1880,15 @@ lwip_socket(int domain, int type, int protocol) + return -1; + } + +- i = alloc_socket(conn, 0); ++ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)){ ++ set_errno(EINVAL); ++ return -1; ++ } ++ ++ if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) ++ flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; ++ ++ i = alloc_socket(conn, 0, flags); + + if (i == -1) { + netconn_delete(conn); +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index 4e7e671..3c5b87b 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -573,6 +573,7 @@ void lwip_socket_thread_cleanup(void); /* LWIP_NETCONN_SEM_PER_THREAD==1: destro + #if LWIP_COMPAT_SOCKETS == 2 + /* This helps code parsers/code completion by not having the COMPAT functions as defines */ + #define lwip_accept accept ++#define lwip_accept4 accept4 + #define lwip_bind bind + #define lwip_shutdown shutdown + #define lwip_getpeername getpeername +@@ -614,7 +615,25 @@ int fcntl(int s, int cmd, ...); + #endif /* LWIP_POSIX_SOCKETS_IO_NAMES */ + #endif /* LWIP_COMPAT_SOCKETS == 2 */ + ++#ifndef O_CLOEXEC ++#define O_CLOEXEC 02000000 ++#endif ++ ++#ifndef SOCK_TYPE_MASK ++#define SOCK_TYPE_MASK 0xf ++#endif ++ ++#ifndef SOCK_CLOEXEC ++#define SOCK_CLOEXEC O_CLOEXEC ++#endif ++ ++#ifndef SOCK_NONBLOCK ++#define SOCK_NONBLOCK O_NONBLOCK ++#endif ++ ++ + int lwip_accept(int s, struct sockaddr *addr, socklen_t *addrlen); ++int lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags); + int lwip_bind(int s, const struct sockaddr *name, socklen_t namelen); + int lwip_shutdown(int s, int how); + int lwip_getpeername (int s, struct sockaddr *name, socklen_t *namelen); +@@ -661,6 +680,8 @@ int lwip_inet_pton(int af, const char *src, void *dst); + /** @ingroup socket */ + #define accept(s,addr,addrlen) lwip_accept(s,addr,addrlen) + /** @ingroup socket */ ++#define accept4(s,addr,addrlen,flags) lwip_accept4(s,addr,addrlen,flags) ++/** @ingroup socket */ + #define bind(s,name,namelen) lwip_bind(s,name,namelen) + /** @ingroup socket */ + #define shutdown(s,how) lwip_shutdown(s,how) +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index c8f2cf9..e958ded 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -66,6 +66,7 @@ typedef struct { + int (*fcntl64_fn)(int fd, int cmd, ...); + int (*pipe_fn)(int pipefd[2]); + int (*epoll_create_fn)(int size); ++ int (*epoll_create1_fn)(int size); + int (*epoll_ctl_fn)(int epfd, int op, int fd, struct epoll_event *event); + int (*epoll_wait_fn)(int epfd, struct epoll_event *events, int maxevents, int timeout); + int (*epoll_close_fn)(int epfd); +-- +2.33.0 + diff --git a/0035-add-writev-and-readv.patch b/0035-add-writev-and-readv.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc43028429f66a23c312baf36a5bbcf207e625d8 --- /dev/null +++ b/0035-add-writev-and-readv.patch @@ -0,0 +1,41 @@ +From d5aca360e7518791d21ca63b44ae2dfaa6c35072 Mon Sep 17 00:00:00 2001 +From: compile_success <980965867@qq.com> +Date: Mon, 17 Oct 2022 14:18:53 +0000 +Subject: [PATCH] add writev and readv + +--- + src/api/posix_api.c | 2 ++ + src/include/posix_api.h | 2 ++ + 2 files changed, 4 insertions(+) + +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index e721381..b7334da 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -93,7 +93,9 @@ int posix_api_init(void) + CHECK_DLSYM_RET_RETURN(posix_api->shutdown_fn = dlsym(handle, "shutdown")); + CHECK_DLSYM_RET_RETURN(posix_api->close_fn = dlsym(handle, "close")); + CHECK_DLSYM_RET_RETURN(posix_api->read_fn = dlsym(handle, "read")); ++ CHECK_DLSYM_RET_RETURN(posix_api->readv_fn = dlsym(handle, "readv")); + CHECK_DLSYM_RET_RETURN(posix_api->write_fn = dlsym(handle, "write")); ++ CHECK_DLSYM_RET_RETURN(posix_api->writev_fn = dlsym(handle, "writev")); + CHECK_DLSYM_RET_RETURN(posix_api->recv_fn = dlsym(handle, "recv")); + CHECK_DLSYM_RET_RETURN(posix_api->send_fn = dlsym(handle, "send")); + CHECK_DLSYM_RET_RETURN(posix_api->recv_msg = dlsym(handle, "recvmsg")); +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index e958ded..a73e2ec 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -54,7 +54,9 @@ typedef struct { + int (*close_fn)(int fd); + pid_t (*fork_fn)(void); + ssize_t (*read_fn)(int fd, void *mem, size_t len); ++ ssize_t (*readv_fn)(int s, const struct iovec *iov, int iovcnt); + ssize_t (*write_fn)(int fd, const void *data, size_t len); ++ ssize_t (*writev_fn)(int s, const struct iovec *iov, int iovcnt); + ssize_t (*recv_fn)(int sockfd, void *buf, size_t len, int flags); + ssize_t (*send_fn)(int sockfd, const void *buf, size_t len, int flags); + ssize_t (*recv_msg)(int sockfd, const struct msghdr *msg, int flags); +-- +2.33.0 + diff --git a/0036-add-fs-secure-compilation-option.patch b/0036-add-fs-secure-compilation-option.patch new file mode 100644 index 0000000000000000000000000000000000000000..3021f6e62dbe652d6e3bc97ebf018b87f1c7b108 --- /dev/null +++ b/0036-add-fs-secure-compilation-option.patch @@ -0,0 +1,25 @@ +From c2c7c2f5bbf84f62acc6468113b1f11cdc6b8410 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Sat, 22 Oct 2022 16:05:37 +0800 +Subject: [PATCH] add fs secure compilation option + +--- + src/Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/Makefile b/src/Makefile +index 1676a71..f445601 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -4,7 +4,7 @@ ROOT_DIR := $(dir $(abspath $(LWIP_DIR))) + LWIP_INC = $(LWIP_DIR)/include + DPDK_INCLUDE_FILE ?= /usr/include/dpdk + +-SEC_FLAGS = -fstack-protector-strong -Werror -Wall -Wl,-z,relro,-z,now -Wl,-z,noexecstack -Wtrampolines -fPIC ++SEC_FLAGS = -fstack-protector-strong -Werror -Wall -Wl,-z,relro,-z,now -Wl,-z,noexecstack -Wtrampolines -fPIC -D_FORTIRY_SOURCE=2 -O2 + + CC = gcc + AR = ar +-- +2.23.0 + diff --git a/0037-enable-ARP-QUEUE-to-avoid-sync-packet-dropped.patch b/0037-enable-ARP-QUEUE-to-avoid-sync-packet-dropped.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea99146f1b9655e7b84b401fef132194a1ba068a --- /dev/null +++ b/0037-enable-ARP-QUEUE-to-avoid-sync-packet-dropped.patch @@ -0,0 +1,27 @@ +From 2acba8aecef2140045a8ae50b05f9f36f5dc865f Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Wed, 23 Nov 2022 09:37:34 +0800 +Subject: [PATCH] enable ARP QUEUE to avoid sync packet dropped + +--- + src/include/lwipopts.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 7459991..a5add21 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -169,6 +169,10 @@ + + #define ARP_TABLE_SIZE 512 + ++#define ARP_QUEUEING 1 ++ ++#define ARP_QUEUE_LEN 32 ++ + /* --------------------------------------- + * ------- NIC offloads -------- + * --------------------------------------- +-- +2.23.0 + diff --git a/0038-add-tso.patch b/0038-add-tso.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6a9841c920ee392de78e80432f50f49b886aa54 --- /dev/null +++ b/0038-add-tso.patch @@ -0,0 +1,380 @@ +From af8ac36acb103aa27b498dafa0ae8ba4332faac8 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Sat, 3 Dec 2022 21:38:09 +0800 +Subject: [PATCH] add-tso + +--- + src/core/ipv4/etharp.c | 17 +++- + src/core/ipv4/ip4.c | 10 ++- + src/core/tcp.c | 6 ++ + src/core/tcp_out.c | 178 +++++++++++++++++++++++++++++++++++++-- + src/include/dpdk_cksum.h | 2 +- + src/include/lwip/pbuf.h | 8 +- + src/include/lwipopts.h | 4 + + 7 files changed, 211 insertions(+), 14 deletions(-) + +diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c +index effb7db..f1903e4 100644 +--- a/src/core/ipv4/etharp.c ++++ b/src/core/ipv4/etharp.c +@@ -482,6 +482,13 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et + struct pbuf *p = arp_table[i].q; + arp_table[i].q = NULL; + #endif /* ARP_QUEUEING */ ++#if USE_LIBOS ++ struct pbuf *tmp = p->next; ++ while (tmp != NULL) { ++ tmp->ref--; ++ tmp = tmp->next; ++ } ++#endif + /* send the queued IP packet */ + ethernet_output(netif, p, (struct eth_addr *)(netif->hwaddr), ethaddr, ETHTYPE_IP); + /* free the queued IP packet */ +@@ -1027,7 +1034,15 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q) + } else { + /* referencing the old pbuf is enough */ + p = q; +- pbuf_ref(p); ++#if USE_LIBOS ++ struct pbuf *tmp = p; ++ while (tmp != NULL) { ++ pbuf_ref(tmp); ++ tmp = tmp->next; ++ } ++#else ++ pbuf_ref(p); ++#endif + } + /* packet could be taken over? */ + if (p != NULL) { +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index 1334cdc..d823491 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -1034,9 +1034,15 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + #endif /* ENABLE_LOOPBACK */ + #if IP_FRAG + /* don't fragment if interface has mtu set to 0 [loopif] */ +- if (netif->mtu && (p->tot_len > netif->mtu)) { +- return ip4_frag(p, netif, dest); ++#if USE_LIBOS ++ if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { ++#endif ++ if (netif->mtu && (p->tot_len > netif->mtu)) { ++ return ip4_frag(p, netif, dest); ++ } ++#if USE_LIBOS + } ++#endif + #endif /* IP_FRAG */ + + LWIP_DEBUGF(IP_DEBUG, ("ip4_output_if: call netif->output()\n")); +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 7c18408..51ada38 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1756,7 +1756,9 @@ tcp_seg_free(struct tcp_seg *seg) + seg->p = NULL; + #endif /* TCP_DEBUG */ + } ++#if !USE_LIBOS + memp_free(MEMP_TCP_SEG, seg); ++#endif + } + } + +@@ -1792,10 +1794,14 @@ tcp_seg_copy(struct tcp_seg *seg) + + LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL); + ++#if USE_LIBOS ++ cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom)); ++#else + cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG); + if (cseg == NULL) { + return NULL; + } ++#endif + SMEMCPY((u8_t *)cseg, (const u8_t *)seg, sizeof(struct tcp_seg)); + pbuf_ref(cseg->p); + return cseg; +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 2834ba3..ee6f40b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -161,6 +161,40 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) + * The TCP header is filled in except ackno and wnd. + * p is freed on failure. + */ ++#if USE_LIBOS ++void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, ++ u32_t seqno, u8_t optflags) ++{ ++ u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb); ++ ++ seg->flags = optflags; ++ seg->next = NULL; ++ seg->p = p; ++ seg->len = p->tot_len - optlen; ++ ++ /* build TCP header */ ++ pbuf_add_header(p, TCP_HLEN); ++ seg->tcphdr = (struct tcp_hdr *)seg->p->payload; ++ seg->tcphdr->src = lwip_htons(pcb->local_port); ++ seg->tcphdr->dest = lwip_htons(pcb->remote_port); ++ seg->tcphdr->seqno = lwip_htonl(seqno); ++ ++ TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (TCP_HLEN + optlen) / 4, hdrflags); ++ seg->tcphdr->urgp = 0; ++} ++ ++static struct tcp_seg * ++tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) ++{ ++ struct tcp_seg *seg; ++ ++ seg = (struct tcp_seg *)((uint8_t *)p + sizeof(struct pbuf_custom)); ++ ++ tcp_init_segment(seg, pcb, p, hdrflags, seqno, optflags); ++ ++ return seg; ++} ++#else + static struct tcp_seg * + tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags) + { +@@ -210,6 +244,7 @@ tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32 + seg->tcphdr->urgp = 0; + return seg; + } ++#endif + + /** + * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end. +@@ -1272,6 +1307,60 @@ tcp_build_wnd_scale_option(u32_t *opts) + } + #endif + ++#if USE_LIBOS ++static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) ++{ ++ if (TCP_TCPLEN(seg) > 0) { ++ seg->next = NULL; ++ if (useg == NULL) { ++ pcb->unacked = seg; ++ useg = seg; ++ } else { ++ if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { ++ /* add segment to before tail of unacked list, keeping the list sorted */ ++ struct tcp_seg **cur_seg = &(pcb->unacked); ++ while (*cur_seg && ++ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { ++ cur_seg = &((*cur_seg)->next ); ++ } ++ seg->next = (*cur_seg); ++ (*cur_seg) = seg; ++ } else { ++ /* add segment to tail of unacked list */ ++ useg->next = seg; ++ useg = seg; ++ } ++ } ++ } else { ++ tcp_seg_free(seg); ++ } ++ ++ return useg; ++} ++static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt) ++{ ++ if (pcb->state != SYN_SENT) { ++ TCPH_SET_FLAG(seg->tcphdr, TCP_ACK); ++ } ++ ++ err_t err = tcp_output_segment(seg, pcb, netif); ++ if (err != ERR_OK) { ++ /* segment could not be sent, for whatever reason */ ++ tcp_set_flags(pcb, TF_NAGLEMEMERR); ++ return err; ++ } ++ ++ if (pcb->state != SYN_SENT) { ++ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); ++ } ++ ++ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { ++ pcb->snd_nxt = snd_nxt; ++ } ++ ++ return ERR_OK; ++} ++#endif + /** + * @ingroup tcp_raw + * Find out what we can send and send it +@@ -1376,16 +1465,88 @@ tcp_output(struct tcp_pcb *pcb) + for (; useg->next != NULL; useg = useg->next); + } + /* data available and window allows it to be sent? */ ++ + #if USE_LIBOS +- /* avoid send cose too much time, limit send pkts num max 10 */ +- uint16_t send_pkt = 0; +- while (seg != NULL && send_pkt < 10 && +- lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { +- send_pkt++; +-#else ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { ++ while(seg) { ++ /** ++ * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 ++ * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 ++ * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。 ++ */ ++ struct tcp_seg *start_seg = seg; ++ struct pbuf *first_pbuf = NULL; ++ struct pbuf *pre_pbuf = NULL; ++ u8_t pbuf_chain_len = 0; ++ u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); ++ while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) { ++ u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); ++ if (seg_seqno - pcb->lastack + seg->len > wnd) { ++ if (first_pbuf) ++ break; ++ else ++ goto output_done; ++ } ++ ++ if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { ++ if (first_pbuf) ++ break; ++ else ++ goto output_done; ++ } ++ ++ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) { ++ break; ++ } ++ if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { ++ break; ++ } ++ ++ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); ++ if (first_pbuf == NULL) { ++ first_pbuf = seg->p; ++ } else { ++ first_pbuf->tot_len += seg->p->len; ++ pre_pbuf->next = seg->p; ++ } ++ ++ pre_pbuf = seg->p; ++ next_seqno = seg_seqno + TCP_TCPLEN(seg); ++ seg = seg->next; ++ pcb->unsent = seg; ++ pbuf_chain_len++; ++ } ++ ++ if (first_pbuf == NULL) { ++ err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); ++ if (err != ERR_OK) ++ return err; ++ pcb->unsent = seg->next; ++ useg = tcp_output_over(pcb, seg, useg); ++ seg = pcb->unsent; ++ continue; ++ } ++ ++ struct tcp_seg new_seg; ++ tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); ++ ++ err = tcp_output_seg(pcb, &new_seg, netif, next_seqno); ++ ++ for (u32_t i = 0; i < pbuf_chain_len; i++) { ++ struct tcp_seg *next_seg = start_seg->next; ++ start_seg->p->next = NULL; ++ useg = tcp_output_over(pcb, start_seg, useg); ++ start_seg = next_seg; ++ } ++ ++ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); ++ new_seg.p->tot_len = new_seg.p->len; ++ } ++ } else ++#endif ++{ + while (seg != NULL && + lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { +-#endif + LWIP_ASSERT("RST not expected here!", + (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); + /* Stop sending if the nagle algorithm would prevent it +@@ -1462,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb) + } + seg = pcb->unsent; + } ++} + #if TCP_OVERSIZE + if (pcb->unsent == NULL) { + /* last unsent has been removed, reset unsent_oversize */ +@@ -1627,7 +1789,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { + #if CHECKSUM_GEN_TCP_HW + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { +- tcph_cksum_set(seg->p, TCP_HLEN); ++ tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr)); + seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); + } else { + #if TCP_CHECKSUM_ON_COPY +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index e57be4d..83c9c38 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -78,7 +78,7 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { + #include + + static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { +- (void)len; ++ p->l4_len = len; + p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; + } + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 87cd960..ef879da 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -223,10 +223,14 @@ struct pbuf { + #if USE_LIBOS && CHECKSUM_OFFLOAD_ALL + /** checksum offload ol_flags */ + u64_t ol_flags; +- /** checksum offload l2_len */ ++ /* < L2 (MAC) Header Length for non-tunneling pkt. */ + u64_t l2_len:7; +- /** checksum offload l3_len */ ++ /* < L3 (IP) Header Length. */ + u64_t l3_len:9; ++ /* < L4 (TCP/UDP) Header Length. */ ++ u64_t l4_len:8; ++ u16_t header_off; ++ u8_t rexmit; + #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index a5add21..7c819d0 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -173,6 +173,10 @@ + + #define ARP_QUEUE_LEN 32 + ++#define MAX_PBUF_CHAIN_LEN 40 ++ ++#define MIN_TSO_SEG_LEN 256 ++ + /* --------------------------------------- + * ------- NIC offloads -------- + * --------------------------------------- +-- +2.23.0 + diff --git a/0039-optimize-app-thread-write-buff-block.patch b/0039-optimize-app-thread-write-buff-block.patch new file mode 100644 index 0000000000000000000000000000000000000000..42280e32072a072241a5e3611883f56d6405c82a --- /dev/null +++ b/0039-optimize-app-thread-write-buff-block.patch @@ -0,0 +1,94 @@ +From be541628552ccc3a8dcd3c6ad6e5a1aed07c4928 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sat, 3 Dec 2022 20:35:34 +0800 +Subject: [PATCH 2/2] fix app thread write fail + +--- + src/core/tcp_out.c | 2 +- + src/include/lwip/pbuf.h | 3 +++ + src/include/lwipsock.h | 33 +++++++++++++++++++++++---------- + 3 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index ee6f40b..f53750b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -763,7 +763,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + + pos += seglen; + #if USE_LIBOS +- write_lwip_over((struct lwip_sock*)arg, 1); ++ write_lwip_over((struct lwip_sock*)arg); + #endif + } + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index ef879da..10e2af9 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -231,6 +231,9 @@ struct pbuf { + u64_t l4_len:8; + u16_t header_off; + u8_t rexmit; ++ u8_t in_write; ++ u8_t head; ++ struct pbuf *last; + #endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 2ffb077..f919330 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -93,17 +93,30 @@ struct lwip_sock { + #endif + + #if USE_LIBOS +- struct pbuf *send_lastdata; +- uint16_t send_datalen; +- volatile uint32_t events __rte_cache_aligned; /* available events */ +- struct pbuf *recv_lastdata __rte_cache_aligned; /* unread data in one pbuf */ +- struct list_node recv_list __rte_cache_aligned; +- struct list_node event_list __rte_cache_aligned; +- struct list_node send_list __rte_cache_aligned; +- uint32_t in_send __rte_cache_aligned; /* avoid sock too much send rpc msg*/ ++ char pad0 __rte_cache_aligned; ++ /* app thread use */ ++ struct pbuf *recv_lastdata; /* unread data in one pbuf */ ++ uint16_t remain_len; + uint32_t epoll_events; /* registered events, EPOLLONESHOT write frequently */ +- char pad __rte_cache_aligned; ++ volatile uint32_t events; /* available events */ ++ struct list_node event_list; ++ ++ char pad1 __rte_cache_aligned; ++ /* app and stack thread all use */ ++ uint32_t in_send; /* avoid sock too much send rpc msg*/ ++ pthread_spinlock_t sock_lock; ++ ++ char pad2 __rte_cache_aligned; ++ /* stack thread all use */ ++ struct list_node recv_list; ++ struct list_node send_list; ++ struct pbuf *send_lastdata; ++ struct pbuf *send_pre_del; ++ uint64_t recv_all; ++ uint64_t send_all; + ++ char pad3 __rte_cache_aligned; ++ /* nerver change */ + struct wakeup_poll *wakeup; + epoll_data_t ep_data; + struct lwip_sock *listen_next; /* listenfd list */ +@@ -131,7 +144,7 @@ extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apifla + extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); + extern void gazelle_init_sock(int32_t fd); + extern void gazelle_clean_sock(int32_t fd); +-extern void write_lwip_over(struct lwip_sock *sock, uint32_t n); ++extern void write_lwip_over(struct lwip_sock *sock); + #endif /* USE_LIBOS */ + + struct lwip_sock *get_socket(int s); +-- +2.8.4.windows.1 + diff --git a/0040-add-huge-snd_buf.patch b/0040-add-huge-snd_buf.patch new file mode 100644 index 0000000000000000000000000000000000000000..73fa142f56c1c77af51accd6160a6cf885266bdc --- /dev/null +++ b/0040-add-huge-snd_buf.patch @@ -0,0 +1,86 @@ +diff -Nur lwip-2.1.3-org/src/core/init.c lwip-2.1.3/src/core/init.c +--- lwip-2.1.3-org/src/core/init.c 2022-12-06 14:40:45.280000000 +0000 ++++ lwip-2.1.3/src/core/init.c 2022-12-06 14:41:01.452000000 +0000 +@@ -306,7 +306,7 @@ + #if TCP_SNDLOWAT >= TCP_SND_BUF + #error "lwip_sanity_check: WARNING: TCP_SNDLOWAT must be less than TCP_SND_BUF. If you know what you are doing, define LWIP_DISABLE_TCP_SANITY_CHECKS to 1 to disable this error." + #endif +-#if TCP_SNDLOWAT >= (0xFFFF - (4 * TCP_MSS)) ++#if TCP_SNDLOWAT >= (0xFFFFFFFF - (4 * TCP_MSS)) + #error "lwip_sanity_check: WARNING: TCP_SNDLOWAT must at least be 4*MSS below u16_t overflow!" + #endif + #if TCP_SNDQUEUELOWAT >= TCP_SND_QUEUELEN +diff -Nur lwip-2.1.3-org/src/core/pbuf.c lwip-2.1.3/src/core/pbuf.c +--- lwip-2.1.3-org/src/core/pbuf.c 2022-12-06 14:40:45.280000000 +0000 ++++ lwip-2.1.3/src/core/pbuf.c 2022-12-06 14:46:04.860000000 +0000 +@@ -869,13 +869,13 @@ + /* proceed to last pbuf of chain */ + for (p = h; p->next != NULL; p = p->next) { + /* add total length of second chain to all totals of first chain */ +- p->tot_len = (u16_t)(p->tot_len + t->tot_len); ++ p->tot_len = p->tot_len + t->tot_len; + } + /* { p is last pbuf of first h chain, p->next == NULL } */ + LWIP_ASSERT("p->tot_len == p->len (of last pbuf in chain)", p->tot_len == p->len); + LWIP_ASSERT("p->next == NULL", p->next == NULL); + /* add total length of second chain to last pbuf total of first chain */ +- p->tot_len = (u16_t)(p->tot_len + t->tot_len); ++ p->tot_len = p->tot_len + t->tot_len; + /* chain last pbuf of head (p) with first of tail (t) */ + p->next = t; + /* p->next now references t, but the caller will drop its reference to t, +@@ -1181,7 +1181,7 @@ + if (r != NULL) { + /* Update the tot_len field in the first part */ + for (i = p; i != NULL; i = i->next) { +- i->tot_len = (u16_t)(i->tot_len - r->tot_len); ++ i->tot_len = tot_len_front; + LWIP_ASSERT("tot_len/len mismatch in last pbuf", + (i->next != NULL) || (i->tot_len == i->len)); + } +@@ -1192,6 +1192,9 @@ + /* tot_len field in rest does not need modifications */ + /* reference counters do not need modifications */ + *rest = r; ++ r->tot_len = r->len; ++ }else{ ++ p->tot_len = tot_len_front; + } + } + } +diff -Nur lwip-2.1.3-org/src/include/lwip/opt.h lwip-2.1.3/src/include/lwip/opt.h +--- lwip-2.1.3-org/src/include/lwip/opt.h 2022-12-06 14:40:45.292000000 +0000 ++++ lwip-2.1.3/src/include/lwip/opt.h 2022-12-06 14:41:01.456000000 +0000 +@@ -1482,7 +1482,7 @@ + * send window while having a small receive window only. + */ + #if !defined LWIP_WND_SCALE || defined __DOXYGEN__ +-#define LWIP_WND_SCALE 0 ++#define LWIP_WND_SCALE 1 + #define TCP_RCV_SCALE 0 + #endif + +diff -Nur lwip-2.1.3-org/src/include/lwip/pbuf.h lwip-2.1.3/src/include/lwip/pbuf.h +--- lwip-2.1.3-org/src/include/lwip/pbuf.h 2022-12-06 14:40:45.284000000 +0000 ++++ lwip-2.1.3/src/include/lwip/pbuf.h 2022-12-06 14:46:36.720000000 +0000 +@@ -197,7 +197,7 @@ + * For non-queue packet chains this is the invariant: + * p->tot_len == p->len + (p->next? p->next->tot_len: 0) + */ +- u16_t tot_len; ++ u32_t tot_len; + + /** length of this buffer */ + u16_t len; +diff -Nur lwip-2.1.3-org/src/include/lwipopts.h lwip-2.1.3/src/include/lwipopts.h +--- lwip-2.1.3-org/src/include/lwipopts.h 2022-12-06 14:40:45.292000000 +0000 ++++ lwip-2.1.3/src/include/lwipopts.h 2022-12-06 14:41:01.456000000 +0000 +@@ -97,7 +97,7 @@ + + #define TCP_WND (40 * TCP_MSS) + +-#define TCP_SND_BUF (40 * TCP_MSS) ++#define TCP_SND_BUF (2500 * TCP_MSS) + + #define TCP_SND_QUEUELEN (8191) + diff --git a/0041-optimite-pcb-list-limit-send-size-and-ack-now.patch b/0041-optimite-pcb-list-limit-send-size-and-ack-now.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c47b1514e5239be68ca9123d5f08fd56dfa5b29 --- /dev/null +++ b/0041-optimite-pcb-list-limit-send-size-and-ack-now.patch @@ -0,0 +1,374 @@ +From 08716b71ccb93c6d998d1654c1fac137f29d2851 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Tue, 13 Dec 2022 22:27:33 +0800 +Subject: [PATCH] optimite pcb-list limit , send size and ack now + +--- + src/core/tcp.c | 1 + + src/core/tcp_in.c | 16 +++++++- + src/core/tcp_out.c | 103 ++++++++++++++++++++++++++++++------------------- + src/include/lwip/opt.h | 2 +- + src/include/lwip/tcp.h | 2 + + src/include/lwipsock.h | 2 - + 6 files changed, 83 insertions(+), 43 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 51ada38..cb08f95 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -2297,6 +2297,7 @@ tcp_pcb_purge(struct tcp_pcb *pcb) + tcp_segs_free(pcb->unsent); + tcp_segs_free(pcb->unacked); + pcb->unacked = pcb->unsent = NULL; ++ pcb->last_unacked = pcb->last_unsent = NULL; + #if TCP_OVERSIZE + pcb->unsent_oversize = 0; + #endif /* TCP_OVERSIZE */ +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 2d6cb6a..78954bd 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -976,8 +976,14 @@ tcp_process(struct tcp_pcb *pcb) + rseg = pcb->unsent; + LWIP_ASSERT("no segment to free", rseg != NULL); + pcb->unsent = rseg->next; ++ if (pcb->last_unsent == rseg) { ++ pcb->last_unsent = rseg->next; ++ } + } else { + pcb->unacked = rseg->next; ++ if (pcb->last_unacked == rseg) { ++ pcb->last_unacked = rseg->next; ++ } + } + tcp_seg_free(rseg); + +@@ -1393,6 +1399,8 @@ tcp_receive(struct tcp_pcb *pcb) + /* Remove segment from the unacknowledged list if the incoming + ACK acknowledges them. */ + pcb->unacked = tcp_free_acked_segments(pcb, pcb->unacked, "unacked", pcb->unsent); ++ if (pcb->unacked == NULL) ++ pcb->last_unacked = NULL; + /* We go through the ->unsent list to see if any of the segments + on the list are acknowledged by the ACK. This may seem + strange since an "unsent" segment shouldn't be acked. The +@@ -1400,6 +1408,8 @@ tcp_receive(struct tcp_pcb *pcb) + ->unsent list after a retransmission, so these segments may + in fact have been sent once. */ + pcb->unsent = tcp_free_acked_segments(pcb, pcb->unsent, "unsent", pcb->unacked); ++ if (pcb->unsent == NULL) ++ pcb->last_unsent = NULL; + + /* If there's nothing left to acknowledge, stop the retransmit + timer, otherwise reset it to start again */ +@@ -1736,7 +1746,11 @@ tcp_receive(struct tcp_pcb *pcb) + + + /* Acknowledge the segment(s). */ +- tcp_ack(pcb); ++ if (flags & TCP_PSH) { ++ tcp_ack_now(pcb); ++ } else { ++ tcp_ack(pcb); ++ } + + #if LWIP_TCP_SACK_OUT + if (LWIP_TCP_SACK_VALID(pcb, 0)) { +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index f53750b..55053d8 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -631,11 +631,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + #endif /* TCP_OVERSIZE */ + } + #else /* USE_LIBOS */ +- if (pcb->unsent != NULL) { +- /* @todo: this could be sped up by keeping last_unsent in the pcb */ +- for (last_unsent = pcb->unsent; last_unsent->next != NULL; +- last_unsent = last_unsent->next); +- } ++ last_unsent = pcb->last_unsent; + #endif /* USE_LIBOS */ + + /* +@@ -851,6 +847,9 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + * Finally update the pcb state. + */ + #if USE_LIBOS ++ if (queue) { ++ pcb->last_unsent = prev_seg; ++ } + pcb->snd_lbb += pos; + pcb->snd_buf -= pos; + #else +@@ -1050,6 +1049,8 @@ tcp_split_unsent_seg(struct tcp_pcb *pcb, u16_t split) + /* Finally insert remainder into queue after split (which stays head) */ + seg->next = useg->next; + useg->next = seg; ++ if (pcb->last_unsent == useg) ++ pcb->last_unsent = seg; + + #if TCP_OVERSIZE + /* If remainder is last segment on the unsent, ensure we clear the oversize amount +@@ -1086,9 +1087,7 @@ tcp_send_fin(struct tcp_pcb *pcb) + + /* first, try to add the fin to the last unsent segment */ + if (pcb->unsent != NULL) { +- struct tcp_seg *last_unsent; +- for (last_unsent = pcb->unsent; last_unsent->next != NULL; +- last_unsent = last_unsent->next); ++ struct tcp_seg *last_unsent = pcb->unsent; + + if ((TCPH_FLAGS(last_unsent->tcphdr) & (TCP_SYN | TCP_FIN | TCP_RST)) == 0) { + /* no SYN/FIN/RST flag in the header, we can add the FIN flag */ +@@ -1182,10 +1181,10 @@ tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags) + if (pcb->unsent == NULL) { + pcb->unsent = seg; + } else { +- struct tcp_seg *useg; +- for (useg = pcb->unsent; useg->next != NULL; useg = useg->next); ++ struct tcp_seg *useg = pcb->last_unsent; + useg->next = seg; + } ++ pcb->last_unsent = seg; + #if TCP_OVERSIZE + /* The new unsent tail has no space */ + pcb->unsent_oversize = 0; +@@ -1314,6 +1313,7 @@ static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, + seg->next = NULL; + if (useg == NULL) { + pcb->unacked = seg; ++ pcb->last_unacked = seg; + useg = seg; + } else { + if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { +@@ -1329,6 +1329,7 @@ static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, + /* add segment to tail of unacked list */ + useg->next = seg; + useg = seg; ++ pcb->last_unacked = seg; + } + } + } else { +@@ -1460,15 +1461,14 @@ tcp_output(struct tcp_pcb *pcb) + pcb->persist_backoff = 0; + + /* useg should point to last segment on unacked queue */ +- useg = pcb->unacked; +- if (useg != NULL) { +- for (; useg->next != NULL; useg = useg->next); +- } ++ useg = pcb->last_unacked; ++ + /* data available and window allows it to be sent? */ + ++ u32_t send_len = 0; + #if USE_LIBOS + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { +- while(seg) { ++ while(seg && send_len < 0xffff) { + /** + * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 + * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 +@@ -1510,6 +1510,7 @@ tcp_output(struct tcp_pcb *pcb) + pre_pbuf->next = seg->p; + } + ++ send_len += seg->len; + pre_pbuf = seg->p; + next_seqno = seg_seqno + TCP_TCPLEN(seg); + seg = seg->next; +@@ -1519,8 +1520,11 @@ tcp_output(struct tcp_pcb *pcb) + + if (first_pbuf == NULL) { + err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); +- if (err != ERR_OK) ++ if (err != ERR_OK) { ++ if (pcb->unsent == NULL) ++ pcb->last_unsent = NULL; + return err; ++ } + pcb->unsent = seg->next; + useg = tcp_output_over(pcb, seg, useg); + seg = pcb->unsent; +@@ -1545,7 +1549,7 @@ tcp_output(struct tcp_pcb *pcb) + } else + #endif + { +- while (seg != NULL && ++ while (seg != NULL && send_len < 0xffff && + lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { + LWIP_ASSERT("RST not expected here!", + (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); +@@ -1560,6 +1564,7 @@ tcp_output(struct tcp_pcb *pcb) + ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { + break; + } ++ send_len += seg->len; + #if TCP_CWND_DEBUG + LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F", effwnd %"U32_F", seq %"U32_F", ack %"U32_F", i %"S16_F"\n", + pcb->snd_wnd, pcb->cwnd, wnd, +@@ -1577,6 +1582,8 @@ tcp_output(struct tcp_pcb *pcb) + if (err != ERR_OK) { + /* segment could not be sent, for whatever reason */ + tcp_set_flags(pcb, TF_NAGLEMEMERR); ++ if (pcb->unsent == NULL) ++ pcb->last_unsent = NULL; + return err; + } + #if TCP_OVERSIZE_DBGCHECK +@@ -1596,6 +1603,7 @@ tcp_output(struct tcp_pcb *pcb) + /* unacked list is empty? */ + if (pcb->unacked == NULL) { + pcb->unacked = seg; ++ pcb->last_unacked = seg; + useg = seg; + /* unacked list is not empty? */ + } else { +@@ -1615,6 +1623,7 @@ tcp_output(struct tcp_pcb *pcb) + /* add segment to tail of unacked list */ + useg->next = seg; + useg = useg->next; ++ pcb->last_unacked = seg; + } + } + /* do not queue empty segments on the unacked list */ +@@ -1632,6 +1641,8 @@ tcp_output(struct tcp_pcb *pcb) + #endif /* TCP_OVERSIZE */ + + output_done: ++ if (pcb->unsent == NULL) ++ pcb->last_unsent = NULL; + tcp_clear_flags(pcb, TF_NAGLEMEMERR); + return ERR_OK; + } +@@ -1932,9 +1943,13 @@ tcp_rexmit_rto_prepare(struct tcp_pcb *pcb) + } + #endif /* TCP_OVERSIZE_DBGCHECK */ + /* unsent queue is the concatenated queue (of unacked, unsent) */ ++ if (pcb->unsent == NULL) { ++ pcb->last_unsent = pcb->last_unacked; ++ } + pcb->unsent = pcb->unacked; + /* unacked queue is now empty */ + pcb->unacked = NULL; ++ pcb->last_unacked = NULL; + + /* Mark RTO in-progress */ + tcp_set_flags(pcb, TF_RTO); +@@ -2004,32 +2019,42 @@ tcp_rexmit(struct tcp_pcb *pcb) + } + + seg = pcb->unacked; ++ while (seg) { ++ /* Give up if the segment is still referenced by the netif driver ++ due to deferred transmission. */ ++ if (tcp_output_segment_busy(seg)) { ++ LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n")); ++ if (seg == pcb->unacked) ++ return ERR_VAL; ++ else ++ break; ++ } + +- /* Give up if the segment is still referenced by the netif driver +- due to deferred transmission. */ +- if (tcp_output_segment_busy(seg)) { +- LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n")); +- return ERR_VAL; +- } +- +- /* Move the first unacked segment to the unsent queue */ +- /* Keep the unsent queue sorted. */ +- pcb->unacked = seg->next; ++ /* Move the first unacked segment to the unsent queue */ ++ /* Keep the unsent queue sorted. */ ++ if (pcb->last_unacked == pcb->unacked) ++ pcb->last_unacked = pcb->unacked->next; ++ pcb->unacked = pcb->unacked->next; + +- cur_seg = &(pcb->unsent); +- while (*cur_seg && +- TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { +- cur_seg = &((*cur_seg)->next ); +- } +- seg->next = *cur_seg; +- *cur_seg = seg; ++ cur_seg = &(pcb->unsent); ++ while (*cur_seg && ++ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { ++ cur_seg = &((*cur_seg)->next); ++ } ++ if (*cur_seg == NULL) ++ pcb->last_unsent = seg; ++ seg->next = *cur_seg; ++ *cur_seg = seg; + #if TCP_OVERSIZE +- if (seg->next == NULL) { +- /* the retransmitted segment is last in unsent, so reset unsent_oversize */ +- pcb->unsent_oversize = 0; +- } ++ if (seg->next == NULL) { ++ /* the retransmitted segment is last in unsent, so reset unsent_oversize */ ++ pcb->unsent_oversize = 0; ++ } + #endif /* TCP_OVERSIZE */ + ++ seg = pcb->unacked; ++ } ++ + if (pcb->nrtx < 0xFF) { + ++pcb->nrtx; + } +@@ -2207,7 +2232,7 @@ tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p, + struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload; + #if CHECKSUM_GEN_TCP_HW + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { +- tcph_cksum_set(p, TCP_HLEN); ++ tcph_cksum_set(p, TCPH_HDRLEN_BYTES(tcphdr)); + tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP, p->tot_len, src, dst); + } else { + tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index 8294cdd..83e7e93 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -1281,7 +1281,7 @@ + * LWIP_TCP_SACK_OUT==1: TCP will support sending selective acknowledgements (SACKs). + */ + #if !defined LWIP_TCP_SACK_OUT || defined __DOXYGEN__ +-#define LWIP_TCP_SACK_OUT 0 ++#define LWIP_TCP_SACK_OUT 1 + #endif + + /** +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index b36bf33..b0ae02c 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -356,7 +356,9 @@ struct tcp_pcb { + + /* These are ordered by sequence number: */ + struct tcp_seg *unsent; /* Unsent (queued) segments. */ ++ struct tcp_seg *last_unsent; + struct tcp_seg *unacked; /* Sent but unacknowledged segments. */ ++ struct tcp_seg *last_unacked; + #if TCP_QUEUE_OOSEQ + struct tcp_seg *ooseq; /* Received out of sequence segments. */ + #endif /* TCP_QUEUE_OOSEQ */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index f919330..bf0d753 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -112,8 +112,6 @@ struct lwip_sock { + struct list_node send_list; + struct pbuf *send_lastdata; + struct pbuf *send_pre_del; +- uint64_t recv_all; +- uint64_t send_all; + + char pad3 __rte_cache_aligned; + /* nerver change */ +-- +2.8.4.windows.1 + diff --git a/0042-expand-recv-win.patch b/0042-expand-recv-win.patch new file mode 100644 index 0000000000000000000000000000000000000000..38438ac032b8ac1b691eec46d820e1e4fe96d855 --- /dev/null +++ b/0042-expand-recv-win.patch @@ -0,0 +1,39 @@ +From 288d56ebd68f366d3fa2ee1521120016fb21bf31 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sat, 17 Dec 2022 19:14:36 +0800 +Subject: [PATCH 1/2] expand recv win + +--- + src/include/lwip/opt.h | 2 +- + src/include/lwipopts.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index 9d41a09..718816b 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -1483,7 +1483,7 @@ + */ + #if !defined LWIP_WND_SCALE || defined __DOXYGEN__ + #define LWIP_WND_SCALE 1 +-#define TCP_RCV_SCALE 0 ++#define TCP_RCV_SCALE 6 + #endif + + /** +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index d7b9635..907c630 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -95,7 +95,7 @@ + + #define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) + +-#define TCP_WND (40 * TCP_MSS) ++#define TCP_WND (2500 * TCP_MSS) + + #define TCP_SND_BUF (2500 * TCP_MSS) + +-- +2.8.4.windows.1 + diff --git a/0043-add-prefetch.patch b/0043-add-prefetch.patch new file mode 100644 index 0000000000000000000000000000000000000000..082503fbdce83781948a8f3be464d5f67a7b0bc4 --- /dev/null +++ b/0043-add-prefetch.patch @@ -0,0 +1,62 @@ +From 3d63cb611a0b7a3cde0bb9e74b0ec83501966c51 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Sun, 18 Dec 2022 18:50:33 +0800 +Subject: [PATCH 2/2] add prefetch + +--- + src/core/pbuf.c | 7 +++++++ + src/core/tcp_out.c | 6 ++++++ + 2 files changed, 13 insertions(+) + +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 404c8a7..ad75aa6 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -83,6 +83,9 @@ + #if LWIP_CHECKSUM_ON_COPY + #include "lwip/inet_chksum.h" + #endif ++#if USE_LIBOS ++#include ++#endif + + #include + +@@ -750,6 +753,10 @@ pbuf_free(struct pbuf *p) + /* de-allocate all consecutive pbufs from the head of the chain that + * obtain a zero reference count after decrementing*/ + while (p != NULL) { ++#if USE_LIBOS ++ if (p->next) ++ rte_prefetch0(p->next); ++#endif + LWIP_PBUF_REF_T ref; + SYS_ARCH_DECL_PROTECT(old_level); + /* Since decrementing ref cannot be guaranteed to be a single machine operation +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 55053d8..3c4dc4b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -82,6 +82,7 @@ + + #if USE_LIBOS + #include "lwipsock.h" ++#include + #if CHECKSUM_GEN_TCP_HW + #include "dpdk_cksum.h" + #endif +@@ -1762,6 +1763,11 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + } + #endif + ++#if USE_LIBOS ++ /* pbuf into mbuf. ref dpdk_common.h */ ++ rte_prefetch0((uint8_t *)(seg->p) - sizeof(struct rte_mbuf) - sizeof(uint64_t) * 2); ++#endif ++ + /* Set retransmission timer running if it is not currently enabled + This must be set before checking the route. */ + if (pcb->rtime < 0) { +-- +2.8.4.windows.1 + diff --git a/0044-skip-unnecessary-tcp_route.patch b/0044-skip-unnecessary-tcp_route.patch new file mode 100644 index 0000000000000000000000000000000000000000..b93b7d523e5d1aacbafd24f68bbecec922189333 --- /dev/null +++ b/0044-skip-unnecessary-tcp_route.patch @@ -0,0 +1,103 @@ +From b23520dcddbdf088ededeac7a0a1611db73db191 Mon Sep 17 00:00:00 2001 +From: kircher +Date: Mon, 19 Dec 2022 19:23:42 +0800 +Subject: [PATCH] skip unnecessary tcp_route + +--- + src/core/tcp.c | 1 + + src/core/tcp_out.c | 20 ++++++++++++++++---- + src/include/lwip/tcp.h | 1 + + src/include/lwipsock.h | 1 + + 4 files changed, 19 insertions(+), 4 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 252f27f..abfcc00 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -2294,6 +2294,7 @@ tcp_pcb_purge(struct tcp_pcb *pcb) + tcp_segs_free(pcb->unacked); + pcb->unacked = pcb->unsent = NULL; + pcb->last_unacked = pcb->last_unsent = NULL; ++ pcb->pcb_if = NULL; + #if TCP_OVERSIZE + pcb->unsent_oversize = 0; + #endif /* TCP_OVERSIZE */ +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 25aeb23..1c5734b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1425,7 +1425,12 @@ tcp_output(struct tcp_pcb *pcb) + lwip_ntohl(seg->tcphdr->seqno), pcb->lastack)); + } + +- netif = tcp_route(pcb, &pcb->local_ip, &pcb->remote_ip); ++ if (pcb->pcb_if == NULL) { ++ netif = tcp_route(pcb, &pcb->local_ip, &pcb->remote_ip); ++ pcb->pcb_if = netif; ++ } else { ++ netif = pcb->pcb_if; ++ } + if (netif == NULL) { + return ERR_RTE; + } +@@ -2220,7 +2225,7 @@ tcp_output_fill_options(const struct tcp_pcb *pcb, struct pbuf *p, u8_t optflags + * header checksum and calling ip_output_if while handling netif hints and stats. + */ + static err_t +-tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p, ++tcp_output_control_segment(struct tcp_pcb *pcb, struct pbuf *p, + const ip_addr_t *src, const ip_addr_t *dst) + { + err_t err; +@@ -2228,7 +2233,14 @@ tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p, + + LWIP_ASSERT("tcp_output_control_segment: invalid pbuf", p != NULL); + +- netif = tcp_route(pcb, src, dst); ++ if (pcb == NULL || pcb->pcb_if == NULL) { ++ netif = tcp_route(pcb, src, dst); ++ if (pcb) { ++ pcb->pcb_if = netif; ++ } ++ } else { ++ netif = pcb->pcb_if; ++ } + if (netif == NULL) { + err = ERR_RTE; + } else { +@@ -2318,7 +2330,7 @@ tcp_rst(const struct tcp_pcb *pcb, u32_t seqno, u32_t ackno, + + MIB2_STATS_INC(mib2.tcpoutrsts); + +- tcp_output_control_segment(pcb, p, local_ip, remote_ip); ++ tcp_output_control_segment((struct tcp_pcb*)pcb, p, local_ip, remote_ip); + LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_rst: seqno %"U32_F" ackno %"U32_F".\n", seqno, ackno)); + } + +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index b0ae02c..2a61776 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -408,6 +408,7 @@ struct tcp_pcb { + u8_t snd_scale; + u8_t rcv_scale; + #endif ++ struct netif* pcb_if; + }; + + #if TCP_PCB_HASH +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 62e5bf1..ec4d78c 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -111,6 +111,7 @@ struct lwip_sock { + /* stack thread all use */ + struct list_node recv_list; + struct list_node send_list; ++ struct pbuf *lwip_lastdata; + struct pbuf *send_lastdata; + struct pbuf *send_pre_del; + +-- +2.33.0 + diff --git a/0045-add-variable-in-struct-sock.patch b/0045-add-variable-in-struct-sock.patch new file mode 100644 index 0000000000000000000000000000000000000000..8fcb6daaf73fc058a78de0ff34ab7439ae28bfa3 --- /dev/null +++ b/0045-add-variable-in-struct-sock.patch @@ -0,0 +1,26 @@ +From 1ede4a00c1eca575314af02374846cb086798c08 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Tue, 20 Dec 2022 14:37:21 +0800 +Subject: [PATCH] add variable in struct sock + +--- + src/include/lwipsock.h | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index ec4d78c..8924728 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -105,8 +105,7 @@ struct lwip_sock { + char pad1 __rte_cache_aligned; + /* app and stack thread all use */ + uint32_t in_send; /* avoid sock too much send rpc msg*/ +- pthread_spinlock_t sock_lock; +- ++ bool read_wait; + char pad2 __rte_cache_aligned; + /* stack thread all use */ + struct list_node recv_list; +-- +2.23.0 + diff --git a/0046-add-dataack-when-recv-too-many-acks-with-data.patch b/0046-add-dataack-when-recv-too-many-acks-with-data.patch new file mode 100644 index 0000000000000000000000000000000000000000..70131e546bcb547271099d665a95651874f6607a --- /dev/null +++ b/0046-add-dataack-when-recv-too-many-acks-with-data.patch @@ -0,0 +1,91 @@ +From 1aa27395a4c4b73b6db472c4ae75ed91637a11bf Mon Sep 17 00:00:00 2001 +From: kircher +Date: Wed, 21 Dec 2022 17:50:50 +0800 +Subject: [PATCH] add dataack when recv too many acks with data + +--- + src/core/tcp_in.c | 22 ++++++++++++++++++++++ + src/include/lwip/tcp.h | 1 + + src/include/lwipopts.h | 2 ++ + 3 files changed, 25 insertions(+) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 78954bd..35ec6d9 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -1260,6 +1260,7 @@ tcp_receive(struct tcp_pcb *pcb) + s16_t m; + u32_t right_wnd_edge; + int found_dupack = 0; ++ int found_dataack = 0; + + LWIP_ASSERT("tcp_receive: invalid pcb", pcb != NULL); + LWIP_ASSERT("tcp_receive: wrong state", pcb->state >= ESTABLISHED); +@@ -1337,11 +1338,31 @@ tcp_receive(struct tcp_pcb *pcb) + } + } + } ++ /* fast rexmit when receive too many acks with data */ ++ if (TCP_SEQ_LT(ackno + 1, pcb->snd_nxt)) { ++ if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) { ++ if (pcb->rtime >= 0) { ++ if (pcb->lastack == ackno) { ++ found_dataack = 1; ++ ++pcb->dataacks; ++ if (pcb->dataacks > MAX_DATA_ACK_NUM) { ++ if (tcp_rexmit(pcb) == ERR_OK) { ++ pcb->rtime = 0; ++ pcb->dataacks = 0; ++ } ++ } ++ } ++ } ++ } ++ } + /* If Clause (1) or more is true, but not a duplicate ack, reset + * count of consecutive duplicate acks */ + if (!found_dupack) { + pcb->dupacks = 0; + } ++ if (!found_dataack) { ++ pcb->dataacks = 0; ++ } + } else if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) { + /* We come here when the ACK acknowledges new data. */ + tcpwnd_size_t acked; +@@ -1367,6 +1388,7 @@ tcp_receive(struct tcp_pcb *pcb) + /* Reset the fast retransmit variables. */ + pcb->dupacks = 0; + pcb->lastack = ackno; ++ pcb->dataacks = 0; + + /* Update the congestion control variables (cwnd and + ssthresh). */ +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 2a61776..97cb882 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -326,6 +326,7 @@ struct tcp_pcb { + + /* fast retransmit/recovery */ + u8_t dupacks; ++ u32_t dataacks; + u32_t lastack; /* Highest acknowledged seqno. */ + + /* congestion avoidance/control variables */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 907c630..405cf11 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -177,6 +177,8 @@ + + #define MIN_TSO_SEG_LEN 256 + ++#define MAX_DATA_ACK_NUM 256 ++ + /* --------------------------------------- + * ------- NIC offloads -------- + * --------------------------------------- +-- +2.33.0 + diff --git a/0047-reduce-struct-pbuf-size.patch b/0047-reduce-struct-pbuf-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..fed1ba50b8a450943d75ff9406c08df6fa283e84 --- /dev/null +++ b/0047-reduce-struct-pbuf-size.patch @@ -0,0 +1,25 @@ +From 5527e02b7ae7f27db8964ad55747326b98e33634 Mon Sep 17 00:00:00 2001 +From: wu-changsheng +Date: Wed, 21 Dec 2022 22:47:04 +0800 +Subject: [PATCH] reduce struct pbuf size + +--- + src/include/lwip/pbuf.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 380ac8e..1124408 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -229,7 +229,7 @@ struct pbuf { + u64_t l3_len:9; + /* < L4 (TCP/UDP) Header Length. */ + u64_t l4_len:8; +- u16_t header_off; ++ u8_t header_off; + u8_t rexmit; + u8_t in_write; + u8_t head; +-- +2.23.0 + diff --git a/0048-listen-pcb-also-use-pcb_if.patch b/0048-listen-pcb-also-use-pcb_if.patch new file mode 100644 index 0000000000000000000000000000000000000000..81ff5baa70159bb9581780ca0b4da1da59385e36 --- /dev/null +++ b/0048-listen-pcb-also-use-pcb_if.patch @@ -0,0 +1,37 @@ +From 11d8e9f6d54a606163a452f1c8beaa5bc90e949c Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Wed, 28 Dec 2022 21:25:36 +0800 +Subject: [PATCH] listen pcb also use pcb_if + +--- + src/include/lwip/ip.h | 1 + + src/include/lwip/tcp.h | 1 - + 2 files changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/include/lwip/ip.h b/src/include/lwip/ip.h +index d560f6b..4cf5f5e 100644 +--- a/src/include/lwip/ip.h ++++ b/src/include/lwip/ip.h +@@ -79,6 +79,7 @@ extern "C" { + ip_addr_t remote_ip; \ + /* Bound netif index */ \ + u8_t netif_idx; \ ++ struct netif *pcb_if; \ + /* Socket options */ \ + u8_t so_options; \ + /* Type Of Service */ \ +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 97cb882..0b65b01 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -409,7 +409,6 @@ struct tcp_pcb { + u8_t snd_scale; + u8_t rcv_scale; + #endif +- struct netif* pcb_if; + }; + + #if TCP_PCB_HASH +-- +2.23.0 + diff --git a/0049-expand-recv-mbox-size.patch b/0049-expand-recv-mbox-size.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8a9e765ac5efc661250a769f36922dbbdce262f --- /dev/null +++ b/0049-expand-recv-mbox-size.patch @@ -0,0 +1,61 @@ +From c18ca96d1c2ef02153ce9fb22386b4e8a44b60e9 Mon Sep 17 00:00:00 2001 +From: wuchangsheng +Date: Fri, 30 Dec 2022 11:34:31 +0800 +Subject: [PATCH] expand recv mbox size + +--- + src/core/tcp_out.c | 6 +++--- + src/include/lwipopts.h | 2 +- + src/include/lwipsock.h | 2 -- + 3 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 4feeb69..f84ff57 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1476,9 +1476,9 @@ tcp_output(struct tcp_pcb *pcb) + if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { + while(seg && send_len < 0xffff) { + /** +- * 1)遍历unsent队列,找到所有的待发送seg. 将seg的buf串起来 +- * 2) 生成新的seg, 调用tcp_output_segment, 新的seg释放掉 +- * 3) 若成功,则更新snd_nxt, unacked队列,和unsent队列。 ++ * 1) walk unsent queue, find all seg witch wait to send. chain buf in these segs. ++ * 2) create new segment, send and free new segment. ++ * 3) update snd_nxt, unacked queue, and unsent queue + */ + struct tcp_seg *start_seg = seg; + struct pbuf *first_pbuf = NULL; +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 405cf11..fedded9 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -113,7 +113,7 @@ + + #define LWIP_NETIF_API 1 + +-#define DEFAULT_TCP_RECVMBOX_SIZE 128 ++#define DEFAULT_TCP_RECVMBOX_SIZE 4096 + + #define DEFAULT_ACCEPTMBOX_SIZE 1024 + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index fef5b6d..a807e3e 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -104,12 +104,10 @@ struct lwip_sock { + char pad1 __rte_cache_aligned; + /* app and stack thread all use */ + uint32_t in_send; /* avoid sock too much send rpc msg*/ +- bool read_wait; + char pad2 __rte_cache_aligned; + /* stack thread all use */ + struct list_node recv_list; + struct list_node send_list; +- struct pbuf *lwip_lastdata; + struct pbuf *send_lastdata; + struct pbuf *send_pre_del; + +-- +2.8.4.windows.1 + diff --git a/0050-lwip-reuse-ip-port.patch b/0050-lwip-reuse-ip-port.patch new file mode 100644 index 0000000000000000000000000000000000000000..08cdcddee7e8d9feac6fb86d518f4f0bb8ea1ed1 --- /dev/null +++ b/0050-lwip-reuse-ip-port.patch @@ -0,0 +1,254 @@ +From 28f8ba80cd733e14e0540c414a18134b3c3fcc94 Mon Sep 17 00:00:00 2001 +From: FanBin +Date: Wed, 15 Feb 2023 10:09:39 +0800 +Subject: [PATCH] lwip reuse ip port + +--- + src/core/tcp.c | 40 +++++++++++++++++++++++++++++--- + src/core/tcp_in.c | 32 +++++++++++++++++++++++++ + src/include/lwip/api.h | 4 ++++ + src/include/lwip/priv/tcp_priv.h | 19 +++++++++++++++ + src/include/lwip/tcp.h | 8 +++++++ + src/include/lwipopts.h | 4 ++++ + 6 files changed, 104 insertions(+), 3 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index f75d214..3171c5e 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -111,6 +111,7 @@ + #include "lwip/ip6.h" + #include "lwip/ip6_addr.h" + #include "lwip/nd6.h" ++#include "lwip/api.h" + + #include + #include +@@ -772,6 +773,9 @@ tcp_bind(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + /* Check if the address already is in use (on all lists) */ + for (i = 0; i < max_pcb_list; i++) { + for (cpcb = *tcp_pcb_lists[i]; cpcb != NULL; cpcb = cpcb->next) { ++#if REUSE_IPPORT ++ continue; ++#else + if (cpcb->local_port == port) { + #if SO_REUSE + /* Omit checking for the same port if both pcbs have REUSEADDR set. +@@ -790,6 +794,7 @@ tcp_bind(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + } + } + } ++#endif /* REUSE_IPORT */ + } + } + } +@@ -921,7 +926,18 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + res = ERR_ALREADY; + goto done; + } +-#if SO_REUSE ++ ++#if REUSE_IPPORT ++ struct tcp_pcb_listen *first_same_port_pcb = NULL; ++ for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) { ++ if ((lpcb->local_port == pcb->local_port) && ++ ip_addr_cmp(&lpcb->local_ip, &pcb->local_ip)) { ++ /* this address/port is already used */ ++ first_same_port_pcb = lpcb; ++ break; ++ } ++ } ++#else + if (ip_get_option(pcb, SOF_REUSEADDR)) { + /* Since SOF_REUSEADDR allows reusing a local address before the pcb's usage + is declared (listen-/connection-pcb), we have to make sure now that +@@ -936,7 +952,7 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + } + } + } +-#endif /* SO_REUSE */ ++#endif /* REUSE_IPPORT */ + + #if USE_LIBOS + vdev_reg_done(REG_RING_TCP_LISTEN, pcb); +@@ -955,6 +971,16 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + lpcb->netif_idx = pcb->netif_idx; + lpcb->ttl = pcb->ttl; + lpcb->tos = pcb->tos; ++ ++#if REUSE_IPPORT ++ lpcb->connect_num = 0; ++ lpcb->next_same_port_pcb = NULL; ++ ++ struct netconn* conn = pcb->callback_arg; ++ lpcb->socket_fd = conn->socket; ++ lpcb->master_lpcb = conn->is_master_fd; ++#endif ++ + #if LWIP_IPV4 && LWIP_IPV6 + IP_SET_TYPE_VAL(lpcb->remote_ip, pcb->local_ip.type); + #endif /* LWIP_IPV4 && LWIP_IPV6 */ +@@ -979,7 +1005,15 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + lpcb->accepts_pending = 0; + tcp_backlog_set(lpcb, backlog); + #endif /* TCP_LISTEN_BACKLOG */ +- TCP_REG(&tcp_listen_pcbs.pcbs, (struct tcp_pcb *)lpcb); ++ ++#if REUSE_IPPORT ++ if (first_same_port_pcb != NULL) { ++ TCP_REG_SAMEPORT((struct tcp_pcb_listen *)first_same_port_pcb, (struct tcp_pcb_listen *)lpcb); ++ } else ++#endif ++ { ++ TCP_REG(&tcp_listen_pcbs.pcbs, (struct tcp_pcb *)lpcb); ++ } + res = ERR_OK; + done: + if (err != NULL) { +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 35ec6d9..9f5c34a 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -356,6 +356,9 @@ tcp_input(struct pbuf *p, struct netif *inp) + } + } + ++#if REUSE_IPPORT ++ struct tcp_pcb_listen *min_cnts_lpcb = NULL; ++#endif + /* Finally, if we still did not get a match, we check all PCBs that + are LISTENing for incoming connections. */ + prev = NULL; +@@ -379,6 +382,30 @@ tcp_input(struct pbuf *p, struct netif *inp) + } else if (IP_ADDR_PCB_VERSION_MATCH_EXACT(lpcb, ip_current_dest_addr())) { + if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) { + /* found an exact match */ ++#if REUSE_IPPORT ++ // check master fd ++ struct tcp_pcb_listen *tmp_lpcb = lpcb; ++ u8_t have_master_fd = 0; ++ while (tmp_lpcb != NULL) { ++ if (tmp_lpcb->master_lpcb) { ++ have_master_fd = 1; ++ } ++ tmp_lpcb = tmp_lpcb->next_same_port_pcb; ++ } ++ ++ tmp_lpcb = lpcb; ++ min_cnts_lpcb = lpcb; ++ u16_t min_conn_num = MAX_CONN_NUM_PER_THREAD; ++ while (tmp_lpcb != NULL) { ++ if (!have_master_fd || tmp_lpcb->master_lpcb) { ++ if (tmp_lpcb->connect_num < min_conn_num) { ++ min_cnts_lpcb = tmp_lpcb; ++ min_conn_num = tmp_lpcb->connect_num; ++ } ++ } ++ tmp_lpcb = tmp_lpcb->next_same_port_pcb; ++ } ++#endif + break; + } else if (ip_addr_isany(&lpcb->local_ip)) { + /* found an ANY-match */ +@@ -428,7 +455,12 @@ tcp_input(struct pbuf *p, struct netif *inp) + tcphdr_opt1len, tcphdr_opt2, p) == ERR_OK) + #endif + { ++#if REUSE_IPPORT ++ tcp_listen_input(min_cnts_lpcb); ++ min_cnts_lpcb->connect_num++; ++#else + tcp_listen_input(lpcb); ++#endif + } + pbuf_free(p); + return; +diff --git a/src/include/lwip/api.h b/src/include/lwip/api.h +index 6dec8c0..430a7a0 100644 +--- a/src/include/lwip/api.h ++++ b/src/include/lwip/api.h +@@ -318,6 +318,10 @@ struct netconn { + #endif /* LWIP_TCP */ + /** A callback function that is informed about events for this netconn */ + netconn_callback callback; ++ ++#if REUSE_IPPORT ++ u8_t is_master_fd; ++#endif + }; + + /** This vector type is passed to @ref netconn_write_vectors_partly to send +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index b242428..97f799e 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -353,6 +353,15 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + qtuple.dst_ip = pcb->remote_ip.addr; + qtuple.dst_port = lwip_htons(pcb->remote_port); + ++#if REUSE_IPPORT ++ if (reg_type == REG_RING_TCP_CONNECT_CLOSE) { ++ struct tcp_pcb_listen* lpcb = pcb->listener; ++ if (lpcb != NULL) { ++ lpcb->connect_num--; ++ } ++ } ++#endif ++ + return vdev_reg_xmit(reg_type, &qtuple); + } + static inline void vdev_unreg_done(const struct tcp_pcb *pcb) +@@ -473,6 +482,16 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + tcp_timer_needed(); \ + } while (0) + ++#define TCP_REG_SAMEPORT(first_pcb, lpcb) \ ++ do { \ ++ struct tcp_pcb_listen *tmp_pcb = first_pcb; \ ++ while (tmp_pcb->next_same_port_pcb != NULL) { \ ++ tmp_pcb = tmp_pcb->next_same_port_pcb; \ ++ }; \ ++ tmp_pcb->next_same_port_pcb = lpcb; \ ++ tcp_timer_needed(); \ ++ } while (0) ++ + #define TCP_RMV_HASH(pcbs, npcb) \ + do { \ + hlist_del_init(&(npcb)->tcp_node); \ +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 0b65b01..312320b 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -252,6 +252,14 @@ struct tcp_pcb_listen { + u8_t backlog; + u8_t accepts_pending; + #endif /* TCP_LISTEN_BACKLOG */ ++ ++#if REUSE_IPPORT ++ struct tcp_pcb_listen* next_same_port_pcb; ++ u16_t connect_num; ++ int socket_fd; ++ u8_t master_lpcb; ++#endif ++ + }; + + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index fedded9..be58ec3 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -143,6 +143,10 @@ + + #define USE_LIBOS_ZC_RING 0 + ++#define REUSE_IPPORT 1 ++ ++#define MAX_CONN_NUM_PER_THREAD 65535 ++ + #define SO_REUSE 1 + + #define SIOCSHIWAT 1 +-- +2.33.0 + diff --git a/0051-lwip-add-need_tso_send.patch b/0051-lwip-add-need_tso_send.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e45768991c0d2ed86651d4b382a4faf4ae90d49 --- /dev/null +++ b/0051-lwip-add-need_tso_send.patch @@ -0,0 +1,76 @@ +From 590873482f9b6a5e2635a95720acb37b5f516ab0 Mon Sep 17 00:00:00 2001 +From: kircher +Date: Tue, 21 Feb 2023 15:05:41 +0800 +Subject: [PATCH] lwip add need_tso_send + +--- + src/api/api_msg.c | 1 + + src/core/tcp_out.c | 5 ++++- + src/include/lwip/tcp.h | 2 ++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 1fedaad..3a4a473 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -1744,6 +1744,7 @@ lwip_netconn_do_writemore(struct netconn *conn WRITE_DELAYED_PARAM) + write_more = 0; + err = tcp_write(conn->pcb.tcp, conn->current_msg->msg.w.vector->ptr, len, apiflags); + conn->current_msg->msg.w.len = len; ++ conn->pcb.tcp->need_tso_send = 1; + #else + err = tcp_write(conn->pcb.tcp, dataptr, len, apiflags); + #endif +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index c538f2a..bf23381 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1473,7 +1473,7 @@ tcp_output(struct tcp_pcb *pcb) + + u32_t send_len = 0; + #if USE_LIBOS +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) { ++ if ((get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) && pcb->need_tso_send) { + while(seg && send_len < 0xffff) { + /** + * 1) walk unsent queue, find all seg witch wait to send. chain buf in these segs. +@@ -1529,6 +1529,7 @@ tcp_output(struct tcp_pcb *pcb) + if (err != ERR_OK) { + if (pcb->unsent == NULL) + pcb->last_unsent = NULL; ++ pcb->need_tso_send = 0; + return err; + } + pcb->unsent = seg->next; +@@ -1552,6 +1553,7 @@ tcp_output(struct tcp_pcb *pcb) + pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); + new_seg.p->tot_len = new_seg.p->len; + } ++ pcb->need_tso_send = 0; + } else + #endif + { +@@ -1647,6 +1649,7 @@ tcp_output(struct tcp_pcb *pcb) + #endif /* TCP_OVERSIZE */ + + output_done: ++ pcb->need_tso_send = 0; + if (pcb->unsent == NULL) + pcb->last_unsent = NULL; + tcp_clear_flags(pcb, TF_NAGLEMEMERR); +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 0b65b01..2fc683d 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -409,6 +409,8 @@ struct tcp_pcb { + u8_t snd_scale; + u8_t rcv_scale; + #endif ++ ++ u8_t need_tso_send; + }; + + #if TCP_PCB_HASH +-- +2.33.0 + diff --git a/0052-lwip_fnctl-only-support-F_SETFL-F_GETFL.patch b/0052-lwip_fnctl-only-support-F_SETFL-F_GETFL.patch new file mode 100644 index 0000000000000000000000000000000000000000..02af1405352796912431a266545efd5f841a4b79 --- /dev/null +++ b/0052-lwip_fnctl-only-support-F_SETFL-F_GETFL.patch @@ -0,0 +1,30 @@ +From 40bd7d38bd7a15d22459c4b35cfc7480205a57d9 Mon Sep 17 00:00:00 2001 +From: jiangheng12 +Date: Wed, 22 Feb 2023 20:20:35 +0800 +Subject: [PATCH] lwip_cnctl only support F_SETFL,F_GETFL, other opt return 0 + for compitable + +--- + src/api/sockets.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 9b3f514..2cb6f22 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -4107,7 +4107,12 @@ lwip_fcntl(int s, int cmd, int val) + break; + default: + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_fcntl(%d, UNIMPL: %d, %d)\n", s, cmd, val)); ++#if USE_LIBOS ++ sock_set_errno(sock, 0); /* not yet implemented, but we return 0 for compatilbe with app */ ++ ret = 0; ++#else + sock_set_errno(sock, ENOSYS); /* not yet implemented */ ++#endif + break; + } + done_socket(sock); +-- +2.23.0 + diff --git a/0053-cleancode-improve-lwipopts.h-readability.patch b/0053-cleancode-improve-lwipopts.h-readability.patch new file mode 100644 index 0000000000000000000000000000000000000000..6996746840522e5ada20926898ddcbbc7ecc14e7 --- /dev/null +++ b/0053-cleancode-improve-lwipopts.h-readability.patch @@ -0,0 +1,2252 @@ +From b42299206a917ed5876c27617de59fb71f8437a7 Mon Sep 17 00:00:00 2001 +From: Lemmy Huang +Date: Thu, 9 Mar 2023 10:57:16 +0800 +Subject: [PATCH] cleancode: improve lwipopts.h readability + +Signed-off-by: Lemmy Huang +--- + src/api/api_msg.c | 12 +- + src/api/sockets.c | 66 ++++----- + src/api/tcpip.c | 18 +-- + src/core/ipv4/etharp.c | 4 +- + src/core/ipv4/icmp.c | 2 +- + src/core/ipv4/ip4.c | 6 +- + src/core/ipv4/ip4_frag.c | 4 +- + src/core/memp.c | 4 +- + src/core/pbuf.c | 8 +- + src/core/tcp.c | 66 ++++----- + src/core/tcp_in.c | 46 +++--- + src/core/tcp_out.c | 36 ++--- + src/core/timeouts.c | 4 +- + src/include/arch/sys_arch.h | 2 +- + src/include/dpdk_cksum.h | 4 +- + src/include/hlist.h | 4 +- + src/include/lwip/api.h | 10 +- + src/include/lwip/ip.h | 4 +- + src/include/lwip/memp.h | 10 +- + src/include/lwip/opt.h | 8 +- + src/include/lwip/pbuf.h | 6 +- + src/include/lwip/priv/memp_std.h | 4 +- + src/include/lwip/priv/tcp_priv.h | 24 ++-- + src/include/lwip/sockets.h | 30 ++-- + src/include/lwip/tcp.h | 16 +-- + src/include/lwip/timeouts.h | 4 +- + src/include/lwiplog.h | 4 +- + src/include/lwipopts.h | 240 +++++++++++++++++-------------- + src/include/lwipsock.h | 8 +- + src/netif/ethernet.c | 2 +- + 30 files changed, 339 insertions(+), 317 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 3a4a473..1840c9d 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -54,7 +54,7 @@ + #include "lwip/mld6.h" + #include "lwip/priv/tcpip_priv.h" + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include "lwip/sockets.h" + #include "lwipsock.h" + #include "posix_api.h" +@@ -341,7 +341,7 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + #if LWIP_SO_RCVBUF + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + add_recv_list(conn->socket); + LWIP_UNUSED_ARG(len); + #else +@@ -477,7 +477,7 @@ err_tcp(void *arg, err_t err) + if (NETCONN_MBOX_VALID(conn, &conn->recvmbox)) { + /* use trypost to prevent deadlock */ + sys_mbox_trypost(&conn->recvmbox, mbox_msg); +-#if USE_LIBOS ++#if GAZELLE_ENABLE + add_recv_list(conn->socket); + #endif + } +@@ -609,7 +609,7 @@ accept_function(void *arg, struct tcp_pcb *newpcb, err_t err) + API_EVENT(conn, NETCONN_EVT_RCVPLUS, 0); + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + LWIP_DEBUGF(API_MSG_DEBUG, ("libos incoming connection established\n")); + SET_CONN_TYPE_LIBOS(newconn); + #endif +@@ -1333,7 +1333,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + return ERR_VAL; + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + gazelle_connected_callback(conn); + #endif + +@@ -1738,7 +1738,7 @@ lwip_netconn_do_writemore(struct netconn *conn WRITE_DELAYED_PARAM) + } else { + write_more = 0; + } +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /* vector->ptr is private arg sock */ + LWIP_UNUSED_ARG(dataptr); + write_more = 0; +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 2cb6f22..356e345 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -62,7 +62,7 @@ + #include + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include + #include "lwipsock.h" + #include "posix_api.h" +@@ -92,7 +92,7 @@ + #define API_SELECT_CB_VAR_FREE(name) API_VAR_FREE(MEMP_SELECT_CB, name) + + #if LWIP_IPV4 +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define IP4ADDR_PORT_TO_SOCKADDR(sin, ipaddr, port) do { \ + (sin)->sin_family = AF_INET; \ + (sin)->sin_port = lwip_htons((port)); \ +@@ -105,7 +105,7 @@ + (sin)->sin_port = lwip_htons((port)); \ + inet_addr_from_ip4addr(&(sin)->sin_addr, ipaddr); \ + memset((sin)->sin_zero, 0, SIN_ZERO_LEN); }while(0) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #define SOCKADDR4_TO_IP4ADDR_PORT(sin, ipaddr, port) do { \ + inet_addr_to_ip4addr(ip_2_ip4(ipaddr), &((sin)->sin_addr)); \ + (port) = lwip_ntohs((sin)->sin_port); }while(0) +@@ -271,12 +271,12 @@ static void lwip_socket_drop_registered_mld6_memberships(int s); + #endif /* LWIP_IPV6_MLD */ + + /** The global array of available sockets */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + uint32_t sockets_num; + struct lwip_sock *sockets; + #else + static struct lwip_sock sockets[NUM_SOCKETS]; +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #if LWIP_SOCKET_SELECT || LWIP_SOCKET_POLL + #if LWIP_TCPIP_CORE_LOCKING +@@ -431,11 +431,11 @@ tryget_socket_unconn_nouse(int fd) + { + int s = fd - LWIP_SOCKET_OFFSET; + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if ((s < 0) || (s >= sockets_num)) + #else + if ((s < 0) || (s >= NUM_SOCKETS)) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + { + LWIP_DEBUGF(SOCKETS_DEBUG, ("tryget_socket_unconn(%d): invalid\n", fd)); + return NULL; +@@ -500,13 +500,13 @@ tryget_socket(int fd) + * @param fd externally used socket index + * @return struct lwip_sock for the socket or NULL if not found + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + struct lwip_sock * + get_socket(int fd) + #else + static struct lwip_sock * + get_socket(int fd) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + { + struct lwip_sock *sock = tryget_socket(fd); + if (!sock) { +@@ -519,7 +519,7 @@ get_socket(int fd) + return sock; + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /** + * Map a externally used socket index to the internal socket representation. + * +@@ -535,7 +535,7 @@ get_socket_by_fd(int fd) + } + return &sockets[fd - LWIP_SOCKET_OFFSET]; + } +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /** + * Allocate a new socket for a given netconn. +@@ -553,7 +553,7 @@ alloc_socket(struct netconn *newconn, int accepted, int flags) + SYS_ARCH_DECL_PROTECT(lev); + LWIP_UNUSED_ARG(accepted); + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + int type, protocol = 0, domain = AF_INET; + switch (NETCONNTYPE_GROUP(newconn->type)) { + case NETCONN_RAW: +@@ -615,7 +615,7 @@ err: + posix_api->close_fn(i); + SYS_ARCH_UNPROTECT(lev); + return -1; +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + + /* allocate a new socket identifier */ + for (i = 0; i < NUM_SOCKETS; ++i) { +@@ -649,7 +649,7 @@ err: + } + return -1; + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + } + + /** Free a socket (under lock) +@@ -773,12 +773,12 @@ lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + done_socket(sock); + return -1; + } +-#if USE_LIBOS ++#if GAZELLE_ENABLE + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < sockets_num + LWIP_SOCKET_OFFSET)); + gazelle_init_sock(newsock); + #else + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < NUM_SOCKETS + LWIP_SOCKET_OFFSET)); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + nsock = &sockets[newsock - LWIP_SOCKET_OFFSET]; + + /* See event_callback: If data comes in right away after an accept, even +@@ -816,13 +816,13 @@ lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + } + + IPADDR_PORT_TO_SOCKADDR(&tempaddr, &naddr, port); +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + if (*addrlen > tempaddr.sa.sa_len) { + *addrlen = tempaddr.sa.sa_len; + } + #else + *addrlen = LWIP_MIN(*addrlen, sizeof(tempaddr)); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + MEMCPY(addr, &tempaddr, *addrlen); + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_accept(%d) returning new sock=%d addr=", s, newsock)); +@@ -993,10 +993,10 @@ lwip_connect(int s, const struct sockaddr *name, socklen_t namelen) + return -1; + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + LWIP_DEBUGF(SOCKETS_DEBUG, ("libos connect succeed fd=%d\n",s)); + SET_CONN_TYPE_LIBOS(sock->conn); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_connect(%d) succeeded\n", s)); + sock_set_errno(sock, 0); +@@ -1065,7 +1065,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + apiflags |= NETCONN_DONTBLOCK; + } + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + do { + struct pbuf *p; + err_t err; +@@ -1146,13 +1146,13 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + } while ((recv_left > 0) && !(flags & MSG_PEEK)); + + lwip_recv_tcp_done: +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + LWIP_UNUSED_ARG(recv_left); + recvd = read_lwip_data(sock, flags, apiflags); + if (recvd <= 0) { + return recvd; + } +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + if (apiflags & NETCONN_NOAUTORCVD) { + if ((recvd > 0) && !(flags & MSG_PEEK)) { + /* ensure window update after copying all data */ +@@ -1188,7 +1188,7 @@ lwip_sock_make_addr(struct netconn *conn, ip_addr_t *fromaddr, u16_t port, + #endif /* LWIP_IPV4 && LWIP_IPV6 */ + + IPADDR_PORT_TO_SOCKADDR(&saddr, fromaddr, port); +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + if (*fromlen < saddr.sa.sa_len) { + truncated = 1; + } else if (*fromlen > saddr.sa.sa_len) { +@@ -2692,7 +2692,7 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + if (sock->rcvevent > 1) { + check_waiters = 0; + } +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { + add_sock_event(sock, POLLIN); + } +@@ -2714,7 +2714,7 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + break; + case NETCONN_EVT_ERROR: + sock->errevent = 1; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + add_sock_event(sock, EPOLLERR); + #endif + break; +@@ -2911,7 +2911,7 @@ lwip_getaddrname(int s, struct sockaddr *name, socklen_t *namelen, u8_t local) + ip_addr_debug_print_val(SOCKETS_DEBUG, naddr); + LWIP_DEBUGF(SOCKETS_DEBUG, (" port=%"U16_F")\n", port)); + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + if (*namelen > saddr.sa.sa_len) { + *namelen = saddr.sa.sa_len; + } +@@ -3052,7 +3052,7 @@ lwip_sockopt_to_ipopt(int optname) + return SOF_KEEPALIVE; + case SO_REUSEADDR: + return SOF_REUSEADDR; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + case SO_REUSEPORT: + return SO_REUSEPORT; + #endif +@@ -3928,7 +3928,7 @@ lwip_setsockopt_impl(int s, int level, int optname, const void *optval, socklen_ + return err; + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + int + lwip_ioctl(int s, long cmd, ...) + { +@@ -3963,7 +3963,7 @@ lwip_ioctl(int s, long cmd, void *argp) + if (!sock) { + return -1; + } +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + switch (cmd) { + #if LWIP_SO_RCVBUF || LWIP_FIONREAD_LINUXMODE +@@ -4107,7 +4107,7 @@ lwip_fcntl(int s, int cmd, int val) + break; + default: + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_fcntl(%d, UNIMPL: %d, %d)\n", s, cmd, val)); +-#if USE_LIBOS ++#if GAZELLE_ENABLE + sock_set_errno(sock, 0); /* not yet implemented, but we return 0 for compatilbe with app */ + ret = 0; + #else +@@ -4375,7 +4375,7 @@ lwip_socket_drop_registered_mld6_memberships(int s) + } + #endif /* LWIP_IPV6_MLD */ + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + void lwip_sock_init(void) + { + if (sockets_num == 0) { +@@ -4400,6 +4400,6 @@ void lwip_exit(void) + return; + } + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #endif /* LWIP_SOCKET */ +diff --git a/src/api/tcpip.c b/src/api/tcpip.c +index d3d0b55..fe7a7bd 100644 +--- a/src/api/tcpip.c ++++ b/src/api/tcpip.c +@@ -123,13 +123,13 @@ again: + * + * @param arg unused argument + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + __attribute__((unused)) static void + tcpip_thread(void *arg) + #else + static void + tcpip_thread(void *arg) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + { + struct tcpip_msg *msg; + LWIP_UNUSED_ARG(arg); +@@ -247,7 +247,7 @@ tcpip_inpkt(struct pbuf *p, struct netif *inp, netif_input_fn input_fn) + #if LWIP_TCPIP_CORE_LOCKING_INPUT + err_t ret; + LWIP_DEBUGF(TCPIP_DEBUG, ("tcpip_inpkt: PACKET %p/%p\n", (void *)p, (void *)inp)); +-#if USE_LIBOS && LWIP_TIMERS ++#if GAZELLE_ENABLE && LWIP_TIMERS + sys_timer_run(); + #endif + LOCK_TCPIP_CORE(); +@@ -329,7 +329,7 @@ tcpip_callback(tcpip_callback_fn function, void *ctx) + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; + +-#if USE_LIBOS && LWIP_TIMER ++#if GAZELLE_ENABLE && LWIP_TIMER + sys_timer_run(); + #endif + sys_mbox_post(&tcpip_mbox, msg); +@@ -368,7 +368,7 @@ tcpip_try_callback(tcpip_callback_fn function, void *ctx) + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; + +-#if USE_LIBOS && LWIP_TIMER ++#if GAZELLE_ENABLE && LWIP_TIMER + sys_timer_run(); + #endif + if (sys_mbox_trypost(&tcpip_mbox, msg) != ERR_OK) { +@@ -452,7 +452,7 @@ tcpip_send_msg_wait_sem(tcpip_callback_fn fn, void *apimsg, sys_sem_t *sem) + { + #if LWIP_TCPIP_CORE_LOCKING + LWIP_UNUSED_ARG(sem); +-#if USE_LIBOS && LWIP_TIMERS ++#if GAZELLE_ENABLE && LWIP_TIMERS + sys_timer_run(); + #endif + LOCK_TCPIP_CORE(); +@@ -492,7 +492,7 @@ tcpip_api_call(tcpip_api_call_fn fn, struct tcpip_api_call_data *call) + #if LWIP_TCPIP_CORE_LOCKING + err_t err; + LOCK_TCPIP_CORE(); +-#if USE_LIBOS && LWIP_TIMERS ++#if GAZELLE_ENABLE && LWIP_TIMERS + sys_timer_run(); + #endif + err = fn(call); +@@ -558,7 +558,7 @@ tcpip_callbackmsg_new(tcpip_callback_fn function, void *ctx) + msg->msg.cb.function = function; + msg->msg.cb.ctx = ctx; + +-#if USE_LIBOS && LWIP_TIMER ++#if GAZELLE_ENABLE && LWIP_TIMER + sys_timer_run(); + #endif + return (struct tcpip_callback_msg *)msg; +@@ -638,7 +638,7 @@ tcpip_init(tcpip_init_done_fn initfunc, void *arg) + } + #endif /* LWIP_TCPIP_CORE_LOCKING */ + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + sys_thread_new(TCPIP_THREAD_NAME, tcpip_thread, NULL, TCPIP_THREAD_STACKSIZE, TCPIP_THREAD_PRIO); + #endif + } +diff --git a/src/core/ipv4/etharp.c b/src/core/ipv4/etharp.c +index f1903e4..5a1a834 100644 +--- a/src/core/ipv4/etharp.c ++++ b/src/core/ipv4/etharp.c +@@ -482,7 +482,7 @@ etharp_update_arp_entry(struct netif *netif, const ip4_addr_t *ipaddr, struct et + struct pbuf *p = arp_table[i].q; + arp_table[i].q = NULL; + #endif /* ARP_QUEUEING */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + struct pbuf *tmp = p->next; + while (tmp != NULL) { + tmp->ref--; +@@ -1034,7 +1034,7 @@ etharp_query(struct netif *netif, const ip4_addr_t *ipaddr, struct pbuf *q) + } else { + /* referencing the old pbuf is enough */ + p = q; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + struct pbuf *tmp = p; + while (tmp != NULL) { + pbuf_ref(tmp); +diff --git a/src/core/ipv4/icmp.c b/src/core/ipv4/icmp.c +index c58ae25..402ba69 100644 +--- a/src/core/ipv4/icmp.c ++++ b/src/core/ipv4/icmp.c +@@ -51,7 +51,7 @@ + + #include + +-#if USE_LIBOS && CHECKSUM_GEN_IP_HW ++#if GAZELLE_ENABLE && CHECKSUM_GEN_IP_HW + #include "dpdk_cksum.h" + #endif + +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index d823491..1b70bb5 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -59,7 +59,7 @@ + + #include + +-#if USE_LIBOS && (CHECKSUM_CHECK_IP_HW || CHECKSUM_GEN_IP_HW) ++#if GAZELLE_ENABLE && (CHECKSUM_CHECK_IP_HW || CHECKSUM_GEN_IP_HW) + #include "dpdk_cksum.h" + #endif + +@@ -1034,13 +1034,13 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + #endif /* ENABLE_LOOPBACK */ + #if IP_FRAG + /* don't fragment if interface has mtu set to 0 [loopif] */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { + #endif + if (netif->mtu && (p->tot_len > netif->mtu)) { + return ip4_frag(p, netif, dest); + } +-#if USE_LIBOS ++#if GAZELLE_ENABLE + } + #endif + #endif /* IP_FRAG */ +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index c60523d..f15b798 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -51,7 +51,7 @@ + + #include + +-#if USE_LIBOS && CHECKSUM_GEN_IP_HW ++#if GAZELLE_ENABLE && CHECKSUM_GEN_IP_HW + #include "dpdk_cksum.h" + #endif + +@@ -115,7 +115,7 @@ PACK_STRUCT_END + IPH_ID(iphdrA) == IPH_ID(iphdrB)) ? 1 : 0 + + /* global variables */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + static PER_THREAD struct ip_reassdata *reassdatagrams; + static PER_THREAD u16_t ip_reass_pbufcount; + #else +diff --git a/src/core/memp.c b/src/core/memp.c +index 454ba32..fca1b0c 100644 +--- a/src/core/memp.c ++++ b/src/core/memp.c +@@ -78,14 +78,14 @@ + #define LWIP_MEMPOOL(name,num,size,desc) LWIP_MEMPOOL_DECLARE(name,num,size,desc) + #include "lwip/priv/memp_std.h" + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + PER_THREAD struct memp_desc* memp_pools[MEMP_MAX] = {NULL}; + #else + const struct memp_desc *const memp_pools[MEMP_MAX] = { + #define LWIP_MEMPOOL(name,num,size,desc) &memp_ ## name, + #include "lwip/priv/memp_std.h" + }; +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index ad75aa6..dd71519 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -83,7 +83,7 @@ + #if LWIP_CHECKSUM_ON_COPY + #include "lwip/inet_chksum.h" + #endif +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include + #endif + +@@ -284,7 +284,7 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + } + + /* If pbuf is to be allocated in RAM, allocate memory for it. */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /* alloc mbuf avoid send copy */ + p = lwip_alloc_pbuf(layer, length, type); + #else +@@ -753,7 +753,7 @@ pbuf_free(struct pbuf *p) + /* de-allocate all consecutive pbufs from the head of the chain that + * obtain a zero reference count after decrementing*/ + while (p != NULL) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (p->next) + rte_prefetch0(p->next); + #endif +@@ -1027,7 +1027,7 @@ pbuf_copy_partial_pbuf(struct pbuf *p_to, const struct pbuf *p_from, u16_t copy_ + len_calc = p_to->len - offset_to; + } + +-#if USE_LIBOS && (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW) ++#if GAZELLE_ENABLE && (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW) + p_to->l2_len = p_from->l2_len; + p_to->l3_len = p_from->l3_len; + p_to->ol_flags = p_from->ol_flags; +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 3171c5e..69a39f6 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -183,7 +183,7 @@ PER_THREAD struct tcp_pcb *tcp_tw_pcbs; + /** An array with all (non-temporary) PCB lists, mainly used for smaller code size */ + PER_THREAD struct tcp_pcb ** tcp_pcb_lists[NUM_TCP_PCB_LISTS] = {NULL, NULL, NULL, NULL}; + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + #define INIT_TCP_HTABLE(ht_ptr) \ + do { \ + int _i; \ +@@ -203,7 +203,7 @@ PER_THREAD u8_t tcp_active_pcbs_changed; + /** Timer counter to handle calling slow-timer from tcp_tmr() */ + static PER_THREAD u8_t tcp_timer; + static PER_THREAD u8_t tcp_timer_ctr; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + static u16_t tcp_new_port(struct tcp_pcb *pcb); + #else + static u16_t tcp_new_port(void); +@@ -214,7 +214,7 @@ static err_t tcp_close_shutdown_fin(struct tcp_pcb *pcb); + static void tcp_ext_arg_invoke_callbacks_destroyed(struct tcp_pcb_ext_args *ext_args); + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + static u8_t port_state[TCP_LOCAL_PORT_RANGE_END - TCP_LOCAL_PORT_RANGE_START + 1] = {0}; + void release_port(u16_t port) + { +@@ -238,7 +238,7 @@ tcp_init(void) + tcp_port = TCP_ENSURE_LOCAL_PORT_RANGE(LWIP_RAND()); + #endif /* LWIP_RAND */ + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + tcp_active_htable = (struct tcp_hash_table*)mem_malloc(sizeof(struct tcp_hash_table)); + LWIP_ASSERT("malloc tcp_active_htable mem failed.", tcp_active_htable != NULL); + INIT_TCP_HTABLE(tcp_active_htable); +@@ -249,7 +249,7 @@ tcp_init(void) + void + tcp_free(struct tcp_pcb *pcb) + { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + vdev_unreg_done(pcb); + release_port(pcb->local_port); + #endif +@@ -405,7 +405,7 @@ tcp_close_shutdown(struct tcp_pcb *pcb, u8_t rst_on_unacked_data) + pcb->local_port, pcb->remote_port); + + tcp_pcb_purge(pcb); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_RMV_ACTIVE_HASH(pcb); + #endif + TCP_RMV_ACTIVE(pcb); +@@ -442,7 +442,7 @@ tcp_close_shutdown(struct tcp_pcb *pcb, u8_t rst_on_unacked_data) + tcp_free_listen(pcb); + break; + case SYN_SENT: +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_PCB_REMOVE_ACTIVE_HASH(pcb); + #endif + TCP_PCB_REMOVE_ACTIVE(pcb); +@@ -650,7 +650,7 @@ tcp_abandon(struct tcp_pcb *pcb, int reset) + } else { + send_rst = reset; + local_port = pcb->local_port; +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_PCB_REMOVE_ACTIVE_HASH(pcb); + #endif + TCP_PCB_REMOVE_ACTIVE(pcb); +@@ -761,7 +761,7 @@ tcp_bind(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + #endif /* LWIP_IPV6 && LWIP_IPV6_SCOPES */ + + if (port == 0) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + port = tcp_new_port(pcb); + #else + port = tcp_new_port(); +@@ -773,7 +773,7 @@ tcp_bind(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + /* Check if the address already is in use (on all lists) */ + for (i = 0; i < max_pcb_list; i++) { + for (cpcb = *tcp_pcb_lists[i]; cpcb != NULL; cpcb = cpcb->next) { +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + continue; + #else + if (cpcb->local_port == port) { +@@ -927,7 +927,7 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + goto done; + } + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + struct tcp_pcb_listen *first_same_port_pcb = NULL; + for (lpcb = tcp_listen_pcbs.listen_pcbs; lpcb != NULL; lpcb = lpcb->next) { + if ((lpcb->local_port == pcb->local_port) && +@@ -952,9 +952,9 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + } + } + } +-#endif /* REUSE_IPPORT */ ++#endif /* GAZELLE_TCP_REUSE_IPPORT */ + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + vdev_reg_done(REG_RING_TCP_LISTEN, pcb); + #endif + +@@ -972,7 +972,7 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + lpcb->ttl = pcb->ttl; + lpcb->tos = pcb->tos; + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + lpcb->connect_num = 0; + lpcb->next_same_port_pcb = NULL; + +@@ -992,7 +992,7 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + /* copy over ext_args to listening pcb */ + memcpy(&lpcb->ext_args, &pcb->ext_args, sizeof(pcb->ext_args)); + #endif +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /* pcb transfer to lpcb and reg into tcp_listen_pcbs. freeing pcb shouldn't release sock table in here. + * local_port=0 avoid to release sock table in tcp_free */ + pcb->local_port = 0; +@@ -1006,7 +1006,7 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + tcp_backlog_set(lpcb, backlog); + #endif /* TCP_LISTEN_BACKLOG */ + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + if (first_same_port_pcb != NULL) { + TCP_REG_SAMEPORT((struct tcp_pcb_listen *)first_same_port_pcb, (struct tcp_pcb_listen *)lpcb); + } else +@@ -1109,7 +1109,7 @@ tcp_recved(struct tcp_pcb *pcb, u16_t len) + * + * @return a new (free) local TCP port number + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + static u16_t + tcp_new_port(struct tcp_pcb *pcb) + #else +@@ -1128,7 +1128,7 @@ tcp_new_port(void) + } + + if (__atomic_load_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (port_in_stack_queue(pcb->remote_ip.addr, pcb->local_ip.addr, pcb->remote_port, tcp_port)) { + tmp_port = tcp_port; + __atomic_store_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); +@@ -1231,7 +1231,7 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + + old_local_port = pcb->local_port; + if (pcb->local_port == 0) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + pcb->local_port = tcp_new_port(pcb); + #else + pcb->local_port = tcp_new_port(); +@@ -1289,7 +1289,7 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + /* Send a SYN together with the MSS option. */ + ret = tcp_enqueue_flags(pcb, TCP_SYN); + if (ret == ERR_OK) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + vdev_reg_done(REG_RING_TCP_CONNECT, pcb); + #endif + +@@ -1298,7 +1298,7 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + if (old_local_port != 0) { + TCP_RMV(&tcp_bound_pcbs, pcb); + } +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_REG_ACTIVE_HASH(pcb); + #endif + TCP_REG_ACTIVE(pcb); +@@ -1516,7 +1516,7 @@ tcp_slowtmr_start: + if (prev != NULL) { + LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_active_pcbs", pcb != tcp_active_pcbs); + prev->next = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pcb->next) + pcb->next->prev = prev; + //dont set next NULL, it will be used below +@@ -1526,14 +1526,14 @@ tcp_slowtmr_start: + /* This PCB was the first. */ + LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_active_pcbs", tcp_active_pcbs == pcb); + tcp_active_pcbs = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pcb->next) + pcb->next->prev = NULL; + //dont set next NULL, it will be used below + pcb->prev = NULL; + #endif + } +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_RMV_ACTIVE_HASH(pcb); + #endif + +@@ -1546,7 +1546,7 @@ tcp_slowtmr_start: + last_state = pcb->state; + pcb2 = pcb; + pcb = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + pcb2->next = NULL; + #endif + tcp_free(pcb2); +@@ -1600,7 +1600,7 @@ tcp_slowtmr_start: + if (prev != NULL) { + LWIP_ASSERT("tcp_slowtmr: middle tcp != tcp_tw_pcbs", pcb != tcp_tw_pcbs); + prev->next = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pcb->next) + pcb->next->prev = prev; + //dont set next NULL, it will be used below +@@ -1610,7 +1610,7 @@ tcp_slowtmr_start: + /* This PCB was the first. */ + LWIP_ASSERT("tcp_slowtmr: first pcb == tcp_tw_pcbs", tcp_tw_pcbs == pcb); + tcp_tw_pcbs = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pcb->next) + pcb->next->prev = NULL; + //dont set next NULL, it will be used below +@@ -1619,7 +1619,7 @@ tcp_slowtmr_start: + } + pcb2 = pcb; + pcb = pcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + pcb2->next = NULL; + #endif + tcp_free(pcb2); +@@ -1790,7 +1790,7 @@ tcp_seg_free(struct tcp_seg *seg) + seg->p = NULL; + #endif /* TCP_DEBUG */ + } +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + memp_free(MEMP_TCP_SEG, seg); + #endif + } +@@ -1828,7 +1828,7 @@ tcp_seg_copy(struct tcp_seg *seg) + + LWIP_ASSERT("tcp_seg_copy: invalid seg", seg != NULL); + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + cseg = (struct tcp_seg *)((uint8_t *)seg->p + sizeof(struct pbuf_custom)); + #else + cseg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG); +@@ -2371,7 +2371,7 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + #endif /* TCP_QUEUE_OOSEQ */ + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + vdev_unreg_done(pcb); + release_port(pcb->local_port); + #endif +@@ -2383,13 +2383,13 @@ tcp_pcb_remove(struct tcp_pcb **pcblist, struct tcp_pcb *pcb) + LWIP_ASSERT("tcp_pcb_remove: tcp_pcbs_sane()", tcp_pcbs_sane()); + } + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + void + tcp_pcb_remove_hash(struct tcp_hash_table *htb, struct tcp_pcb *pcb) + { + TCP_RMV_HASH(htb, pcb); + } +-#endif /* TCP_PCB_HASH */ ++#endif /* GAZELLE_TCP_PCB_HASH */ + + /** + * Calculates a new initial sequence number for new connections. +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 9f5c34a..dd83260 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -58,13 +58,13 @@ + #if LWIP_ND6_TCP_REACHABILITY_HINTS + #include "lwip/nd6.h" + #endif /* LWIP_ND6_TCP_REACHABILITY_HINTS */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include "lwip/api.h" + #endif + + #include + +-#if USE_LIBOS && CHECKSUM_CHECK_TCP_HW ++#if GAZELLE_ENABLE && CHECKSUM_CHECK_TCP_HW + #include + #endif /* CHECKSUM_CHECK_TCP_HW */ + +@@ -134,7 +134,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + u8_t hdrlen_bytes; + err_t err; + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + u32_t idx; + struct hlist_head *head; + struct hlist_node *node; +@@ -277,7 +277,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + for an active connection. */ + prev = NULL; + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + idx = TUPLE4_HASH_FN( ip_current_dest_addr()->addr, tcphdr->dest, + ip_current_src_addr()->addr, tcphdr->src) & + (tcp_active_htable->size - 1); +@@ -301,7 +301,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + pcb->local_port == tcphdr->dest && + ip_addr_cmp(&pcb->remote_ip, ip_current_src_addr()) && + ip_addr_cmp(&pcb->local_ip, ip_current_dest_addr())) { +-#if !TCP_PCB_HASH ++#if !GAZELLE_TCP_PCB_HASH + /* Move this PCB to the front of the list so that subsequent + lookups will be faster (we exploit locality in TCP segment + arrivals). */ +@@ -317,7 +317,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + #endif + break; + } +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + pcb = NULL; + #else + prev = pcb; +@@ -356,7 +356,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + } + } + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + struct tcp_pcb_listen *min_cnts_lpcb = NULL; + #endif + /* Finally, if we still did not get a match, we check all PCBs that +@@ -382,7 +382,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + } else if (IP_ADDR_PCB_VERSION_MATCH_EXACT(lpcb, ip_current_dest_addr())) { + if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) { + /* found an exact match */ +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + // check master fd + struct tcp_pcb_listen *tmp_lpcb = lpcb; + u8_t have_master_fd = 0; +@@ -395,7 +395,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + + tmp_lpcb = lpcb; + min_cnts_lpcb = lpcb; +- u16_t min_conn_num = MAX_CONN_NUM_PER_THREAD; ++ u16_t min_conn_num = GAZELLE_TCP_MAX_CONN_PER_THREAD; + while (tmp_lpcb != NULL) { + if (!have_master_fd || tmp_lpcb->master_lpcb) { + if (tmp_lpcb->connect_num < min_conn_num) { +@@ -434,13 +434,13 @@ tcp_input(struct pbuf *p, struct netif *inp) + arrivals). */ + if (prev != NULL) { + ((struct tcp_pcb_listen *)prev)->next = lpcb->next; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (lpcb->next) + lpcb->next->prev = (struct tcp_pcb_listen *)prev; + #endif + /* our successor is the remainder of the listening list */ + lpcb->next = tcp_listen_pcbs.listen_pcbs; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + lpcb->prev = NULL; + #endif + /* put this listening pcb at the head of the listening list */ +@@ -455,7 +455,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + tcphdr_opt1len, tcphdr_opt2, p) == ERR_OK) + #endif + { +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + tcp_listen_input(min_cnts_lpcb); + min_cnts_lpcb->connect_num++; + #else +@@ -528,7 +528,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + application that the connection is dead before we + deallocate the PCB. */ + TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_RST); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + tcp_pcb_remove_hash(tcp_active_htable, pcb); + #endif + tcp_pcb_remove(&tcp_active_pcbs, pcb); +@@ -710,7 +710,7 @@ tcp_input_delayed_close(struct tcp_pcb *pcb) + ensure the application doesn't continue using the PCB. */ + TCP_EVENT_ERR(pcb->state, pcb->errf, pcb->callback_arg, ERR_CLSD); + } +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + tcp_pcb_remove_hash(tcp_active_htable, pcb); + #endif + tcp_pcb_remove(&tcp_active_pcbs, pcb); +@@ -799,12 +799,12 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + npcb->netif_idx = pcb->netif_idx; + /* Register the new PCB so that we can begin receiving segments + for it. */ +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_REG_ACTIVE_HASH(npcb); + #endif + TCP_REG_ACTIVE(npcb); + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + vdev_reg_done(REG_RING_TCP_CONNECT, npcb); + #endif + +@@ -1102,7 +1102,7 @@ tcp_process(struct tcp_pcb *pcb) + if (recv_flags & TF_GOT_FIN) { + tcp_ack_now(pcb); + pcb->state = CLOSE_WAIT; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); + #endif + } +@@ -1120,7 +1120,7 @@ tcp_process(struct tcp_pcb *pcb) + if (recv_flags & TF_GOT_FIN) { /* passive close */ + tcp_ack_now(pcb); + pcb->state = CLOSE_WAIT; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); + #endif + } +@@ -1134,7 +1134,7 @@ tcp_process(struct tcp_pcb *pcb) + ("TCP connection closed: FIN_WAIT_1 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_ack_now(pcb); + tcp_pcb_purge(pcb); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_RMV_ACTIVE_HASH(pcb); + #endif + TCP_RMV_ACTIVE(pcb); +@@ -1155,7 +1155,7 @@ tcp_process(struct tcp_pcb *pcb) + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: FIN_WAIT_2 %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_ack_now(pcb); + tcp_pcb_purge(pcb); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_RMV_ACTIVE_HASH(pcb); + #endif + TCP_RMV_ACTIVE(pcb); +@@ -1168,7 +1168,7 @@ tcp_process(struct tcp_pcb *pcb) + if ((flags & TCP_ACK) && ackno == pcb->snd_nxt && pcb->unsent == NULL) { + LWIP_DEBUGF(TCP_DEBUG, ("TCP connection closed: CLOSING %"U16_F" -> %"U16_F".\n", inseg.tcphdr->src, inseg.tcphdr->dest)); + tcp_pcb_purge(pcb); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + TCP_RMV_ACTIVE_HASH(pcb); + #endif + TCP_RMV_ACTIVE(pcb); +@@ -1377,7 +1377,7 @@ tcp_receive(struct tcp_pcb *pcb) + if (pcb->lastack == ackno) { + found_dataack = 1; + ++pcb->dataacks; +- if (pcb->dataacks > MAX_DATA_ACK_NUM) { ++ if (pcb->dataacks > GAZELLE_TCP_MAX_DATA_ACK_NUM) { + if (tcp_rexmit(pcb) == ERR_OK) { + pcb->rtime = 0; + pcb->dataacks = 0; +@@ -1775,7 +1775,7 @@ tcp_receive(struct tcp_pcb *pcb) + recv_flags |= TF_GOT_FIN; + if (pcb->state == ESTABLISHED) { /* force passive close or we can move to active close */ + pcb->state = CLOSE_WAIT; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + API_EVENT(((struct netconn *)pcb->callback_arg), NETCONN_EVT_ERROR, 0); + #endif + } +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index bf23381..1b3c5af 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -80,7 +80,7 @@ + + #include + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include "lwipsock.h" + #include + #if CHECKSUM_GEN_TCP_HW +@@ -162,7 +162,7 @@ tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) + * The TCP header is filled in except ackno and wnd. + * p is freed on failure. + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + void tcp_init_segment(struct tcp_seg *seg, const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, + u32_t seqno, u8_t optflags) + { +@@ -515,7 +515,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + * pos records progress as data is segmented. + */ + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + /* Find the tail of the unsent queue. */ + if (pcb->unsent != NULL) { + u16_t space; +@@ -631,9 +631,9 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + pcb->unsent_oversize == 0); + #endif /* TCP_OVERSIZE */ + } +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + last_unsent = pcb->last_unsent; +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /* + * Phase 3: Create new segments. +@@ -651,7 +651,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + u8_t chksum_swapped = 0; + #endif /* TCP_CHECKSUM_ON_COPY */ + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + if (apiflags & TCP_WRITE_FLAG_COPY) { + /* If copy is set, memory should be allocated and data copied + * into pbuf */ +@@ -698,13 +698,13 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + /* Concatenate the headers and data pbufs together. */ + pbuf_cat(p/*header*/, p2/*data*/); + } +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + p = write_lwip_data((struct lwip_sock *)arg, len - pos, &apiflags); + if (p == NULL) { + break; + } + seglen = p->tot_len; +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + queuelen += pbuf_clen(p); + +@@ -714,7 +714,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + if (queuelen > LWIP_MIN(TCP_SND_QUEUELEN, TCP_SNDQUEUELEN_OVERFLOW)) { + LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: queue too long %"U16_F" (%d)\n", + queuelen, (int)TCP_SND_QUEUELEN)); +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pos > 0) { + queuelen -= pbuf_clen(p); + break; +@@ -726,7 +726,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + } + + if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) { +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (pos > 0) { + queuelen -= pbuf_clen(p); + break; +@@ -759,7 +759,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg))); + + pos += seglen; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + write_lwip_over((struct lwip_sock*)arg); + #endif + } +@@ -847,7 +847,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + /* + * Finally update the pcb state. + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if (queue) { + pcb->last_unsent = prev_seg; + } +@@ -876,7 +876,7 @@ memerr: + tcp_set_flags(pcb, TF_NAGLEMEMERR); + TCP_STATS_INC(tcp.memerr); + +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + if (concat_p != NULL) { + pbuf_free(concat_p); + } +@@ -1307,7 +1307,7 @@ tcp_build_wnd_scale_option(u32_t *opts) + } + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) + { + if (TCP_TCPLEN(seg) > 0) { +@@ -1472,7 +1472,7 @@ tcp_output(struct tcp_pcb *pcb) + /* data available and window allows it to be sent? */ + + u32_t send_len = 0; +-#if USE_LIBOS ++#if GAZELLE_ENABLE + if ((get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) && pcb->need_tso_send) { + while(seg && send_len < 0xffff) { + /** +@@ -1485,7 +1485,7 @@ tcp_output(struct tcp_pcb *pcb) + struct pbuf *pre_pbuf = NULL; + u8_t pbuf_chain_len = 0; + u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); +- while (seg != NULL && pbuf_chain_len < MAX_PBUF_CHAIN_LEN) { ++ while (seg != NULL && pbuf_chain_len < GAZELLE_TCP_MAX_PBUF_CHAIN_LEN) { + u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); + if (seg_seqno - pcb->lastack + seg->len > wnd) { + if (first_pbuf) +@@ -1501,7 +1501,7 @@ tcp_output(struct tcp_pcb *pcb) + goto output_done; + } + +- if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= MAX_PBUF_CHAIN_LEN) { ++ if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= GAZELLE_TCP_MAX_PBUF_CHAIN_LEN) { + break; + } + if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { +@@ -1771,7 +1771,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + } + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /* pbuf into mbuf. ref dpdk_common.h */ + rte_prefetch0((uint8_t *)(seg->p) - sizeof(struct rte_mbuf) - sizeof(uint64_t) * 2); + #endif +diff --git a/src/core/timeouts.c b/src/core/timeouts.c +index 0542a32..2b80b0a 100644 +--- a/src/core/timeouts.c ++++ b/src/core/timeouts.c +@@ -442,7 +442,7 @@ sys_timeouts_sleeptime(void) + } + } + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + void sys_timer_run(void) + { + u32_t sleeptime; +@@ -452,7 +452,7 @@ void sys_timer_run(void) + sys_check_timeouts(); + } + } +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #else /* LWIP_TIMERS && !LWIP_TIMERS_CUSTOM */ + /* Satisfy the TCP code which calls this function */ +diff --git a/src/include/arch/sys_arch.h b/src/include/arch/sys_arch.h +index fc4a9fd..04e3192 100644 +--- a/src/include/arch/sys_arch.h ++++ b/src/include/arch/sys_arch.h +@@ -76,7 +76,7 @@ int sys_mbox_empty(struct sys_mbox *); + struct sys_thread; + typedef struct sys_thread *sys_thread_t; + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + extern int eth_dev_poll(void); + #include + +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index 83c9c38..df2e2a5 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -34,7 +34,7 @@ + #define __DPDK_CKSUM_H__ + + #include "lwipopts.h" +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include + + #if CHECKSUM_OFFLOAD_ALL +@@ -103,5 +103,5 @@ static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, + } + #endif /* CHECKSUM_GEN_TCP_HW */ + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #endif /* __DPDK_CKSUM_H__ */ +diff --git a/src/include/hlist.h b/src/include/hlist.h +index 7059488..988b017 100644 +--- a/src/include/hlist.h ++++ b/src/include/hlist.h +@@ -35,7 +35,7 @@ + + #include "list.h" + +-//#if TCP_PCB_HASH ++//#if GAZELLE_TCP_PCB_HASH + struct hlist_node { + /** + * @pprev: point the previous node's next pointer +@@ -228,6 +228,6 @@ static inline void hlist_ctl_add_after(struct hlist_node *n, struct hlist_node * + ctl->tail.end = next; + } + } +-//#endif /* TCP_PCB_HASH */ ++//#endif /* GAZELLE_TCP_PCB_HASH */ + + #endif /* __HLIST_H__ */ +diff --git a/src/include/lwip/api.h b/src/include/lwip/api.h +index 430a7a0..197faef 100644 +--- a/src/include/lwip/api.h ++++ b/src/include/lwip/api.h +@@ -141,16 +141,16 @@ enum netconn_type { + , NETCONN_RAW_IPV6 = NETCONN_RAW | NETCONN_TYPE_IPV6 /* 0x48 */ + #endif /* LWIP_IPV6 */ + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /*here must bigger than 0xff, because (type & 0xff) is for lwip inner use*/ + , NETCONN_LIBOS = 0x100 + , NETCONN_HOST = 0x200 + , NETCONN_INPRG = 0x400 + , NETCONN_STACK = NETCONN_LIBOS | NETCONN_HOST | NETCONN_INPRG +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + }; + +-#ifdef USE_LIBOS ++#ifdef GAZELLE_ENABLE + #define SET_CONN_TYPE_LIBOS_OR_HOST(conn) do { \ + conn->type &= ~(NETCONN_STACK); \ + conn->type |= (NETCONN_LIBOS | NETCONN_HOST); } while (0) +@@ -175,7 +175,7 @@ enum netconn_type { + #define CONN_TYPE_HAS_LIBOS_AND_HOST(conn) (0) + #define CONN_TYPE_HAS_LIBOS(conn) (0) + #define CONN_TYPE_HAS_HOST(conn) (0) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /** Current state of the netconn. Non-TCP netconns are always + * in state NETCONN_NONE! */ +@@ -319,7 +319,7 @@ struct netconn { + /** A callback function that is informed about events for this netconn */ + netconn_callback callback; + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + u8_t is_master_fd; + #endif + }; +diff --git a/src/include/lwip/ip.h b/src/include/lwip/ip.h +index 7f55fb3..1c6988b 100644 +--- a/src/include/lwip/ip.h ++++ b/src/include/lwip/ip.h +@@ -97,7 +97,7 @@ struct ip_pcb { + /* + * Option flags per-socket. These are the same like SO_XXX in sockets.h + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define SOF_REUSEADDR 0x02U /* allow local address reuse */ + #define SOF_KEEPALIVE 0x09U /* keep connections alive */ + #define SOF_BROADCAST 0x06U /* permit to send and to receive broadcast messages (see IP_SOF_BROADCAST option) */ +@@ -105,7 +105,7 @@ struct ip_pcb { + #define SOF_REUSEADDR 0x04U /* allow local address reuse */ + #define SOF_KEEPALIVE 0x08U /* keep connections alive */ + #define SOF_BROADCAST 0x20U /* permit to send and to receive broadcast messages (see IP_SOF_BROADCAST option) */ +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /* These flags are inherited (e.g. from a listen-pcb to a connection-pcb): */ + #define SOF_INHERITED (SOF_REUSEADDR|SOF_KEEPALIVE) +diff --git a/src/include/lwip/memp.h b/src/include/lwip/memp.h +index 64d8f31..1763836 100644 +--- a/src/include/lwip/memp.h ++++ b/src/include/lwip/memp.h +@@ -58,11 +58,11 @@ typedef enum { + #include "lwip/priv/memp_priv.h" + #include "lwip/stats.h" + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + extern PER_THREAD struct memp_desc* memp_pools[MEMP_MAX]; + #else + extern const struct memp_desc* const memp_pools[MEMP_MAX]; +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /** + * @ingroup mempool +@@ -96,7 +96,7 @@ extern const struct memp_desc* const memp_pools[MEMP_MAX]; + * To relocate a pool, declare it as extern in cc.h. Example for GCC: + * extern u8_t \_\_attribute\_\_((section(".onchip_mem"))) memp_memory_my_private_pool_base[]; + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define LWIP_MEMPOOL_DECLARE(name,num,size,desc) \ + PER_THREAD struct memp_desc memp_ ## name = {0}; \ + PER_THREAD char memp_desc_ ## name[] = desc; \ +@@ -106,7 +106,7 @@ extern const struct memp_desc* const memp_pools[MEMP_MAX]; + PER_THREAD struct memp *memp_tab_ ## name = NULL; \ + LWIP_DECLARE_MEMP_BASE_ALIGNED(name, ((num) * (MEMP_SIZE + MEMP_ALIGN_SIZE(size)))); + +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + + #define LWIP_MEMPOOL_DECLARE(name,num,size,desc) \ + LWIP_DECLARE_MEMORY_ALIGNED(memp_memory_ ## name ## _base, ((num) * (MEMP_SIZE + MEMP_ALIGN_SIZE(size)))); \ +@@ -124,7 +124,7 @@ extern const struct memp_desc* const memp_pools[MEMP_MAX]; + &memp_tab_ ## name \ + }; + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #endif /* MEMP_MEM_MALLOC */ + + /** +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index 718816b..0376f60 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -3525,7 +3525,7 @@ + /** + * EPOLL_DEBUG: Enable debugging in epoll.c. + */ +-#if !defined EPOLL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#if !defined EPOLL_DEBUG || defined __DOXYGEN__ && GAZELLE_ENABLE + #define EPOLL_DEBUG LWIP_DBG_OFF + #endif + /** +@@ -3535,7 +3535,7 @@ + /** + * ETHDEV_DEBUG: Enable debugging in ethdev.c. + */ +-#if !defined ETHDEV_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#if !defined ETHDEV_DEBUG || defined __DOXYGEN__ && GAZELLE_ENABLE + #define ETHDEV_DEBUG LWIP_DBG_OFF + #endif + /** +@@ -3545,7 +3545,7 @@ + /** + * ETHDEV_DEBUG: Enable debugging in ethdev.c. + */ +-#if !defined SYSCALL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#if !defined SYSCALL_DEBUG || defined __DOXYGEN__ && GAZELLE_ENABLE + #define SYSCALL_DEBUG LWIP_DBG_OFF + #endif + /** +@@ -3555,7 +3555,7 @@ + /** + * CONTROL_DEBUG: Enable debugging in control_plane.c. + */ +-#if !defined CONTROL_DEBUG || defined __DOXYGEN__ && USE_LIBOS ++#if !defined CONTROL_DEBUG || defined __DOXYGEN__ && GAZELLE_ENABLE + #define CONTROL_DEBUG LWIP_DBG_ON + #endif + /** +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 1124408..a2e8e01 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -220,7 +220,7 @@ struct pbuf { + /** For incoming packets, this contains the input netif's index */ + u8_t if_idx; + +-#if USE_LIBOS && CHECKSUM_OFFLOAD_ALL ++#if GAZELLE_ENABLE && CHECKSUM_OFFLOAD_ALL + /** checksum offload ol_flags */ + u64_t ol_flags; + /* < L2 (MAC) Header Length for non-tunneling pkt. */ +@@ -234,7 +234,7 @@ struct pbuf { + u8_t in_write; + u8_t head; + struct pbuf *last; +-#endif /* USE_LIBOS CHECKSUM_OFFLOAD_SWITCH */ ++#endif /* GAZELLE_ENABLE CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ + LWIP_PBUF_CUSTOM_DATA +@@ -287,7 +287,7 @@ void pbuf_free_ooseq(void); + + /* Initializes the pbuf module. This call is empty for now, but may not be in future. */ + #define pbuf_init() +-#if USE_LIBOS ++#if GAZELLE_ENABLE + struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type); + #endif + struct pbuf *pbuf_alloc(pbuf_layer l, u16_t length, pbuf_type type); +diff --git a/src/include/lwip/priv/memp_std.h b/src/include/lwip/priv/memp_std.h +index 395ac0c..66d7e4e 100644 +--- a/src/include/lwip/priv/memp_std.h ++++ b/src/include/lwip/priv/memp_std.h +@@ -122,13 +122,13 @@ LWIP_MEMPOOL(MLD6_GROUP, MEMP_NUM_MLD6_GROUP, sizeof(struct mld_group), + #endif /* LWIP_IPV6 && LWIP_IPV6_MLD */ + + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #if !LWIP_NETCONN_SEM_PER_THREAD + LWIP_MEMPOOL(SYS_SEM, MEMP_NUM_SYS_SEM, sizeof(struct sys_sem), "SYS_SEM") + #endif + + LWIP_MEMPOOL(SYS_MBOX, MEMP_NUM_SYS_MBOX, sizeof(struct sys_mbox), "SYS_MBOX") +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + /* + * A list of pools of pbuf's used by LWIP. + * +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index 97f799e..ddae3fd 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -340,7 +340,7 @@ extern PER_THREAD struct tcp_pcb *tcp_tw_pcbs; /* List of all TCP PCBs in T + #define NUM_TCP_PCB_LISTS 4 + extern PER_THREAD struct tcp_pcb ** tcp_pcb_lists[NUM_TCP_PCB_LISTS]; + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include "reg_sock.h" + static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pcb *pcb) + { +@@ -353,7 +353,7 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + qtuple.dst_ip = pcb->remote_ip.addr; + qtuple.dst_port = lwip_htons(pcb->remote_port); + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + if (reg_type == REG_RING_TCP_CONNECT_CLOSE) { + struct tcp_pcb_listen* lpcb = pcb->listener; + if (lpcb != NULL) { +@@ -389,7 +389,7 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + #define TCP_DEBUG_PCB_LISTS 0 + #endif + #if TCP_DEBUG_PCB_LISTS +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define TCP_REG(pcbs, npcb) do {\ + struct tcp_pcb *tcp_tmp_pcb; \ + LWIP_DEBUGF(TCP_DEBUG, ("TCP_REG %p local port %d\n", (npcb), (npcb)->local_port)); \ +@@ -432,7 +432,7 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removed %p from %p\n", (npcb), *(pcbs))); \ + } while(0) + +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + #define TCP_REG(pcbs, npcb) do {\ + struct tcp_pcb *tcp_tmp_pcb; \ + LWIP_DEBUGF(TCP_DEBUG, ("TCP_REG %p local port %"U16_F"\n", (void *)(npcb), (npcb)->local_port)); \ +@@ -465,10 +465,10 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + LWIP_DEBUGF(TCP_DEBUG, ("TCP_RMV: removed %p from %p\n", (void *)(npcb), (void *)(*(pcbs)))); \ + } while(0) + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #else /* LWIP_DEBUG */ + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + #define TCP_REG_HASH(pcbs, npcb) \ + do { \ + u32_t idx; \ +@@ -496,9 +496,9 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + do { \ + hlist_del_init(&(npcb)->tcp_node); \ + } while (0) +-#endif /* TCP_PCB_HASH */ ++#endif /* GAZELLE_TCP_PCB_HASH */ + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define TCP_REG(pcbs, npcb) \ + do { \ + if (*pcbs) \ +@@ -529,7 +529,7 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + (npcb)->next = NULL; \ + } while(0) + +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + #define TCP_REG(pcbs, npcb) \ + do { \ + (npcb)->next = *pcbs; \ +@@ -556,11 +556,11 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + (npcb)->next = NULL; \ + } while(0) + +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #endif /* LWIP_DEBUG */ + + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + #define TCP_REG_ACTIVE_HASH(npcb) \ + do { \ + TCP_REG_HASH(tcp_active_htable, npcb); \ +@@ -580,7 +580,7 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + } while (0) + + void tcp_pcb_remove_hash(struct tcp_hash_table *htb, struct tcp_pcb *pcb); +-#endif /* TCP_PCB_HASH */ ++#endif /* GAZELLE_TCP_PCB_HASH */ + + #define TCP_REG_ACTIVE(npcb) \ + do { \ +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index 3c5b87b..58acf0f 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -57,7 +57,7 @@ extern "C" { + + /* If your port already typedef's sa_family_t, define SA_FAMILY_T_DEFINED + to prevent this code from redefining it. */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define SA_FAMILY_T_DEFINED + typedef u16_t sa_family_t; + #endif +@@ -74,7 +74,7 @@ typedef u16_t in_port_t; + #if LWIP_IPV4 + /* members are in network byte order */ + struct sockaddr_in { +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + u8_t sin_len; + #endif + sa_family_t sin_family; +@@ -97,7 +97,7 @@ struct sockaddr_in6 { + #endif /* LWIP_IPV6 */ + + struct sockaddr { +-#if !USE_LIBOS ++#if !GAZELLE_ENABLE + u8_t sa_len; + #endif + sa_family_t sa_family; +@@ -198,7 +198,7 @@ struct ifreq { + #define SOCK_DGRAM 2 + #define SOCK_RAW 3 + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #include + #else + /* +@@ -236,7 +236,7 @@ struct ifreq { + * Level number for (get/set)sockopt() to apply to socket itself. + */ + #define SOL_SOCKET 0xfff /* options for socket level */ +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /* + * Structure used for manipulating linger option. +@@ -289,20 +289,20 @@ struct linger { + /* + * Options for level IPPROTO_TCP + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + /* come from netinet/tcp.h */ + #define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ + #define TCP_KEEPALIVE 0x24 /* send KEEPALIVE probes when idle for pcb->keep_idle milliseconds */ + #define TCP_KEEPIDLE 0x04 /* set pcb->keep_idle - Same as TCP_KEEPALIVE, but use seconds for get/setsockopt */ + #define TCP_KEEPINTVL 0x05 /* set pcb->keep_intvl - Use seconds for get/setsockopt */ + #define TCP_KEEPCNT 0x06 /* set pcb->keep_cnt - Use number of probes sent for get/setsockopt */ +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + #define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ + #define TCP_KEEPALIVE 0x02 /* send KEEPALIVE probes when idle for pcb->keep_idle milliseconds */ + #define TCP_KEEPIDLE 0x03 /* set pcb->keep_idle - Same as TCP_KEEPALIVE, but use seconds for get/setsockopt */ + #define TCP_KEEPINTVL 0x04 /* set pcb->keep_intvl - Use seconds for get/setsockopt */ + #define TCP_KEEPCNT 0x05 /* set pcb->keep_cnt - Use number of probes sent for get/setsockopt */ +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + #endif /* LWIP_TCP */ + + #if LWIP_IPV6 +@@ -505,13 +505,13 @@ typedef struct fd_set + unsigned char fd_bits [(FD_SETSIZE+7)/8]; + } fd_set; + +-#elif FD_SETSIZE < (LWIP_SOCKET_OFFSET + MEMP_NUM_NETCONN) && !USE_LIBOS ++#elif FD_SETSIZE < (LWIP_SOCKET_OFFSET + MEMP_NUM_NETCONN) && !GAZELLE_ENABLE + #error "external FD_SETSIZE too small for number of sockets" + #else + #define LWIP_SELECT_MAXNFDS FD_SETSIZE + #endif /* FD_SET */ + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #if !defined(POLLIN) && !defined(POLLOUT) + /* come from bits/poll.h */ + #define POLLIN 0x001 +@@ -526,7 +526,7 @@ typedef struct fd_set + #define POLLWRBAND 0x200 + #define POLLHUP 0x010 + #endif +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #if LWIP_SOCKET_POLL + /* poll-related defines and types */ +@@ -664,13 +664,13 @@ int lwip_select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptse + int lwip_poll(struct pollfd *fds, nfds_t nfds, int timeout); + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + int lwip_ioctl(int s, long cmd, ...); + int lwip_fcntl(int s, int cmd, int val); + #else + int lwip_ioctl(int s, long cmd, void *argp); + int lwip_fcntl(int s, int cmd, int val); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + const char *lwip_inet_ntop(int af, const void *src, char *dst, socklen_t size); + int lwip_inet_pton(int af, const char *src, void *dst); +@@ -740,7 +740,7 @@ int lwip_inet_pton(int af, const char *src, void *dst); + /** @ingroup socket */ + #define close(s) lwip_close(s) + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define fcntl(s,cmd...) lwip_fcntl(s,cmd) + #define ioctl(s,cmd...) lwip_ioctl(s,cmd) + #else +@@ -748,7 +748,7 @@ int lwip_inet_pton(int af, const char *src, void *dst); + #define fcntl(s,cmd,val) lwip_fcntl(s,cmd,val) + /** @ingroup socket */ + #define ioctl(s,cmd,argp) lwip_ioctl(s,cmd,argp) +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #endif /* LWIP_POSIX_SOCKETS_IO_NAMES */ + #endif /* LWIP_COMPAT_SOCKETS != 2 */ +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index c2018cb..b822f40 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -51,7 +51,7 @@ + #include "lwip/ip6.h" + #include "lwip/ip6_addr.h" + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + #include "lwip/sys.h" + #include "hlist.h" + #endif +@@ -214,7 +214,7 @@ typedef u16_t tcpflags_t; + /** + * members common to struct tcp_pcb and struct tcp_listen_pcb + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + #define TCP_PCB_COMMON(type) \ + type *next; /* for the linked list */ \ + type *prev; /* for the linked list */ \ +@@ -225,7 +225,7 @@ typedef u16_t tcpflags_t; + /* ports are in host byte order */ \ + u16_t local_port + +-#else /* USE_LIBOS */ ++#else /* GAZELLE_ENABLE */ + #define TCP_PCB_COMMON(type) \ + type *next; /* for the linked list */ \ + void *callback_arg; \ +@@ -234,7 +234,7 @@ typedef u16_t tcpflags_t; + u8_t prio; \ + /* ports are in host byte order */ \ + u16_t local_port +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + /** the TCP protocol control block for listening pcbs */ + struct tcp_pcb_listen { +@@ -253,7 +253,7 @@ struct tcp_pcb_listen { + u8_t accepts_pending; + #endif /* TCP_LISTEN_BACKLOG */ + +-#if REUSE_IPPORT ++#if GAZELLE_TCP_REUSE_IPPORT + struct tcp_pcb_listen* next_same_port_pcb; + u16_t connect_num; + int socket_fd; +@@ -269,7 +269,7 @@ struct tcp_pcb { + IP_PCB; + /** protocol specific PCB members */ + TCP_PCB_COMMON(struct tcp_pcb); +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + struct hlist_node tcp_node; + #endif + +@@ -421,7 +421,7 @@ struct tcp_pcb { + u8_t need_tso_send; + }; + +-#if TCP_PCB_HASH ++#if GAZELLE_TCP_PCB_HASH + #define TCP_HTABLE_SIZE MEMP_NUM_NETCONN*12 + + struct tcp_hashbucket +@@ -471,7 +471,7 @@ static inline unsigned int jhash_3words(unsigned int a, unsigned int b, unsigned + #define tcppcb_hlist_for_each(tcppcb, node, list) \ + hlist_for_each_entry(tcppcb, node, list, tcp_node) + +-#endif /* TCP_PCB_HASH */ ++#endif /* GAZELLE_TCP_PCB_HASH */ + + #if LWIP_EVENT_API + +diff --git a/src/include/lwip/timeouts.h b/src/include/lwip/timeouts.h +index b451554..f7ffc5e 100644 +--- a/src/include/lwip/timeouts.h ++++ b/src/include/lwip/timeouts.h +@@ -119,9 +119,9 @@ struct sys_timeo** sys_timeouts_get_next_timeout(void); + void lwip_cyclic_timer(void *arg); + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + void sys_timer_run(void); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + #endif /* LWIP_TIMERS */ + +diff --git a/src/include/lwiplog.h b/src/include/lwiplog.h +index 011ed21..f278ff4 100644 +--- a/src/include/lwiplog.h ++++ b/src/include/lwiplog.h +@@ -41,7 +41,7 @@ + + #include "lwipopts.h" + +-#if USE_DPDK_LOG ++#if GAZELLE_USE_DPDK_LOG + + #define LWIP_LOG_WARN LWIP_DBG_LEVEL_WARNING + #define LWIP_LOG_ERROR LWIP_DBG_LEVEL_SERIOUS +@@ -75,6 +75,6 @@ do { LWIP_PLATFORM_LOG(LWIP_LOG_FATAL, "Assertion \"%s\" failed at line %d in %s + + #define LWIP_PLATFORM_LOG(debug, message) + +-#endif /* USE_DPDK_LOG */ ++#endif /* GAZELLE_USE_DPDK_LOG */ + + #endif /* __LWIPLOG_H__ */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index be58ec3..9cc93bc 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -33,182 +33,204 @@ + #ifndef __LWIPOPTS_H__ + #define __LWIPOPTS_H__ + +-#define LWIP_TCPIP_CORE_LOCKING 1 +- +-#define LWIP_NETCONN_SEM_PER_THREAD 0 +- +-#define LWIP_TCP 1 +- +-#define LWIP_SO_SENTIMEO 0 +- +-#define LIP_SO_LINGER 0 +- +-#define MEMP_USE_CUSTOM_POOLS 0 +-#define MEM_USE_POOLS 0 +- +-#define PER_TCP_PCB_BUFFER (16 * 128) +- +-#define MAX_CLIENTS (20000) +- +-#define RESERVED_CLIENTS (2000) +- +-#define MEMP_NUM_TCP_PCB (MAX_CLIENTS + RESERVED_CLIENTS) +- +-/* we use PBUF_POOL instead of PBUF_RAM in tcp_write, so reduce PBUF_RAM size, +- * and do NOT let PBUF_POOL_BUFSIZE less then TCP_MSS ++/* ++ ------------------------------------- ++ ---------- gazelle options ---------- ++ ------------------------------------- + */ +-#define MEM_SIZE (((PER_TCP_PCB_BUFFER + 128) * MEMP_NUM_TCP_SEG) >> 2) ++#define LWIP_PERF 1 ++#define LWIP_RECORD_PERF 0 + +-#define MEMP_NUM_TCP_PCB_LISTEN 3000 ++//#define LWIP_DEBUG 1 ++#define GAZELLE_USE_DPDK_LOG 1 + +-#define MEMP_NUM_TCP_SEG (128 * 128 * 2) ++#define GAZELLE_ENABLE 1 ++#define PER_THREAD __thread + +-#define MEMP_NUM_NETCONN (MAX_CLIENTS + RESERVED_CLIENTS) ++#define FRAME_MTU 1500 + +-#define MEMP_NUM_SYS_SEM (MAX_CLIENTS + RESERVED_CLIENTS) ++#define GAZELLE_TCP_PCB_HASH 1 + +-#define MEMP_NUM_SYS_MBOX (MAX_CLIENTS + RESERVED_CLIENTS) ++#define GAZELLE_TCP_MAX_DATA_ACK_NUM 256 + +-#define PBUF_POOL_SIZE (MAX_CLIENTS * 2) ++#define GAZELLE_TCP_MAX_PBUF_CHAIN_LEN 40 + +-#define MEMP_MEM_MALLOC 0 ++/* ++ ---------------------------------- ++ ---------- NIC offloads ---------- ++ ---------------------------------- ++*/ ++#define LWIP_CHECKSUM_CTRL_PER_NETIF 1 /* checksum ability check before checksum*/ + +-#define LWIP_ARP 1 ++// rx cksum ++#define CHECKSUM_CHECK_IP 1 /* master switch */ ++#define CHECKSUM_CHECK_TCP 1 /* master switch */ ++// tx cksum ++#define CHECKSUM_GEN_IP 1 /* master switch */ ++#define CHECKSUM_GEN_TCP 1 /* master switch */ + +-#define ETHARP_SUPPORT_STATIC_ENTRIES 1 ++// rx offload cksum ++#define CHECKSUM_CHECK_IP_HW (1 && CHECKSUM_CHECK_IP) /* hardware switch */ ++#define CHECKSUM_CHECK_TCP_HW (1 && CHECKSUM_CHECK_TCP) /* hardware switch */ ++// tx offload cksum ++#define CHECKSUM_GEN_IP_HW (1 && CHECKSUM_GEN_IP) /* hardware switch */ ++#define CHECKSUM_GEN_TCP_HW (1 && CHECKSUM_GEN_TCP) /* hardware switch */ + +-#define LWIP_IPV4 1 ++#define CHECKSUM_OFFLOAD_ALL (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW || CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW) + +-#define IP_FORWARD 0 + +-#define IP_REASSEMBLY 1 ++/* ++ --------------------------------------- ++ ---------- lwIP APIs options ---------- ++ --------------------------------------- ++*/ ++#define LWIP_TCPIP_CORE_LOCKING 1 + +-#define LWIP_UDP 0 ++#define LWIP_TCPIP_TIMEOUT 0 + +-#define LWIP_TCP 1 ++#define TCPIP_MBOX_SIZE (MEMP_NUM_TCPIP_MSG_API) + +-#define IP_HLEN 20 ++#define LWIP_NETCONN 1 + +-#define TCP_HLEN 20 ++#define LWIP_NETCONN_SEM_PER_THREAD 0 + +-#define FRAME_MTU 1500 ++#define LWIP_STATS 1 + +-#define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) ++#define LWIP_STATS_DISPLAY 1 + +-#define TCP_WND (2500 * TCP_MSS) ++#define LWIP_TIMERS 1 + +-#define TCP_SND_BUF (2500 * TCP_MSS) ++#define LWIP_TIMEVAL_PRIVATE 0 + +-#define TCP_SND_QUEUELEN (8191) + +-#define TCP_SNDLOWAT (TCP_SND_BUF / 5) ++/* ++ ------------------------------------------------ ++ ---------- Internal Memory Pool Sizes ---------- ++ ------------------------------------------------ ++*/ ++#define GAZELLE_MAX_CLIENTS (20000) ++#define GAZELLE_RESERVED_CLIENTS (2000) + +-#define TCP_SNDQUEUELOWAT (TCP_SND_QUEUELEN / 5) ++#define LWIP_SUPPORT_CUSTOM_PBUF 1 + +-#define TCP_LISTEN_BACKLOG 1 ++#define MEMP_MEM_MALLOC 0 ++#define MEM_LIBC_MALLOC 0 ++#define MEM_USE_POOLS 0 ++#define MEMP_USE_CUSTOM_POOLS 0 + +-#define TCP_DEFAULT_LISTEN_BACKLOG 0xff ++#define MEMP_NUM_TCP_PCB_LISTEN 3000 + +-#define TCP_OVERSIZE 0 ++#define MEMP_NUM_TCP_PCB (GAZELLE_MAX_CLIENTS + GAZELLE_RESERVED_CLIENTS) + +-#define LWIP_NETIF_API 1 ++#define MEMP_NUM_NETCONN (GAZELLE_MAX_CLIENTS + GAZELLE_RESERVED_CLIENTS) + +-#define DEFAULT_TCP_RECVMBOX_SIZE 4096 ++#define MEMP_NUM_SYS_SEM (GAZELLE_MAX_CLIENTS + GAZELLE_RESERVED_CLIENTS) + +-#define DEFAULT_ACCEPTMBOX_SIZE 1024 ++#define MEMP_NUM_SYS_MBOX (GAZELLE_MAX_CLIENTS + GAZELLE_RESERVED_CLIENTS) + +-#define LWIP_NETCONN 1 ++#define PBUF_POOL_SIZE (GAZELLE_MAX_CLIENTS * 2) + +-#define LWIP_TCPIP_TIMEOUT 0 ++/* we use PBUF_POOL instead of PBUF_RAM in tcp_write, so reduce PBUF_RAM size, ++ * and do NOT let PBUF_POOL_BUFSIZE less then TCP_MSS ++*/ ++#define MEMP_NUM_TCP_SEG (128 * 128 * 2) ++#define PER_TCP_PCB_BUFFER (16 * 128) ++#define MEM_SIZE (((PER_TCP_PCB_BUFFER + 128) * MEMP_NUM_TCP_SEG) >> 2) + +-#define LWIP_SOCKET 1 + +-#define LWIP_TCP_KEEPALIVE 1 ++/* ++ --------------------------------- ++ ---------- ARP options ---------- ++ --------------------------------- ++*/ ++#define LWIP_ARP 1 + +-#define LWIP_STATS 1 ++#define ARP_TABLE_SIZE 512 + +-#define LWIP_STATS_DISPLAY 1 ++#define ARP_QUEUEING 1 + +-#define LWIP_TIMEVAL_PRIVATE 0 ++#define ARP_QUEUE_LEN 32 + +-#define USE_LIBOS 1 ++#define ETHARP_SUPPORT_STATIC_ENTRIES 1 + +-//#define LWIP_DEBUG 1 + +-#define LWIP_PERF 1 ++/* ++ --------------------------------- ++ ---------- IP options ---------- ++ --------------------------------- ++*/ ++#define LWIP_IPV4 1 + +-#define LWIP_RECORD_PERF 0 ++#define IP_FORWARD 0 + +-#define LWIP_SOCKET_POLL 0 ++#define IP_REASSEMBLY 1 + +-#define USE_LIBOS_ZC_RING 0 ++#define IP_HLEN 20 + +-#define REUSE_IPPORT 1 + +-#define MAX_CONN_NUM_PER_THREAD 65535 ++/* ++ --------------------------------- ++ ---------- UDP options ---------- ++ --------------------------------- ++*/ ++#define LWIP_UDP 0 + +-#define SO_REUSE 1 + +-#define SIOCSHIWAT 1 ++/* ++ --------------------------------- ++ ---------- TCP options ---------- ++ --------------------------------- ++*/ ++#define LWIP_TCP 1 + +-#define O_NONBLOCK 04000 /* same as define in bits/fcntl-linux.h */ ++#define TCP_HLEN 20 + +-#define O_NDELAY O_NONBLOCK ++#define DEFAULT_ACCEPTMBOX_SIZE 1024 ++#define DEFAULT_TCP_RECVMBOX_SIZE 4096 + +-#define FIONBIO 0x5421 /* same as define in asm-generic/ioctls.h */ ++#define TCP_LISTEN_BACKLOG 1 ++#define TCP_DEFAULT_LISTEN_BACKLOG 0xff + +-#define LWIP_SUPPORT_CUSTOM_PBUF 1 ++#define TCP_OVERSIZE 0 ++#define LWIP_NETIF_TX_SINGLE_PBUF 0 + +-#define MEM_LIBC_MALLOC 0 ++#define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) + +-#define LWIP_TIMERS 1 ++#define TCP_WND (2500 * TCP_MSS) + +-#define TCPIP_MBOX_SIZE (MEMP_NUM_TCPIP_MSG_API) ++#define TCP_SND_BUF (2500 * TCP_MSS) + +-#define TCP_PCB_HASH 1 ++#define TCP_SND_QUEUELEN (8191) + +-#define USE_DPDK_LOG 1 ++#define TCP_SNDLOWAT (TCP_SND_BUF / 5) + +-#define LWIP_EPOOL_WAIT_MAX_EVENTS 30 ++#define TCP_SNDQUEUELOWAT (TCP_SND_QUEUELEN / 5) + +-#define ARP_TABLE_SIZE 512 ++#define LWIP_TCP_KEEPALIVE 1 + +-#define ARP_QUEUEING 1 ++#define GAZELLE_TCP_MAX_CONN_PER_THREAD 65535 ++#define GAZELLE_TCP_REUSE_IPPORT 1 + +-#define ARP_QUEUE_LEN 32 + +-#define MAX_PBUF_CHAIN_LEN 40 ++/* ++ ------------------------------------ ++ ---------- Socket options ---------- ++ ------------------------------------ ++*/ ++#define LWIP_SOCKET 1 + +-#define MIN_TSO_SEG_LEN 256 ++#define LWIP_SOCKET_POLL 0 + +-#define MAX_DATA_ACK_NUM 256 ++#define LWIP_SO_SNDTIMEO 0 + +-/* --------------------------------------- +- * ------- NIC offloads -------- +- * --------------------------------------- +- */ +-#define LWIP_CHECKSUM_CTRL_PER_NETIF 1 /* checksum ability check before checksum*/ ++#define LWIP_SO_LINGER 0 + +-// rx cksum +-#define CHECKSUM_CHECK_IP 1 /* master switch */ +-#define CHECKSUM_CHECK_TCP 1 /* master switch */ +-// tx cksum +-#define CHECKSUM_GEN_IP 1 /* master switch */ +-#define CHECKSUM_GEN_TCP 1 /* master switch */ ++#define SO_REUSE 1 + +-// rx offload cksum +-#define CHECKSUM_CHECK_IP_HW (1 && CHECKSUM_CHECK_IP) /* hardware switch */ +-#define CHECKSUM_CHECK_TCP_HW (1 && CHECKSUM_CHECK_TCP) /* hardware switch */ +-// tx offload cksum +-#define CHECKSUM_GEN_IP_HW (1 && CHECKSUM_GEN_IP) /* hardware switch */ +-#define CHECKSUM_GEN_TCP_HW (1 && CHECKSUM_GEN_TCP) /* hardware switch */ ++#define FIONBIO 0x5421 /* same as define in asm-generic/ioctls.h */ + +-#define CHECKSUM_OFFLOAD_ALL (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW || CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW) ++#define O_NONBLOCK 04000 /* same as define in bits/fcntl-linux.h */ + +-#if USE_LIBOS +-#define PER_THREAD __thread +-#else +-#define PER_THREAD +-#endif ++#define SIOCSHIWAT 1 + + #endif /* __LWIPOPTS_H__ */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index a807e3e..f78c9cf 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -59,7 +59,7 @@ union lwip_sock_lastdata { + struct pbuf *pbuf; + }; + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + struct protocol_stack; + struct wakeup_poll; + struct rte_ring; +@@ -92,7 +92,7 @@ struct lwip_sock { + #define LWIP_SOCK_FD_FREE_FREE 2 + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + char pad0 __rte_cache_aligned; + /* app thread use */ + struct pbuf *recv_lastdata; /* unread data in one pbuf */ +@@ -131,7 +131,7 @@ struct lwip_sock { + * --------------- LIBNET references ---------------- + * -------------------------------------------------- + */ +-#if USE_LIBOS ++#if GAZELLE_ENABLE + extern uint32_t sockets_num; + extern struct lwip_sock *sockets; + extern void gazelle_connected_callback(struct netconn *conn); +@@ -141,7 +141,7 @@ extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size + extern void gazelle_init_sock(int32_t fd); + extern void gazelle_clean_sock(int32_t fd); + extern void write_lwip_over(struct lwip_sock *sock); +-#endif /* USE_LIBOS */ ++#endif /* GAZELLE_ENABLE */ + + struct lwip_sock *get_socket(int s); + struct lwip_sock *get_socket_by_fd(int s); +diff --git a/src/netif/ethernet.c b/src/netif/ethernet.c +index ab976a8..fd13f00 100644 +--- a/src/netif/ethernet.c ++++ b/src/netif/ethernet.c +@@ -56,7 +56,7 @@ + #include "netif/ppp/pppoe.h" + #endif /* PPPOE_SUPPORT */ + +-#if USE_LIBOS && (CHECKSUM_GEN_TCP_HW || CHECKSUM_GEN_IP_HW) ++#if GAZELLE_ENABLE && (CHECKSUM_GEN_TCP_HW || CHECKSUM_GEN_IP_HW) + #include "dpdk_cksum.h" + #endif + +-- +2.33.0 + diff --git a/0054-reduce-cpu-usage-when-send.patch b/0054-reduce-cpu-usage-when-send.patch new file mode 100644 index 0000000000000000000000000000000000000000..94f394f9d8b43182accee08e5fd1948e1e808722 --- /dev/null +++ b/0054-reduce-cpu-usage-when-send.patch @@ -0,0 +1,32 @@ +From d3d6f7fa6e755992fd4b75b56681b5e14aa8ba14 Mon Sep 17 00:00:00 2001 +From: jiangheng12 +Date: Fri, 10 Mar 2023 19:32:48 +0800 +Subject: [PATCH] reduce cpu usage when send + +--- + src/include/lwipsock.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index f78c9cf..810e98f 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -33,6 +33,7 @@ + #ifndef __LWIPSOCK_H__ + #define __LWIPSOCK_H__ + ++#include + #include "lwip/opt.h" + #include "lwip/api.h" + +@@ -110,6 +111,7 @@ struct lwip_sock { + struct list_node send_list; + struct pbuf *send_lastdata; + struct pbuf *send_pre_del; ++ sem_t snd_ring_sem; + + char pad3 __rte_cache_aligned; + /* nerver change */ +-- +2.23.0 + diff --git a/0055-add-pbuf-lock-when-aggregate-pbuf.patch b/0055-add-pbuf-lock-when-aggregate-pbuf.patch new file mode 100644 index 0000000000000000000000000000000000000000..7df6243c12e7b098e14f90c2733c8fb6a9beb736 --- /dev/null +++ b/0055-add-pbuf-lock-when-aggregate-pbuf.patch @@ -0,0 +1,46 @@ +From a9906aabda21b9d2912377352ef0058eb4fb76e0 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Mon, 13 Mar 2023 10:00:12 +0800 +Subject: [PATCH] add pbuf lock when aggregate pbuf + +--- + src/include/lwip/pbuf.h | 3 ++- + src/include/lwipsock.h | 3 +-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index a2e8e01..8807a49 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -231,9 +231,10 @@ struct pbuf { + u64_t l4_len:8; + u8_t header_off; + u8_t rexmit; +- u8_t in_write; ++ volatile u8_t allow_in; + u8_t head; + struct pbuf *last; ++ pthread_spinlock_t pbuf_lock; + #endif /* GAZELLE_ENABLE CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 810e98f..7e16ec8 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -104,11 +104,10 @@ struct lwip_sock { + + char pad1 __rte_cache_aligned; + /* app and stack thread all use */ +- uint32_t in_send; /* avoid sock too much send rpc msg*/ ++ uint32_t call_num; /* avoid sock too much send rpc msg*/ + char pad2 __rte_cache_aligned; + /* stack thread all use */ + struct list_node recv_list; +- struct list_node send_list; + struct pbuf *send_lastdata; + struct pbuf *send_pre_del; + sem_t snd_ring_sem; +-- +2.29.0.windows.1 + diff --git a/0056-fix-tso-small-packet-drop-in-kernel-server.patch b/0056-fix-tso-small-packet-drop-in-kernel-server.patch new file mode 100644 index 0000000000000000000000000000000000000000..997c865184f6a6f73e3aa4ea6f45ea3191f2c3b1 --- /dev/null +++ b/0056-fix-tso-small-packet-drop-in-kernel-server.patch @@ -0,0 +1,325 @@ +From abeef0770f76cd0eff8e5c6e50de0b280079d7f0 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Mon, 13 Mar 2023 19:25:42 +0800 +Subject: [PATCH] fix tso small packet drop in kernel server + +--- + src/core/tcp_out.c | 254 +++++++++++++++++++++-------------------- + src/include/lwipopts.h | 2 + + 2 files changed, 130 insertions(+), 126 deletions(-) + +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 8a0d653..b1c317d 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1312,60 +1312,33 @@ tcp_build_wnd_scale_option(u32_t *opts) + #endif + + #if GAZELLE_ENABLE +-static struct tcp_seg *tcp_output_over(struct tcp_pcb *pcb, struct tcp_seg *seg, struct tcp_seg *useg) +-{ +- if (TCP_TCPLEN(seg) > 0) { +- seg->next = NULL; +- if (useg == NULL) { +- pcb->unacked = seg; +- pcb->last_unacked = seg; +- useg = seg; +- } else { +- if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { +- /* add segment to before tail of unacked list, keeping the list sorted */ +- struct tcp_seg **cur_seg = &(pcb->unacked); +- while (*cur_seg && +- TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) { +- cur_seg = &((*cur_seg)->next ); +- } +- seg->next = (*cur_seg); +- (*cur_seg) = seg; +- } else { +- /* add segment to tail of unacked list */ +- useg->next = seg; +- useg = seg; +- pcb->last_unacked = seg; +- } +- } +- } else { +- tcp_seg_free(seg); +- } +- +- return useg; +-} +-static err_t tcp_output_seg(struct tcp_pcb *pcb, struct tcp_seg *seg, struct netif *netif, u32_t snd_nxt) +-{ +- if (pcb->state != SYN_SENT) { +- TCPH_SET_FLAG(seg->tcphdr, TCP_ACK); +- } +- +- err_t err = tcp_output_segment(seg, pcb, netif); +- if (err != ERR_OK) { +- /* segment could not be sent, for whatever reason */ +- tcp_set_flags(pcb, TF_NAGLEMEMERR); +- return err; +- } +- +- if (pcb->state != SYN_SENT) { +- tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); +- } +- +- if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { +- pcb->snd_nxt = snd_nxt; +- } +- +- return ERR_OK; +-} ++u32_t start_seqno = 0; ++#define TCP_INIT_SEGMENT(tem_seg, _pcb, _p, _hdrflags, _seqno, _optflags) \ ++do { \ ++ struct tcp_seg *_seg = tem_seg; \ ++ u8_t _optlen; \ ++ rte_prefetch2(_p); \ ++ \ ++ _optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(_optflags, _pcb); \ ++ _seg->flags = _optflags; \ ++ _seg->next = NULL; \ ++ _seg->p = _p; \ ++ _seg->len = _p->tot_len - _optlen; \ ++ /* build TCP header */ \ ++ pbuf_add_header(_p, TCP_HLEN); \ ++ _seg->tcphdr = (struct tcp_hdr *)_seg->p->payload; \ ++ _seg->tcphdr->src = lwip_htons(_pcb->local_port); \ ++ _seg->tcphdr->dest = lwip_htons(_pcb->remote_port); \ ++ /* _seg->tcphdr->src = lwip_htons(_pcb->local_port); \ */ \ ++ /* _seg->tcphdr->dest = lwip_htons(_pcb->remote_port); \ */ \ ++ _seg->tcphdr->seqno = lwip_htonl(_seqno); \ ++ \ ++ if (start_seqno == 0) {\ ++ start_seqno = _seqno; \ ++ } \ ++ TCPH_HDRLEN_FLAGS_SET(_seg->tcphdr, (5 + _optlen / 4), _hdrflags); \ ++ _seg->tcphdr->urgp = 0; \ ++} while(0) + #endif + /** + * @ingroup tcp_raw +@@ -1471,97 +1444,127 @@ tcp_output(struct tcp_pcb *pcb) + pcb->persist_backoff = 0; + + /* useg should point to last segment on unacked queue */ +- useg = pcb->last_unacked; ++ useg = pcb->unacked; ++ if (useg != NULL) { ++ for (; useg->next != NULL; useg = useg->next); ++ } + + /* data available and window allows it to be sent? */ +- +- u32_t send_len = 0; + #if GAZELLE_ENABLE + if ((get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) && pcb->need_tso_send) { +- while(seg && send_len < 0xffff) { +- /** +- * 1) walk unsent queue, find all seg witch wait to send. chain buf in these segs. +- * 2) create new segment, send and free new segment. +- * 3) update snd_nxt, unacked queue, and unsent queue +- */ +- struct tcp_seg *start_seg = seg; +- struct pbuf *first_pbuf = NULL; +- struct pbuf *pre_pbuf = NULL; +- u8_t pbuf_chain_len = 0; +- u32_t next_seqno = lwip_ntohl(seg->tcphdr->seqno); +- while (seg != NULL && pbuf_chain_len < GAZELLE_TCP_MAX_PBUF_CHAIN_LEN) { ++ uint16_t send_pkt = 0; ++ ++ do { ++ struct tcp_seg * start_seg = seg; ++ struct pbuf *new_pbuf = NULL; ++ ++ struct pbuf *tmp_pbuf = NULL; + u32_t seg_seqno = lwip_ntohl(seg->tcphdr->seqno); +- if (seg_seqno - pcb->lastack + seg->len > wnd) { +- if (first_pbuf) +- break; +- else +- goto output_done; ++ u32_t last_seg_seqno = seg_seqno; ++ ++ struct tcp_seg *last_seg = NULL; ++ u16_t last_seg_len = 0; ++ u8_t pbuf_chain_len = 0; ++ while (seg != NULL && seg_seqno - pcb->lastack + seg->len <= wnd && pbuf_chain_len < GAZELLE_TCP_MAX_PBUF_CHAIN_LEN) { ++ if (last_seg_len != 0 && (last_seg_len + seg->len < 1460) && seg->len < GAZELLE_TCP_MIN_TSO_SEG_LEN) { ++ break; ++ } ++ ++ if ((tcp_do_output_nagle(pcb) == 0) && ++ ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { ++ break; ++ } ++ if (last_seg_seqno + last_seg_len == seg_seqno) { ++ pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); ++ if (new_pbuf == NULL) { ++ new_pbuf = seg->p; ++ tmp_pbuf = new_pbuf; ++ } else { ++ new_pbuf->tot_len += seg->p->len; ++ tmp_pbuf->next = seg->p; ++ tmp_pbuf = tmp_pbuf->next; ++ } ++ } else { ++ break; ++ } ++ ++ last_seg = seg; ++ last_seg_len = seg->len; ++ last_seg_seqno = seg_seqno; ++ seg = seg->next; ++ seg_seqno = (seg != NULL) ? lwip_ntohl(seg->tcphdr->seqno) : seg_seqno; ++ pbuf_chain_len++; + } + +- if ((tcp_do_output_nagle(pcb) == 0) && ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { +- if (first_pbuf) +- break; +- else +- goto output_done; ++ // tcp_do_output_nagle, break ++ if (new_pbuf == NULL) { ++ goto end_loop; + } + +- if (seg->len < TCP_MSS || next_seqno != seg_seqno || pbuf_chain_len >= GAZELLE_TCP_MAX_PBUF_CHAIN_LEN) { +- break; +- } +- if (first_pbuf == NULL && (seg->next == NULL || seg->next->len < TCP_MSS)) { +- break; +- } ++ struct tcp_seg new_seg; ++ TCP_INIT_SEGMENT(&new_seg, pcb, new_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); + +- pbuf_remove_header(seg->p, seg->p->tot_len - seg->len); +- if (first_pbuf == NULL) { +- first_pbuf = seg->p; +- } else { +- first_pbuf->tot_len += seg->p->len; +- pre_pbuf->next = seg->p; ++ if (pcb->state != SYN_SENT) { ++ TCPH_SET_FLAG(new_seg.tcphdr, TCP_ACK); + } + +- send_len += seg->len; +- pre_pbuf = seg->p; +- next_seqno = seg_seqno + TCP_TCPLEN(seg); +- seg = seg->next; +- pcb->unsent = seg; +- pbuf_chain_len++; +- } +- +- if (first_pbuf == NULL) { +- err = tcp_output_seg(pcb, seg, netif, next_seqno + seg->len); ++ err = tcp_output_segment(&new_seg, pcb, netif); + if (err != ERR_OK) { +- if (pcb->unsent == NULL) +- pcb->last_unsent = NULL; +- pcb->need_tso_send = 0; +- return err; ++ /* segment could not be sent, for whatever reason */ ++ tcp_set_flags(pcb, TF_NAGLEMEMERR); ++ return err; + } +- pcb->unsent = seg->next; +- useg = tcp_output_over(pcb, seg, useg); +- seg = pcb->unsent; +- continue; +- } +- +- struct tcp_seg new_seg; +- tcp_init_segment(&new_seg, pcb, first_pbuf, 0, lwip_ntohl(start_seg->tcphdr->seqno), 0); + +- err = tcp_output_seg(pcb, &new_seg, netif, next_seqno); ++ pcb->unsent = last_seg->next; ++ if (pcb->state != SYN_SENT) { ++ tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); ++ } + +- for (u32_t i = 0; i < pbuf_chain_len; i++) { +- struct tcp_seg *next_seg = start_seg->next; +- start_seg->p->next = NULL; +- useg = tcp_output_over(pcb, start_seg, useg); +- start_seg = next_seg; +- } ++ snd_nxt = last_seg_seqno + TCP_TCPLEN(last_seg); ++ if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) { ++ pcb->snd_nxt = snd_nxt; ++ } + +- pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCPH_HDRLEN_BYTES(new_seg.tcphdr)); +- new_seg.p->tot_len = new_seg.p->len; +- } +- pcb->need_tso_send = 0; ++ pbuf_remove_header(new_seg.p, new_seg.p->tot_len - new_seg.len - TCP_HLEN); ++ new_seg.p->tot_len = new_seg.p->len; ++ ++ for (int start = pbuf_chain_len; start > 0; start--) { ++ struct tcp_seg *tmp_seg = start_seg; ++ start_seg = start_seg->next; ++ tmp_seg->p->next = NULL; ++ if (TCP_TCPLEN(tmp_seg) > 0) { ++ tmp_seg->next = NULL; ++ if (pcb->unacked == NULL) { ++ pcb->unacked = tmp_seg; ++ useg = tmp_seg; ++ } else { ++ if (TCP_SEQ_LT(lwip_ntohl(tmp_seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) { ++ /* add segment to before tail of unacked list, keeping the list sorted */ ++ struct tcp_seg **cur_seg = &(pcb->unacked); ++ while (*cur_seg && ++ TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(tmp_seg->tcphdr->seqno))) { ++ cur_seg = &((*cur_seg)->next ); ++ } ++ tmp_seg->next = (*cur_seg); ++ (*cur_seg) = tmp_seg; ++ } else { ++ /* add segment to tail of unacked list */ ++ useg->next = tmp_seg; ++ useg = useg->next; ++ } ++ } ++ } else { ++ tcp_seg_free(tmp_seg); ++ } ++ } ++ } while(seg != NULL && lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd && send_pkt++ < 10); ++end_loop: ++ pcb->need_tso_send = 0; + } else + #endif + { +- while (seg != NULL && send_len < 0xffff && ++ uint16_t send_pkt = 0; ++ while (seg != NULL && send_pkt++ < 10 && + lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) { + LWIP_ASSERT("RST not expected here!", + (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0); +@@ -1576,7 +1579,6 @@ tcp_output(struct tcp_pcb *pcb) + ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) { + break; + } +- send_len += seg->len; + #if TCP_CWND_DEBUG + LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F", effwnd %"U32_F", seq %"U32_F", ack %"U32_F", i %"S16_F"\n", + pcb->snd_wnd, pcb->cwnd, wnd, +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 742b4a9..0d2a6d9 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -55,6 +55,8 @@ + + #define GAZELLE_TCP_MAX_PBUF_CHAIN_LEN 40 + ++#define GAZELLE_TCP_MIN_TSO_SEG_LEN 256 ++ + /* + ---------------------------------- + ---------- NIC offloads ---------- +-- +2.33.0 + diff --git a/0057-same-node-gazellectl-a.patch b/0057-same-node-gazellectl-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..0061f5d923049ba24d81d89a538a26c638211270 --- /dev/null +++ b/0057-same-node-gazellectl-a.patch @@ -0,0 +1,456 @@ +From 8a68ee510f5da20edf7fa06da701713ef10db930 Mon Sep 17 00:00:00 2001 +From: jiangheng12 +Date: Thu, 16 Mar 2023 19:59:26 +0800 +Subject: [PATCH] same node & gazellectl -a + +--- + src/api/sockets.c | 21 +++++++++++++++++++++ + src/core/ipv4/ip4_frag.c | 4 ++++ + src/core/netif.c | 7 ++++--- + src/core/pbuf.c | 6 ++++++ + src/core/tcp.c | 39 +++++++++++++++++++++++++++++++++++++++ + src/core/tcp_in.c | 6 ++++++ + src/core/tcp_out.c | 11 +++++++++++ + src/include/lwip/pbuf.h | 3 +++ + src/include/lwip/tcp.h | 10 ++++++++++ + src/include/lwipopts.h | 7 +++++++ + src/include/lwipsock.h | 37 +++++++++++++++++++++++++++++++++++++ + 11 files changed, 148 insertions(+), 3 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 356e345..7a5da26 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -605,6 +605,10 @@ alloc_socket(struct netconn *newconn, int accepted, int flags) + * (unless it has been created by accept()). */ + sockets[i].sendevent = (NETCONNTYPE_GROUP(newconn->type) == NETCONN_TCP ? (accepted != 0) : 1); + sockets[i].errevent = 0; ++ sockets[i].same_node_rx_ring = NULL; ++ sockets[i].same_node_rx_ring_mz = NULL; ++ sockets[i].same_node_tx_ring = NULL; ++ sockets[i].same_node_tx_ring_mz = NULL; + return i + LWIP_SOCKET_OFFSET; + } else { + lwip_close(i); +@@ -716,6 +720,11 @@ free_socket(struct lwip_sock *sock, int is_tcp) + /* Protect socket array */ + SYS_ARCH_PROTECT(lev); + ++#if GAZELLE_ENABLE ++ /* remove sock from same_node_recv_lit */ ++ list_del_node_null(&sock->recv_list); ++#endif ++ + freed = free_socket_locked(sock, is_tcp, &conn, &lastdata); + SYS_ARCH_UNPROTECT(lev); + /* don't use 'sock' after this line, as another task might have allocated it */ +@@ -780,6 +789,18 @@ lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < NUM_SOCKETS + LWIP_SOCKET_OFFSET)); + #endif /* GAZELLE_ENABLE */ + nsock = &sockets[newsock - LWIP_SOCKET_OFFSET]; ++#if GAZELLE_ENABLE ++ struct tcp_pcb *pcb = newconn->pcb.tcp; ++ if (pcb->client_rx_ring != NULL && pcb->client_tx_ring != NULL) { ++ if (find_same_node_memzone(pcb, nsock) != 0) { ++ netconn_delete(newconn); ++ free_socket(nsock, 1); ++ sock_set_errno(sock, ENOTCONN); ++ done_socket(sock); ++ return -1; ++ } ++ } ++#endif + + /* See event_callback: If data comes in right away after an accept, even + * though the server task might not have created a new socket yet. +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index f15b798..e01ea51 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -729,6 +729,7 @@ ip_frag_free_pbuf_custom_ref(struct pbuf_custom_ref *p) + + /** Free-callback function to free a 'struct pbuf_custom_ref', called by + * pbuf_free. */ ++#if !GAZELLE_ENABLE + static void + ipfrag_free_pbuf_custom(struct pbuf *p) + { +@@ -740,6 +741,7 @@ ipfrag_free_pbuf_custom(struct pbuf *p) + } + ip_frag_free_pbuf_custom_ref(pcr); + } ++#endif + #endif /* !LWIP_NETIF_TX_SINGLE_PBUF */ + + /** +@@ -851,7 +853,9 @@ ip4_frag(struct pbuf *p, struct netif *netif, const ip4_addr_t *dest) + } + pbuf_ref(p); + pcr->original = p; ++#if !GAZELLE_ENABLE + pcr->pc.custom_free_function = ipfrag_free_pbuf_custom; ++#endif + + /* Add it to end of rambuf's chain, but using pbuf_cat, not pbuf_chain + * so that it is removed when pbuf_dechain is later called on rambuf. +diff --git a/src/core/netif.c b/src/core/netif.c +index 70392cb..86b74a0 100644 +--- a/src/core/netif.c ++++ b/src/core/netif.c +@@ -1065,7 +1065,7 @@ netif_set_link_callback(struct netif *netif, netif_status_callback_fn link_callb + } + #endif /* LWIP_NETIF_LINK_CALLBACK */ + +-#if ENABLE_LOOPBACK ++#if !GAZELLE_ENABLE + /** + * @ingroup netif + * Send an IP packet to be received on the same netif (loopif-like). +@@ -1184,6 +1184,7 @@ netif_loop_output(struct netif *netif, struct pbuf *p) + + return ERR_OK; + } ++#endif + + #if LWIP_HAVE_LOOPIF + #if LWIP_IPV4 +@@ -1205,7 +1206,7 @@ netif_loop_output_ipv6(struct netif *netif, struct pbuf *p, const ip6_addr_t *ad + #endif /* LWIP_IPV6 */ + #endif /* LWIP_HAVE_LOOPIF */ + +- ++#if !GAZELLE_ENABLE + /** + * Call netif_poll() in the main loop of your application. This is to prevent + * reentering non-reentrant functions like tcp_input(). Packets passed to +@@ -1277,6 +1278,7 @@ netif_poll(struct netif *netif) + } + SYS_ARCH_UNPROTECT(lev); + } ++#endif + + #if !LWIP_NETIF_LOOPBACK_MULTITHREADING + /** +@@ -1292,7 +1294,6 @@ netif_poll_all(void) + } + } + #endif /* !LWIP_NETIF_LOOPBACK_MULTITHREADING */ +-#endif /* ENABLE_LOOPBACK */ + + #if LWIP_NUM_NETIF_CLIENT_DATA > 0 + /** +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index dd71519..2385e57 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -69,6 +69,7 @@ + */ + + #include "lwip/opt.h" ++#include "lwipsock.h" + + #include "lwip/pbuf.h" + #include "lwip/stats.h" +@@ -189,6 +190,7 @@ pbuf_init_alloced_pbuf(struct pbuf *p, void *payload, u16_t tot_len, u16_t len, + p->flags = flags; + p->ref = 1; + p->if_idx = NETIF_NO_INDEX; ++ p->pcb = NULL; + } + + /** +@@ -777,9 +779,13 @@ pbuf_free(struct pbuf *p) + #if LWIP_SUPPORT_CUSTOM_PBUF + /* is this a custom pbuf? */ + if ((p->flags & PBUF_FLAG_IS_CUSTOM) != 0) { ++#if GAZELLE_ENABLE ++ gazelle_free_pbuf(p); ++#else + struct pbuf_custom *pc = (struct pbuf_custom *)p; + LWIP_ASSERT("pc->custom_free_function != NULL", pc->custom_free_function != NULL); + pc->custom_free_function(p); ++#endif + } else + #endif /* LWIP_SUPPORT_CUSTOM_PBUF */ + { +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 69a39f6..538a664 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -116,6 +116,8 @@ + #include + #include + ++#include "lwipsock.h" ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -250,6 +252,18 @@ void + tcp_free(struct tcp_pcb *pcb) + { + #if GAZELLE_ENABLE ++ if (pcb->free_ring == 1) { ++ struct netconn *netconn = NULL; ++ struct lwip_sock *sock = NULL; ++ rte_ring_free(pcb->client_rx_ring); ++ rte_ring_free(pcb->client_tx_ring); ++ netconn = (struct netconn *)pcb->callback_arg; ++ sock = get_socket(netconn->socket); ++ rte_memzone_free(sock->same_node_rx_ring->mz); ++ rte_memzone_free(sock->same_node_rx_ring_mz); ++ rte_memzone_free(sock->same_node_tx_ring->mz); ++ rte_memzone_free(sock->same_node_tx_ring_mz); ++ } + vdev_unreg_done(pcb); + release_port(pcb->local_port); + #endif +@@ -996,6 +1010,15 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + /* pcb transfer to lpcb and reg into tcp_listen_pcbs. freeing pcb shouldn't release sock table in here. + * local_port=0 avoid to release sock table in tcp_free */ + pcb->local_port = 0; ++ ++ char name[RING_NAME_LEN]; ++ snprintf(name, sizeof(name), "listen_rx_ring_%u", lpcb->local_port); ++ if (rte_ring_lookup(name) != NULL) { ++ /* port reuse */ ++ lpcb->listen_rx_ring = NULL; ++ } else { ++ same_node_ring_create(&lpcb->listen_rx_ring, SAME_NODE_RING_SIZE, lpcb->local_port, "listen", "rx"); ++ } + #endif + tcp_free(pcb); + #if LWIP_CALLBACK_API +@@ -1262,6 +1285,16 @@ tcp_connect(struct tcp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port, + #endif /* SO_REUSE */ + } + ++#if GAZELLE_ENABLE ++ /* communication between processes on the same node */ ++ if (ip_addr_cmp(&pcb->local_ip, &pcb->remote_ip)) { ++ ret = create_same_node_ring(pcb); ++ if (ret != 0) { ++ return ret; ++ } ++ } ++#endif ++ + iss = tcp_next_iss(pcb); + pcb->rcv_nxt = 0; + pcb->snd_nxt = iss; +@@ -2090,7 +2123,13 @@ tcp_alloc(u8_t prio) + pcb->keep_intvl = TCP_KEEPINTVL_DEFAULT; + pcb->keep_cnt = TCP_KEEPCNT_DEFAULT; + #endif /* LWIP_TCP_KEEPALIVE */ ++#if GAZELLE_ENABLE ++ pcb->client_rx_ring = NULL; ++ pcb->client_tx_ring = NULL; ++ pcb->free_ring = 0; ++#endif + } ++ + return pcb; + } + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index dd83260..719cf04 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -42,6 +42,7 @@ + */ + + #include "lwip/opt.h" ++#include "lwipsock.h" + + #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */ + +@@ -806,6 +807,11 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + + #if GAZELLE_ENABLE + vdev_reg_done(REG_RING_TCP_CONNECT, npcb); ++ if (ip_addr_cmp(&npcb->local_ip, &npcb->remote_ip)) { ++ if (find_same_node_ring(npcb) != 0) { ++ return; ++ } ++ } + #endif + + /* Parse any options in the SYN. */ +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 8100e18..b1c317d 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -725,6 +725,10 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + goto memerr; + } + ++#if GAZELLE_ENABLE ++ lstack_calculate_aggregate(2, p->tot_len); ++#endif ++ + if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) { + #if GAZELLE_ENABLE + if (pos > 0) { +@@ -1705,6 +1709,10 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + int seg_chksum_was_swapped = 0; + #endif + ++#if USE_LIBOS ++ lstack_calculate_aggregate(1, seg->len); ++#endif ++ + LWIP_ASSERT("tcp_output_segment: invalid seg", seg != NULL); + LWIP_ASSERT("tcp_output_segment: invalid pcb", pcb != NULL); + LWIP_ASSERT("tcp_output_segment: invalid netif", netif != NULL); +@@ -1899,6 +1907,8 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + PERF_START(PERF_LAYER_IP, PERF_POINT_IP_SEND); + + NETIF_SET_HINTS(netif, &(pcb->netif_hints)); ++ ++ seg->p->pcb = pcb; + err = ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, pcb->ttl, + pcb->tos, IP_PROTO_TCP, netif); + NETIF_RESET_HINTS(netif); +@@ -2236,6 +2246,7 @@ tcp_output_control_segment(struct tcp_pcb *pcb, struct pbuf *p, + err_t err; + struct netif *netif; + ++ p->pcb = pcb; + LWIP_ASSERT("tcp_output_control_segment: invalid pbuf", p != NULL); + + if (pcb == NULL || pcb->pcb_if == NULL) { +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 6c4ca44..9321afc 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -235,6 +235,7 @@ struct pbuf { + u8_t head; + struct pbuf *last; + pthread_spinlock_t pbuf_lock; ++ struct tcp_pcb *pcb; + #endif /* GAZELLE_ENABLE CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +@@ -263,7 +264,9 @@ struct pbuf_custom { + /** The actual pbuf */ + struct pbuf pbuf; + /** This function is called when pbuf_free deallocates this pbuf(_custom) */ ++#if !GAZELLE_ENABLE + pbuf_free_custom_fn custom_free_function; ++#endif + }; + #endif /* LWIP_SUPPORT_CUSTOM_PBUF */ + +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index b822f40..e13099c 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -260,6 +260,9 @@ struct tcp_pcb_listen { + u8_t master_lpcb; + #endif + ++#if GAZELLE_ENABLE ++ struct rte_ring *listen_rx_ring; ++#endif + }; + + +@@ -418,6 +421,13 @@ struct tcp_pcb { + u8_t rcv_scale; + #endif + ++#if GAZELLE_ENABLE ++#define SAME_NODE_RING_SIZE 512 ++ struct rte_ring *client_rx_ring; ++ struct rte_ring *client_tx_ring; ++ u8_t free_ring; ++#endif ++ + u8_t need_tso_send; + }; + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 414ead4..0d2a6d9 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -235,4 +235,11 @@ + + #define SIOCSHIWAT 1 + ++/* ++ ------------------------------------ ++ ---------- Netif options ---------- ++ ------------------------------------ ++*/ ++#define LWIP_NETIF_LOOPBACK 1 ++ + #endif /* __LWIPOPTS_H__ */ +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index 7e16ec8..f917d8a 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -65,7 +65,19 @@ struct protocol_stack; + struct wakeup_poll; + struct rte_ring; + #include ++#include ++ ++// 8M ++#define SAME_NODE_RING_LEN (unsigned long long)(8388608) ++#define SAME_NODE_RING_MASK (unsigned long long)(8388608 - 1) ++#define RING_NAME_LEN 32 ++struct same_node_ring { ++ const struct rte_memzone *mz; ++ unsigned long long sndbegin; ++ unsigned long long sndend; ++}; + #endif ++ + /** Contains all internal pointers and states used for a socket */ + struct lwip_sock { + /** sockets currently are built on netconns, each socket has one netconn */ +@@ -120,9 +132,25 @@ struct lwip_sock { + struct protocol_stack *stack; + struct rte_ring *recv_ring; + struct rte_ring *send_ring; ++ ++ /* same node send data ring */ ++ struct same_node_ring *same_node_rx_ring; ++ const struct rte_memzone *same_node_rx_ring_mz; ++ struct same_node_ring *same_node_tx_ring; ++ const struct rte_memzone *same_node_tx_ring_mz; + #endif + }; + ++#if GAZELLE_ENABLE ++static inline unsigned same_node_ring_count(struct lwip_sock *sock) ++{ ++ const unsigned long long cur_begin = __atomic_load_n(&sock->same_node_rx_ring->sndbegin, __ATOMIC_RELAXED); ++ const unsigned long long cur_end = __atomic_load_n(&sock->same_node_rx_ring->sndend, __ATOMIC_RELAXED); ++ ++ return cur_end - cur_begin; ++} ++#endif ++ + #ifndef set_errno + #define set_errno(err) do { if (err) { errno = (err); } } while(0) + #endif +@@ -142,6 +170,15 @@ extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size + extern void gazelle_init_sock(int32_t fd); + extern void gazelle_clean_sock(int32_t fd); + extern void write_lwip_over(struct lwip_sock *sock); ++extern void netif_poll(struct netif *netif); ++extern err_t netif_loop_output(struct netif *netif, struct pbuf *p); ++extern err_t find_same_node_memzone(struct tcp_pcb *pcb, struct lwip_sock *nsock); ++extern err_t same_node_memzone_create(const struct rte_memzone **zone, int size, int port, char *name, char *); ++extern err_t same_node_ring_create(struct rte_ring **ring, int size, int port, char *name, char *rx); ++extern err_t create_same_node_ring(struct tcp_pcb *pcb); ++extern err_t find_same_node_ring(struct tcp_pcb *pcb); ++extern void gazelle_free_pbuf(struct pbuf *pbuf); ++extern void lstack_calculate_aggregate(int type, uint32_t len); + #endif /* GAZELLE_ENABLE */ + + struct lwip_sock *get_socket(int s); +-- +2.23.0 + diff --git a/0058-lwip-send-recv-thread-bind-numa.patch b/0058-lwip-send-recv-thread-bind-numa.patch new file mode 100644 index 0000000000000000000000000000000000000000..070b32711559b601e6d6e716ace7eff8769c9050 --- /dev/null +++ b/0058-lwip-send-recv-thread-bind-numa.patch @@ -0,0 +1,24 @@ +From 0e16f4ec71b0794f48cb7b9e99712c36e40d4d48 Mon Sep 17 00:00:00 2001 +From: kircher +Date: Wed, 22 Mar 2023 15:16:04 +0800 +Subject: [PATCH] lwip-send-recv-thread-bind-numa + +--- + src/include/lwipsock.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index f917d8a..f8480c5 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -138,6 +138,7 @@ struct lwip_sock { + const struct rte_memzone *same_node_rx_ring_mz; + struct same_node_ring *same_node_tx_ring; + const struct rte_memzone *same_node_tx_ring_mz; ++ uint8_t already_bind_numa; + #endif + }; + +-- +2.33.0 + diff --git a/0059-fix-last_unsent-last_unacked.patch b/0059-fix-last_unsent-last_unacked.patch new file mode 100644 index 0000000000000000000000000000000000000000..e51d645c2f8a421c9e24c2ad9078119085a9e0b7 --- /dev/null +++ b/0059-fix-last_unsent-last_unacked.patch @@ -0,0 +1,114 @@ +From f1692b0c380241699f70adbf7796cb2c7b3a5c94 Mon Sep 17 00:00:00 2001 +From: jiangheng12 +Date: Sat, 1 Apr 2023 16:59:28 +0800 +Subject: [PATCH] fix last_unsent/last_unacked + +--- + src/core/tcp_in.c | 25 +++++++++++++------------ + src/core/tcp_out.c | 18 +++++++++++++----- + 2 files changed, 26 insertions(+), 17 deletions(-) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 719cf04..7e7d70a 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -1375,18 +1375,19 @@ tcp_receive(struct tcp_pcb *pcb) + } + } + } +- } +- /* fast rexmit when receive too many acks with data */ +- if (TCP_SEQ_LT(ackno + 1, pcb->snd_nxt)) { +- if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) { +- if (pcb->rtime >= 0) { +- if (pcb->lastack == ackno) { +- found_dataack = 1; +- ++pcb->dataacks; +- if (pcb->dataacks > GAZELLE_TCP_MAX_DATA_ACK_NUM) { +- if (tcp_rexmit(pcb) == ERR_OK) { +- pcb->rtime = 0; +- pcb->dataacks = 0; ++ } else { ++ /* fast rexmit when receive too many acks with data */ ++ if (TCP_SEQ_LT(ackno + 1, pcb->snd_nxt)) { ++ if (pcb->snd_wl2 + pcb->snd_wnd == right_wnd_edge) { ++ if (pcb->rtime >= 0) { ++ if (pcb->lastack == ackno) { ++ found_dataack = 1; ++ ++pcb->dataacks; ++ if ((pcb->dataacks > GAZELLE_TCP_MAX_DATA_ACK_NUM) && (pcb->nrtx < (TCP_MAXRTX / 2))) { ++ if (tcp_rexmit(pcb) == ERR_OK) { ++ pcb->rtime = 0; ++ pcb->dataacks = 0; ++ } + } + } + } +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index b1c317d..6250e6b 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1444,10 +1444,7 @@ tcp_output(struct tcp_pcb *pcb) + pcb->persist_backoff = 0; + + /* useg should point to last segment on unacked queue */ +- useg = pcb->unacked; +- if (useg != NULL) { +- for (; useg->next != NULL; useg = useg->next); +- } ++ useg = pcb->last_unacked; + + /* data available and window allows it to be sent? */ + #if GAZELLE_ENABLE +@@ -1515,7 +1512,11 @@ tcp_output(struct tcp_pcb *pcb) + return err; + } + ++ if (pcb->last_unsent == pcb->unsent) { ++ pcb->last_unsent = last_seg->next; ++ } + pcb->unsent = last_seg->next; ++ + if (pcb->state != SYN_SENT) { + tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); + } +@@ -1535,6 +1536,7 @@ tcp_output(struct tcp_pcb *pcb) + if (TCP_TCPLEN(tmp_seg) > 0) { + tmp_seg->next = NULL; + if (pcb->unacked == NULL) { ++ pcb->last_unacked = tmp_seg; + pcb->unacked = tmp_seg; + useg = tmp_seg; + } else { +@@ -1550,6 +1552,9 @@ tcp_output(struct tcp_pcb *pcb) + } else { + /* add segment to tail of unacked list */ + useg->next = tmp_seg; ++ if (pcb->last_unacked == useg) { ++ pcb->last_unacked = tmp_seg; ++ } + useg = useg->next; + } + } +@@ -1603,6 +1608,9 @@ end_loop: + #if TCP_OVERSIZE_DBGCHECK + seg->oversize_left = 0; + #endif /* TCP_OVERSIZE_DBGCHECK */ ++ if (pcb->last_unsent == pcb->unsent) { ++ pcb->last_unsent = seg->next; ++ } + pcb->unsent = seg->next; + if (pcb->state != SYN_SENT) { + tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); +@@ -1709,7 +1717,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + int seg_chksum_was_swapped = 0; + #endif + +-#if USE_LIBOS ++#if GAZELLE_ENABLE + lstack_calculate_aggregate(1, seg->len); + #endif + +-- +2.23.0 + diff --git a/0060-lwip-add-udp-multicast.patch b/0060-lwip-add-udp-multicast.patch new file mode 100644 index 0000000000000000000000000000000000000000..94e4e307193631bb66ffe460eb01a1785836adcb --- /dev/null +++ b/0060-lwip-add-udp-multicast.patch @@ -0,0 +1,337 @@ +From d9ef907e03f44c30e26190b0c5ad895de716ac5c Mon Sep 17 00:00:00 2001 +From: kircher +Date: Fri, 12 May 2023 20:54:51 +0800 +Subject: [PATCH] add udp multicast in support + +--- + src/api/api_msg.c | 5 +++++ + src/api/sockets.c | 21 ++++++++++++++++++++- + src/core/dir.mk | 2 +- + src/core/udp.c | 28 +++++++++++++++++++++++++--- + src/include/dpdk_cksum.h | 4 ++++ + src/include/lwip/opt.h | 5 +++-- + src/include/lwip/pbuf.h | 4 ++++ + src/include/lwip/sockets.h | 15 +++++++++++++++ + src/include/lwipopts.h | 2 +- + 9 files changed, 78 insertions(+), 8 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 1840c9d..0287c06 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -282,8 +282,13 @@ recv_udp(void *arg, struct udp_pcb *pcb, struct pbuf *p, + #if LWIP_SO_RCVBUF + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ ++#if GAZELLE_ENABLE ++ add_recv_list(conn->socket); ++ LWIP_UNUSED_ARG(len); ++#else + /* Register event with callback */ + API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); ++#endif + } + } + #endif /* LWIP_UDP */ +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 7a5da26..a0f9d50 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -54,6 +54,7 @@ + #include "lwip/netif.h" + #include "lwip/priv/tcpip_priv.h" + #include "lwip/mld6.h" ++#include "lwip/api.h" + #if LWIP_CHECKSUM_ON_COPY + #include "lwip/inet_chksum.h" + #endif +@@ -1187,7 +1188,7 @@ lwip_recv_tcp_done: + #endif + + /* Convert a netbuf's address data to struct sockaddr */ +-static int ++int + lwip_sock_make_addr(struct netconn *conn, ip_addr_t *fromaddr, u16_t port, + struct sockaddr *from, socklen_t *fromlen) + { +@@ -1274,6 +1275,7 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + apiflags = 0; + } + ++#if !GAZELLE_ENABLE + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvfrom_udp_raw[UDP/RAW]: top sock->lastdata=%p\n", (void *)sock->lastdata.netbuf)); + /* Check if there is data left from the last recv operation. */ + buf = sock->lastdata.netbuf; +@@ -1361,6 +1363,18 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + sock->lastdata.netbuf = NULL; + netbuf_delete(buf); + } ++#else /* GAZELLE_ENABLE */ ++ LWIP_UNUSED_ARG(copylen); ++ LWIP_UNUSED_ARG(buf); ++ LWIP_UNUSED_ARG(err); ++ LWIP_UNUSED_ARG(copied); ++ LWIP_UNUSED_ARG(i); ++ buflen = read_lwip_data(sock, flags, apiflags); ++ if (buflen <= 0) { ++ return ERR_BUF; ++ } ++ ++#endif /* GAZELLE_ENABLE */ + if (datagram_len) { + *datagram_len = buflen; + } +@@ -1409,6 +1423,7 @@ lwip_recvfrom(int s, void *mem, size_t len, int flags, + done_socket(sock); + return -1; + } ++ + ret = (ssize_t)LWIP_MIN(LWIP_MIN(len, datagram_len), SSIZE_MAX); + if (fromlen) { + *fromlen = msg.msg_namelen; +@@ -3956,6 +3971,10 @@ lwip_ioctl(int s, long cmd, ...) + struct lwip_sock *sock = posix_api->get_socket(s); + u8_t val; + ++#if LWIP_SO_RCVBUF ++ int recv_avail; ++#endif /* LWIP_SO_RCVBUF */ ++ + int ret = -1; + void *argp; + va_list ap; +diff --git a/src/core/dir.mk b/src/core/dir.mk +index ebc01a5..57a9670 100644 +--- a/src/core/dir.mk ++++ b/src/core/dir.mk +@@ -1,6 +1,6 @@ + SRC = def.c inet_chksum.c init.c ip.c mem.c memp.c netif.c pbuf.c \ + raw.c tcp.c tcp_in.c tcp_out.c timeouts.c udp.c stats.c\ + ipv4/icmp.c ipv4/ip4_addr.c ipv4/ip4_frag.c ipv4/etharp.c \ +- ipv4/ip4.c ++ ipv4/ip4.c ipv4/igmp.c + + $(eval $(call register_dir, core, $(SRC))) +diff --git a/src/core/udp.c b/src/core/udp.c +index a5f76b9..1398537 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -65,6 +65,12 @@ + + #include + ++#if GAZELLE_ENABLE ++#include "lwipsock.h" ++#include ++#include "dpdk_cksum.h" ++#endif ++ + #ifndef UDP_LOCAL_PORT_RANGE_START + /* From http://www.iana.org/assignments/port-numbers: + "The Dynamic and/or Private Ports are those from 49152 through 65535" */ +@@ -210,7 +216,7 @@ udp_input(struct pbuf *p, struct netif *inp) + #if LWIP_RECORD_PERF + PERF_START(PERF_LAYER_UDP, PERF_POINT_UDP); + #else +- PERF_START; ++ //PERF_START; + #endif + + UDP_STATS_INC(udp.recv); +@@ -435,7 +441,7 @@ end: + #if LWIP_RECORD_PERF + PERF_STOP_INCREASE_COUNT("udp_input", PERF_LAYER_UDP); + #else +- PERF_STOP("udp_input"); ++ //PERF_STOP("udp_input"); + #endif + + return; +@@ -451,7 +457,7 @@ chkerr: + #if LWIP_RECORD_PERF + PERF_STOP_INCREASE_COUNT("udp_input", PERF_LAYER_UDP); + #else +- PERF_STOP("udp_input"); ++ //PERF_STOP("udp_input"); + #endif + + #endif /* CHECKSUM_CHECK_UDP */ +@@ -608,11 +614,26 @@ udp_sendto_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *dst_ip, + UDP_STATS_INC(udp.rterr); + return ERR_RTE; + } ++ uint8_t apiflags = 0; ++ ++ struct pbuf *udp_pbuf = write_lwip_data((struct lwip_sock *)(p->payload), p->tot_len, &apiflags); ++ write_lwip_over((struct lwip_sock *)(p->payload)); ++ ++ pbuf_free(p); ++ p = udp_pbuf; ++ if (p == NULL) { ++ return ERR_MEM; ++ } ++ ++ if (p->port) { ++ return udp_sendto_if(pcb, p, &(p->addr), p->port, netif); ++ } else { + #if LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP + return udp_sendto_if_chksum(pcb, p, dst_ip, dst_port, netif, have_chksum, chksum); + #else /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ + return udp_sendto_if(pcb, p, dst_ip, dst_port, netif); + #endif /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ ++ } + } + + /** +@@ -905,6 +926,7 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + LWIP_DEBUGF(UDP_DEBUG, ("udp_send: ip_output_if (,,,,0x%02"X16_F",)\n", (u16_t)ip_proto)); + /* output to IP */ + NETIF_SET_HINTS(netif, &(pcb->netif_hints)); ++ udph_cksum_set(q, UDP_HLEN); + err = ip_output_if_src(q, src_ip, dst_ip, ttl, pcb->tos, ip_proto, netif); + NETIF_RESET_HINTS(netif); + +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index df2e2a5..e41644b 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -82,6 +82,10 @@ static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { + p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; + } + ++static inline void udph_cksum_set(struct pbuf *p, u16_t len) { ++ p->l4_len = len; ++} ++ + static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, + const ip_addr_t *src, const ip_addr_t *dst) + { +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index 0376f60..38c6e9b 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -133,6 +133,7 @@ + * MEMCPY: override this if you have a faster implementation at hand than the + * one included in your C library + */ ++//#include + #if !defined MEMCPY || defined __DOXYGEN__ + #define MEMCPY(dst,src,len) memcpy(dst,src,len) + #endif +@@ -1083,7 +1084,7 @@ + * LWIP_IGMP==1: Turn on IGMP module. + */ + #if !defined LWIP_IGMP || defined __DOXYGEN__ +-#define LWIP_IGMP 0 ++#define LWIP_IGMP 1 + #endif + #if !LWIP_IPV4 + #undef LWIP_IGMP +@@ -2030,7 +2031,7 @@ + * LWIP_SO_RCVBUF==1: Enable SO_RCVBUF processing. + */ + #if !defined LWIP_SO_RCVBUF || defined __DOXYGEN__ +-#define LWIP_SO_RCVBUF 0 ++#define LWIP_SO_RCVBUF 1 + #endif + + /** +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 9321afc..fb21134 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -40,6 +40,8 @@ + + #include "lwip/opt.h" + #include "lwip/err.h" ++#include "lwip/ip_addr.h" ++#include "lwip/ip6_addr.h" + + #ifdef __cplusplus + extern "C" { +@@ -236,6 +238,8 @@ struct pbuf { + struct pbuf *last; + pthread_spinlock_t pbuf_lock; + struct tcp_pcb *pcb; ++ ip_addr_t addr; ++ u16_t port; + #endif /* GAZELLE_ENABLE CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index 58acf0f..36a47eb 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -48,6 +48,7 @@ + #include "lwip/err.h" + #include "lwip/inet.h" + #include "lwip/errno.h" ++#include "lwip/api.h" + + #include + +@@ -323,20 +324,31 @@ struct linger { + + + #if LWIP_MULTICAST_TX_OPTIONS ++#if GAZELLE_ENABLE ++#define IP_MULTICAST_IF 32 ++#define IP_MULTICAST_TTL 33 ++#define IP_MULTICAST_LOOP 34 ++#else + /* + * Options and types for UDP multicast traffic handling + */ + #define IP_MULTICAST_TTL 5 + #define IP_MULTICAST_IF 6 + #define IP_MULTICAST_LOOP 7 ++#endif /* GAZELLE_ENABLE */ + #endif /* LWIP_MULTICAST_TX_OPTIONS */ + + #if LWIP_IGMP ++#if GAZELLE_ENABLE ++#define IP_ADD_MEMBERSHIP 35 ++#define IP_DROP_MEMBERSHIP 36 ++#else + /* + * Options and types related to multicast membership + */ + #define IP_ADD_MEMBERSHIP 3 + #define IP_DROP_MEMBERSHIP 4 ++#endif /* GAZELLE_ENABLE */ + + typedef struct ip_mreq { + struct in_addr imr_multiaddr; /* IP multicast address of group */ +@@ -656,6 +668,7 @@ ssize_t lwip_sendto(int s, const void *dataptr, size_t size, int flags, + int lwip_socket(int domain, int type, int protocol); + ssize_t lwip_write(int s, const void *dataptr, size_t size); + ssize_t lwip_writev(int s, const struct iovec *iov, int iovcnt); ++ + #if LWIP_SOCKET_SELECT + int lwip_select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptset, + struct timeval *timeout); +@@ -667,6 +680,8 @@ int lwip_poll(struct pollfd *fds, nfds_t nfds, int timeout); + #if GAZELLE_ENABLE + int lwip_ioctl(int s, long cmd, ...); + int lwip_fcntl(int s, int cmd, int val); ++int lwip_sock_make_addr(struct netconn *conn, ip_addr_t *fromaddr, u16_t port, ++ struct sockaddr *from, socklen_t *fromlen); + #else + int lwip_ioctl(int s, long cmd, void *argp); + int lwip_fcntl(int s, int cmd, int val); +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 0d2a6d9..bcb0879 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -175,7 +175,7 @@ + ---------- UDP options ---------- + --------------------------------- + */ +-#define LWIP_UDP 0 ++#define LWIP_UDP 1 + + + /* +-- +2.33.0 + diff --git a/0061-fix-pbuf-leak-in-udp-connection.patch b/0061-fix-pbuf-leak-in-udp-connection.patch new file mode 100644 index 0000000000000000000000000000000000000000..a75e3fd9e0ef9fba43cb2359d68ebaa981a14c0d --- /dev/null +++ b/0061-fix-pbuf-leak-in-udp-connection.patch @@ -0,0 +1,29 @@ +From 21f7f9a5bdfd5d2f592af19e73647a48fdbb7bf1 Mon Sep 17 00:00:00 2001 +From: kircher +Date: Tue, 16 May 2023 19:07:42 +0800 +Subject: [PATCH] fix pbuf leak in udp connection + +--- + src/core/udp.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/src/core/udp.c b/src/core/udp.c +index 1398537..9c3cdaa 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -933,8 +933,11 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + /* @todo: must this be increased even if error occurred? */ + MIB2_STATS_INC(mib2.udpoutdatagrams); + ++#if !GAZELLE_ENABLE + /* did we chain a separate header pbuf earlier? */ +- if (q != p) { ++ if (q != p) ++#endif ++ { + /* free the header pbuf */ + pbuf_free(q); + q = NULL; +-- +2.33.0 + diff --git a/0062-drop-netbuf-in-recv_udp-to-fix-mem-overflow.patch b/0062-drop-netbuf-in-recv_udp-to-fix-mem-overflow.patch new file mode 100644 index 0000000000000000000000000000000000000000..7022817d22a71d806087d667e4c4c18fbe68dff8 --- /dev/null +++ b/0062-drop-netbuf-in-recv_udp-to-fix-mem-overflow.patch @@ -0,0 +1,249 @@ +From 2e51934e230013c9df58971df53a08dad108becf Mon Sep 17 00:00:00 2001 +From: kircher +Date: Mon, 29 May 2023 19:58:52 +0800 +Subject: [PATCH] drop-netbuf-in-recv_udp-to-fix-mem-overflow + +--- + src/api/api_lib.c | 14 ++++++++++++++ + src/api/api_msg.c | 15 ++++++++++++--- + src/api/sockets.c | 6 +++--- + src/core/udp.c | 8 ++++++++ + src/include/lwip/api.h | 3 +++ + src/include/lwip/pbuf.h | 4 ++++ + src/include/lwip/sockets.h | 8 ++++---- + src/include/lwipopts.h | 4 ++++ + 8 files changed, 52 insertions(+), 10 deletions(-) + +diff --git a/src/api/api_lib.c b/src/api/api_lib.c +index ffa14d6..afdfc11 100644 +--- a/src/api/api_lib.c ++++ b/src/api/api_lib.c +@@ -655,7 +655,11 @@ netconn_recv_data(struct netconn *conn, void **new_buf, u8_t apiflags) + #if (LWIP_UDP || LWIP_RAW) + { + LWIP_ASSERT("buf != NULL", buf != NULL); ++#if GAZELLE_UDP_ENABLE ++ len = ((struct pbuf *)buf)->tot_len; ++#else /* GAZELLE_UDP_ENABLE */ + len = netbuf_len((struct netbuf *)buf); ++#endif /* GAZELLE_UDP_ENABLE */ + } + #endif /* (LWIP_UDP || LWIP_RAW) */ + +@@ -827,6 +831,16 @@ netconn_recv_udp_raw_netbuf(struct netconn *conn, struct netbuf **new_buf) + return netconn_recv_data(conn, (void **)new_buf, 0); + } + ++#if GAZELLE_UDP_ENABLE ++err_t ++netconn_recv_udp_raw_pbuf_flags(struct netconn *conn, struct pbuf **new_buf, u8_t apiflags) ++{ ++ LWIP_ERROR("netconn_recv_udp_raw_pbuf: invalid conn", (conn != NULL) && ++ NETCONNTYPE_GROUP(netconn_type(conn)) != NETCONN_TCP, return ERR_ARG;); ++ return netconn_recv_data(conn, (void **)new_buf, apiflags); ++} ++#endif /* GAZELLE_UDP_ENABLE */ ++ + /** + * Receive data (in form of a netbuf) from a UDP or RAW netconn + * +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 30929be..b82ebf2 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -253,6 +253,14 @@ recv_udp(void *arg, struct udp_pcb *pcb, struct pbuf *p, + return; + } + ++#if GAZELLE_UDP_ENABLE ++ LWIP_UNUSED_ARG(buf); ++ ip_addr_set(&p->addr, addr); ++ p->port = port; ++ len = p->tot_len; ++ if (sys_mbox_trypost(&conn->recvmbox, p) != ERR_OK) { ++ return; ++#else /* GAZELLE_UDP_ENABLE */ + buf = (struct netbuf *)memp_malloc(MEMP_NETBUF); + if (buf == NULL) { + pbuf_free(p); +@@ -277,17 +285,18 @@ recv_udp(void *arg, struct udp_pcb *pcb, struct pbuf *p, + if (sys_mbox_trypost(&conn->recvmbox, buf) != ERR_OK) { + netbuf_delete(buf); + return; ++#endif /* GAZELLE_UDP_ENABLE */ + } else { + #if LWIP_SO_RCVBUF + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ +-#if GAZELLE_ENABLE ++#if GAZELLE_UDP_ENABLE + add_recv_list(conn->socket); + LWIP_UNUSED_ARG(len); +-#else ++#else /* GAZELLE_UDP_ENABLE */ + /* Register event with callback */ + API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); +-#endif ++#endif /* GAZELLE_UDP_ENABLE */ + } + } + #endif /* LWIP_UDP */ +diff --git a/src/api/sockets.c b/src/api/sockets.c +index dee9230..17691f7 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1179,7 +1179,7 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + apiflags = 0; + } + +-#if !GAZELLE_ENABLE ++#if !GAZELLE_UDP_ENABLE + LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_recvfrom_udp_raw[UDP/RAW]: top sock->lastdata=%p\n", (void *)sock->lastdata.netbuf)); + /* Check if there is data left from the last recv operation. */ + buf = sock->lastdata.netbuf; +@@ -1267,7 +1267,7 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + sock->lastdata.netbuf = NULL; + netbuf_delete(buf); + } +-#else /* GAZELLE_ENABLE */ ++#else /* GAZELLE_UDP_ENABLE */ + LWIP_UNUSED_ARG(copylen); + LWIP_UNUSED_ARG(buf); + LWIP_UNUSED_ARG(err); +@@ -1278,7 +1278,7 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + return ERR_BUF; + } + +-#endif /* GAZELLE_ENABLE */ ++#endif /* GAZELLE_UDP_ENABLE */ + if (datagram_len) { + *datagram_len = buflen; + } +diff --git a/src/core/udp.c b/src/core/udp.c +index 170c911..1eb459d 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -599,6 +599,7 @@ udp_sendto_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *dst_ip, + UDP_STATS_INC(udp.rterr); + return ERR_RTE; + } ++#if GAZELLE_UDP_ENABLE + uint8_t apiflags = 0; + + struct pbuf *udp_pbuf = write_lwip_data((struct lwip_sock *)(p->payload), p->tot_len, &apiflags); +@@ -611,14 +612,21 @@ udp_sendto_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *dst_ip, + } + + if (p->port) { ++#if LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP ++ return udp_sendto_if_chksum(pcb, p, &(p->addr), p->port, netif, have_chksum, chksum); ++#else /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ + return udp_sendto_if(pcb, p, &(p->addr), p->port, netif); ++#endif /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ + } else { ++#endif /* GAZELLE_UDP_ENABLE */ + #if LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP + return udp_sendto_if_chksum(pcb, p, dst_ip, dst_port, netif, have_chksum, chksum); + #else /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ + return udp_sendto_if(pcb, p, dst_ip, dst_port, netif); + #endif /* LWIP_CHECKSUM_ON_COPY && CHECKSUM_GEN_UDP */ ++#if GAZELLE_UDP_ENABLE + } ++#endif /* GAZELLE_UDP_ENABLE */ + } + + /** +diff --git a/src/include/lwip/api.h b/src/include/lwip/api.h +index d3c4f02..6090cab 100644 +--- a/src/include/lwip/api.h ++++ b/src/include/lwip/api.h +@@ -338,6 +338,9 @@ err_t netconn_accept(struct netconn *conn, struct netconn **new_conn); + err_t netconn_recv(struct netconn *conn, struct netbuf **new_buf); + err_t netconn_recv_udp_raw_netbuf(struct netconn *conn, struct netbuf **new_buf); + err_t netconn_recv_udp_raw_netbuf_flags(struct netconn *conn, struct netbuf **new_buf, u8_t apiflags); ++#if GAZELLE_UDP_ENABLE ++err_t netconn_recv_udp_raw_pbuf_flags(struct netconn *conn, struct pbuf **new_buf, u8_t apiflags); ++#endif /* GAZELLE_UDP_ENABLE */ + err_t netconn_recv_tcp_pbuf(struct netconn *conn, struct pbuf **new_buf); + err_t netconn_recv_tcp_pbuf_flags(struct netconn *conn, struct pbuf **new_buf, u8_t apiflags); + err_t netconn_tcp_recvd(struct netconn *conn, size_t len); +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 728c5e4..4747f39 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -40,8 +40,10 @@ + + #include "lwip/opt.h" + #include "lwip/err.h" ++#if GAZELLE_UDP_ENABLE + #include "lwip/ip_addr.h" + #include "lwip/ip6_addr.h" ++#endif /* GAZELLE_UDP_ENABLE */ + + #ifdef __cplusplus + extern "C" { +@@ -238,8 +240,10 @@ struct pbuf { + struct pbuf *last; + pthread_spinlock_t pbuf_lock; + struct tcp_pcb *pcb; ++#if GAZELLE_UDP_ENABLE + ip_addr_t addr; + u16_t port; ++#endif /* GAZELLE_UDP_ENABLE */ + #endif /* GAZELLE_ENABLE CHECKSUM_OFFLOAD_SWITCH */ + + /** In case the user needs to store data custom data on a pbuf */ +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index 643093a..2b6e6be 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -330,7 +330,7 @@ struct linger { + + + #if LWIP_MULTICAST_TX_OPTIONS +-#if GAZELLE_ENABLE ++#if GAZELLE_UDP_ENABLE + #define IP_MULTICAST_IF 32 + #define IP_MULTICAST_TTL 33 + #define IP_MULTICAST_LOOP 34 +@@ -341,11 +341,11 @@ struct linger { + #define IP_MULTICAST_TTL 5 + #define IP_MULTICAST_IF 6 + #define IP_MULTICAST_LOOP 7 +-#endif /* GAZELLE_ENABLE */ ++#endif /* GAZELLE_UDP_ENABLE */ + #endif /* LWIP_MULTICAST_TX_OPTIONS */ + + #if LWIP_IGMP +-#if GAZELLE_ENABLE ++#if GAZELLE_UDP_ENABLE + #define IP_ADD_MEMBERSHIP 35 + #define IP_DROP_MEMBERSHIP 36 + #else +@@ -354,7 +354,7 @@ struct linger { + */ + #define IP_ADD_MEMBERSHIP 3 + #define IP_DROP_MEMBERSHIP 4 +-#endif /* GAZELLE_ENABLE */ ++#endif /* GAZELLE_UDP_ENABLE */ + + typedef struct ip_mreq { + struct in_addr imr_multiaddr; /* IP multicast address of group */ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 6b5a2d1..9804aed 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -63,6 +63,10 @@ + + #define GAZELLE_TCP_MIN_TSO_SEG_LEN 256 + ++ ++#define GAZELLE_UDP_ENABLE 1 ++ ++ + /* + ---------------------------------- + ---------- NIC offloads ---------- +-- +2.33.0 + diff --git a/0063-optimize-avoid-too-many-empty-acks-in-tcp_input.patch b/0063-optimize-avoid-too-many-empty-acks-in-tcp_input.patch new file mode 100644 index 0000000000000000000000000000000000000000..0fa456ec3f6bc096c20af560959b6933d98b254c --- /dev/null +++ b/0063-optimize-avoid-too-many-empty-acks-in-tcp_input.patch @@ -0,0 +1,30 @@ +From 30f5815c847060c5ad4075e81581771b8d0cbb72 Mon Sep 17 00:00:00 2001 +From: Lemmy Huang +Date: Thu, 8 Jun 2023 15:15:07 +0800 +Subject: [PATCH] optimize: avoid too many empty acks in tcp_input + +Signed-off-by: Lemmy Huang +--- + src/core/tcp_in.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 7e7d70ab..0abee303 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -1807,11 +1807,7 @@ tcp_receive(struct tcp_pcb *pcb) + + + /* Acknowledge the segment(s). */ +- if (flags & TCP_PSH) { +- tcp_ack_now(pcb); +- } else { +- tcp_ack(pcb); +- } ++ tcp_ack(pcb); + + #if LWIP_TCP_SACK_OUT + if (LWIP_TCP_SACK_VALID(pcb, 0)) { +-- +2.22.0.windows.1 + diff --git a/0064-fix-udp-send-recv-in-multiple-queue.patch b/0064-fix-udp-send-recv-in-multiple-queue.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b5139fbfe412e43305c13d8d2b090421e32907b --- /dev/null +++ b/0064-fix-udp-send-recv-in-multiple-queue.patch @@ -0,0 +1,169 @@ +From 71d82a830005540ef92b2bcd7c121c9ff85beb64 Mon Sep 17 00:00:00 2001 +From: j00660176 +Date: Mon, 12 Jun 2023 20:21:23 +0800 +Subject: [PATCH] fix udp send/recv in multiple queue + +--- + src/core/udp.c | 73 +++++++++++++++++++++++++++++++++++++++--- + src/include/lwip/udp.h | 4 +++ + 2 files changed, 73 insertions(+), 4 deletions(-) + +diff --git a/src/core/udp.c b/src/core/udp.c +index fba645b..0b1fa65 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -65,10 +65,12 @@ + + #include + +-#if GAZELLE_ENABLE +-#include "lwipsock.h" ++#if GAZELLE_UDP_ENABLE ++#include + #include ++#include "lwipsock.h" + #include "dpdk_cksum.h" ++#include "reg_sock.h" + #endif + + #ifndef UDP_LOCAL_PORT_RANGE_START +@@ -81,10 +83,24 @@ + + /* last local UDP port */ + static u16_t udp_port = UDP_LOCAL_PORT_RANGE_START; ++#if GAZELLE_UDP_ENABLE ++static pthread_mutex_t g_udp_port_mutex = PTHREAD_MUTEX_INITIALIZER; ++static u8_t port_state[UDP_LOCAL_PORT_RANGE_END - UDP_LOCAL_PORT_RANGE_START + 1] = {0}; ++static void udp_release_port(u16_t port) ++{ ++ if (port >= UDP_LOCAL_PORT_RANGE_START && port <= UDP_LOCAL_PORT_RANGE_END) { ++ port_state[port - UDP_LOCAL_PORT_RANGE_START] = 0; ++ } ++} ++#endif + + /* The list of UDP PCBs */ + /* exported in udp.h (was static) */ ++#if GAZELLE_UDP_ENABLE ++PER_THREAD struct udp_pcb *udp_pcbs; ++#else + struct udp_pcb *udp_pcbs; ++#endif + + /** + * Initialize this module. +@@ -102,6 +118,37 @@ udp_init(void) + * + * @return a new (free) local UDP port number + */ ++#if GAZELLE_UDP_ENABLE ++static u16_t ++udp_new_port(struct udp_pcb *dst_pcb) ++{ ++ u16_t n = 0; ++ u16_t tmp_port = 0; ++ ++ pthread_mutex_lock(&g_udp_port_mutex); ++ do { ++ if (udp_port++ == UDP_LOCAL_PORT_RANGE_END) { ++ udp_port = UDP_LOCAL_PORT_RANGE_START; ++ } ++ ++ if (__atomic_load_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { ++ if (port_in_stack_queue(dst_pcb->remote_ip.addr, dst_pcb->local_ip.addr, dst_pcb->remote_port, udp_port)) { ++ tmp_port = udp_port; ++ __atomic_store_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); ++ break; ++ } ++ } ++ n++; ++ if (n > UDP_LOCAL_PORT_RANGE_END - UDP_LOCAL_PORT_RANGE_START) { ++ break; ++ } ++ } while (tmp_port == 0); ++ ++ pthread_mutex_unlock(&g_udp_port_mutex); ++ ++ return tmp_port; ++} ++#else + static u16_t + udp_new_port(void) + { +@@ -123,6 +170,7 @@ again: + } + return udp_port; + } ++#endif + + /** Common code to see if the current input packet matches the pcb + * (current input packet is accessed via ip(4/6)_current_* macros) +@@ -789,7 +837,21 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + /* if the PCB is not yet bound to a port, bind it here */ + if (pcb->local_port == 0) { + LWIP_DEBUGF(UDP_DEBUG | LWIP_DBG_TRACE, ("udp_send: not yet bound to a port, binding now\n")); ++#if GAZELLE_UDP_ENABLE ++ ip_addr_t tmp_local_ip = pcb->local_ip; ++ ip_addr_t tmp_remote_ip = pcb->remote_ip; ++ u16_t tmp_remote_port = pcb->remote_port; ++ ++ pcb->local_ip = netif->ip_addr; ++ pcb->remote_port = dst_port; ++ pcb->remote_ip = *dst_ip; ++#endif + err = udp_bind(pcb, &pcb->local_ip, pcb->local_port); ++#if GAZELLE_UDP_ENABLE ++ pcb->local_ip = tmp_local_ip; ++ pcb->remote_ip = tmp_remote_ip; ++ pcb->remote_port = tmp_remote_port; ++#endif + if (err != ERR_OK) { + LWIP_DEBUGF(UDP_DEBUG | LWIP_DBG_TRACE | LWIP_DBG_LEVEL_SERIOUS, ("udp_send: forced port bind failed\n")); + return err; +@@ -941,7 +1003,7 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + /* @todo: must this be increased even if error occurred? */ + MIB2_STATS_INC(mib2.udpoutdatagrams); + +-#if !GAZELLE_ENABLE ++#if !GAZELLE_UDP_ENABLE + /* did we chain a separate header pbuf earlier? */ + if (q != p) + #endif +@@ -1026,7 +1088,7 @@ udp_bind(struct udp_pcb *pcb, const ip_addr_t *ipaddr, u16_t port) + + /* no port specified? */ + if (port == 0) { +- port = udp_new_port(); ++ port = udp_new_port(pcb); + if (port == 0) { + /* no more ports available in local range */ + LWIP_DEBUGF(UDP_DEBUG, ("udp_bind: out of free UDP ports\n")); +@@ -1252,6 +1314,9 @@ udp_remove(struct udp_pcb *pcb) + } + } + } ++#if GAZELLE_UDP_ENABLE ++ udp_release_port(pcb->local_port); ++#endif + memp_free(MEMP_UDP_PCB, pcb); + } + +diff --git a/src/include/lwip/udp.h b/src/include/lwip/udp.h +index b1c78e5..f588d90 100644 +--- a/src/include/lwip/udp.h ++++ b/src/include/lwip/udp.h +@@ -112,7 +112,11 @@ struct udp_pcb { + void *recv_arg; + }; + /* udp_pcbs export for external reference (e.g. SNMP agent) */ ++#if GAZELLE_UDP_ENABLE ++extern PER_THREAD struct udp_pcb *udp_pcbs; ++#else + extern struct udp_pcb *udp_pcbs; ++#endif + + /* The following functions is the application layer interface to the + UDP code. */ +-- +2.33.0 + diff --git a/0065-fix-udp-recvmbox-size-not-set.patch b/0065-fix-udp-recvmbox-size-not-set.patch new file mode 100644 index 0000000000000000000000000000000000000000..e8e56638ba6e456fa8c124ad538011bd50e68626 --- /dev/null +++ b/0065-fix-udp-recvmbox-size-not-set.patch @@ -0,0 +1,24 @@ +From b94a7024bc7dc4984039b4f54aff3dbdcd21d8b8 Mon Sep 17 00:00:00 2001 +From: jiangheng12 +Date: Wed, 14 Jun 2023 18:34:12 +0800 +Subject: [PATCH] fix udp recvmbox size not set + +--- + src/include/lwipopts.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 6b5c769..f0df0e3 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -180,6 +180,7 @@ + --------------------------------- + */ + #define LWIP_UDP 1 ++#define DEFAULT_UDP_RECVMBOX_SIZE 4096 + + + /* +-- +2.23.0 + diff --git a/0066-adapt-to-dpdk-19.11-and-dpdk-21.11.patch b/0066-adapt-to-dpdk-19.11-and-dpdk-21.11.patch new file mode 100644 index 0000000000000000000000000000000000000000..ebd81afe70a0f0d09ea939ad445d0f6b8337b480 --- /dev/null +++ b/0066-adapt-to-dpdk-19.11-and-dpdk-21.11.patch @@ -0,0 +1,144 @@ +From a8ca1b0361d5b31e437fd70d17860248dd44ddf7 Mon Sep 17 00:00:00 2001 +From: Lemmy Huang +Date: Thu, 15 Jun 2023 09:06:58 +0800 +Subject: [PATCH] adapt to dpdk-19.11 and dpdk-21.11 + +Signed-off-by: Lemmy Huang +--- + src/Makefile | 3 +++ + src/include/arch/sys_arch.h | 1 + + src/include/dpdk_cksum.h | 3 +++ + src/include/dpdk_version.h | 52 +++++++++++++++++++++++++++++++++++++ + src/include/eventpoll.h | 1 + + src/include/reg_sock.h | 2 ++ + 6 files changed, 62 insertions(+) + create mode 100644 src/include/dpdk_version.h + +diff --git a/src/Makefile b/src/Makefile +index f445601b..480470fb 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -19,6 +19,9 @@ ARFLAGS = crDP + ifeq ($(shell $(CC) -dumpmachine | cut -d"-" -f1), x86_64) + CFLAGS += -mssse3 + endif ++ifeq ($(DPDK_VERSION_1911), 1) ++ CFLAGS += -DDPDK_VERSION_1911=1 ++endif + + SRCS = + DIRS = api core netif +diff --git a/src/include/arch/sys_arch.h b/src/include/arch/sys_arch.h +index 04e3192a..5e95f3d3 100644 +--- a/src/include/arch/sys_arch.h ++++ b/src/include/arch/sys_arch.h +@@ -79,6 +79,7 @@ typedef struct sys_thread *sys_thread_t; + #if GAZELLE_ENABLE + extern int eth_dev_poll(void); + #include ++#include "dpdk_version.h" + + /* + gazelle custom rte ring interface +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index e41644b5..b48c9267 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -34,8 +34,11 @@ + #define __DPDK_CKSUM_H__ + + #include "lwipopts.h" ++ + #if GAZELLE_ENABLE ++#include + #include ++#include "dpdk_version.h" + + #if CHECKSUM_OFFLOAD_ALL + #include +diff --git a/src/include/dpdk_version.h b/src/include/dpdk_version.h +new file mode 100644 +index 00000000..43b254a8 +--- /dev/null ++++ b/src/include/dpdk_version.h +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001-2004 Swedish Institute of Computer Science. ++ * All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without modification, ++ * are permitted provided that the following conditions are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright notice, ++ * this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright notice, ++ * this list of conditions and the following disclaimer in the documentation ++ * and/or other materials provided with the distribution. ++ * 3. The name of the author may not be used to endorse or promote products ++ * derived from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT ++ * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ++ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT ++ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING ++ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY ++ * OF SUCH DAMAGE. ++ * ++ * This file is part of the lwIP TCP/IP stack. ++ * ++ * Author: Huawei Technologies ++ * ++ */ ++ ++#ifndef __DPDK_VERSION_H__ ++#define __DPDK_VERSION_H__ ++ ++#if DPDK_VERSION_1911 ++#define __rte_ring_enqueue_elems(r, prod_head, obj_table, esize, n) \ ++ ENQUEUE_PTRS(r, &r[1], prod_head, (obj_table), n, void *) ++ ++#define __rte_ring_dequeue_elems(r, cons_head, obj_table, esize, n) \ ++ DEQUEUE_PTRS(r, &r[1], cons_head, (obj_table), n, void *) ++ ++#define RTE_MBUF_F_RX_IP_CKSUM_BAD PKT_RX_IP_CKSUM_BAD ++#define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD ++#define RTE_MBUF_F_TX_IPV4 PKT_TX_IPV4 ++#define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM ++#define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM ++#define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG ++ ++#endif /* DPDK_VERSION_1911 */ ++ ++#endif /* __DPDK_VERSION_H__ */ +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +index a10c84bf..dd65a4d5 100644 +--- a/src/include/eventpoll.h ++++ b/src/include/eventpoll.h +@@ -35,6 +35,7 @@ + + #include + ++#include "arch/sys_arch.h" + #include "lwip/api.h" + #include "list.h" + +diff --git a/src/include/reg_sock.h b/src/include/reg_sock.h +index e349e854..5d5710d7 100644 +--- a/src/include/reg_sock.h ++++ b/src/include/reg_sock.h +@@ -33,6 +33,8 @@ + #ifndef __REG_SOCK_H__ + #define __REG_SOCK_H__ + ++#include ++ + enum reg_ring_type { + REG_RING_TCP_LISTEN = 0, + REG_RING_TCP_LISTEN_CLOSE, +-- +2.22.0.windows.1 + diff --git a/0067-fix-null-pointer-when-all-zero-address-listen.patch b/0067-fix-null-pointer-when-all-zero-address-listen.patch new file mode 100644 index 0000000000000000000000000000000000000000..aa58a8fe11c04fe3667463f8721f0e634bc1c7a6 --- /dev/null +++ b/0067-fix-null-pointer-when-all-zero-address-listen.patch @@ -0,0 +1,100 @@ +From b4a2b2799c199fb2955ecaae72e7b7dbe79e593b Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Thu, 15 Jun 2023 21:42:04 +0800 +Subject: [PATCH] fix null pointer when all zero address listen + +--- + src/core/tcp_in.c | 58 ++++++++++++++++++++++++++++------------------- + 1 file changed, 35 insertions(+), 23 deletions(-) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 0abee30..c20c9b5 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -114,6 +114,36 @@ static void tcp_remove_sacks_gt(struct tcp_pcb *pcb, u32_t seq); + #endif /* TCP_OOSEQ_BYTES_LIMIT || TCP_OOSEQ_PBUFS_LIMIT */ + #endif /* LWIP_TCP_SACK_OUT */ + ++#if GAZELLE_TCP_REUSE_IPPORT ++struct tcp_pcb_listen *min_cnts_lpcb_get(struct tcp_pcb_listen *lpcb) ++{ ++ struct tcp_pcb_listen *min_cnts_lpcb; ++ struct tcp_pcb_listen *tmp_lpcb = lpcb; ++ u16_t min_conn_num = GAZELLE_TCP_MAX_CONN_PER_THREAD; ++ u8_t have_master_fd = 0; ++ ++ while (tmp_lpcb != NULL) { ++ if (tmp_lpcb->master_lpcb) { ++ have_master_fd = 1; ++ } ++ tmp_lpcb = tmp_lpcb->next_same_port_pcb; ++ } ++ ++ tmp_lpcb = lpcb; ++ min_cnts_lpcb = lpcb; ++ while (tmp_lpcb != NULL) { ++ if (!have_master_fd || tmp_lpcb->master_lpcb) { ++ if (tmp_lpcb->connect_num < min_conn_num) { ++ min_cnts_lpcb = tmp_lpcb; ++ min_conn_num = tmp_lpcb->connect_num; ++ } ++ tmp_lpcb = tmp_lpcb->next_same_port_pcb; ++ } ++ } ++ return min_cnts_lpcb; ++} ++#endif ++ + /** + * The initial input processing of TCP. It verifies the TCP header, demultiplexes + * the segment between the PCBs and passes it on to tcp_process(), which implements +@@ -384,33 +414,15 @@ tcp_input(struct pbuf *p, struct netif *inp) + if (ip_addr_cmp(&lpcb->local_ip, ip_current_dest_addr())) { + /* found an exact match */ + #if GAZELLE_TCP_REUSE_IPPORT +- // check master fd +- struct tcp_pcb_listen *tmp_lpcb = lpcb; +- u8_t have_master_fd = 0; +- while (tmp_lpcb != NULL) { +- if (tmp_lpcb->master_lpcb) { +- have_master_fd = 1; +- } +- tmp_lpcb = tmp_lpcb->next_same_port_pcb; +- } +- +- tmp_lpcb = lpcb; +- min_cnts_lpcb = lpcb; +- u16_t min_conn_num = GAZELLE_TCP_MAX_CONN_PER_THREAD; +- while (tmp_lpcb != NULL) { +- if (!have_master_fd || tmp_lpcb->master_lpcb) { +- if (tmp_lpcb->connect_num < min_conn_num) { +- min_cnts_lpcb = tmp_lpcb; +- min_conn_num = tmp_lpcb->connect_num; +- } +- } +- tmp_lpcb = tmp_lpcb->next_same_port_pcb; +- } ++ min_cnts_lpcb = min_cnts_lpcb_get(lpcb); + #endif + break; + } else if (ip_addr_isany(&lpcb->local_ip)) { + /* found an ANY-match */ + #if SO_REUSE ++#if GAZELLE_TCP_REUSE_IPPORT ++ min_cnts_lpcb = min_cnts_lpcb_get(lpcb); ++#endif + lpcb_any = lpcb; + lpcb_prev = prev; + #else /* SO_REUSE */ +@@ -458,7 +470,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + { + #if GAZELLE_TCP_REUSE_IPPORT + tcp_listen_input(min_cnts_lpcb); +- min_cnts_lpcb->connect_num++; ++ min_cnts_lpcb->connect_num++; + #else + tcp_listen_input(lpcb); + #endif +-- +2.27.0 + diff --git a/0068-enable-UDP-CKSUM-in-lwip.patch b/0068-enable-UDP-CKSUM-in-lwip.patch new file mode 100644 index 0000000000000000000000000000000000000000..9f5d0d92f3d5b85e3c55d284f7ff35aa95f3e45d --- /dev/null +++ b/0068-enable-UDP-CKSUM-in-lwip.patch @@ -0,0 +1,42 @@ +From 73d78d322ba8bb997d74c92727d1ec8b8640607f Mon Sep 17 00:00:00 2001 +From: kircher +Date: Wed, 21 Jun 2023 16:59:34 +0800 +Subject: [PATCH] enable UDP CKSUM in lwip + +--- + src/include/dpdk_cksum.h | 1 + + src/include/lwipopts.h | 6 ++++++ + 2 files changed, 7 insertions(+) + +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index b48c926..2c5b31e 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -87,6 +87,7 @@ static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { + + static inline void udph_cksum_set(struct pbuf *p, u16_t len) { + p->l4_len = len; ++ p->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; + } + + static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index f0df0e3..2ba1e4c 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -180,6 +180,12 @@ + --------------------------------- + */ + #define LWIP_UDP 1 ++ ++#define UDP_HLEN 8 ++ ++#define MEMP_NUM_UDP_PCB 16 ++#define MEMP_NUM_IGMP_GROUP 16 ++ + #define DEFAULT_UDP_RECVMBOX_SIZE 4096 + + +-- +2.33.0 + diff --git a/0069-add-error-check-in-hugepage_init-and-sys_mbox_free.patch b/0069-add-error-check-in-hugepage_init-and-sys_mbox_free.patch new file mode 100644 index 0000000000000000000000000000000000000000..55710e8058fa284b5a3b1156ed996c1e613c4eed --- /dev/null +++ b/0069-add-error-check-in-hugepage_init-and-sys_mbox_free.patch @@ -0,0 +1,72 @@ +From cbeb07ef6238a719a2bb84837835ebc228ac4fde Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 20 Jun 2023 14:56:28 +0800 +Subject: [PATCH] add error check in hugepage_init and sys_mbox_free + +--- + src/api/sys_arch.c | 8 ++++++-- + src/include/memp_def.h | 13 ++++++++++++- + 2 files changed, 18 insertions(+), 3 deletions(-) + +diff --git a/src/api/sys_arch.c b/src/api/sys_arch.c +index f93a00e..1bc3aee 100644 +--- a/src/api/sys_arch.c ++++ b/src/api/sys_arch.c +@@ -124,8 +124,12 @@ err_t sys_mbox_new(struct sys_mbox **mb, int size) + void sys_mbox_free(struct sys_mbox **mb) + { + struct sys_mbox *mbox = *mb; +- rte_ring_free(mbox->ring); ++ if (mbox->ring != NULL) { ++ rte_ring_free(mbox->ring); ++ mbox->ring = NULL; ++ } + memp_free(MEMP_SYS_MBOX, mbox); ++ sys_mbox_set_invalid(mb); + } + + err_t sys_mbox_trypost(struct sys_mbox **mb, void *msg) +@@ -371,7 +375,7 @@ uint8_t *sys_hugepage_malloc(const char *name, uint32_t size) + + mz = rte_memzone_reserve(name, size, rte_socket_id(), 0); + if (mz == NULL) { +- rte_exit(EXIT_FAILURE, "failed to reserver memory for mempool[%s]\n", name); ++ LWIP_DEBUGF(SYS_DEBUG, ("sys_hugepage_malloc: failed to reserve memory for mempool\n")); + return NULL; + } + +diff --git a/src/include/memp_def.h b/src/include/memp_def.h +index 082f685..3408c60 100644 +--- a/src/include/memp_def.h ++++ b/src/include/memp_def.h +@@ -52,15 +52,26 @@ + #include + #undef LWIP_MEMPOOL + +-static inline void hugepage_init(void) ++extern PER_THREAD uint8_t *ram_heap; ++static inline int hugepage_init(void) + { + #define LWIP_MEMPOOL(name,num,size,desc) LWIP_MEMPOOL_BASE_INIT(name) + #include "lwip/priv/memp_std.h" ++ u16_t i; ++ for (i = 0; i < LWIP_ARRAYSIZE(memp_pools); i++) { ++ if (memp_pools[i]->base == NULL) { ++ return -1; ++ } ++ } + + #if !MEM_LIBC_MALLOC + LWIP_MEM_MEMORY_DECLARE(ram_heap) + LWIP_MEM_MEMORY_INIT(ram_heap) ++ if (ram_heap == NULL) { ++ return -1; ++ } + #endif /* MEM_LIBC_MALLOC */ ++ return 0; + } + + #endif /* __MEMP_DEF_H__ */ +-- +2.27.0 + diff --git a/0070-add-CHECKSUM_UDP-when-not-support-OFFLOAD_UDP_CHECKS.patch b/0070-add-CHECKSUM_UDP-when-not-support-OFFLOAD_UDP_CHECKS.patch new file mode 100644 index 0000000000000000000000000000000000000000..55b1779e060d8de84c4abc3cade3ee8b2aee6d7f --- /dev/null +++ b/0070-add-CHECKSUM_UDP-when-not-support-OFFLOAD_UDP_CHECKS.patch @@ -0,0 +1,180 @@ +From 4ea38ff354eb4dad54be3c056b884ff7920135da Mon Sep 17 00:00:00 2001 +From: kircher +Date: Tue, 27 Jun 2023 11:18:58 +0800 +Subject: [PATCH] add CHECKSUM_UDP when not support OFFLOAD_UDP_CHECKSUM + +--- + src/core/tcp_in.c | 2 +- + src/core/udp.c | 26 +++++++++++++++++++++++++- + src/include/dpdk_cksum.h | 16 ++++++++++------ + src/include/dpdk_version.h | 1 + + src/include/lwipopts.h | 8 ++++++-- + 5 files changed, 43 insertions(+), 10 deletions(-) + +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 62a6511..5014a21 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -210,7 +210,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + #if CHECKSUM_CHECK_TCP_HW + u64_t ret; + if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_TCP_CKSUM) { +- ret = is_cksum_tcpbad(p); ++ ret = is_cksum_bad(p); + } else { + ret = (u64_t)ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, + ip_current_src_addr(), ip_current_dest_addr()); +diff --git a/src/core/udp.c b/src/core/udp.c +index 0b1fa65..d9db535 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -412,9 +412,21 @@ udp_input(struct pbuf *p, struct netif *inp) + #endif /* LWIP_UDPLITE */ + { + if (udphdr->chksum != 0) { ++#if CHECKSUM_CHECK_UDP_HW ++ u64_t ret = 0; ++ if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_UDP_CKSUM) { ++ ret = is_cksum_bad(p); ++ } else { ++ ret = ip_chksum_pseudo(p, IP_PROTO_UDP, p->tot_len, ++ ip_current_src_addr(), ++ ip_current_dest_addr()); ++ } ++ if (ret != 0) { ++#else /* CHECKSUM_CHECK_UDP_HW */ + if (ip_chksum_pseudo(p, IP_PROTO_UDP, p->tot_len, + ip_current_src_addr(), + ip_current_dest_addr()) != 0) { ++#endif /* CHECKSUM_CHECK_UDP_HW */ + goto chkerr; + } + } +@@ -970,8 +982,18 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + } else + #endif /* LWIP_CHECKSUM_ON_COPY */ + { ++#if CHECKSUM_GEN_UDP_HW ++ if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_UDP_CKSUM) { ++ udph_cksum_set(q, UDP_HLEN); ++ udpchksum = ip_chksum_pseudo_offload(IP_PROTO_UDP, q->tot_len, &pcb->local_ip, &pcb->remote_ip); ++ } else { ++ udpchksum = ip_chksum_pseudo(q, IP_PROTO_UDP, q->tot_len, ++ src_ip, dst_ip); ++ } ++#else /* CHECKSUM_GEN_UDP_HW */ + udpchksum = ip_chksum_pseudo(q, IP_PROTO_UDP, q->tot_len, + src_ip, dst_ip); ++#endif /* CHECKSUM_GEN_UDP_HW */ + } + + /* chksum zero must become 0xffff, as zero means 'no checksum' */ +@@ -996,7 +1018,9 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + LWIP_DEBUGF(UDP_DEBUG, ("udp_send: ip_output_if (,,,,0x%02"X16_F",)\n", (u16_t)ip_proto)); + /* output to IP */ + NETIF_SET_HINTS(netif, &(pcb->netif_hints)); +- udph_cksum_set(q, UDP_HLEN); ++#if GAZELLE_UDP_ENABLE ++ q->l4_len = UDP_HLEN; ++#endif /* GAZELLE_UDP_ENABLE */ + err = ip_output_if_src(q, src_ip, dst_ip, ttl, pcb->tos, ip_proto, netif); + NETIF_RESET_HINTS(netif); + +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index 2c5b31e..d092a1d 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -54,12 +54,12 @@ static inline u64_t is_cksum_ipbad(struct pbuf *p) { + } + #endif /* CHECKSUM_CHECK_IP_HW */ + +-#if CHECKSUM_CHECK_TCP_HW +-// for tcp_input +-static inline u64_t is_cksum_tcpbad(struct pbuf *p) { ++#if (CHECKSUM_CHECK_TCP_HW || CHECKSUM_CHECK_UDP_HW) ++// for tcp_input and udp_input ++static inline u64_t is_cksum_bad(struct pbuf *p) { + return p->ol_flags & (RTE_MBUF_F_RX_L4_CKSUM_BAD); + } +-#endif /* CHECKSUM_CHECK_TCP_HW */ ++#endif /* (CHECKSUM_CHECK_TCP_HW || CHECKSUM_CHECK_UDP_HW) */ + + #if CHECKSUM_GEN_IP_HW + static inline void ethh_cksum_set(struct pbuf *p, u16_t len) { +@@ -77,18 +77,22 @@ static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { + #endif /* CHECKSUM_GEN_IP_HW */ + + // replace ip_chksum_pseudo +-#if CHECKSUM_GEN_TCP_HW ++#if (CHECKSUM_GEN_TCP_HW || CHECKSUM_GEN_UDP_HW) + #include + ++#if CHECKSUM_GEN_TCP_HW + static inline void tcph_cksum_set(struct pbuf *p, u16_t len) { + p->l4_len = len; + p->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM; + } ++#endif /* CHECKSUM_GEN_TCP_HW */ + ++#if CHECKSUM_GEN_UDP_HW + static inline void udph_cksum_set(struct pbuf *p, u16_t len) { + p->l4_len = len; + p->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM; + } ++#endif /* CHECKSUM_GEN_UDP_HW */ + + static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, + const ip_addr_t *src, const ip_addr_t *dst) +@@ -109,7 +113,7 @@ static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, + + return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); + } +-#endif /* CHECKSUM_GEN_TCP_HW */ ++#endif /* (CHECKSUM_GEN_TCP_HW || CHECKSUM_GEN_UDP_HW) */ + + #endif /* GAZELLE_ENABLE */ + #endif /* __DPDK_CKSUM_H__ */ +diff --git a/src/include/dpdk_version.h b/src/include/dpdk_version.h +index 43b254a..c90ddb8 100644 +--- a/src/include/dpdk_version.h ++++ b/src/include/dpdk_version.h +@@ -46,6 +46,7 @@ + #define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM + #define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM + #define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG ++#define RTE_MBUF_F_TX_UDP_CKSUM PKT_TX_UDP_CKSUM + + #endif /* DPDK_VERSION_1911 */ + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 2ba1e4c..5ba123f 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -71,18 +71,22 @@ + // rx cksum + #define CHECKSUM_CHECK_IP 1 /* master switch */ + #define CHECKSUM_CHECK_TCP 1 /* master switch */ ++#define CHECKSUM_CHECK_UDP 1 /* master switch */ + // tx cksum + #define CHECKSUM_GEN_IP 1 /* master switch */ +-#define CHECKSUM_GEN_TCP 1 /* master switch */ ++#define CHECKSUM_GEN_TCP 1 /* master switch */ ++#define CHECKSUM_GEN_UDP 1 /* master switch */ + + // rx offload cksum + #define CHECKSUM_CHECK_IP_HW (1 && CHECKSUM_CHECK_IP) /* hardware switch */ + #define CHECKSUM_CHECK_TCP_HW (1 && CHECKSUM_CHECK_TCP) /* hardware switch */ ++#define CHECKSUM_CHECK_UDP_HW (1 && CHECKSUM_CHECK_UDP) /* hardware switch */ + // tx offload cksum + #define CHECKSUM_GEN_IP_HW (1 && CHECKSUM_GEN_IP) /* hardware switch */ + #define CHECKSUM_GEN_TCP_HW (1 && CHECKSUM_GEN_TCP) /* hardware switch */ ++#define CHECKSUM_GEN_UDP_HW (1 && CHECKSUM_GEN_UDP) /* hardware switch */ + +-#define CHECKSUM_OFFLOAD_ALL (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW || CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW) ++#define CHECKSUM_OFFLOAD_ALL (CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW || CHECKSUM_CHECK_IP_HW || CHECKSUM_CHECK_TCP_HW || CHECKSUM_CHECK_UDP_HW || CHECKSUM_GEN_UDP_HW) + + + /* +-- +2.28.0.windows.1 + diff --git a/0071-fix-pbuf-tot_len-incorrect-after-pbuf_split_64k-is-c.patch b/0071-fix-pbuf-tot_len-incorrect-after-pbuf_split_64k-is-c.patch new file mode 100644 index 0000000000000000000000000000000000000000..7bf4711db2d50476ed19232b7f7d33dfced90b6f --- /dev/null +++ b/0071-fix-pbuf-tot_len-incorrect-after-pbuf_split_64k-is-c.patch @@ -0,0 +1,35 @@ +From 339ad47548236f2b11ee6161a419db8aa664138c Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 15 Sep 2023 09:33:56 +0800 +Subject: [PATCH] fix pbuf->tot_len incorrect after pbuf_split_64k is called + +--- + src/core/pbuf.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 2385e57..8a55463 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -1194,7 +1194,7 @@ void pbuf_split_64k(struct pbuf *p, struct pbuf **rest) + if (r != NULL) { + /* Update the tot_len field in the first part */ + for (i = p; i != NULL; i = i->next) { +- i->tot_len = tot_len_front; ++ i->tot_len = (u16_t)(i->tot_len - r->tot_len); + LWIP_ASSERT("tot_len/len mismatch in last pbuf", + (i->next != NULL) || (i->tot_len == i->len)); + } +@@ -1205,9 +1205,6 @@ void pbuf_split_64k(struct pbuf *p, struct pbuf **rest) + /* tot_len field in rest does not need modifications */ + /* reference counters do not need modifications */ + *rest = r; +- r->tot_len = r->len; +- }else{ +- p->tot_len = tot_len_front; + } + } + } +-- +2.27.0 + diff --git a/0072-add-O_NONBLOCK-and-FIONBIO-when-not-defined.patch b/0072-add-O_NONBLOCK-and-FIONBIO-when-not-defined.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4dda8ba9e4a8c90f646f24c629392c2087d6b92 --- /dev/null +++ b/0072-add-O_NONBLOCK-and-FIONBIO-when-not-defined.patch @@ -0,0 +1,18 @@ +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 5ba123f..baf739e 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -244,9 +244,13 @@ + + #define SO_REUSE 1 + ++#ifndef FIONBIO + #define FIONBIO 0x5421 /* same as define in asm-generic/ioctls.h */ ++#endif + ++#ifndef O_NONBLOCK + #define O_NONBLOCK 04000 /* same as define in bits/fcntl-linux.h */ ++#endif + + #define SIOCSHIWAT 1 + diff --git a/0073-lstack_lwip-external-api-start-with-do_lwip_-prefix.patch b/0073-lstack_lwip-external-api-start-with-do_lwip_-prefix.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6b8741e988d2c07210dd340e1abb9cc51b4e750 --- /dev/null +++ b/0073-lstack_lwip-external-api-start-with-do_lwip_-prefix.patch @@ -0,0 +1,208 @@ +From 39f06e9ef0929da16282b23ec606c3893f394b1e Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Mon, 25 Sep 2023 15:23:51 +0800 +Subject: [PATCH] lstack_lwip: external api start with do_lwip_ prefix + +--- + src/api/api_msg.c | 8 ++++---- + src/api/sockets.c | 8 ++++---- + src/core/pbuf.c | 4 ++-- + src/core/tcp_out.c | 4 ++-- + src/core/udp.c | 4 ++-- + src/include/lwip/pbuf.h | 3 ++- + src/include/lwipsock.h | 19 +++++++++++-------- + 7 files changed, 27 insertions(+), 23 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 869d6bc..3e982ab 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -292,7 +292,7 @@ recv_udp(void *arg, struct udp_pcb *pcb, struct pbuf *p, + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ + #if GAZELLE_UDP_ENABLE +- add_recv_list(conn->socket); ++ do_lwip_add_recvlist(conn->socket); + LWIP_UNUSED_ARG(len); + #else /* GAZELLE_UDP_ENABLE */ + /* Register event with callback */ +@@ -356,7 +356,7 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + SYS_ARCH_INC(conn->recv_avail, len); + #endif /* LWIP_SO_RCVBUF */ + #if GAZELLE_ENABLE +- add_recv_list(conn->socket); ++ do_lwip_add_recvlist(conn->socket); + LWIP_UNUSED_ARG(len); + #else + /* Register event with callback */ +@@ -492,7 +492,7 @@ err_tcp(void *arg, err_t err) + /* use trypost to prevent deadlock */ + sys_mbox_trypost(&conn->recvmbox, mbox_msg); + #if GAZELLE_ENABLE +- add_recv_list(conn->socket); ++ do_lwip_add_recvlist(conn->socket); + #endif + } + /* pass error message to acceptmbox to wake up pending accept */ +@@ -1348,7 +1348,7 @@ lwip_netconn_do_connected(void *arg, struct tcp_pcb *pcb, err_t err) + } + + #if GAZELLE_ENABLE +- gazelle_connected_callback(conn); ++ do_lwip_connected_callback(conn); + #endif + + LWIP_ASSERT("conn->state == NETCONN_CONNECT", conn->state == NETCONN_CONNECT); +diff --git a/src/api/sockets.c b/src/api/sockets.c +index f9b7a8f..8d573aa 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -613,7 +613,7 @@ alloc_socket(struct netconn *newconn, int accepted, int flags) + return i + LWIP_SOCKET_OFFSET; + } else { + lwip_close(i); +- gazelle_clean_sock(i); ++ do_lwip_clean_sock(i); + } + + err: +@@ -785,7 +785,7 @@ lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + } + #if GAZELLE_ENABLE + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < sockets_num + LWIP_SOCKET_OFFSET)); +- gazelle_init_sock(newsock); ++ do_lwip_init_sock(newsock); + #else + LWIP_ASSERT("invalid socket index", (newsock >= LWIP_SOCKET_OFFSET) && (newsock < NUM_SOCKETS + LWIP_SOCKET_OFFSET)); + #endif /* GAZELLE_ENABLE */ +@@ -1170,7 +1170,7 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + lwip_recv_tcp_done: + #else /* GAZELLE_ENABLE */ + LWIP_UNUSED_ARG(recv_left); +- recvd = read_lwip_data(sock, flags, apiflags); ++ recvd = do_lwip_read_from_lwip(sock, flags, apiflags); + if (recvd <= 0) { + return recvd; + } +@@ -1369,7 +1369,7 @@ lwip_recvfrom_udp_raw(struct lwip_sock *sock, int flags, struct msghdr *msg, u16 + LWIP_UNUSED_ARG(err); + LWIP_UNUSED_ARG(copied); + LWIP_UNUSED_ARG(i); +- buflen = read_lwip_data(sock, flags, apiflags); ++ buflen = do_lwip_read_from_lwip(sock, flags, apiflags); + if (buflen <= 0) { + return ERR_BUF; + } +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 8a55463..975e240 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -288,7 +288,7 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + /* If pbuf is to be allocated in RAM, allocate memory for it. */ + #if GAZELLE_ENABLE + /* alloc mbuf avoid send copy */ +- p = lwip_alloc_pbuf(layer, length, type); ++ p = do_lwip_alloc_pbuf(layer, length, type); + #else + p = (struct pbuf *)mem_malloc(alloc_len); + if (p == NULL) { +@@ -780,7 +780,7 @@ pbuf_free(struct pbuf *p) + /* is this a custom pbuf? */ + if ((p->flags & PBUF_FLAG_IS_CUSTOM) != 0) { + #if GAZELLE_ENABLE +- gazelle_free_pbuf(p); ++ do_lwip_free_pbuf(p); + #else + struct pbuf_custom *pc = (struct pbuf_custom *)p; + LWIP_ASSERT("pc->custom_free_function != NULL", pc->custom_free_function != NULL); +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 6250e6b..547d01e 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -699,7 +699,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + pbuf_cat(p/*header*/, p2/*data*/); + } + #else /* GAZELLE_ENABLE */ +- p = write_lwip_data((struct lwip_sock *)arg, len - pos, &apiflags); ++ p = do_lwip_get_from_sendring((struct lwip_sock *)arg, len - pos, &apiflags); + if (p == NULL) { + break; + } +@@ -764,7 +764,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + + pos += seglen; + #if GAZELLE_ENABLE +- write_lwip_over((struct lwip_sock*)arg); ++ do_lwip_get_from_sendring_over((struct lwip_sock*)arg); + #endif + } + +diff --git a/src/core/udp.c b/src/core/udp.c +index d9db535..5c6dadb 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -677,8 +677,8 @@ udp_sendto_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *dst_ip, + #if GAZELLE_UDP_ENABLE + uint8_t apiflags = 0; + +- struct pbuf *udp_pbuf = write_lwip_data((struct lwip_sock *)(p->payload), p->tot_len, &apiflags); +- write_lwip_over((struct lwip_sock *)(p->payload)); ++ struct pbuf *udp_pbuf = do_lwip_get_from_sendring((struct lwip_sock *)(p->payload), p->tot_len, &apiflags); ++ do_lwip_get_from_sendring_over((struct lwip_sock *)(p->payload)); + + pbuf_free(p); + p = udp_pbuf; +diff --git a/src/include/lwip/pbuf.h b/src/include/lwip/pbuf.h +index 2639b37..e1f2e50 100644 +--- a/src/include/lwip/pbuf.h ++++ b/src/include/lwip/pbuf.h +@@ -300,7 +300,8 @@ void pbuf_free_ooseq(void); + /* Initializes the pbuf module. This call is empty for now, but may not be in future. */ + #define pbuf_init() + #if GAZELLE_ENABLE +-struct pbuf *lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type); ++extern struct pbuf *do_lwip_alloc_pbuf(pbuf_layer layer, uint16_t length, pbuf_type type); ++extern void do_lwip_free_pbuf(struct pbuf *pbuf); + #endif + struct pbuf *pbuf_alloc(pbuf_layer l, u16_t length, pbuf_type type); + struct pbuf *pbuf_alloc_reference(void *payload, u16_t length, pbuf_type type); +diff --git a/src/include/lwipsock.h b/src/include/lwipsock.h +index f8480c5..ccc8c43 100644 +--- a/src/include/lwipsock.h ++++ b/src/include/lwipsock.h +@@ -164,13 +164,17 @@ static inline unsigned same_node_ring_count(struct lwip_sock *sock) + #if GAZELLE_ENABLE + extern uint32_t sockets_num; + extern struct lwip_sock *sockets; +-extern void gazelle_connected_callback(struct netconn *conn); +-extern void add_recv_list(int32_t fd); +-extern ssize_t read_lwip_data(struct lwip_sock *sock, int32_t flags, u8_t apiflags); +-extern struct pbuf *write_lwip_data(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); +-extern void gazelle_init_sock(int32_t fd); +-extern void gazelle_clean_sock(int32_t fd); +-extern void write_lwip_over(struct lwip_sock *sock); ++ ++extern void do_lwip_init_sock(int32_t fd); ++extern void do_lwip_clean_sock(int32_t fd); ++extern void do_lwip_connected_callback(struct netconn *conn); ++ ++extern struct pbuf *do_lwip_get_from_sendring(struct lwip_sock *sock, uint16_t remain_size, uint8_t *apiflags); ++extern void do_lwip_get_from_sendring_over(struct lwip_sock *sock); ++extern ssize_t do_lwip_read_from_lwip(struct lwip_sock *sock, int32_t flags, u8_t apiflags); ++ ++extern void do_lwip_add_recvlist(int32_t fd); ++ + extern void netif_poll(struct netif *netif); + extern err_t netif_loop_output(struct netif *netif, struct pbuf *p); + extern err_t find_same_node_memzone(struct tcp_pcb *pcb, struct lwip_sock *nsock); +@@ -178,7 +182,6 @@ extern err_t same_node_memzone_create(const struct rte_memzone **zone, int size, + extern err_t same_node_ring_create(struct rte_ring **ring, int size, int port, char *name, char *rx); + extern err_t create_same_node_ring(struct tcp_pcb *pcb); + extern err_t find_same_node_ring(struct tcp_pcb *pcb); +-extern void gazelle_free_pbuf(struct pbuf *pbuf); + extern void lstack_calculate_aggregate(int type, uint32_t len); + #endif /* GAZELLE_ENABLE */ + +-- +2.27.0 + diff --git a/0074-gazelle-offloads-are-registered-to-lwip.patch b/0074-gazelle-offloads-are-registered-to-lwip.patch new file mode 100644 index 0000000000000000000000000000000000000000..0bef4d8ab021c2adede4d7f36cf35bf95aa25b9e --- /dev/null +++ b/0074-gazelle-offloads-are-registered-to-lwip.patch @@ -0,0 +1,241 @@ +From cc35c455bb52f78546d7b7216b30203863c017fb Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 24 Oct 2023 17:32:17 +0800 +Subject: [PATCH] gazelle offloads are registered to lwip + +--- + src/core/ipv4/icmp.c | 2 +- + src/core/ipv4/ip4.c | 6 +++--- + src/core/ipv4/ip4_frag.c | 4 ++-- + src/core/netif.c | 20 ++++++++++++++++++++ + src/core/tcp_in.c | 2 +- + src/core/tcp_out.c | 6 +++--- + src/core/udp.c | 4 ++-- + src/include/dpdk_cksum.h | 2 -- + src/include/lwip/netif.h | 20 ++++++++++++++++++++ + 9 files changed, 52 insertions(+), 14 deletions(-) + +diff --git a/src/core/ipv4/icmp.c b/src/core/ipv4/icmp.c +index 402ba69..c3a877c 100644 +--- a/src/core/ipv4/icmp.c ++++ b/src/core/ipv4/icmp.c +@@ -241,7 +241,7 @@ icmp_input(struct pbuf *p, struct netif *inp) + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_GEN_IP) { + #if CHECKSUM_GEN_IP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_txol_flags(inp) & DEV_TX_OFFLOAD_IPV4_CKSUM) { + iph_cksum_set(p, hlen, 1); + } else { + iph_cksum_set(p, hlen, 0); +diff --git a/src/core/ipv4/ip4.c b/src/core/ipv4/ip4.c +index 1b70bb5..1e3690f 100644 +--- a/src/core/ipv4/ip4.c ++++ b/src/core/ipv4/ip4.c +@@ -509,7 +509,7 @@ ip4_input(struct pbuf *p, struct netif *inp) + IF__NETIF_CHECKSUM_ENABLED(inp, NETIF_CHECKSUM_CHECK_IP) { + #if CHECKSUM_CHECK_IP_HW + u64_t ret; +- if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_rxol_flags(inp) & DEV_RX_OFFLOAD_IPV4_CKSUM) { + ret = is_cksum_ipbad(p); + } else { + ret = (u64_t)inet_chksum(iphdr, iphdr_hlen); +@@ -986,7 +986,7 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_IP) { + #if CHECKSUM_GEN_IP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_IPV4_CKSUM) { + iph_cksum_set(p, ip_hlen, 1); + } else { + iph_cksum_set(p, ip_hlen, 0); +@@ -1035,7 +1035,7 @@ ip4_output_if_opt_src(struct pbuf *p, const ip4_addr_t *src, const ip4_addr_t *d + #if IP_FRAG + /* don't fragment if interface has mtu set to 0 [loopif] */ + #if GAZELLE_ENABLE +- if (!(get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO)) { ++ if (!(netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_TCP_TSO)) { + #endif + if (netif->mtu && (p->tot_len > netif->mtu)) { + return ip4_frag(p, netif, dest); +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index e01ea51..f63a99e 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -642,7 +642,7 @@ ip4_reass(struct pbuf *p) + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(ip_current_input_netif(), NETIF_CHECKSUM_GEN_IP) { + #if CHECKSUM_GEN_IP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_txol_flags(ip_current_input_netif()) & DEV_TX_OFFLOAD_IPV4_CKSUM) { + iph_cksum_set(p, IP_HLEN, 1); + } else { + iph_cksum_set(p, IP_HLEN, 0); +@@ -885,7 +885,7 @@ ip4_frag(struct pbuf *p, struct netif *netif, const ip4_addr_t *dest) + #if CHECKSUM_GEN_IP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_IP) { + #if CHECKSUM_GEN_IP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_IPV4_CKSUM) { + iph_cksum_set(p, IP_HLEN, 1); + } else { + iph_cksum_set(p, IP_HLEN, 0); +diff --git a/src/core/netif.c b/src/core/netif.c +index 86b74a0..eb59fbc 100644 +--- a/src/core/netif.c ++++ b/src/core/netif.c +@@ -1049,6 +1049,26 @@ netif_set_link_down(struct netif *netif) + } + } + ++#if GAZELLE_ENABLE ++void ++netif_set_rtc_mode(struct netif *netif) ++{ ++ if (!(netif->flags & NETIF_FLAG_RTC_MODE)) { ++ netif_set_flags(netif, NETIF_FLAG_RTC_MODE); ++ } ++} ++void ++netif_set_rxol_flags(struct netif *netif, u64_t flags) ++{ ++ netif->rxol_flags |= flags; ++} ++void ++netif_set_txol_flags(struct netif *netif, u64_t flags) ++{ ++ netif->txol_flags |= flags; ++} ++#endif ++ + #if LWIP_NETIF_LINK_CALLBACK + /** + * @ingroup netif +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 736845c..07203e5 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -209,7 +209,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + /* Verify TCP checksum. */ + #if CHECKSUM_CHECK_TCP_HW + u64_t ret; +- if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_TCP_CKSUM) { ++ if (netif_get_rxol_flags(inp) & DEV_RX_OFFLOAD_TCP_CKSUM) { + ret = is_cksum_bad(p); + } else { + ret = (u64_t)ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len, +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 547d01e..e2c9d63 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -1448,7 +1448,7 @@ tcp_output(struct tcp_pcb *pcb) + + /* data available and window allows it to be sent? */ + #if GAZELLE_ENABLE +- if ((get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_TSO) && pcb->need_tso_send) { ++ if ((netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_TCP_TSO) && pcb->need_tso_send) { + uint16_t send_pkt = 0; + + do { +@@ -1831,7 +1831,7 @@ tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif + #if CHECKSUM_GEN_TCP + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { + #if CHECKSUM_GEN_TCP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { ++ if (netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_TCP_CKSUM) { + tcph_cksum_set(seg->p, TCPH_HDRLEN_BYTES(seg->tcphdr)); + seg->tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP,seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip); + } else { +@@ -2273,7 +2273,7 @@ tcp_output_control_segment(struct tcp_pcb *pcb, struct pbuf *p, + IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) { + struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload; + #if CHECKSUM_GEN_TCP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_TCP_CKSUM) { ++ if (netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_TCP_CKSUM) { + tcph_cksum_set(p, TCPH_HDRLEN_BYTES(tcphdr)); + tcphdr->chksum = ip_chksum_pseudo_offload(IP_PROTO_TCP, p->tot_len, src, dst); + } else { +diff --git a/src/core/udp.c b/src/core/udp.c +index 5c6dadb..937a045 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -414,7 +414,7 @@ udp_input(struct pbuf *p, struct netif *inp) + if (udphdr->chksum != 0) { + #if CHECKSUM_CHECK_UDP_HW + u64_t ret = 0; +- if (get_eth_params_rx_ol() & DEV_RX_OFFLOAD_UDP_CKSUM) { ++ if (netif_get_txol_flags(inp) & DEV_RX_OFFLOAD_UDP_CKSUM) { + ret = is_cksum_bad(p); + } else { + ret = ip_chksum_pseudo(p, IP_PROTO_UDP, p->tot_len, +@@ -983,7 +983,7 @@ udp_sendto_if_src_chksum(struct udp_pcb *pcb, struct pbuf *p, const ip_addr_t *d + #endif /* LWIP_CHECKSUM_ON_COPY */ + { + #if CHECKSUM_GEN_UDP_HW +- if (get_eth_params_tx_ol() & DEV_TX_OFFLOAD_UDP_CKSUM) { ++ if (netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_UDP_CKSUM) { + udph_cksum_set(q, UDP_HLEN); + udpchksum = ip_chksum_pseudo_offload(IP_PROTO_UDP, q->tot_len, &pcb->local_ip, &pcb->remote_ip); + } else { +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index 5b1b6f6..b8056f9 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -45,8 +45,6 @@ + #include "lwip/pbuf.h" + #endif + +-extern uint64_t get_eth_params_rx_ol(void); +-extern uint64_t get_eth_params_tx_ol(void); + #if CHECKSUM_CHECK_IP_HW + // for ip4_input + static inline u64_t is_cksum_ipbad(struct pbuf *p) { +diff --git a/src/include/lwip/netif.h b/src/include/lwip/netif.h +index 057c51f..75f8d50 100644 +--- a/src/include/lwip/netif.h ++++ b/src/include/lwip/netif.h +@@ -106,6 +106,11 @@ extern "C" { + * Set by the netif driver in its init function. */ + #define NETIF_FLAG_MLD6 0x40U + ++#if GAZELLE_ENABLE ++/** If set, use run to completion mode */ ++#define NETIF_FLAG_RTC_MODE 0x80U ++#endif ++ + /** + * @} + */ +@@ -343,6 +348,10 @@ struct netif { + u8_t hwaddr_len; + /** flags (@see @ref netif_flags) */ + u8_t flags; ++#if GAZELLE_ENABLE ++ u64_t rxol_flags; ++ u64_t txol_flags; ++#endif + /** descriptive abbreviation */ + char name[2]; + /** number of this interface. Used for @ref if_api and @ref netifapi_netif, +@@ -464,6 +473,17 @@ void netif_set_down(struct netif *netif); + */ + #define netif_is_up(netif) (((netif)->flags & NETIF_FLAG_UP) ? (u8_t)1 : (u8_t)0) + ++#if GAZELLE_ENABLE ++#define netif_is_rtc_mode(netif) (((netif)->flags & NETIF_FLAG_RTC_MODE) ? (u8_t)1 : (u8_t)0) ++#define netif_get_rxol_flags(netif) ((netif)->rxol_flags) ++#define netif_get_txol_flags(netif) ((netif)->txol_flags) ++ ++void netif_set_rtc_mode(struct netif *netif); ++void netif_set_rxol_flags(struct netif *netif, u64_t flags); ++void netif_set_txol_flags(struct netif *netif, u64_t flags); ++ ++#endif ++ + #if LWIP_NETIF_STATUS_CALLBACK + void netif_set_status_callback(struct netif *netif, netif_status_callback_fn status_callback); + #endif /* LWIP_NETIF_STATUS_CALLBACK */ +-- +2.27.0 + diff --git a/0075-adapt-read-write-for-rtc-mode.patch b/0075-adapt-read-write-for-rtc-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..9da56560a8b8261bf65ce6cf4a6edf06d8390701 --- /dev/null +++ b/0075-adapt-read-write-for-rtc-mode.patch @@ -0,0 +1,457 @@ +From be56e9eed8acf82a862d19ef4f890f309018ddde Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Sat, 28 Oct 2023 17:21:46 +0800 +Subject: [PATCH] adapt read/write for rtc mode + +--- + src/api/api_msg.c | 14 ++-- + src/api/sockets.c | 21 ++--- + src/core/init.c | 2 +- + src/core/pbuf.c | 7 ++ + src/core/tcp_out.c | 171 +++++++++++++++++++++++++++++++++-------- + src/core/udp.c | 2 +- + src/include/lwip/tcp.h | 4 + + src/include/lwipopts.h | 6 +- + 8 files changed, 174 insertions(+), 53 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 3e982ab..d8b99ee 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -1753,11 +1753,15 @@ lwip_netconn_do_writemore(struct netconn *conn WRITE_DELAYED_PARAM) + write_more = 0; + } + #if GAZELLE_ENABLE +- /* vector->ptr is private arg sock */ +- LWIP_UNUSED_ARG(dataptr); +- write_more = 0; +- err = tcp_write(conn->pcb.tcp, conn->current_msg->msg.w.vector->ptr, len, apiflags); +- conn->current_msg->msg.w.len = len; ++ if (netif_is_rtc_mode(netif_default)) { ++ err = tcp_write(conn->pcb.tcp, dataptr, len, apiflags); ++ } else { ++ /* vector->ptr is private arg sock */ ++ LWIP_UNUSED_ARG(dataptr); ++ write_more = 0; ++ err = tcp_write_from_stack(conn->pcb.tcp, conn->current_msg->msg.w.vector->ptr, len, apiflags); ++ conn->current_msg->msg.w.len = len; ++ } + conn->pcb.tcp->need_tso_send = 1; + #else + err = tcp_write(conn->pcb.tcp, dataptr, len, apiflags); +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 8d573aa..e374f96 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1087,7 +1087,15 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + apiflags |= NETCONN_DONTBLOCK; + } + +-#if !GAZELLE_ENABLE ++#if GAZELLE_ENABLE ++ if (!netif_is_rtc_mode(netif_default)) { ++ LWIP_UNUSED_ARG(recv_left); ++ recvd = do_lwip_read_from_lwip(sock, flags, apiflags); ++ if (recvd <= 0) { ++ return recvd; ++ } ++ } else { ++#endif + do { + struct pbuf *p; + err_t err; +@@ -1166,15 +1174,10 @@ lwip_recv_tcp(struct lwip_sock *sock, void *mem, size_t len, int flags) + apiflags |= NETCONN_DONTBLOCK | NETCONN_NOFIN; + /* @todo: do we need to support peeking more than one pbuf? */ + } while ((recv_left > 0) && !(flags & MSG_PEEK)); +- +-lwip_recv_tcp_done: +-#else /* GAZELLE_ENABLE */ +- LWIP_UNUSED_ARG(recv_left); +- recvd = do_lwip_read_from_lwip(sock, flags, apiflags); +- if (recvd <= 0) { +- return recvd; ++#if GAZELLE_ENABLE + } +-#endif /* GAZELLE_ENABLE */ ++#endif ++lwip_recv_tcp_done: + if (apiflags & NETCONN_NOAUTORCVD) { + if ((recvd > 0) && !(flags & MSG_PEEK)) { + /* ensure window update after copying all data */ +diff --git a/src/core/init.c b/src/core/init.c +index 7b6214f..60e1c68 100644 +--- a/src/core/init.c ++++ b/src/core/init.c +@@ -306,7 +306,7 @@ PACK_STRUCT_END + #if TCP_SNDLOWAT >= TCP_SND_BUF + #error "lwip_sanity_check: WARNING: TCP_SNDLOWAT must be less than TCP_SND_BUF. If you know what you are doing, define LWIP_DISABLE_TCP_SANITY_CHECKS to 1 to disable this error." + #endif +-#if TCP_SNDLOWAT >= (0xFFFFFFFF - (4 * TCP_MSS)) ++#if TCP_SNDLOWAT >= (0xFFFF - (4 * TCP_MSS)) + #error "lwip_sanity_check: WARNING: TCP_SNDLOWAT must at least be 4*MSS below u16_t overflow!" + #endif + #if TCP_SNDQUEUELOWAT >= TCP_SND_QUEUELEN +diff --git a/src/core/pbuf.c b/src/core/pbuf.c +index 975e240..61690ff 100644 +--- a/src/core/pbuf.c ++++ b/src/core/pbuf.c +@@ -117,6 +117,7 @@ pbuf_skip_const(const struct pbuf *in, u16_t in_offset, u16_t *out_offset); + volatile u8_t pbuf_free_ooseq_pending; + #define PBUF_POOL_IS_EMPTY() pbuf_pool_is_empty() + ++#if !GAZELLE_ENABLE + /** + * Attempt to reclaim some memory from queued out-of-sequence TCP segments + * if we run out of pool pbufs. It's better to give priority to new packets +@@ -176,6 +177,7 @@ pbuf_pool_is_empty(void) + } + #endif /* PBUF_POOL_FREE_OOSEQ_QUEUE_CALL */ + } ++#endif /* GAZELLE_ENABLE */ + #endif /* !LWIP_TCP || !TCP_QUEUE_OOSEQ || !PBUF_POOL_FREE_OOSEQ */ + + /* Initialize members of struct pbuf after allocation */ +@@ -238,6 +240,10 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + p = pbuf_alloc_reference(NULL, length, type); + break; + case PBUF_POOL: { ++#if GAZELLE_ENABLE ++ // alloc from pktmbuf pool, one pbuf is enough ++ p = do_lwip_alloc_pbuf(layer, length, type); ++#else + struct pbuf *q, *last; + u16_t rem_len; /* remaining length */ + p = NULL; +@@ -273,6 +279,7 @@ pbuf_alloc(pbuf_layer layer, u16_t length, pbuf_type type) + rem_len = (u16_t)(rem_len - qlen); + offset = 0; + } while (rem_len > 0); ++#endif /* GAZELLE_ENABLE */ + break; + } + case PBUF_RAM: { +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index e2c9d63..073d989 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -515,15 +515,18 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + * pos records progress as data is segmented. + */ + +-#if !GAZELLE_ENABLE + /* Find the tail of the unsent queue. */ + if (pcb->unsent != NULL) { + u16_t space; + u16_t unsent_optlen; + ++#if GAZELLE_ENABLE ++ last_unsent = pcb->last_unsent; ++#else + /* @todo: this could be sped up by keeping last_unsent in the pcb */ + for (last_unsent = pcb->unsent; last_unsent->next != NULL; + last_unsent = last_unsent->next); ++#endif + + /* Usable space at the end of the last unsent segment */ + unsent_optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(last_unsent->flags, pcb); +@@ -631,9 +634,6 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + pcb->unsent_oversize == 0); + #endif /* TCP_OVERSIZE */ + } +-#else /* GAZELLE_ENABLE */ +- last_unsent = pcb->last_unsent; +-#endif /* GAZELLE_ENABLE */ + + /* + * Phase 3: Create new segments. +@@ -651,7 +651,6 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + u8_t chksum_swapped = 0; + #endif /* TCP_CHECKSUM_ON_COPY */ + +-#if !GAZELLE_ENABLE + if (apiflags & TCP_WRITE_FLAG_COPY) { + /* If copy is set, memory should be allocated and data copied + * into pbuf */ +@@ -698,13 +697,6 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + /* Concatenate the headers and data pbufs together. */ + pbuf_cat(p/*header*/, p2/*data*/); + } +-#else /* GAZELLE_ENABLE */ +- p = do_lwip_get_from_sendring((struct lwip_sock *)arg, len - pos, &apiflags); +- if (p == NULL) { +- break; +- } +- seglen = p->tot_len; +-#endif /* GAZELLE_ENABLE */ + + queuelen += pbuf_clen(p); + +@@ -714,14 +706,7 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + if (queuelen > LWIP_MIN(TCP_SND_QUEUELEN, TCP_SNDQUEUELEN_OVERFLOW)) { + LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: queue too long %"U16_F" (%d)\n", + queuelen, (int)TCP_SND_QUEUELEN)); +-#if GAZELLE_ENABLE +- if (pos > 0) { +- queuelen -= pbuf_clen(p); +- break; +- } +-#else + pbuf_free(p); +-#endif + goto memerr; + } + +@@ -730,12 +715,6 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + #endif + + if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) { +-#if GAZELLE_ENABLE +- if (pos > 0) { +- queuelen -= pbuf_clen(p); +- break; +- } +-#endif + goto memerr; + } + #if TCP_OVERSIZE_DBGCHECK +@@ -763,9 +742,6 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg))); + + pos += seglen; +-#if GAZELLE_ENABLE +- do_lwip_get_from_sendring_over((struct lwip_sock*)arg); +-#endif + } + + /* +@@ -855,12 +831,9 @@ tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) + if (queue) { + pcb->last_unsent = prev_seg; + } +- pcb->snd_lbb += pos; +- pcb->snd_buf -= pos; +-#else ++#endif + pcb->snd_lbb += len; + pcb->snd_buf -= len; +-#endif + pcb->snd_queuelen = queuelen; + + LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n", +@@ -880,14 +853,12 @@ memerr: + tcp_set_flags(pcb, TF_NAGLEMEMERR); + TCP_STATS_INC(tcp.memerr); + +-#if !GAZELLE_ENABLE + if (concat_p != NULL) { + pbuf_free(concat_p); + } + if (queue != NULL) { + tcp_segs_free(queue); + } +-#endif + if (pcb->snd_queuelen != 0) { + LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL || + pcb->unsent != NULL); +@@ -896,6 +867,137 @@ memerr: + return ERR_MEM; + } + ++#if GAZELLE_ENABLE ++err_t ++tcp_write_from_stack(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags) ++{ ++ struct tcp_seg *last_unsent = NULL, *seg = NULL, *prev_seg = NULL, *queue = NULL; ++ u16_t pos = 0; /* position in 'arg' data */ ++ u16_t queuelen; ++ u8_t optlen; ++ u8_t optflags = 0; ++ err_t err; ++ u16_t mss_local; ++ ++ /* don't allocate segments bigger than half the maximum window we ever received */ ++ mss_local = LWIP_MIN(pcb->mss, TCPWND_MIN16(pcb->snd_wnd_max / 2)); ++ mss_local = mss_local ? mss_local : pcb->mss; ++ ++ err = tcp_write_checks(pcb, len); ++ if (err != ERR_OK) { ++ return err; ++ } ++ queuelen = pcb->snd_queuelen; ++ ++ optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb); ++ ++ last_unsent = pcb->last_unsent; ++ ++ /* ++ * get pbuf from sendring and create new segments. ++ */ ++ while (pos < len) { ++ struct pbuf *p; ++ u16_t left = len - pos; ++ u16_t max_len = mss_local - optlen; ++ u16_t seglen = LWIP_MIN(left, max_len); ++ ++ p = do_lwip_get_from_sendring((struct lwip_sock *)arg, len - pos, &apiflags); ++ if (p == NULL) { ++ break; ++ } ++ seglen = p->tot_len; ++ ++ queuelen += pbuf_clen(p); ++ ++ /* Now that there are more segments queued, we check again if the ++ * length of the queue exceeds the configured maximum or ++ * overflows. */ ++ if (queuelen > LWIP_MIN(TCP_SND_QUEUELEN, TCP_SNDQUEUELEN_OVERFLOW)) { ++ LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: queue too long %"U16_F" (%d)\n", ++ queuelen, (int)TCP_SND_QUEUELEN)); ++ if (pos > 0) { ++ queuelen -= pbuf_clen(p); ++ break; ++ } ++ goto memerr; ++ } ++ ++ lstack_calculate_aggregate(2, p->tot_len); ++ ++ if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) { ++ if (pos > 0) { ++ queuelen -= pbuf_clen(p); ++ break; ++ } ++ goto memerr; ++ } ++ ++ /* first segment of to-be-queued data? */ ++ if (queue == NULL) { ++ queue = seg; ++ } else { ++ /* Attach the segment to the end of the queued segments */ ++ LWIP_ASSERT("prev_seg != NULL", prev_seg != NULL); ++ prev_seg->next = seg; ++ } ++ /* remember last segment of to-be-queued data for next iteration */ ++ prev_seg = seg; ++ ++ LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE, ("tcp_write: queueing %"U32_F":%"U32_F"\n", ++ lwip_ntohl(seg->tcphdr->seqno), ++ lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg))); ++ ++ pos += seglen; ++ do_lwip_get_from_sendring_over((struct lwip_sock*)arg); ++ } ++ ++ /* ++ * Phase 3: Append queue to pcb->unsent. Queue may be NULL, but that ++ * is harmless ++ */ ++ if (last_unsent == NULL) { ++ pcb->unsent = queue; ++ } else { ++ last_unsent->next = queue; ++ } ++ ++ /* ++ * Finally update the pcb state. ++ */ ++ if (queue) { ++ pcb->last_unsent = prev_seg; ++ } ++ pcb->snd_lbb += pos; ++ pcb->snd_buf -= pos; ++ pcb->snd_queuelen = queuelen; ++ ++ LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n", ++ pcb->snd_queuelen)); ++ if (pcb->snd_queuelen != 0) { ++ LWIP_ASSERT("tcp_write: valid queue length", ++ pcb->unacked != NULL || pcb->unsent != NULL); ++ } ++ ++ /* Set the PSH flag in the last segment that we enqueued. */ ++ if (seg != NULL && seg->tcphdr != NULL && ((apiflags & TCP_WRITE_FLAG_MORE) == 0)) { ++ TCPH_SET_FLAG(seg->tcphdr, TCP_PSH); ++ } ++ ++ return ERR_OK; ++memerr: ++ tcp_set_flags(pcb, TF_NAGLEMEMERR); ++ TCP_STATS_INC(tcp.memerr); ++ ++ if (pcb->snd_queuelen != 0) { ++ LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL || ++ pcb->unsent != NULL); ++ } ++ LWIP_DEBUGF(TCP_QLEN_DEBUG | LWIP_DBG_STATE, ("tcp_write: %"S16_F" (with mem err)\n", pcb->snd_queuelen)); ++ return ERR_MEM; ++} ++#endif ++ + /** + * Split segment on the head of the unsent queue. If return is not + * ERR_OK, existing head remains intact +@@ -2095,6 +2197,7 @@ tcp_rexmit(struct tcp_pcb *pcb) + + /* Don't take any rtt measurements after retransmitting. */ + pcb->rttest = 0; ++ pcb->need_tso_send = 1; + + /* Do the actual retransmission. */ + MIB2_STATS_INC(mib2.tcpretranssegs); +diff --git a/src/core/udp.c b/src/core/udp.c +index 937a045..828a489 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -414,7 +414,7 @@ udp_input(struct pbuf *p, struct netif *inp) + if (udphdr->chksum != 0) { + #if CHECKSUM_CHECK_UDP_HW + u64_t ret = 0; +- if (netif_get_txol_flags(inp) & DEV_RX_OFFLOAD_UDP_CKSUM) { ++ if (netif_get_rxol_flags(inp) & DEV_RX_OFFLOAD_UDP_CKSUM) { + ret = is_cksum_bad(p); + } else { + ret = ip_chksum_pseudo(p, IP_PROTO_UDP, p->tot_len, +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index e13099c..959df3e 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -567,6 +567,10 @@ err_t tcp_shutdown(struct tcp_pcb *pcb, int shut_rx, int shut_tx); + + err_t tcp_write (struct tcp_pcb *pcb, const void *dataptr, u16_t len, + u8_t apiflags); ++#if GAZELLE_ENABLE ++err_t tcp_write_from_stack (struct tcp_pcb *pcb, const void *dataptr, u16_t len, ++ u8_t apiflags); ++#endif + + void tcp_setprio (struct tcp_pcb *pcb, u8_t prio); + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index baf739e..fdd4f87 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -208,8 +208,8 @@ + #define TCP_LISTEN_BACKLOG 1 + #define TCP_DEFAULT_LISTEN_BACKLOG 0xff + +-#define TCP_OVERSIZE 0 +-#define LWIP_NETIF_TX_SINGLE_PBUF 0 ++#define TCP_OVERSIZE TCP_MSS ++#define LWIP_NETIF_TX_SINGLE_PBUF 1 + + #define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) + +@@ -219,7 +219,7 @@ + + #define TCP_SND_QUEUELEN (8191) + +-#define TCP_SNDLOWAT (TCP_SND_BUF / 5) ++#define TCP_SNDLOWAT (32768) + + #define TCP_SNDQUEUELOWAT (TCP_SND_QUEUELEN / 5) + +-- +2.27.0 + diff --git a/0076-fix-recvmsg-return-EINVAL.patch b/0076-fix-recvmsg-return-EINVAL.patch new file mode 100644 index 0000000000000000000000000000000000000000..32c2830130e9f9ae526273f1a82e195977256102 --- /dev/null +++ b/0076-fix-recvmsg-return-EINVAL.patch @@ -0,0 +1,38 @@ +From 92091a697ae8dac4026fd75a421ad9464aaa253e Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Wed, 25 Oct 2023 15:44:19 +0800 +Subject: [PATCH 76/77] fix recvmsg return EINVAL + +--- + src/api/sockets.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index b6c7b05..1d71427 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1492,9 +1492,21 @@ lwip_recvmsg(int s, struct msghdr *message, int flags) + /* check for valid vectors */ + buflen = 0; + for (i = 0; i < message->msg_iovlen; i++) { ++#if GAZELLE_ENABLE ++ /* msg_iov[i].iov_len == 0 dont return ERRVAL ++ * According to the Single Unix Specification we should return EINVAL if an elment length is < 0 ++ * when cast to ssize_t ++ */ ++ if ((message->msg_iov[i].iov_base == NULL) || ((ssize_t)message->msg_iov[i].iov_len < 0) || ++#else + if ((message->msg_iov[i].iov_base == NULL) || ((ssize_t)message->msg_iov[i].iov_len <= 0) || ++#endif + ((size_t)(ssize_t)message->msg_iov[i].iov_len != message->msg_iov[i].iov_len) || ++#if GAZELLE_ENABLE ++ ((ssize_t)(buflen + (ssize_t)message->msg_iov[i].iov_len) < 0)) { ++#else + ((ssize_t)(buflen + (ssize_t)message->msg_iov[i].iov_len) <= 0)) { ++#endif + sock_set_errno(sock, err_to_errno(ERR_VAL)); + done_socket(sock); + return -1; +-- +2.27.0 + diff --git a/0077-adpat-event-for-rtc-mode.patch b/0077-adpat-event-for-rtc-mode.patch new file mode 100644 index 0000000000000000000000000000000000000000..32bfd22db4e9bebab09f747472f6688655142303 --- /dev/null +++ b/0077-adpat-event-for-rtc-mode.patch @@ -0,0 +1,123 @@ +From e719fde7f90a1dff8daeb23170febb5ff38903c9 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 27 Oct 2023 19:19:42 +0800 +Subject: [PATCH 77/77] adpat event for rtc mode + +--- + src/api/api_msg.c | 8 ++------ + src/api/sockets.c | 33 ++++++++++++++++++++++++++++++--- + src/include/eventpoll.h | 4 ++++ + 3 files changed, 36 insertions(+), 9 deletions(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 9a8992a..5e09505 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -293,11 +293,9 @@ recv_udp(void *arg, struct udp_pcb *pcb, struct pbuf *p, + #endif /* LWIP_SO_RCVBUF */ + #if GAZELLE_UDP_ENABLE + do_lwip_add_recvlist(conn->socket); +- LWIP_UNUSED_ARG(len); +-#else /* GAZELLE_UDP_ENABLE */ ++#endif /* GAZELLE_UDP_ENABLE */ + /* Register event with callback */ + API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); +-#endif /* GAZELLE_UDP_ENABLE */ + } + } + #endif /* LWIP_UDP */ +@@ -357,11 +355,9 @@ recv_tcp(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t err) + #endif /* LWIP_SO_RCVBUF */ + #if GAZELLE_ENABLE + do_lwip_add_recvlist(conn->socket); +- LWIP_UNUSED_ARG(len); +-#else ++#endif + /* Register event with callback */ + API_EVENT(conn, NETCONN_EVT_RCVPLUS, len); +-#endif + } + + return ERR_OK; +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 1d71427..d62290d 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -2744,29 +2744,56 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + check_waiters = 0; + } + #if GAZELLE_ENABLE +- if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { +- add_sock_event(sock, POLLIN); ++ if (netif_is_rtc_mode(netif_default)) { ++ if (sock->rcvevent == 1) { ++ add_sock_event_nolock(sock, POLLIN); ++ } ++ } else { ++ if (conn->acceptmbox != NULL && !sys_mbox_empty(conn->acceptmbox)) { ++ add_sock_event(sock, POLLIN); ++ } + } + #endif + break; + case NETCONN_EVT_RCVMINUS: + sock->rcvevent--; + check_waiters = 0; ++#if GAZELLE_ENABLE ++ if (netif_is_rtc_mode(netif_default)) { ++ if (sock->rcvevent == 0) { ++ del_sock_event_nolock(sock, POLLIN); ++ } ++ } ++#endif + break; + case NETCONN_EVT_SENDPLUS: + if (sock->sendevent) { + check_waiters = 0; + } + sock->sendevent = 1; ++#if GAZELLE_ENABLE ++ if (netif_is_rtc_mode(netif_default)) { ++ add_sock_event_nolock(sock, POLLOUT); ++ } ++#endif + break; + case NETCONN_EVT_SENDMINUS: + sock->sendevent = 0; + check_waiters = 0; ++#if GAZELLE_ENABLE ++ if (netif_is_rtc_mode(netif_default)) { ++ del_sock_event_nolock(sock, POLLOUT); ++ } ++#endif + break; + case NETCONN_EVT_ERROR: + sock->errevent = 1; + #if GAZELLE_ENABLE +- add_sock_event(sock, EPOLLERR); ++ if (netif_is_rtc_mode(netif_default)) { ++ add_sock_event_nolock(sock, EPOLLERR); ++ } else { ++ add_sock_event(sock, EPOLLERR); ++ } + #endif + break; + default: +diff --git a/src/include/eventpoll.h b/src/include/eventpoll.h +index dd65a4d..5bc5206 100644 +--- a/src/include/eventpoll.h ++++ b/src/include/eventpoll.h +@@ -66,6 +66,10 @@ struct libos_epoll { + + struct lwip_sock; + extern void add_sock_event(struct lwip_sock *sock, uint32_t event); ++extern void add_sock_event_nolock(struct lwip_sock *sock, uint32_t event); ++extern void del_sock_event(struct lwip_sock *sock, uint32_t event); ++extern void del_sock_event_nolock(struct lwip_sock *sock, uint32_t event); ++ + extern int32_t lstack_epoll_close(int32_t); + + #endif /* __EVENTPOLL_H__ */ +-- +2.27.0 + diff --git a/0078-posix_api-support-select.patch b/0078-posix_api-support-select.patch new file mode 100644 index 0000000000000000000000000000000000000000..93cb17700ab4e8346178db4debf939128a990bff --- /dev/null +++ b/0078-posix_api-support-select.patch @@ -0,0 +1,37 @@ +From 189022c0a2438f099caa6cbfcac1a7ca76cf1e71 Mon Sep 17 00:00:00 2001 +From: yangchen +Date: Fri, 3 Nov 2023 14:55:15 +0800 +Subject: [PATCH] posix_api: support select + +--- + src/api/posix_api.c | 1 + + src/include/posix_api.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/src/api/posix_api.c b/src/api/posix_api.c +index b7334da..0dc6ad1 100644 +--- a/src/api/posix_api.c ++++ b/src/api/posix_api.c +@@ -114,6 +114,7 @@ int posix_api_init(void) + CHECK_DLSYM_RET_RETURN(posix_api->sigaction_fn = dlsym(handle, "sigaction")); + CHECK_DLSYM_RET_RETURN(posix_api->poll_fn = dlsym(handle, "poll")); + CHECK_DLSYM_RET_RETURN(posix_api->ioctl_fn = dlsym(handle, "ioctl")); ++ CHECK_DLSYM_RET_RETURN(posix_api->select_fn = dlsym(handle, "select")); + + /* lstack helper api */ + posix_api->get_socket = get_socket; +diff --git a/src/include/posix_api.h b/src/include/posix_api.h +index a73e2ec..cde37e3 100644 +--- a/src/include/posix_api.h ++++ b/src/include/posix_api.h +@@ -78,6 +78,7 @@ typedef struct { + int (*sigaction_fn)(int signum, const struct sigaction *act, struct sigaction *oldact); + int (*poll_fn)(struct pollfd *fds, nfds_t nfds, int timeout); + int (*ioctl_fn)(int fd, int cmd, ...); ++ int (*select_fn)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); + + int ues_posix; + } posix_api_t; +-- +2.23.0 + diff --git a/0079-enable-vlan-define.patch b/0079-enable-vlan-define.patch new file mode 100644 index 0000000000000000000000000000000000000000..da751bf651a011a5c0b783d31247894858af5e90 --- /dev/null +++ b/0079-enable-vlan-define.patch @@ -0,0 +1,41 @@ +From a7be70b7c210fbe17d4b6a90b8c8a155dd489035 Mon Sep 17 00:00:00 2001 +From: compile_success <980965867@qq.com> +Date: Sat, 4 Nov 2023 14:16:06 +0000 +Subject: [PATCH] enable vlan define + +--- + src/include/lwipopts.h | 3 +++ + src/netif/ethernet.c | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index fdd4f87..a18179e 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -261,4 +261,7 @@ + */ + #define LWIP_NETIF_LOOPBACK 1 + ++#define ETHARP_SUPPORT_VLAN 1 ++#define LWIP_VLAN_PCP 1 ++ + #endif /* __LWIPOPTS_H__ */ +diff --git a/src/netif/ethernet.c b/src/netif/ethernet.c +index cbe298e..d411892 100644 +--- a/src/netif/ethernet.c ++++ b/src/netif/ethernet.c +@@ -321,7 +321,11 @@ ethernet_output(struct netif * netif, struct pbuf * p, + ("ethernet_output: sending packet %p\n", (void *)p)); + + #if CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW ++#if LWIP_VLAN_PCP ++ ethh_cksum_set(p, sizeof(*ethhdr)+SIZEOF_VLAN_HDR); ++#else + ethh_cksum_set(p, sizeof(*ethhdr)); ++#endif + #endif + + /* send the packet */ +-- +2.33.0 + diff --git a/0080-enable-ipv6.patch b/0080-enable-ipv6.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b75b4a929c51a2435d6bfb7f55fffca997e244f --- /dev/null +++ b/0080-enable-ipv6.patch @@ -0,0 +1,425 @@ +From 5d9613fe21e2e02863517dbd9d5db539336351b9 Mon Sep 17 00:00:00 2001 +From: zhengjiebing +Date: Fri, 17 Nov 2023 20:37:56 +0800 +Subject: [PATCH] enable ipv6 + +--- + src/api/sockets.c | 12 +++++++++++- + src/core/dir.mk | 4 +++- + src/core/init.c | 2 ++ + src/core/ipv6/ip6.c | 18 ++++++++++++++++-- + src/core/ipv6/ip6_frag.c | 4 ++++ + src/core/tcp.c | 2 +- + src/core/tcp_in.c | 4 ++-- + src/core/tcp_out.c | 2 ++ + src/core/udp.c | 2 +- + src/include/dpdk_cksum.h | 12 ++++++------ + src/include/dpdk_version.h | 1 + + src/include/lwip/priv/tcp_priv.h | 27 ++++++++++++++++++++------- + src/include/lwip/sockets.h | 2 ++ + src/include/lwip/tcp.h | 21 ++++++++++++++++++++- + src/include/lwipopts.h | 11 ++++++++++- + src/include/reg_sock.h | 7 ++++++- + 16 files changed, 107 insertions(+), 24 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 6cff4cb..62052f2 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -113,6 +113,14 @@ + #endif /* LWIP_IPV4 */ + + #if LWIP_IPV6 ++#if GAZELLE_ENABLE ++#define IP6ADDR_PORT_TO_SOCKADDR(sin6, ipaddr, port) do { \ ++ (sin6)->sin6_family = AF_INET6; \ ++ (sin6)->sin6_port = lwip_htons((port)); \ ++ (sin6)->sin6_flowinfo = 0; \ ++ inet6_addr_from_ip6addr(&(sin6)->sin6_addr, ipaddr); \ ++ (sin6)->sin6_scope_id = ip6_addr_zone(ipaddr); }while(0) ++#else + #define IP6ADDR_PORT_TO_SOCKADDR(sin6, ipaddr, port) do { \ + (sin6)->sin6_len = sizeof(struct sockaddr_in6); \ + (sin6)->sin6_family = AF_INET6; \ +@@ -120,6 +128,7 @@ + (sin6)->sin6_flowinfo = 0; \ + inet6_addr_from_ip6addr(&(sin6)->sin6_addr, ipaddr); \ + (sin6)->sin6_scope_id = ip6_addr_zone(ipaddr); }while(0) ++#endif /* GAZELLE_ENABLE */ + #define SOCKADDR6_TO_IP6ADDR_PORT(sin6, ipaddr, port) do { \ + inet6_addr_to_ip6addr(ip_2_ip6(ipaddr), &((sin6)->sin6_addr)); \ + if (ip6_addr_has_scope(ip_2_ip6(ipaddr), IP6_UNKNOWN)) { \ +@@ -555,7 +564,8 @@ alloc_socket(struct netconn *newconn, int accepted, int flags) + LWIP_UNUSED_ARG(accepted); + + #if GAZELLE_ENABLE +- int type, protocol = 0, domain = AF_INET; ++ int type, protocol = 0; ++ int domain = NETCONNTYPE_ISIPV6(newconn->type) ? AF_INET6 : AF_INET; + switch (NETCONNTYPE_GROUP(newconn->type)) { + case NETCONN_RAW: + type = SOCK_RAW; +diff --git a/src/core/dir.mk b/src/core/dir.mk +index 57a9670..69b43d1 100644 +--- a/src/core/dir.mk ++++ b/src/core/dir.mk +@@ -1,6 +1,8 @@ + SRC = def.c inet_chksum.c init.c ip.c mem.c memp.c netif.c pbuf.c \ + raw.c tcp.c tcp_in.c tcp_out.c timeouts.c udp.c stats.c\ + ipv4/icmp.c ipv4/ip4_addr.c ipv4/ip4_frag.c ipv4/etharp.c \ +- ipv4/ip4.c ipv4/igmp.c ++ ipv4/ip4.c ipv4/igmp.c ipv6/icmp6.c ipv6/ip6_addr.c ipv6/ip6_frag.c \ ++ ipv6/ethip6.c ipv6/ip6.c ipv6/dhcp6.c ipv6/inet6.c \ ++ ipv6/mld6.c ipv6/nd6.c + + $(eval $(call register_dir, core, $(SRC))) +diff --git a/src/core/init.c b/src/core/init.c +index 60e1c68..6880fd3 100644 +--- a/src/core/init.c ++++ b/src/core/init.c +@@ -347,7 +347,9 @@ lwip_init(void) + mem_init(); + memp_init(); + pbuf_init(); ++#if !GAZELLE_ENABLE + netif_init(); ++#endif /* GAZELLE_ENABLE */ + #if LWIP_IPV4 + ip_init(); + #if LWIP_ARP +diff --git a/src/core/ipv6/ip6.c b/src/core/ipv6/ip6.c +index 9d904ec..101e599 100644 +--- a/src/core/ipv6/ip6.c ++++ b/src/core/ipv6/ip6.c +@@ -60,6 +60,10 @@ + #include "lwip/debug.h" + #include "lwip/stats.h" + ++#if GAZELLE_ENABLE && (CHECKSUM_CHECK_IP_HW || CHECKSUM_GEN_IP_HW) ++#include "dpdk_cksum.h" ++#endif ++ + #ifdef LWIP_HOOK_FILENAME + #include LWIP_HOOK_FILENAME + #endif +@@ -1232,6 +1236,10 @@ ip6_output_if_src(struct pbuf *p, const ip6_addr_t *src, const ip6_addr_t *dest, + /* src cannot be NULL here */ + ip6_addr_copy_to_packed(ip6hdr->src, *src); + ++#if CHECKSUM_GEN_IP_HW ++ iph_cksum_set(p, IP6_HLEN, 0); ++#endif /* CHECKSUM_GEN_IP_HW */ ++ + } else { + /* IP header already included in p */ + ip6hdr = (struct ip6_hdr *)p->payload; +@@ -1270,9 +1278,15 @@ ip6_output_if_src(struct pbuf *p, const ip6_addr_t *src, const ip6_addr_t *dest, + #endif /* ENABLE_LOOPBACK */ + #if LWIP_IPV6_FRAG + /* don't fragment if interface has mtu set to 0 [loopif] */ +- if (netif_mtu6(netif) && (p->tot_len > nd6_get_destination_mtu(dest, netif))) { +- return ip6_frag(p, netif, dest); ++#if GAZELLE_ENABLE ++ if (!(netif_get_txol_flags(netif) & DEV_TX_OFFLOAD_TCP_TSO)) { ++#endif ++ if (netif_mtu6(netif) && (p->tot_len > nd6_get_destination_mtu(dest, netif))) { ++ return ip6_frag(p, netif, dest); ++ } ++#if GAZELLE_ENABLE + } ++#endif + #endif /* LWIP_IPV6_FRAG */ + + LWIP_DEBUGF(IP6_DEBUG, ("netif->output_ip6()\n")); +diff --git a/src/core/ipv6/ip6_frag.c b/src/core/ipv6/ip6_frag.c +index 8b352f5..67e36bf 100644 +--- a/src/core/ipv6/ip6_frag.c ++++ b/src/core/ipv6/ip6_frag.c +@@ -689,6 +689,7 @@ ip6_frag_free_pbuf_custom_ref(struct pbuf_custom_ref* p) + memp_free(MEMP_FRAG_PBUF, p); + } + ++#if !GAZELLE_ENABLE + /** Free-callback function to free a 'struct pbuf_custom_ref', called by + * pbuf_free. */ + static void +@@ -702,6 +703,7 @@ ip6_frag_free_pbuf_custom(struct pbuf *p) + } + ip6_frag_free_pbuf_custom_ref(pcr); + } ++#endif /* !GAZELLE_ENABLE */ + #endif /* !LWIP_NETIF_TX_SINGLE_PBUF */ + + /** +@@ -816,7 +818,9 @@ ip6_frag(struct pbuf *p, struct netif *netif, const ip6_addr_t *dest) + } + pbuf_ref(p); + pcr->original = p; ++#if !GAZELLE_ENABLE + pcr->pc.custom_free_function = ip6_frag_free_pbuf_custom; ++#endif /* !GAZELLE_ENABLE */ + + /* Add it to end of rambuf's chain, but using pbuf_cat, not pbuf_chain + * so that it is removed when pbuf_dechain is later called on rambuf. +diff --git a/src/core/tcp.c b/src/core/tcp.c +index c44664e..963b8a4 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1155,7 +1155,7 @@ tcp_new_port(void) + + if (__atomic_load_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { + #if GAZELLE_ENABLE +- if (port_in_stack_queue(pcb->remote_ip.addr, pcb->local_ip.addr, pcb->remote_port, tcp_port)) { ++ if (port_in_stack_queue(pcb->remote_ip, pcb->local_ip, pcb->remote_port, tcp_port)) { + tmp_port = tcp_port; + __atomic_store_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); + break; +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index ecbd616..7154659 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -309,8 +309,8 @@ tcp_input(struct pbuf *p, struct netif *inp) + prev = NULL; + + #if GAZELLE_TCP_PCB_HASH +- idx = TUPLE4_HASH_FN( ip_current_dest_addr()->addr, tcphdr->dest, +- ip_current_src_addr()->addr, tcphdr->src) & ++ idx = TUPLE4_HASH_FN( ip_current_dest_addr(), tcphdr->dest, ++ ip_current_src_addr(), tcphdr->src) & + (tcp_active_htable->size - 1); + head = &tcp_active_htable->array[idx].chain; + tcppcb_hlist_for_each(pcb, node, head) { +diff --git a/src/core/tcp_out.c b/src/core/tcp_out.c +index 073d989..137e3cf 100644 +--- a/src/core/tcp_out.c ++++ b/src/core/tcp_out.c +@@ -139,7 +139,9 @@ static err_t tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct + static struct netif * + tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst) + { ++#if LWIP_IPV6 + LWIP_UNUSED_ARG(src); /* in case IPv4-only and source-based routing is disabled */ ++#endif /* LWIP_IPV6 */ + + if ((pcb != NULL) && (pcb->netif_idx != NETIF_NO_INDEX)) { + return netif_get_by_index(pcb->netif_idx); +diff --git a/src/core/udp.c b/src/core/udp.c +index 828a489..727a705 100644 +--- a/src/core/udp.c ++++ b/src/core/udp.c +@@ -132,7 +132,7 @@ udp_new_port(struct udp_pcb *dst_pcb) + } + + if (__atomic_load_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { +- if (port_in_stack_queue(dst_pcb->remote_ip.addr, dst_pcb->local_ip.addr, dst_pcb->remote_port, udp_port)) { ++ if (port_in_stack_queue(dst_pcb->remote_ip, dst_pcb->local_ip, dst_pcb->remote_port, udp_port)) { + tmp_port = udp_port; + __atomic_store_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); + break; +diff --git a/src/include/dpdk_cksum.h b/src/include/dpdk_cksum.h +index b8056f9..38cfb96 100644 +--- a/src/include/dpdk_cksum.h ++++ b/src/include/dpdk_cksum.h +@@ -66,7 +66,7 @@ static inline void ethh_cksum_set(struct pbuf *p, u16_t len) { + + // replaces IPH_CHKSUM_SET + static inline void iph_cksum_set(struct pbuf *p, u16_t len, bool do_ipcksum) { +- p->ol_flags |= RTE_MBUF_F_TX_IPV4; ++ p->ol_flags |= ((len == IP_HLEN) ? RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6); + if (do_ipcksum) { + p->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM; + } +@@ -95,16 +95,16 @@ static inline void udph_cksum_set(struct pbuf *p, u16_t len) { + static inline u16_t ip_chksum_pseudo_offload(u8_t proto, u16_t proto_len, + const ip_addr_t *src, const ip_addr_t *dst) + { +- struct ipv4_psd_header { +- uint32_t src_addr; /* IP address of source host. */ +- uint32_t dst_addr; /* IP address of destination host. */ ++ struct ip_psd_header { ++ ip_addr_t src_addr; /* IP address of source host. */ ++ ip_addr_t dst_addr; /* IP address of destination host. */ + uint8_t zero; /* zero. */ + uint8_t proto; /* L4 protocol type. */ + uint16_t len; /* L4 length. */ + } psd_hdr; + +- psd_hdr.src_addr = ip4_addr_get_u32(src); +- psd_hdr.dst_addr = ip4_addr_get_u32(dst); ++ ip_addr_copy(psd_hdr.src_addr, *src); ++ ip_addr_copy(psd_hdr.dst_addr, *dst); + psd_hdr.proto = proto; + psd_hdr.len = lwip_htons(proto_len); + psd_hdr.zero = 0; +diff --git a/src/include/dpdk_version.h b/src/include/dpdk_version.h +index c90ddb8..e61d0b3 100644 +--- a/src/include/dpdk_version.h ++++ b/src/include/dpdk_version.h +@@ -43,6 +43,7 @@ + #define RTE_MBUF_F_RX_IP_CKSUM_BAD PKT_RX_IP_CKSUM_BAD + #define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD + #define RTE_MBUF_F_TX_IPV4 PKT_TX_IPV4 ++#define RTE_MBUF_F_TX_IPV6 PKT_TX_IPV6 + #define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM + #define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM + #define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index ddae3fd..9b1341c 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -347,11 +347,24 @@ static inline int vdev_reg_done(enum reg_ring_type reg_type, const struct tcp_pc + LWIP_ASSERT("Invalid parameter", pcb != NULL); + + struct gazelle_quintuple qtuple; +- qtuple.protocol = 0; +- qtuple.src_ip = pcb->local_ip.addr; +- qtuple.src_port = lwip_htons(pcb->local_port); +- qtuple.dst_ip = pcb->remote_ip.addr; +- qtuple.dst_port = lwip_htons(pcb->remote_port); ++ if (IP_IS_V4_VAL(pcb->local_ip)) { ++ qtuple.protocol = 0; ++ qtuple.src_ip = ip_2_ip4(&pcb->local_ip)->addr; ++ qtuple.src_port = lwip_htons(pcb->local_port); ++ qtuple.dst_ip = ip_2_ip4(&pcb->remote_ip)->addr; ++ qtuple.dst_port = lwip_htons(pcb->remote_port); ++ } else { ++#if LWIP_IPV6 ++ qtuple.protocol = 1; ++ qtuple.src_port = lwip_htons(pcb->local_port); ++ qtuple.dst_port = lwip_htons(pcb->remote_port); ++ ++ for (int i = 0; i < 4; i++) { ++ qtuple.src_ip6[i] = pcb->local_ip.u_addr.ip6.addr[i]; ++ qtuple.dst_ip6[i] = pcb->remote_ip.u_addr.ip6.addr[i]; ++ } ++#endif ++ } + + #if GAZELLE_TCP_REUSE_IPPORT + if (reg_type == REG_RING_TCP_CONNECT_CLOSE) { +@@ -474,8 +487,8 @@ static inline void vdev_unreg_done(const struct tcp_pcb *pcb) + u32_t idx; \ + struct hlist_head *hd; \ + struct tcp_hash_table *htb = pcbs; \ +- idx = TUPLE4_HASH_FN((npcb)->local_ip.addr, (npcb)->local_port, \ +- (npcb)->remote_ip.addr, (npcb)->remote_port) & \ ++ idx = TUPLE4_HASH_FN(&((npcb)->local_ip), (npcb)->local_port, \ ++ &((npcb)->remote_ip), (npcb)->remote_port) & \ + (htb->size - 1); \ + hd = &htb->array[idx].chain; \ + hlist_add_head(&(npcb)->tcp_node, hd); \ +diff --git a/src/include/lwip/sockets.h b/src/include/lwip/sockets.h +index cfec6a5..5715df4 100644 +--- a/src/include/lwip/sockets.h ++++ b/src/include/lwip/sockets.h +@@ -88,7 +88,9 @@ struct sockaddr_in { + + #if LWIP_IPV6 + struct sockaddr_in6 { ++#if !GAZELLE_ENABLE + u8_t sin6_len; /* length of this structure */ ++#endif /* GAZELLE_ENABLE */ + sa_family_t sin6_family; /* AF_INET6 */ + in_port_t sin6_port; /* Transport layer port # */ + u32_t sin6_flowinfo; /* IPv6 flow information */ +diff --git a/src/include/lwip/tcp.h b/src/include/lwip/tcp.h +index 959df3e..91a86c9 100644 +--- a/src/include/lwip/tcp.h ++++ b/src/include/lwip/tcp.h +@@ -476,7 +476,26 @@ static inline unsigned int jhash_3words(unsigned int a, unsigned int b, unsigned + return c; + } + +-#define TUPLE4_HASH_FN(laddr, lport, faddr, fport) jhash_3words(laddr, faddr,lport|(fport<<16)) ++static inline unsigned int jhash_3words6(unsigned int *a, unsigned int *b, unsigned int c) ++{ ++ for (int i = 0; i < 4; i++) { ++ unsigned int e = *((unsigned int *)a + i) + JHASH_INITVAL; ++ unsigned int f = *((unsigned int *)b + i) + JHASH_INITVAL; ++ ++ __jhash_final(e, f, c); ++ } ++ ++ return c; ++} ++ ++#if LWIP_IPV4 && LWIP_IPV6 ++#define TUPLE4_HASH_FN(laddr, lport, faddr, fport) \ ++ (IP_IS_V4(laddr) ? jhash_3words(ip_2_ip4(laddr)->addr, ip_2_ip4(faddr)->addr, lport|(fport<<16)) \ ++ : jhash_3words6(ip_2_ip6(laddr)->addr, ip_2_ip6(faddr)->addr, lport|(fport<<16))) ++#elif LWIP_IPV4 ++#define TUPLE4_HASH_FN(laddr, lport, faddr, fport) \ ++ jhash_3words(ip_2_ip4(laddr)->addr, ip_2_ip4(faddr)->addr, lport|(fport<<16)) ++#endif + + #define tcppcb_hlist_for_each(tcppcb, node, list) \ + hlist_for_each_entry(tcppcb, node, list, tcp_node) +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index a18179e..9ab5cde 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -178,6 +178,14 @@ + #define IP_HLEN 20 + + ++/* ++ ------------------------------------- ++ ----------- IPv6 options ----------- ++ ------------------------------------- ++*/ ++#define LWIP_IPV6 1 ++#define IP6_HLEN 40 ++ + /* + --------------------------------- + ---------- UDP options ---------- +@@ -211,7 +219,7 @@ + #define TCP_OVERSIZE TCP_MSS + #define LWIP_NETIF_TX_SINGLE_PBUF 1 + +-#define TCP_MSS (FRAME_MTU - IP_HLEN - TCP_HLEN) ++#define TCP_MSS (FRAME_MTU - IP6_HLEN - TCP_HLEN - VLAN_LEN) + + #define TCP_WND (2500 * TCP_MSS) + +@@ -263,5 +271,6 @@ + + #define ETHARP_SUPPORT_VLAN 1 + #define LWIP_VLAN_PCP 1 ++#define VLAN_LEN 4 + + #endif /* __LWIPOPTS_H__ */ +diff --git a/src/include/reg_sock.h b/src/include/reg_sock.h +index 5d5710d..5a5e971 100644 +--- a/src/include/reg_sock.h ++++ b/src/include/reg_sock.h +@@ -34,6 +34,7 @@ + #define __REG_SOCK_H__ + + #include ++#include "lwip/ip_addr.h" + + enum reg_ring_type { + REG_RING_TCP_LISTEN = 0, +@@ -50,6 +51,10 @@ struct gazelle_quintuple { + uint16_t dst_port; + uint32_t src_ip; + uint32_t dst_ip; ++#if LWIP_IPV6 ++ uint32_t src_ip6[4]; ++ uint32_t dst_ip6[4]; ++#endif + }; + + struct reg_ring_msg { +@@ -60,6 +65,6 @@ struct reg_ring_msg { + }; + + extern int vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple); +-extern bool port_in_stack_queue(uint32_t src_ip, uint32_t dst_ip, uint16_t src_port, uint16_t dst_port); ++extern bool port_in_stack_queue(ip_addr_t src_ip, ip_addr_t dst_ip, uint16_t src_port, uint16_t dst_port); + + #endif /* __REG_SOCK_H__ */ +-- +2.27.0 + diff --git a/0081-ip6-hdr.patch b/0081-ip6-hdr.patch new file mode 100644 index 0000000000000000000000000000000000000000..a35fe542a3bc57f270974b2c7e2a06d59fa45eb8 --- /dev/null +++ b/0081-ip6-hdr.patch @@ -0,0 +1,354 @@ +diff -Nur lwip-ipv6/src/core/ipv6/ip6.c lwip-ipv6-hdr/src/core/ipv6/ip6.c +--- lwip-ipv6/src/core/ipv6/ip6.c 2023-11-15 19:48:02.864481010 +0800 ++++ lwip-ipv6-hdr/src/core/ipv6/ip6.c 2023-11-15 20:05:30.388481010 +0800 +@@ -367,7 +367,7 @@ + * @param inp the netif on which this packet was received + */ + static void +-ip6_forward(struct pbuf *p, struct ip6_hdr *iphdr, struct netif *inp) ++ip6_forward(struct pbuf *p, struct ip6hdr *iphdr, struct netif *inp) + { + struct netif *netif; + +@@ -512,7 +512,7 @@ + err_t + ip6_input(struct pbuf *p, struct netif *inp) + { +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + struct netif *netif; + const u8_t *nexth; + u16_t hlen, hlen_tot; /* the current header length */ +@@ -531,7 +531,7 @@ + IP6_STATS_INC(ip6.recv); + + /* identify the IP header */ +- ip6hdr = (struct ip6_hdr *)p->payload; ++ ip6hdr = (struct ip6hdr *)p->payload; + if (IP6H_V(ip6hdr) != 6) { + LWIP_DEBUGF(IP6_DEBUG | LWIP_DBG_LEVEL_WARNING, ("IPv6 packet dropped due to bad version number %"U32_F"\n", + IP6H_V(ip6hdr))); +@@ -1015,7 +1015,7 @@ + + /* Returned p point to IPv6 header. + * Update all our variables and pointers and continue. */ +- ip6hdr = (struct ip6_hdr *)p->payload; ++ ip6hdr = (struct ip6hdr *)p->payload; + nexth = &IP6H_NEXTH(ip6hdr); + hlen = hlen_tot = IP6_HLEN; + pbuf_remove_header(p, IP6_HLEN); +@@ -1188,7 +1188,7 @@ + u8_t hl, u8_t tc, + u8_t nexth, struct netif *netif) + { +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + ip6_addr_t dest_addr; + + LWIP_ASSERT_CORE_LOCKED(); +@@ -1217,9 +1217,9 @@ + return ERR_BUF; + } + +- ip6hdr = (struct ip6_hdr *)p->payload; +- LWIP_ASSERT("check that first pbuf can hold struct ip6_hdr", +- (p->len >= sizeof(struct ip6_hdr))); ++ ip6hdr = (struct ip6hdr *)p->payload; ++ LWIP_ASSERT("check that first pbuf can hold struct ip6hdr", ++ (p->len >= sizeof(struct ip6hdr))); + + IP6H_HOPLIM_SET(ip6hdr, hl); + IP6H_NEXTH_SET(ip6hdr, nexth); +@@ -1242,7 +1242,7 @@ + + } else { + /* IP header already included in p */ +- ip6hdr = (struct ip6_hdr *)p->payload; ++ ip6hdr = (struct ip6hdr *)p->payload; + ip6_addr_copy_from_packed(dest_addr, ip6hdr->dest); + ip6_addr_assign_zone(&dest_addr, IP6_UNKNOWN, netif); + dest = &dest_addr; +@@ -1316,7 +1316,7 @@ + u8_t hl, u8_t tc, u8_t nexth) + { + struct netif *netif; +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + ip6_addr_t src_addr, dest_addr; + + LWIP_IP_CHECK_PBUF_REF_COUNT_FOR_TX(p); +@@ -1325,7 +1325,7 @@ + netif = ip6_route(src, dest); + } else { + /* IP header included in p, read addresses. */ +- ip6hdr = (struct ip6_hdr *)p->payload; ++ ip6hdr = (struct ip6hdr *)p->payload; + ip6_addr_copy_from_packed(src_addr, ip6hdr->src); + ip6_addr_copy_from_packed(dest_addr, ip6hdr->dest); + netif = ip6_route(&src_addr, &dest_addr); +@@ -1375,7 +1375,7 @@ + u8_t hl, u8_t tc, u8_t nexth, struct netif_hint *netif_hint) + { + struct netif *netif; +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + ip6_addr_t src_addr, dest_addr; + err_t err; + +@@ -1385,7 +1385,7 @@ + netif = ip6_route(src, dest); + } else { + /* IP header included in p, read addresses. */ +- ip6hdr = (struct ip6_hdr *)p->payload; ++ ip6hdr = (struct ip6hdr *)p->payload; + ip6_addr_copy_from_packed(src_addr, ip6hdr->src); + ip6_addr_copy_from_packed(dest_addr, ip6hdr->dest); + netif = ip6_route(&src_addr, &dest_addr); +@@ -1476,7 +1476,7 @@ + void + ip6_debug_print(struct pbuf *p) + { +- struct ip6_hdr *ip6hdr = (struct ip6_hdr *)p->payload; ++ struct ip6hdr *ip6hdr = (struct ip6hdr *)p->payload; + + LWIP_DEBUGF(IP6_DEBUG, ("IPv6 header:\n")); + LWIP_DEBUGF(IP6_DEBUG, ("+-------------------------------+\n")); +diff -Nur lwip-ipv6/src/core/ipv6/ip6_frag.c lwip-ipv6-hdr/src/core/ipv6/ip6_frag.c +--- lwip-ipv6/src/core/ipv6/ip6_frag.c 2023-11-15 19:48:02.864481010 +0800 ++++ lwip-ipv6-hdr/src/core/ipv6/ip6_frag.c 2023-11-15 20:01:41.668481010 +0800 +@@ -551,7 +551,7 @@ + + if (valid) { + /* All fragments have been received */ +- struct ip6_hdr* iphdr_ptr; ++ struct ip6hdr* iphdr_ptr; + + /* chain together the pbufs contained within the ip6_reassdata list. */ + iprh = (struct ip6_reass_helper*) ipr->p->payload; +@@ -565,7 +565,7 @@ + pbuf_remove_header(next_pbuf, IP6_FRAG_HLEN); + #if IPV6_FRAG_COPYHEADER + if (IPV6_FRAG_REQROOM > 0) { +- /* hide the extra bytes borrowed from ip6_hdr for struct ip6_reass_helper */ ++ /* hide the extra bytes borrowed from ip6hdr for struct ip6_reass_helper */ + u8_t hdrerr = pbuf_remove_header(next_pbuf, IPV6_FRAG_REQROOM); + LWIP_UNUSED_ARG(hdrerr); /* in case of LWIP_NOASSERT */ + LWIP_ASSERT("no room for struct ip6_reass_helper", hdrerr == 0); +@@ -610,7 +610,7 @@ + (size_t)((u8_t*)p->payload - (u8_t*)ipr->iphdr)); + + /* This is where the IPv6 header is now. */ +- iphdr_ptr = (struct ip6_hdr*)((u8_t*)ipr->iphdr + ++ iphdr_ptr = (struct ip6hdr*)((u8_t*)ipr->iphdr + + sizeof(struct ip6_frag_hdr)); + + /* Adjust datagram length by adding header lengths. */ +@@ -721,8 +721,8 @@ + err_t + ip6_frag(struct pbuf *p, struct netif *netif, const ip6_addr_t *dest) + { +- struct ip6_hdr *original_ip6hdr; +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *original_ip6hdr; ++ struct ip6hdr *ip6hdr; + struct ip6_frag_hdr *frag_hdr; + struct pbuf *rambuf; + #if !LWIP_NETIF_TX_SINGLE_PBUF +@@ -740,7 +740,7 @@ + + identification++; + +- original_ip6hdr = (struct ip6_hdr *)p->payload; ++ original_ip6hdr = (struct ip6hdr *)p->payload; + + /* @todo we assume there are no options in the unfragmentable part (IPv6 header). */ + LWIP_ASSERT("p->tot_len >= IP6_HLEN", p->tot_len >= IP6_HLEN); +@@ -769,7 +769,7 @@ + } + /* fill in the IP header */ + SMEMCPY(rambuf->payload, original_ip6hdr, IP6_HLEN); +- ip6hdr = (struct ip6_hdr *)rambuf->payload; ++ ip6hdr = (struct ip6hdr *)rambuf->payload; + frag_hdr = (struct ip6_frag_hdr *)((u8_t*)rambuf->payload + IP6_HLEN); + #else + /* When not using a static buffer, create a chain of pbufs. +@@ -785,7 +785,7 @@ + LWIP_ASSERT("this needs a pbuf in one piece!", + (rambuf->len >= (IP6_HLEN))); + SMEMCPY(rambuf->payload, original_ip6hdr, IP6_HLEN); +- ip6hdr = (struct ip6_hdr *)rambuf->payload; ++ ip6hdr = (struct ip6hdr *)rambuf->payload; + frag_hdr = (struct ip6_frag_hdr *)((u8_t*)rambuf->payload + IP6_HLEN); + + /* Can just adjust p directly for needed offset. */ +diff -Nur lwip-ipv6/src/core/ipv6/nd6.c lwip-ipv6-hdr/src/core/ipv6/nd6.c +--- lwip-ipv6/src/core/ipv6/nd6.c 2023-11-15 19:48:02.864481010 +0800 ++++ lwip-ipv6-hdr/src/core/ipv6/nd6.c 2023-11-15 20:06:47.036481010 +0800 +@@ -895,7 +895,7 @@ + case ICMP6_TYPE_PTB: /* Packet too big */ + { + struct icmp6_hdr *icmp6hdr; /* Packet too big message */ +- struct ip6_hdr *ip6hdr; /* IPv6 header of the packet which caused the error */ ++ struct ip6hdr *ip6hdr; /* IPv6 header of the packet which caused the error */ + u32_t pmtu; + ip6_addr_t destination_address; + +@@ -909,7 +909,7 @@ + } + + icmp6hdr = (struct icmp6_hdr *)p->payload; +- ip6hdr = (struct ip6_hdr *)((u8_t*)p->payload + sizeof(struct icmp6_hdr)); ++ ip6hdr = (struct ip6hdr *)((u8_t*)p->payload + sizeof(struct icmp6_hdr)); + + /* Create an aligned, zoned copy of the destination address. */ + ip6_addr_copy_from_packed(destination_address, ip6hdr->dest); +@@ -2187,7 +2187,7 @@ + static void + nd6_send_q(s8_t i) + { +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + ip6_addr_t dest; + #if LWIP_ND6_QUEUEING + struct nd6_q_entry *q; +@@ -2204,7 +2204,7 @@ + /* pop first item off the queue */ + neighbor_cache[i].q = q->next; + /* Get ipv6 header. */ +- ip6hdr = (struct ip6_hdr *)(q->p->payload); ++ ip6hdr = (struct ip6hdr *)(q->p->payload); + /* Create an aligned copy. */ + ip6_addr_copy_from_packed(dest, ip6hdr->dest); + /* Restore the zone, if applicable. */ +@@ -2219,7 +2219,7 @@ + #else /* LWIP_ND6_QUEUEING */ + if (neighbor_cache[i].q != NULL) { + /* Get ipv6 header. */ +- ip6hdr = (struct ip6_hdr *)(neighbor_cache[i].q->payload); ++ ip6hdr = (struct ip6hdr *)(neighbor_cache[i].q->payload); + /* Create an aligned copy. */ + ip6_addr_copy_from_packed(dest, ip6hdr->dest); + /* Restore the zone, if applicable. */ +diff -Nur lwip-ipv6/src/core/raw.c lwip-ipv6-hdr/src/core/raw.c +--- lwip-ipv6/src/core/raw.c 2023-11-15 19:48:02.860481010 +0800 ++++ lwip-ipv6-hdr/src/core/raw.c 2023-11-15 19:49:53.468481010 +0800 +@@ -146,7 +146,7 @@ + if (IP_HDR_GET_VERSION(p->payload) == 6) + #endif /* LWIP_IPV4 */ + { +- struct ip6_hdr *ip6hdr = (struct ip6_hdr *)p->payload; ++ struct ip6hdr *ip6hdr = (struct ip6hdr *)p->payload; + proto = IP6H_NEXTH(ip6hdr); + } + #if LWIP_IPV4 +diff -Nur lwip-ipv6/src/include/lwip/ip6_frag.h lwip-ipv6-hdr/src/include/lwip/ip6_frag.h +--- lwip-ipv6/src/include/lwip/ip6_frag.h 2023-11-15 19:48:02.864481010 +0800 ++++ lwip-ipv6-hdr/src/include/lwip/ip6_frag.h 2023-11-15 20:13:40.008481010 +0800 +@@ -90,7 +90,7 @@ + struct ip6_reassdata { + struct ip6_reassdata *next; + struct pbuf *p; +- struct ip6_hdr *iphdr; /* pointer to the first (original) IPv6 header */ ++ struct ip6hdr *iphdr; /* pointer to the first (original) IPv6 header */ + #if IPV6_FRAG_COPYHEADER + ip6_addr_p_t src; /* copy of the source address in the IP header */ + ip6_addr_p_t dest; /* copy of the destination address in the IP header */ +diff -Nur lwip-ipv6/src/include/lwip/ip.h lwip-ipv6-hdr/src/include/lwip/ip.h +--- lwip-ipv6/src/include/lwip/ip.h 2023-11-15 19:48:02.864481010 +0800 ++++ lwip-ipv6-hdr/src/include/lwip/ip.h 2023-11-15 20:12:42.796481010 +0800 +@@ -123,7 +123,7 @@ + #endif /* LWIP_IPV4 */ + #if LWIP_IPV6 + /** Header of the input IPv6 packet currently being processed. */ +- struct ip6_hdr *current_ip6_header; ++ struct ip6hdr *current_ip6_header; + #endif /* LWIP_IPV6 */ + /** Total header length of current_ip4/6_header (i.e. after this, the UDP/TCP header starts) */ + u16_t current_ip_header_tot_len; +@@ -159,7 +159,7 @@ + /** Get the IPv6 header of the current packet. + * This function must only be called from a receive callback (udp_recv, + * raw_recv, tcp_accept). It will return NULL otherwise. */ +-#define ip6_current_header() ((const struct ip6_hdr*)(ip_data.current_ip6_header)) ++#define ip6_current_header() ((const struct ip6hdr*)(ip_data.current_ip6_header)) + /** Returns TRUE if the current IP input packet is IPv6, FALSE if it is IPv4 */ + #define ip_current_is_v6() (ip6_current_header() != NULL) + /** Source IPv6 address of current_header */ +@@ -201,7 +201,7 @@ + /** Get the IPv6 header of the current packet. + * This function must only be called from a receive callback (udp_recv, + * raw_recv, tcp_accept). It will return NULL otherwise. */ +-#define ip6_current_header() ((const struct ip6_hdr*)(ip_data.current_ip6_header)) ++#define ip6_current_header() ((const struct ip6hdr*)(ip_data.current_ip6_header)) + /** Always returns TRUE when only supporting IPv6 only */ + #define ip_current_is_v6() 1 + /** Get the transport layer protocol */ +diff -Nur lwip-ipv6/src/include/lwip/prot/ip6.h lwip-ipv6-hdr/src/include/lwip/prot/ip6.h +--- lwip-ipv6/src/include/lwip/prot/ip6.h 2023-11-15 19:48:02.868481010 +0800 ++++ lwip-ipv6-hdr/src/include/lwip/prot/ip6.h 2023-11-17 13:24:56.832481010 +0800 +@@ -79,7 +79,7 @@ + # include "arch/bpstruct.h" + #endif + PACK_STRUCT_BEGIN +-struct ip6_hdr { ++struct ip6hdr { + /** version / traffic class / flow label */ + PACK_STRUCT_FIELD(u32_t _v_tc_fl); + /** payload length */ +diff -Nur lwip-ipv6/src/netif/lowpan6.c lwip-ipv6-hdr/src/netif/lowpan6.c +--- lwip-ipv6/src/netif/lowpan6.c 2023-11-15 19:48:02.868481010 +0800 ++++ lwip-ipv6-hdr/src/netif/lowpan6.c 2023-11-15 20:16:23.836481010 +0800 +@@ -570,12 +570,12 @@ + struct lowpan6_link_addr src, dest; + #if LWIP_6LOWPAN_INFER_SHORT_ADDRESS + ip6_addr_t ip6_src; +- struct ip6_hdr *ip6_hdr; ++ struct ip6hdr *ip6_hdr; + #endif /* LWIP_6LOWPAN_INFER_SHORT_ADDRESS */ + + #if LWIP_6LOWPAN_INFER_SHORT_ADDRESS + /* Check if we can compress source address (use aligned copy) */ +- ip6_hdr = (struct ip6_hdr *)q->payload; ++ ip6_hdr = (struct ip6hdr *)q->payload; + ip6_addr_copy_from_packed(ip6_src, ip6_hdr->src); + ip6_addr_assign_zone(&ip6_src, IP6_UNICAST, netif); + if (lowpan6_get_address_mode(&ip6_src, &short_mac_addr) == 3) { +diff -Nur lwip-ipv6/src/netif/lowpan6_common.c lwip-ipv6-hdr/src/netif/lowpan6_common.c +--- lwip-ipv6/src/netif/lowpan6_common.c 2023-11-15 19:48:02.868481010 +0800 ++++ lwip-ipv6-hdr/src/netif/lowpan6_common.c 2023-11-15 20:15:44.460481010 +0800 +@@ -137,7 +137,7 @@ + u8_t lowpan6_header_len; + u8_t hidden_header_len = 0; + s8_t i; +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + ip_addr_t ip6src, ip6dst; + + LWIP_ASSERT("netif != NULL", netif != NULL); +@@ -160,7 +160,7 @@ + } + + /* Point to ip6 header and align copies of src/dest addresses. */ +- ip6hdr = (struct ip6_hdr *)inptr; ++ ip6hdr = (struct ip6hdr *)inptr; + ip_addr_copy_from_ip6_packed(ip6dst, ip6hdr->dest); + ip6_addr_assign_zone(ip_2_ip6(&ip6dst), IP6_UNKNOWN, netif); + ip_addr_copy_from_ip6_packed(ip6src, ip6hdr->src); +@@ -396,7 +396,7 @@ + struct lowpan6_link_addr *src, struct lowpan6_link_addr *dest) + { + u16_t lowpan6_offset; +- struct ip6_hdr *ip6hdr; ++ struct ip6hdr *ip6hdr; + s8_t i; + u32_t header_temp; + u16_t ip6_offset = IP6_HLEN; +@@ -408,7 +408,7 @@ + LWIP_ASSERT("hdr_size_comp != NULL", hdr_size_comp != NULL); + LWIP_ASSERT("dehdr_size_decompst != NULL", hdr_size_decomp != NULL); + +- ip6hdr = (struct ip6_hdr *)decomp_buffer; ++ ip6hdr = (struct ip6hdr *)decomp_buffer; + if (decomp_bufsize < IP6_HLEN) { + return ERR_MEM; + } diff --git a/0082-add-vlanid-in-netif.patch b/0082-add-vlanid-in-netif.patch new file mode 100644 index 0000000000000000000000000000000000000000..49bffd3af4fc6c1e04d7e148ea8fb20900c3ebe4 --- /dev/null +++ b/0082-add-vlanid-in-netif.patch @@ -0,0 +1,109 @@ +diff -Nur lwip-org/src/core/netif.c lwip-vlan/src/core/netif.c +--- lwip-org/src/core/netif.c 2023-11-24 17:38:29.428481010 +0800 ++++ lwip-vlan/src/core/netif.c 2023-11-27 18:35:00.172481010 +0800 +@@ -355,6 +355,11 @@ + netif->input = input; + + NETIF_RESET_HINTS(netif); ++ ++#if GAZELLE_ENABLE ++ netif->vlan_enable=false; ++#endif ++ + #if ENABLE_LOOPBACK + netif->loop_first = NULL; + netif->loop_last = NULL; +@@ -441,6 +446,15 @@ + return netif; + } + ++#if GAZELLE_ENABLE ++void ++netif_set_vlan_tci(struct netif *netif, u16_t vlan_tci) ++{ ++ netif->vlan_enable = true; ++ netif->vlan_tci = vlan_tci; ++} ++#endif ++ + static void + netif_do_ip_addr_changed(const ip_addr_t *old_addr, const ip_addr_t *new_addr) + { +diff -Nur lwip-org/src/core/tcp.c lwip-vlan/src/core/tcp.c +--- lwip-org/src/core/tcp.c 2023-11-24 17:38:29.448481010 +0800 ++++ lwip-vlan/src/core/tcp.c 2023-11-27 10:42:33.228481010 +0800 +@@ -987,7 +987,9 @@ + lpcb->tos = pcb->tos; + + #if LWIP_VLAN_PCP ++#if !GAZELLE_ENABLE + lpcb->netif_hints.tci = pcb->netif_hints.tci; ++#endif + #endif /* LWIP_VLAN_PCP */ + #if GAZELLE_TCP_REUSE_IPPORT + lpcb->connect_num = 0; +diff -Nur lwip-org/src/core/tcp_in.c lwip-vlan/src/core/tcp_in.c +--- lwip-org/src/core/tcp_in.c 2023-11-24 17:38:29.448481010 +0800 ++++ lwip-vlan/src/core/tcp_in.c 2023-11-27 10:42:33.228481010 +0800 +@@ -808,7 +808,9 @@ + npcb->listener = pcb; + #endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */ + #if LWIP_VLAN_PCP ++#if !GAZELLE_ENABLE + npcb->netif_hints.tci = pcb->netif_hints.tci; ++#endif + #endif /* LWIP_VLAN_PCP */ + /* inherit socket options */ + npcb->so_options = pcb->so_options & SOF_INHERITED; +diff -Nur lwip-org/src/include/lwip/netif.h lwip-vlan/src/include/lwip/netif.h +--- lwip-org/src/include/lwip/netif.h 2023-11-24 17:38:29.440481010 +0800 ++++ lwip-vlan/src/include/lwip/netif.h 2023-11-27 18:33:07.936481010 +0800 +@@ -45,6 +45,10 @@ + + #include "lwip/ip_addr.h" + ++#if GAZELLE_ENABLE ++#include ++#endif ++ + #include "lwip/def.h" + #include "lwip/pbuf.h" + #include "lwip/stats.h" +@@ -357,6 +361,10 @@ + #if GAZELLE_ENABLE + u64_t rxol_flags; + u64_t txol_flags; ++ bool vlan_enable; ++ /** vlan id is an attribute of NIC. The variable 'netif_hints' is not used because it is assigned by pcb, ++ * while non transport layers without pcb cannot be enabled */ ++ u16_t vlan_tci; + #endif + /** descriptive abbreviation */ + char name[2]; +@@ -484,6 +492,7 @@ + #define netif_get_rxol_flags(netif) ((netif)->rxol_flags) + #define netif_get_txol_flags(netif) ((netif)->txol_flags) + ++void netif_set_vlan_tci(struct netif *netif, u16_t vlan_tci); + void netif_set_rtc_mode(struct netif *netif); + void netif_set_rxol_flags(struct netif *netif, u64_t flags); + void netif_set_txol_flags(struct netif *netif, u64_t flags); +diff -Nur lwip-org/src/netif/ethernet.c lwip-vlan/src/netif/ethernet.c +--- lwip-org/src/netif/ethernet.c 2023-11-24 17:38:29.444481010 +0800 ++++ lwip-vlan/src/netif/ethernet.c 2023-11-27 11:07:48.464481010 +0800 +@@ -283,9 +283,15 @@ + vlan_prio_vid = LWIP_HOOK_VLAN_SET(netif, p, src, dst, eth_type); + #elif LWIP_VLAN_PCP + vlan_prio_vid = -1; ++#if !GAZELLE_ENABLE + if (netif->hints && (netif->hints->tci >= 0)) { + vlan_prio_vid = (u16_t)netif->hints->tci; + } ++#else ++ if (netif->vlan_enable) { ++ vlan_prio_vid = netif->vlan_tci; ++ } ++#endif /* GAZELLE_ENABLE */ + #endif + if (vlan_prio_vid >= 0) { + struct eth_vlan_hdr *vlanhdr; diff --git a/0083-lwipopts-add-lwip-debug-log-macro.patch b/0083-lwipopts-add-lwip-debug-log-macro.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6fda4f12dfa25f58953355ce27b242766fe4b6b --- /dev/null +++ b/0083-lwipopts-add-lwip-debug-log-macro.patch @@ -0,0 +1,75 @@ +From afa156f10bdabe937e37080918f669937343eb54 Mon Sep 17 00:00:00 2001 +From: yangchen +Date: Tue, 28 Nov 2023 09:22:34 +0800 +Subject: [PATCH] lwipopts: add lwip debug log macro + +--- + src/include/lwiplog.h | 15 ++++++++------- + src/include/lwipopts.h | 10 +++++++++- + 2 files changed, 17 insertions(+), 8 deletions(-) + +diff --git a/src/include/lwiplog.h b/src/include/lwiplog.h +index f278ff4..80ed0fc 100644 +--- a/src/include/lwiplog.h ++++ b/src/include/lwiplog.h +@@ -43,21 +43,22 @@ + + #if GAZELLE_USE_DPDK_LOG + +-#define LWIP_LOG_WARN LWIP_DBG_LEVEL_WARNING +-#define LWIP_LOG_ERROR LWIP_DBG_LEVEL_SERIOUS +-#define LWIP_LOG_FATAL LWIP_DBG_LEVEL_SEVERE ++#define LWIP_LOG_WARN LWIP_DBG_LEVEL_WARNING ++#define LWIP_LOG_ERROR LWIP_DBG_LEVEL_SERIOUS ++#define LWIP_LOG_FATAL LWIP_DBG_LEVEL_SEVERE ++#define RTE_LOGTYPE_LWIP RTE_LOGTYPE_USER2 + + #define LWIP_PLATFORM_LOG(level, fmt, ...) \ + do { \ + if ((level) & LWIP_LOG_FATAL) { \ +- RTE_LOG(ERR, EAL, fmt, ##__VA_ARGS__); \ ++ RTE_LOG(ERR, LWIP, fmt, ##__VA_ARGS__); \ + abort(); \ + } else if ((level) & LWIP_LOG_ERROR) { \ +- RTE_LOG(ERR, EAL, fmt, ##__VA_ARGS__); \ ++ RTE_LOG(ERR, LWIP, fmt, ##__VA_ARGS__); \ + } else if ((level) & LWIP_LOG_WARN) { \ +- RTE_LOG(WARNING, EAL, fmt, ##__VA_ARGS__); \ ++ RTE_LOG(WARNING, LWIP, fmt, ##__VA_ARGS__); \ + } else { \ +- RTE_LOG(INFO, EAL, fmt, ##__VA_ARGS__); \ ++ RTE_LOG(INFO, LWIP, fmt, ##__VA_ARGS__); \ + } \ + } while(0) + +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 9ab5cde..06b3ae5 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -41,7 +41,6 @@ + #define LWIP_PERF 1 + #define LWIP_RECORD_PERF 0 + +-//#define LWIP_DEBUG 1 + #define GAZELLE_USE_DPDK_LOG 1 + + #define GAZELLE_ENABLE 1 +@@ -262,6 +261,15 @@ + + #define SIOCSHIWAT 1 + ++/* ++ ------------------------------------ ++ --------- Debug log options -------- ++ ------------------------------------ ++*/ ++#define LWIP_DEBUG 1 ++ ++#define GAZELLE_DEBUG LWIP_DBG_ON ++ + /* + ------------------------------------ + ---------- Netif options ---------- +-- +2.23.0 + diff --git a/0084-add-tcpslowtmr-log-and-tcpfasttmr-cnt.patch b/0084-add-tcpslowtmr-log-and-tcpfasttmr-cnt.patch new file mode 100644 index 0000000000000000000000000000000000000000..c28c2a3671c3e6b5ef0c9e86b87d81183abb2a01 --- /dev/null +++ b/0084-add-tcpslowtmr-log-and-tcpfasttmr-cnt.patch @@ -0,0 +1,135 @@ +From d0edabb1ebfe0cc1f32e91834589b16b209dcfc9 Mon Sep 17 00:00:00 2001 +From: hantwofish +Date: Tue, 28 Nov 2023 04:34:02 +0800 +Subject: [PATCH] add tcpslowtmr log and tcpfasttmr cnt + +--- + src/core/tcp.c | 18 +++++++++++------- + src/include/lwip/stats.h | 3 +++ + src/include/lwipopts.h | 3 +++ + 3 files changed, 17 insertions(+), 7 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 9f1e636..c1b64a3 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1393,16 +1393,17 @@ tcp_slowtmr_start: + + if (pcb->state == SYN_SENT && pcb->nrtx >= TCP_SYNMAXRTX) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: max SYN retries reached\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max SYN retries reached loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else if (pcb->nrtx >= TCP_MAXRTX) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: max DATA retries reached\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max DATA retries reached loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else { + if (pcb->persist_backoff > 0) { + LWIP_ASSERT("tcp_slowtimr: persist ticking with in-flight data", pcb->unacked == NULL); + LWIP_ASSERT("tcp_slowtimr: persist ticking with empty send buffer", pcb->unsent != NULL); + if (pcb->persist_probe >= TCP_MAXRTX) { + ++pcb_remove; /* max probes reached */ ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: persist_probe is greater TCP_MAXRTX loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else { + u8_t backoff_cnt = tcp_persist_backoff[pcb->persist_backoff - 1]; + if (pcb->persist_cnt < backoff_cnt) { +@@ -1486,7 +1487,7 @@ tcp_slowtmr_start: + if ((u32_t)(tcp_ticks - pcb->tmr) > + TCP_FIN_WAIT_TIMEOUT / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in FIN-WAIT-2\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in FIN-WAIT-2 loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + } +@@ -1497,7 +1498,7 @@ tcp_slowtmr_start: + (pcb->state == CLOSE_WAIT))) { + if ((u32_t)(tcp_ticks - pcb->tmr) > + (pcb->keep_idle + TCP_KEEP_DUR(pcb)) / TCP_SLOW_INTERVAL) { +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: KEEPALIVE timeout. Aborting connection to ")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: KEEPALIVE timeout. Aborting connection to loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip); + LWIP_DEBUGF(TCP_DEBUG, ("\n")); + +@@ -1519,7 +1520,7 @@ tcp_slowtmr_start: + #if TCP_QUEUE_OOSEQ + if (pcb->ooseq != NULL && + (tcp_ticks - pcb->tmr >= (u32_t)pcb->rto * TCP_OOSEQ_TIMEOUT)) { +- LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_slowtmr: dropping OOSEQ queued data\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: dropping OOSEQ queued data loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + tcp_free_ooseq(pcb); + } + #endif /* TCP_QUEUE_OOSEQ */ +@@ -1529,7 +1530,7 @@ tcp_slowtmr_start: + if ((u32_t)(tcp_ticks - pcb->tmr) > + TCP_SYN_RCVD_TIMEOUT / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in SYN-RCVD\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in SYN-RCVD loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + +@@ -1537,7 +1538,7 @@ tcp_slowtmr_start: + if (pcb->state == LAST_ACK) { + if ((u32_t)(tcp_ticks - pcb->tmr) > 2 * TCP_MSL / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_slowtmr: removing pcb stuck in LAST-ACK\n")); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in LAST-ACK loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + +@@ -1691,6 +1692,7 @@ tcp_fasttmr_start: + /* send delayed ACKs */ + if (pcb->flags & TF_ACK_DELAY) { + LWIP_DEBUGF(TCP_DEBUG, ("tcp_fasttmr: delayed ACK\n")); ++ MIB2_STATS_INC(mib2.tcpdelayackcnt); + tcp_ack_now(pcb); + tcp_output(pcb); + tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW); +@@ -1698,6 +1700,7 @@ tcp_fasttmr_start: + /* send pending FIN */ + if (pcb->flags & TF_CLOSEPEND) { + LWIP_DEBUGF(TCP_DEBUG, ("tcp_fasttmr: pending FIN\n")); ++ MIB2_STATS_INC(mib2.tcpfinackcnt); + tcp_clear_flags(pcb, TF_CLOSEPEND); + tcp_close_shutdown_fin(pcb); + } +@@ -1707,6 +1710,7 @@ tcp_fasttmr_start: + /* If there is data which was previously "refused" by upper layer */ + if (pcb->refused_data != NULL) { + tcp_active_pcbs_changed = 0; ++ MIB2_STATS_INC(mib2.tcpredusedcnt); + tcp_process_refused_data(pcb); + if (tcp_active_pcbs_changed) { + /* application callback has changed the pcb list: restart the loop */ +diff --git a/src/include/lwip/stats.h b/src/include/lwip/stats.h +index 4470531..5953a74 100644 +--- a/src/include/lwip/stats.h ++++ b/src/include/lwip/stats.h +@@ -150,6 +150,9 @@ struct stats_mib2 { + u32_t tcpinsegs; + u32_t tcpinerrs; + u32_t tcpoutrsts; ++ u32_t tcpfinackcnt; ++ u32_t tcpdelayackcnt; ++ u32_t tcpredusedcnt; + + /* UDP */ + u32_t udpindatagrams; +diff --git a/src/include/lwipopts.h b/src/include/lwipopts.h +index 06b3ae5..5fe647f 100644 +--- a/src/include/lwipopts.h ++++ b/src/include/lwipopts.h +@@ -269,6 +269,9 @@ + #define LWIP_DEBUG 1 + + #define GAZELLE_DEBUG LWIP_DBG_ON ++#define GAZELLE_DEBUG_WARNING (LWIP_DBG_ON | LWIP_DBG_LEVEL_WARNING) ++#define GAZELLE_DEBUG_SERIOUS (LWIP_DBG_ON | LWIP_DBG_LEVEL_SERIOUS) ++#define GAZELLE_DEBUG_SEVERE (LWIP_DBG_ON | LWIP_DBG_LEVEL_SEVERE) + + /* + ------------------------------------ +-- +2.33.0 + diff --git a/0085-add-lwip-log-tcp_rst-tcp_abandon-tcp_abort.patch b/0085-add-lwip-log-tcp_rst-tcp_abandon-tcp_abort.patch new file mode 100644 index 0000000000000000000000000000000000000000..0785dbf7c85d0c7c9a9fb6ed6081f240a595b013 --- /dev/null +++ b/0085-add-lwip-log-tcp_rst-tcp_abandon-tcp_abort.patch @@ -0,0 +1,192 @@ +From 4ab4406f6e59ee09d893e31104236518fc81e991 Mon Sep 17 00:00:00 2001 +From: yangchen +Date: Tue, 28 Nov 2023 16:11:09 +0800 +Subject: [PATCH] add lwip log: tcp_rst & tcp_abandon & tcp_abort + +--- + src/core/tcp.c | 24 ++++++++++++++++-------- + src/core/tcp_in.c | 19 +++++++++++++++++-- + src/include/lwip/debug.h | 4 ++-- + 3 files changed, 35 insertions(+), 12 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 963b8a4..a4f82a3 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -415,6 +415,9 @@ tcp_close_shutdown(struct tcp_pcb *pcb, u8_t rst_on_unacked_data) + + /* don't call tcp_abort here: we must not deallocate the pcb since + that might not be expected when calling tcp_close */ ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ++ ("tcp_close_shutdown: Not all data received by app, send RST, local_port=%d, remote_port=%d\n", ++ pcb->local_port, pcb->remote_port)); + tcp_rst(pcb, pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip, + pcb->local_port, pcb->remote_port); + +@@ -682,7 +685,8 @@ tcp_abandon(struct tcp_pcb *pcb, int reset) + #endif /* TCP_QUEUE_OOSEQ */ + tcp_backlog_accepted(pcb); + if (send_rst) { +- LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_abandon: sending RST\n")); ++ LWIP_DEBUGF(TCP_RST_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_abandon: send RST, local port=%d, remote port=%d\n", local_port, pcb->remote_port)); + tcp_rst(pcb, seqno, ackno, &pcb->local_ip, &pcb->remote_ip, local_port, pcb->remote_port); + } + last_state = pcb->state; +@@ -1574,6 +1578,9 @@ tcp_slowtmr_start: + #endif + + if (pcb_reset) { ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ++ ("tcp_slowtmr: KEEPALIVE timeout, send RST, local port=%d, remote port=%d\n", ++ pcb->local_port, pcb->remote_port)); + tcp_rst(pcb, pcb->snd_nxt, pcb->rcv_nxt, &pcb->local_ip, &pcb->remote_ip, + pcb->local_port, pcb->remote_port); + } +@@ -1941,8 +1948,8 @@ tcp_kill_prio(u8_t prio) + } + } + if (inactive != NULL) { +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_kill_prio: killing oldest PCB %p (%"S32_F")\n", +- (void *)inactive, inactivity)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_kill_prio: killing oldest PCB (%"S32_F")\n", inactivity)); + tcp_abort(inactive); + } + } +@@ -1972,8 +1979,8 @@ tcp_kill_state(enum tcp_state state) + } + } + if (inactive != NULL) { +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_kill_closing: killing oldest %s PCB %p (%"S32_F")\n", +- tcp_state_str[state], (void *)inactive, inactivity)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_kill_closing: killing oldest %s PCB (%"S32_F")\n", tcp_state_str[state], inactivity)); + /* Don't send a RST, since no data is lost. */ + tcp_abandon(inactive, 0); + } +@@ -1999,8 +2006,8 @@ tcp_kill_timewait(void) + } + } + if (inactive != NULL) { +- LWIP_DEBUGF(TCP_DEBUG, ("tcp_kill_timewait: killing oldest TIME-WAIT PCB %p (%"S32_F")\n", +- (void *)inactive, inactivity)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_kill_timewait: killing oldest TIME-WAIT PCB (%"S32_F")\n", inactivity)); + tcp_abort(inactive); + } + } +@@ -2540,7 +2547,8 @@ tcp_netif_ip_addr_changed_pcblist(const ip_addr_t *old_addr, struct tcp_pcb *pcb + ) { + /* this connection must be aborted */ + struct tcp_pcb *next = pcb->next; +- LWIP_DEBUGF(NETIF_DEBUG | LWIP_DBG_STATE, ("netif_set_ipaddr: aborting TCP pcb %p\n", (void *)pcb)); ++ LWIP_DEBUGF(NETIF_DEBUG | LWIP_DBG_STATE | GAZELLE_DEBUG_SERIOUS, ++ ("netif_set_ipaddr: aborting TCP\n")); + tcp_abort(pcb); + pcb = next; + } else { +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 7154659..700a64c 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -592,6 +592,7 @@ tcp_input(struct pbuf *p, struct netif *inp) + pbuf_free(rest); + } + #endif /* TCP_QUEUE_OOSEQ && LWIP_WND_SCALE */ ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_input: received data although already closed, send RST\n")); + tcp_abort(pcb); + goto aborted; + } +@@ -683,10 +684,12 @@ aborted: + } else { + /* If no matching PCB was found, send a TCP RST (reset) to the + sender. */ +- LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_input: no PCB match found, resetting.\n")); + if (!(TCPH_FLAGS(tcphdr) & TCP_RST)) { + TCP_STATS_INC(tcp.proterr); + TCP_STATS_INC(tcp.drop); ++ LWIP_DEBUGF(TCP_RST_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_input: no PCB match found, send RST, dest port=%d, src port=%d\n", ++ lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); + tcp_rst(NULL, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } +@@ -761,7 +764,9 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + if (flags & TCP_ACK) { + /* For incoming segments with the ACK flag set, respond with a + RST. */ +- LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_listen_input: ACK in LISTEN, sending reset\n")); ++ LWIP_DEBUGF(TCP_RST_DEBUG | GAZELLE_DEBUG_SERIOUS, ++ ("tcp_listen_input: ACK in LISTEN, send reset, dest port=%d, src port=%d\n", ++ lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); + tcp_rst((const struct tcp_pcb *)pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } else if (flags & TCP_SYN) { +@@ -852,6 +857,7 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + /* Send a SYN|ACK together with the MSS option. */ + rc = tcp_enqueue_flags(npcb, TCP_SYN | TCP_ACK); + if (rc != ERR_OK) { ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_listen_input: send SYN or ACK failed\n")); + tcp_abandon(npcb, 0); + PERF_RESUME(PERF_LAYER_TCP, PERF_POINT_TCP_SYN_RECV); + return; +@@ -892,6 +898,9 @@ tcp_timewait_input(struct tcp_pcb *pcb) + should be sent in reply */ + if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd)) { + /* If the SYN is in the window it is an error, send a reset */ ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ++ ("tcp_timewait_input: SYN in TIME_WAIT, send RST, dest port=%d, src port=%d\n", ++ lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + return; +@@ -1060,6 +1069,8 @@ tcp_process(struct tcp_pcb *pcb) + /* received ACK? possibly a half-open connection */ + else if (flags & TCP_ACK) { + /* send a RST to bring the other side in a non-synchronized state. */ ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_process: ACK in SYN_SENT, send RST, dest port=%d, src port=%d\n", ++ lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + /* Resend SYN immediately (don't wait for rto timeout) to establish +@@ -1102,6 +1113,7 @@ tcp_process(struct tcp_pcb *pcb) + * the connection. */ + /* Already aborted? */ + if (err != ERR_ABRT) { ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_process: accept function returns with an error %d, send RST\n", err)); + tcp_abort(pcb); + } + return ERR_ABRT; +@@ -1129,6 +1141,9 @@ tcp_process(struct tcp_pcb *pcb) + } + } else { + /* incorrect ACK number, send RST */ ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ++ ("tcp_process: incorrect ACK number in SYN_RCVD, send RST, ackno=%d, lastack=%d, snd_nxt=%d, dest port=%d, src port=%d\n", ++ ackno, pcb->lastack, pcb->snd_nxt, lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } +diff --git a/src/include/lwip/debug.h b/src/include/lwip/debug.h +index f47cbfe..6abed9f 100644 +--- a/src/include/lwip/debug.h ++++ b/src/include/lwip/debug.h +@@ -56,12 +56,12 @@ + /** Debug level: Serious. memory allocation failures, ... */ + #define LWIP_DBG_LEVEL_SERIOUS 0x02 + /** Debug level: Severe */ +-#define LWIP_DBG_LEVEL_SEVERE 0x03 ++#define LWIP_DBG_LEVEL_SEVERE 0x04 + /** + * @} + */ + +-#define LWIP_DBG_MASK_LEVEL 0x03 ++#define LWIP_DBG_MASK_LEVEL 0x07 + /* compatibility define only */ + #define LWIP_DBG_LEVEL_OFF LWIP_DBG_LEVEL_ALL + +-- +2.23.0 + diff --git a/0086-log-add-errevent-log-and-tcp-exception-statistics.patch b/0086-log-add-errevent-log-and-tcp-exception-statistics.patch new file mode 100644 index 0000000000000000000000000000000000000000..17cba3bdc399807922d9b81c18999051ca2541fb --- /dev/null +++ b/0086-log-add-errevent-log-and-tcp-exception-statistics.patch @@ -0,0 +1,77 @@ +From d7b0ffc7604075639f3eedbfe63fc0f12b87d23f Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Tue, 28 Nov 2023 16:34:16 +0800 +Subject: [PATCH] log: add errevent log and tcp exception statistics + +--- + src/api/api_msg.c | 1 + + src/api/sockets.c | 3 ++- + src/core/tcp_in.c | 2 ++ + src/include/lwip/stats.h | 3 +++ + 4 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/src/api/api_msg.c b/src/api/api_msg.c +index 8d98be6..531da40 100644 +--- a/src/api/api_msg.c ++++ b/src/api/api_msg.c +@@ -613,6 +613,7 @@ accept_function(void *arg, struct tcp_pcb *newpcb, err_t err) + sys_mbox_free(&newconn->recvmbox); + sys_mbox_set_invalid(&newconn->recvmbox); + netconn_free(newconn); ++ MIB2_STATS_INC(mib2.tcpacceptmboxfull); + return ERR_MEM; + } else { + /* Register event with callback */ +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 62052f2..65c69d4 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -2797,9 +2797,10 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + #endif + break; + case NETCONN_EVT_ERROR: ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("event_callback: have errevent, err=%d\n", conn->pending_err)); + sock->errevent = 1; + #if GAZELLE_ENABLE +- if (netif_is_rtc_mode(netif_default)) { ++ if (netif_is_rtc_mode(netif_default)) { + add_sock_event_nolock(sock, EPOLLERR); + } else { + add_sock_event(sock, EPOLLERR); +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 1076f20..03b9942 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -770,6 +770,7 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + #if TCP_LISTEN_BACKLOG + if (pcb->accepts_pending >= pcb->backlog) { + LWIP_DEBUGF(TCP_DEBUG, ("tcp_listen_input: listen backlog exceeded for port %"U16_F"\n", tcphdr->dest)); ++ MIB2_STATS_INC(mib2.tcplistendrops); + return; + } + #endif /* TCP_LISTEN_BACKLOG */ +@@ -1845,6 +1846,7 @@ tcp_receive(struct tcp_pcb *pcb) + + } else { + /* We get here if the incoming segment is out-of-sequence. */ ++ MIB2_STATS_INC(mib2.tcpoutofseq); + + #if TCP_QUEUE_OOSEQ + /* We queue the segment on the ->ooseq queue. */ +diff --git a/src/include/lwip/stats.h b/src/include/lwip/stats.h +index 5953a74..805836c 100644 +--- a/src/include/lwip/stats.h ++++ b/src/include/lwip/stats.h +@@ -153,6 +153,9 @@ struct stats_mib2 { + u32_t tcpfinackcnt; + u32_t tcpdelayackcnt; + u32_t tcpredusedcnt; ++ u32_t tcpoutofseq; ++ u32_t tcpacceptmboxfull; ++ u32_t tcplistendrops; + + /* UDP */ + u32_t udpindatagrams; +-- +2.27.0 + diff --git a/0087-support-vlan-offload.patch b/0087-support-vlan-offload.patch new file mode 100644 index 0000000000000000000000000000000000000000..756db5d1adf8d1540e111b4863c6647411a9fb8e --- /dev/null +++ b/0087-support-vlan-offload.patch @@ -0,0 +1,56 @@ +diff -Nur lwip-82/src/include/dpdk_version.h lwip-offload/src/include/dpdk_version.h +--- lwip-82/src/include/dpdk_version.h 2023-11-28 14:17:02.432481010 +0800 ++++ lwip-offload/src/include/dpdk_version.h 2023-11-28 14:34:21.208481010 +0800 +@@ -48,6 +48,7 @@ + #define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM + #define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG + #define RTE_MBUF_F_TX_UDP_CKSUM PKT_TX_UDP_CKSUM ++#define RTE_MBUF_F_TX_VLAN PKT_TX_VLAN_PKT + + #endif /* DPDK_VERSION_1911 */ + +diff -Nur lwip-82/src/include/lwip/pbuf.h lwip-offload/src/include/lwip/pbuf.h +--- lwip-82/src/include/lwip/pbuf.h 2023-11-28 14:17:02.408481010 +0800 ++++ lwip-offload/src/include/lwip/pbuf.h 2023-11-28 14:41:31.580481010 +0800 +@@ -240,6 +240,7 @@ + struct pbuf *last; + pthread_spinlock_t pbuf_lock; + struct tcp_pcb *pcb; ++ u16_t vlan_tci; + #if GAZELLE_UDP_ENABLE + ip_addr_t addr; + u16_t port; +diff -Nur lwip-82/src/netif/ethernet.c lwip-offload/src/netif/ethernet.c +--- lwip-82/src/netif/ethernet.c 2023-11-28 14:17:02.440481010 +0800 ++++ lwip-offload/src/netif/ethernet.c 2023-11-28 16:35:36.536481010 +0800 +@@ -289,7 +289,12 @@ + } + #else + if (netif->vlan_enable) { +- vlan_prio_vid = netif->vlan_tci; ++ if (netif->txol_flags & DEV_TX_OFFLOAD_VLAN_INSERT) { ++ p->ol_flags |= RTE_MBUF_F_TX_VLAN; ++ p->vlan_tci = netif->vlan_tci; ++ } else { ++ vlan_prio_vid = netif->vlan_tci; ++ } + } + #endif /* GAZELLE_ENABLE */ + #endif +@@ -327,11 +332,11 @@ + ("ethernet_output: sending packet %p\n", (void *)p)); + + #if CHECKSUM_GEN_IP_HW || CHECKSUM_GEN_TCP_HW +-#if LWIP_VLAN_PCP +- ethh_cksum_set(p, sizeof(*ethhdr)+SIZEOF_VLAN_HDR); +-#else +- ethh_cksum_set(p, sizeof(*ethhdr)); +-#endif ++ if (netif->vlan_enable && !(netif->txol_flags & DEV_TX_OFFLOAD_VLAN_INSERT)) { ++ ethh_cksum_set(p, sizeof(*ethhdr) + SIZEOF_VLAN_HDR); ++ } else { ++ ethh_cksum_set(p, sizeof(*ethhdr)); ++ } + #endif + + /* send the packet */ diff --git a/0088-modify-log-info-err.patch b/0088-modify-log-info-err.patch new file mode 100644 index 0000000000000000000000000000000000000000..43bafbfe6dbc4371aa4379b1ca1506a6ca9b0aef --- /dev/null +++ b/0088-modify-log-info-err.patch @@ -0,0 +1,82 @@ +From f1b954fd737ca0e7571019f88b18a926ce849e6b Mon Sep 17 00:00:00 2001 +From: hantwofish +Date: Tue, 5 Dec 2023 10:57:46 +0800 +Subject: [PATCH] modify log info err + +--- + src/core/tcp.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index 3c7bbd0..2cfee11 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -1397,17 +1397,17 @@ tcp_slowtmr_start: + + if (pcb->state == SYN_SENT && pcb->nrtx >= TCP_SYNMAXRTX) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max SYN retries reached loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max SYN retries reached local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else if (pcb->nrtx >= TCP_MAXRTX) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max DATA retries reached loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: max DATA retries reached local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else { + if (pcb->persist_backoff > 0) { + LWIP_ASSERT("tcp_slowtimr: persist ticking with in-flight data", pcb->unacked == NULL); + LWIP_ASSERT("tcp_slowtimr: persist ticking with empty send buffer", pcb->unsent != NULL); + if (pcb->persist_probe >= TCP_MAXRTX) { + ++pcb_remove; /* max probes reached */ +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: persist_probe is greater TCP_MAXRTX loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: persist_probe is greater TCP_MAXRTX local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } else { + u8_t backoff_cnt = tcp_persist_backoff[pcb->persist_backoff - 1]; + if (pcb->persist_cnt < backoff_cnt) { +@@ -1491,7 +1491,7 @@ tcp_slowtmr_start: + if ((u32_t)(tcp_ticks - pcb->tmr) > + TCP_FIN_WAIT_TIMEOUT / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in FIN-WAIT-2 loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in FIN-WAIT-2 local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + } +@@ -1502,7 +1502,7 @@ tcp_slowtmr_start: + (pcb->state == CLOSE_WAIT))) { + if ((u32_t)(tcp_ticks - pcb->tmr) > + (pcb->keep_idle + TCP_KEEP_DUR(pcb)) / TCP_SLOW_INTERVAL) { +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: KEEPALIVE timeout. Aborting connection to loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: KEEPALIVE timeout. Aborting connection to local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip); + LWIP_DEBUGF(TCP_DEBUG, ("\n")); + +@@ -1524,7 +1524,7 @@ tcp_slowtmr_start: + #if TCP_QUEUE_OOSEQ + if (pcb->ooseq != NULL && + (tcp_ticks - pcb->tmr >= (u32_t)pcb->rto * TCP_OOSEQ_TIMEOUT)) { +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: dropping OOSEQ queued data loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: dropping OOSEQ queued data local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + tcp_free_ooseq(pcb); + } + #endif /* TCP_QUEUE_OOSEQ */ +@@ -1534,7 +1534,7 @@ tcp_slowtmr_start: + if ((u32_t)(tcp_ticks - pcb->tmr) > + TCP_SYN_RCVD_TIMEOUT / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in SYN-RCVD loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in SYN-RCVD local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + +@@ -1542,7 +1542,7 @@ tcp_slowtmr_start: + if (pcb->state == LAST_ACK) { + if ((u32_t)(tcp_ticks - pcb->tmr) > 2 * TCP_MSL / TCP_SLOW_INTERVAL) { + ++pcb_remove; +- LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in LAST-ACK loac_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); ++ LWIP_DEBUGF(TCP_DEBUG | GAZELLE_DEBUG_SERIOUS, ("tcp_slowtmr: removing pcb stuck in LAST-ACK local_port=%u, remote_port=%u\n", pcb->local_port, pcb->remote_port)); + } + } + +-- +2.33.0 + diff --git a/0089-add-struct-gz-addr.patch b/0089-add-struct-gz-addr.patch new file mode 100644 index 0000000000000000000000000000000000000000..09729fd66cc26c5ee52f7ea4ab3b7c3de98bab02 --- /dev/null +++ b/0089-add-struct-gz-addr.patch @@ -0,0 +1,97 @@ +diff -Nur lwip-org/src/core/tcp.c lwip-gz-addr/src/core/tcp.c +--- lwip-org/src/core/tcp.c 2023-12-04 14:10:25.364481010 +0800 ++++ lwip-gz-addr/src/core/tcp.c 2023-12-04 14:33:31.712481010 +0800 +@@ -1161,7 +1161,7 @@ + + if (__atomic_load_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { + #if GAZELLE_ENABLE +- if (port_in_stack_queue(pcb->remote_ip, pcb->local_ip, pcb->remote_port, tcp_port)) { ++ if (port_in_stack_queue((gz_addr_t *)&pcb->remote_ip, (gz_addr_t *)&pcb->local_ip, pcb->remote_port, tcp_port)) { + tmp_port = tcp_port; + __atomic_store_n(&port_state[tcp_port - TCP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); + break; +diff -Nur lwip-org/src/core/udp.c lwip-gz-addr/src/core/udp.c +--- lwip-org/src/core/udp.c 2023-12-04 14:10:25.364481010 +0800 ++++ lwip-gz-addr/src/core/udp.c 2023-12-04 14:19:58.832481010 +0800 +@@ -132,7 +132,7 @@ + } + + if (__atomic_load_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], __ATOMIC_ACQUIRE) == 0) { +- if (port_in_stack_queue(dst_pcb->remote_ip, dst_pcb->local_ip, dst_pcb->remote_port, udp_port)) { ++ if (port_in_stack_queue((gz_addr_t *)&dst_pcb->remote_ip, (gz_addr_t *)&dst_pcb->local_ip, dst_pcb->remote_port, udp_port)) { + tmp_port = udp_port; + __atomic_store_n(&port_state[udp_port - UDP_LOCAL_PORT_RANGE_START], 1, __ATOMIC_RELEASE); + break; +diff -Nur lwip-org/src/include/lwipopts.h lwip-gz-addr/src/include/lwipopts.h +--- lwip-org/src/include/lwipopts.h 2023-12-04 14:10:25.368481010 +0800 ++++ lwip-gz-addr/src/include/lwipopts.h 2023-12-06 19:29:24.520481010 +0800 +@@ -184,6 +184,7 @@ + */ + #define LWIP_IPV6 1 + #define IP6_HLEN 40 ++#define LWIP_IPV6_SCOPES 1 + + /* + --------------------------------- +diff -Nur lwip-org/src/include/reg_sock.h lwip-gz-addr/src/include/reg_sock.h +--- lwip-org/src/include/reg_sock.h 2023-12-04 14:10:25.368481010 +0800 ++++ lwip-gz-addr/src/include/reg_sock.h 2023-12-06 19:41:19.792481010 +0800 +@@ -34,7 +34,35 @@ + #define __REG_SOCK_H__ + + #include +-#include "lwip/ip_addr.h" ++ ++#include "lwipopts.h" ++ ++/* compatible with ip4_addr_t */ ++struct gz_ip4 { ++ uint32_t addr; ++}; ++ ++/* compatible with ip6_addr_t */ ++#if LWIP_IPV6 ++struct gz_ip6 { ++ uint32_t addr[4]; ++#if LWIP_IPV6_SCOPES ++ uint8_t zone; ++#endif /* LWIP_IPV6_SCOPES */ ++}; ++#endif /* LWIP_IPV6 */ ++ ++/* gazelle ip address, compatible with ip_addr_t */ ++typedef struct gz_addr { ++ union { ++#if LWIP_IPV6 ++ struct gz_ip6 ip6; ++#endif /* LWIP_IPV6 */ ++ struct gz_ip4 ip4; ++ } u_addr; ++ /** @ref lwip_ip_addr_type */ ++ uint8_t type; ++} gz_addr_t; + + enum reg_ring_type { + REG_RING_TCP_LISTEN = 0, +@@ -45,10 +73,12 @@ + }; + + struct gazelle_quintuple { +- uint32_t protocol; ++ uint32_t protocol; + /* net byte order */ + uint16_t src_port; + uint16_t dst_port; ++ ++ /* TODO: replace with gz_addr_t */ + uint32_t src_ip; + uint32_t dst_ip; + #if LWIP_IPV6 +@@ -65,6 +95,6 @@ + }; + + extern int vdev_reg_xmit(enum reg_ring_type type, struct gazelle_quintuple *qtuple); +-extern bool port_in_stack_queue(ip_addr_t src_ip, ip_addr_t dst_ip, uint16_t src_port, uint16_t dst_port); ++extern bool port_in_stack_queue(gz_addr_t *src_ip, gz_addr_t *dst_ip, uint16_t src_port, uint16_t dst_port); + + #endif /* __REG_SOCK_H__ */ diff --git a/0090-frag-fix-coredump-when-get-netif.patch b/0090-frag-fix-coredump-when-get-netif.patch new file mode 100644 index 0000000000000000000000000000000000000000..e5159c4bf173823709269632aad1f4adac803ed6 --- /dev/null +++ b/0090-frag-fix-coredump-when-get-netif.patch @@ -0,0 +1,28 @@ +From 5e5195ad88819a77d59038ec49004d1e11801d08 Mon Sep 17 00:00:00 2001 +From: jiangheng +Date: Fri, 8 Dec 2023 11:14:15 +0800 +Subject: [PATCH] frag: fix coredump when get netif + +--- + src/core/ipv4/ip4_frag.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/core/ipv4/ip4_frag.c b/src/core/ipv4/ip4_frag.c +index f63a99e..b4a183a 100644 +--- a/src/core/ipv4/ip4_frag.c ++++ b/src/core/ipv4/ip4_frag.c +@@ -640,9 +640,9 @@ ip4_reass(struct pbuf *p) + IPH_CHKSUM_SET(fraghdr, 0); + /* @todo: do we need to set/calculate the correct checksum? */ + #if CHECKSUM_GEN_IP +- IF__NETIF_CHECKSUM_ENABLED(ip_current_input_netif(), NETIF_CHECKSUM_GEN_IP) { ++ IF__NETIF_CHECKSUM_ENABLED(netif_default, NETIF_CHECKSUM_GEN_IP) { + #if CHECKSUM_GEN_IP_HW +- if (netif_get_txol_flags(ip_current_input_netif()) & DEV_TX_OFFLOAD_IPV4_CKSUM) { ++ if (netif_get_txol_flags(netif_default) & DEV_TX_OFFLOAD_IPV4_CKSUM) { + iph_cksum_set(p, IP_HLEN, 1); + } else { + iph_cksum_set(p, IP_HLEN, 0); +-- +2.27.0 + diff --git a/0091-add-fd-log-info-and-fix-wrong-port-log-info.patch b/0091-add-fd-log-info-and-fix-wrong-port-log-info.patch new file mode 100644 index 0000000000000000000000000000000000000000..a2a8f9dd79ca18cd3cd1741fea126223eac13d5b --- /dev/null +++ b/0091-add-fd-log-info-and-fix-wrong-port-log-info.patch @@ -0,0 +1,85 @@ +From 9ef6e86cbbd7bff2d1980f9b88f3b6f9ec1457b5 Mon Sep 17 00:00:00 2001 +From: yangchen +Date: Wed, 6 Dec 2023 08:43:23 +0800 +Subject: [PATCH] add fd log info and fix wrong port log info + +--- + src/api/sockets.c | 2 +- + src/core/tcp_in.c | 20 ++++++++++---------- + 2 files changed, 11 insertions(+), 11 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index 65c69d4..d488b5b 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -2797,7 +2797,7 @@ event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len) + #endif + break; + case NETCONN_EVT_ERROR: +- LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("event_callback: have errevent, err=%d\n", conn->pending_err)); ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("event_callback: have errevent, err=%d, fd=%d\n", conn->pending_err, conn->socket)); + sock->errevent = 1; + #if GAZELLE_ENABLE + if (netif_is_rtc_mode(netif_default)) { +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 8ed91b5..24706c1 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -688,8 +688,8 @@ aborted: + TCP_STATS_INC(tcp.proterr); + TCP_STATS_INC(tcp.drop); + LWIP_DEBUGF(TCP_RST_DEBUG | GAZELLE_DEBUG_SERIOUS, +- ("tcp_input: no PCB match found, send RST, dest port=%d, src port=%d\n", +- lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); ++ ("tcp_input: no PCB match found, send RST, local_port=%d, remote_port=%d\n", ++ tcphdr->src, tcphdr->dest)); + tcp_rst(NULL, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } +@@ -765,8 +765,8 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + /* For incoming segments with the ACK flag set, respond with a + RST. */ + LWIP_DEBUGF(TCP_RST_DEBUG | GAZELLE_DEBUG_SERIOUS, +- ("tcp_listen_input: ACK in LISTEN, send reset, dest port=%d, src port=%d\n", +- lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); ++ ("tcp_listen_input: ACK in LISTEN, send reset, local_port=%d, remote_port=%d\n", ++ tcphdr->src, tcphdr->dest)); + tcp_rst((const struct tcp_pcb *)pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } else if (flags & TCP_SYN) { +@@ -902,8 +902,8 @@ tcp_timewait_input(struct tcp_pcb *pcb) + if (TCP_SEQ_BETWEEN(seqno, pcb->rcv_nxt, pcb->rcv_nxt + pcb->rcv_wnd)) { + /* If the SYN is in the window it is an error, send a reset */ + LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, +- ("tcp_timewait_input: SYN in TIME_WAIT, send RST, dest port=%d, src port=%d\n", +- lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); ++ ("tcp_timewait_input: SYN in TIME_WAIT, send RST, local_port=%d, remote_port=%d\n", ++ tcphdr->src, tcphdr->dest)); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + return; +@@ -1072,8 +1072,8 @@ tcp_process(struct tcp_pcb *pcb) + /* received ACK? possibly a half-open connection */ + else if (flags & TCP_ACK) { + /* send a RST to bring the other side in a non-synchronized state. */ +- LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_process: ACK in SYN_SENT, send RST, dest port=%d, src port=%d\n", +- lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); ++ LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, ("tcp_process: ACK in SYN_SENT, send RST, local_port=%d, remote_port=%d\n", ++ tcphdr->src, tcphdr->dest)); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + /* Resend SYN immediately (don't wait for rto timeout) to establish +@@ -1145,8 +1145,8 @@ tcp_process(struct tcp_pcb *pcb) + } else { + /* incorrect ACK number, send RST */ + LWIP_DEBUGF(GAZELLE_DEBUG_SERIOUS, +- ("tcp_process: incorrect ACK number in SYN_RCVD, send RST, ackno=%d, lastack=%d, snd_nxt=%d, dest port=%d, src port=%d\n", +- ackno, pcb->lastack, pcb->snd_nxt, lwip_ntohs(tcphdr->dest), lwip_ntohs(tcphdr->src))); ++ ("tcp_process: incorrect ACK number in SYN_RCVD, send RST, ackno=%d, lastack=%d, snd_nxt=%d, local_port=%d, remote_port=%d\n", ++ ackno, pcb->lastack, pcb->snd_nxt, tcphdr->src, tcphdr->dest)); + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } +-- +2.33.0 + diff --git a/0092-fix-the-coredump-issue-when-UDP-traffic-is-sent.patch b/0092-fix-the-coredump-issue-when-UDP-traffic-is-sent.patch new file mode 100644 index 0000000000000000000000000000000000000000..5da581984ebb91554f0190115dca074344994db3 --- /dev/null +++ b/0092-fix-the-coredump-issue-when-UDP-traffic-is-sent.patch @@ -0,0 +1,30 @@ +From defef8f57ee35b510c4a542e54237f664ac31d5d Mon Sep 17 00:00:00 2001 +From: root +Date: Sat, 9 Dec 2023 16:54:47 +0800 +Subject: [PATCH] fix the coredump issue when UDP traffic is sent + +--- + src/api/sockets.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index d488b5b..a9d39ae 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -1865,7 +1865,13 @@ lwip_sendto(int s, const void *data, size_t size, int flags, + } else + #endif /* LWIP_CHECKSUM_ON_COPY */ + { ++#if GAZELLE_ENABLE ++ /* In the gazelle scenario, the payload is stored in send_ring, ++ and the payload stores the sock pointer information. */ ++ buf.p->payload = (void *)sock; ++#else + MEMCPY(buf.p->payload, data, short_size); ++#endif + } + err = ERR_OK; + } +-- +2.30.0 + diff --git a/0093-modfiy-accept-null-pointer-when-new-conn-receive-RST-packet-in-listening.patch b/0093-modfiy-accept-null-pointer-when-new-conn-receive-RST-packet-in-listening.patch new file mode 100644 index 0000000000000000000000000000000000000000..df7ee1c22586f33c643f868efb23c0bc42b90147 --- /dev/null +++ b/0093-modfiy-accept-null-pointer-when-new-conn-receive-RST-packet-in-listening.patch @@ -0,0 +1,37 @@ +From dcdd7d73c6083a63fe966a68f11eddcafa3fd743 Mon Sep 17 00:00:00 2001 +From: hantwofish +Date: Thu, 14 Dec 2023 14:51:37 +0800 +Subject: [PATCH] resove null pointer + +--- + src/api/sockets.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/src/api/sockets.c b/src/api/sockets.c +index a9d39ae..0b3e4ea 100644 +--- a/src/api/sockets.c ++++ b/src/api/sockets.c +@@ -801,15 +801,17 @@ lwip_accept4(int s, struct sockaddr *addr, socklen_t *addrlen, int flags) + #endif /* GAZELLE_ENABLE */ + nsock = &sockets[newsock - LWIP_SOCKET_OFFSET]; + #if GAZELLE_ENABLE ++ int ret = 0; + struct tcp_pcb *pcb = newconn->pcb.tcp; +- if (pcb->client_rx_ring != NULL && pcb->client_tx_ring != NULL) { +- if (find_same_node_memzone(pcb, nsock) != 0) { ++ if (pcb != NULL && pcb->client_rx_ring != NULL && pcb->client_tx_ring != NULL) { ++ ret = find_same_node_memzone(pcb, nsock); ++ } ++ if (pcb == NULL || ret != 0) { + netconn_delete(newconn); + free_socket(nsock, 1); + sock_set_errno(sock, ENOTCONN); + done_socket(sock); + return -1; +- } + } + #endif + +-- +2.33.0 + diff --git a/backport-Add-outgoing-VLAN-PCP-support.patch b/backport-Add-outgoing-VLAN-PCP-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..7f89e9c45fa75025f39b8434ca9a424c291f5741 --- /dev/null +++ b/backport-Add-outgoing-VLAN-PCP-support.patch @@ -0,0 +1,143 @@ +From 95aba99f41333ad430496eab2596bc8b489ae731 Mon Sep 17 00:00:00 2001 +From: Dirk Ziegelmeier +Date: Fri, 19 Oct 2018 22:30:17 +0200 +Subject: [PATCH] Implement task #11620: Add outgoing VLAN PCP support for + Ethernet level QoS + +Apply rebased patch from Timmy Brolin +--- + src/core/tcp.c | 29 ++++++++++++++++------------- + src/core/tcp_in.c | 3 +++ + src/include/lwip/netif.h | 22 ++++++++++++++-------- + src/include/lwip/opt.h | 34 +++++++++++++++++++++++----------- + src/netif/ethernet.c | 12 ++++++++++-- + 5 files changed, 66 insertions(+), 34 deletions(-) + +diff --git a/src/core/tcp.c b/src/core/tcp.c +index ce03c8161..1f91d24ba 100644 +--- a/src/core/tcp.c ++++ b/src/core/tcp.c +@@ -892,6 +892,9 @@ tcp_listen_with_backlog_and_err(struct tcp_pcb *pcb, u8_t backlog, err_t *err) + lpcb->ttl = pcb->ttl; + lpcb->tos = pcb->tos; + ++#if LWIP_VLAN_PCP ++ lpcb->netif_hints.tci = pcb->netif_hints.tci; ++#endif /* LWIP_VLAN_PCP */ + #if GAZELLE_TCP_REUSE_IPPORT + lpcb->connect_num = 0; + lpcb->next_same_port_pcb = NULL; +index 428a6f48d..d1fe067a4 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -690,6 +690,9 @@ tcp_listen_input(struct tcp_pcb_listen *pcb) + #if LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG + npcb->listener = pcb; + #endif /* LWIP_CALLBACK_API || TCP_LISTEN_BACKLOG */ ++#if LWIP_VLAN_PCP ++ npcb->netif_hints.tci = pcb->netif_hints.tci; ++#endif /* LWIP_VLAN_PCP */ + /* inherit socket options */ + npcb->so_options = pcb->so_options & SOF_INHERITED; + npcb->netif_idx = pcb->netif_idx; +diff --git a/src/include/lwip/netif.h b/src/include/lwip/netif.h +index 9e2007a64..013a69b5a 100644 +--- a/src/include/lwip/netif.h ++++ b/src/include/lwip/netif.h +@@ -248,14 +248,20 @@ typedef u8_t netif_addr_idx_t; + #define NETIF_ADDR_IDX_MAX 0x7F + #endif + ++#if LWIP_NETIF_HWADDRHINT || LWIP_VLAN_PCP ++ #define LWIP_NETIF_USE_HINTS 1 ++ struct netif_hint { + #if LWIP_NETIF_HWADDRHINT +-#define LWIP_NETIF_USE_HINTS 1 +-struct netif_hint { +- netif_addr_idx_t addr_hint; +-}; +-#else /* LWIP_NETIF_HWADDRHINT */ +-#define LWIP_NETIF_USE_HINTS 0 +-#endif /* LWIP_NETIF_HWADDRHINT */ ++ u8_t addr_hint; ++#endif ++#if LWIP_VLAN_PCP ++ /** VLAN hader is set if this is >= 0 (but must be <= 0xFFFF) */ ++ s32_t tci; ++#endif ++ }; ++#else /* LWIP_NETIF_HWADDRHINT || LWIP_VLAN_PCP */ ++ #define LWIP_NETIF_USE_HINTS 0 ++#endif /* LWIP_NETIF_HWADDRHINT || LWIP_VLAN_PCP*/ + + /** Generic data structure used for all lwIP network interfaces. + * The following fields should be filled in by the initialization + #if LWIP_IPV6_AUTOCONFIG +diff --git a/src/include/lwip/opt.h b/src/include/lwip/opt.h +index 90fce4f05..fb4b10c8b 100644 +--- a/src/include/lwip/opt.h ++++ b/src/include/lwip/opt.h +@@ -677,6 +677,18 @@ + #define ETHARP_SUPPORT_VLAN 0 + #endif + ++/** ++ * LWIP_VLAN_PCP==1: Enable outgoing VLAN taggning of frames on a per-PCB basis ++ * for QoS purposes. With this feature enabled, each PCB has a new variable: "tci". ++ * (Tag Control Identifier). The TCI contains three fields: VID, CFI and PCP. ++ * VID is the VLAN ID, which should be set to zero. ++ * The "CFI" bit is used to enable or disable VLAN tags for the PCB. ++ * PCP (Priority Code Point) is a 3 bit field used for Ethernet level QoS. ++ */ ++#ifndef LWIP_VLAN_PCP ++#define LWIP_VLAN_PCP 0 ++#endif ++ + /** LWIP_ETHERNET==1: enable ethernet support even though ARP might be disabled + */ + #if !defined LWIP_ETHERNET || defined __DOXYGEN__ +@@ -1548,13 +1560,13 @@ + * link level header. The default is 14, the standard value for + * Ethernet. + */ +-#if !defined PBUF_LINK_HLEN || defined __DOXYGEN__ +-#if defined LWIP_HOOK_VLAN_SET && !defined __DOXYGEN__ +-#define PBUF_LINK_HLEN (18 + ETH_PAD_SIZE) +-#else /* LWIP_HOOK_VLAN_SET */ +-#define PBUF_LINK_HLEN (14 + ETH_PAD_SIZE) +-#endif /* LWIP_HOOK_VLAN_SET */ +-#endif ++ #if !defined PBUF_LINK_HLEN || defined __DOXYGEN__ ++#if (defined LWIP_HOOK_VLAN_SET || LWIP_VLAN_PCP) && !defined __DOXYGEN__ ++ #define PBUF_LINK_HLEN (18 + ETH_PAD_SIZE) ++#else /* LWIP_HOOK_VLAN_SET || LWIP_VLAN_PCP */ ++ #define PBUF_LINK_HLEN (14 + ETH_PAD_SIZE) ++#endif /* LWIP_HOOK_VLAN_SET || LWIP_VLAN_PCP */ ++ #endif + + /** + * PBUF_LINK_ENCAPSULATION_HLEN: the number of bytes that should be allocated +diff --git a/src/netif/ethernet.c b/src/netif/ethernet.c +index dd171e280..9e367f8cc 100644 +--- a/src/netif/ethernet.c ++++ b/src/netif/ethernet.c +@@ -273,8 +273,16 @@ ethernet_output(struct netif * netif, struct pbuf * p, + struct eth_hdr *ethhdr; + u16_t eth_type_be = lwip_htons(eth_type); + +-#if ETHARP_SUPPORT_VLAN && defined(LWIP_HOOK_VLAN_SET) +- s32_t vlan_prio_vid = LWIP_HOOK_VLAN_SET(netif, p, src, dst, eth_type); ++#if ETHARP_SUPPORT_VLAN ++ s32_t vlan_prio_vid; ++#ifdef LWIP_HOOK_VLAN_SET ++ vlan_prio_vid = LWIP_HOOK_VLAN_SET(netif, p, src, dst, eth_type); ++#elif LWIP_VLAN_PCP ++ vlan_prio_vid = -1; ++ if (netif->hints && (netif->hints->tci >= 0)) { ++ vlan_prio_vid = (u16_t)netif->hints->tci; ++ } ++#endif + if (vlan_prio_vid >= 0) { + struct eth_vlan_hdr *vlanhdr; + + diff --git a/backport-fix-compiling-ETHARP_SUPPORT_VLAN.patch b/backport-fix-compiling-ETHARP_SUPPORT_VLAN.patch new file mode 100644 index 0000000000000000000000000000000000000000..d6014278d0706a1c51eee496801a81ae584c30c0 --- /dev/null +++ b/backport-fix-compiling-ETHARP_SUPPORT_VLAN.patch @@ -0,0 +1,33 @@ +From f72227aadcc1d0d8c46a8b4fe62ba3d03ffa42c3 Mon Sep 17 00:00:00 2001 +From: Simon Goldschmidt +Date: Wed, 7 Nov 2018 10:49:06 +0100 +Subject: [PATCH] fix compiling ETHARP_SUPPORT_VLAN without LWIP_HOOK_VLAN_SET + and LWIP_VLAN_PCP + +--- + src/netif/ethernet.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/netif/ethernet.c b/src/netif/ethernet.c +index 9e367f8cc..6db434b46 100644 +--- a/src/netif/ethernet.c ++++ b/src/netif/ethernet.c +@@ -273,7 +273,7 @@ ethernet_output(struct netif * netif, struct pbuf * p, + struct eth_hdr *ethhdr; + u16_t eth_type_be = lwip_htons(eth_type); + +-#if ETHARP_SUPPORT_VLAN ++#if ETHARP_SUPPORT_VLAN && (defined(LWIP_HOOK_VLAN_SET) || LWIP_VLAN_PCP) + s32_t vlan_prio_vid; + #ifdef LWIP_HOOK_VLAN_SET + vlan_prio_vid = LWIP_HOOK_VLAN_SET(netif, p, src, dst, eth_type); +@@ -297,7 +297,7 @@ ethernet_output(struct netif * netif, struct pbuf * p, + + eth_type_be = PP_HTONS(ETHTYPE_VLAN); + } else +-#endif /* ETHARP_SUPPORT_VLAN && defined(LWIP_HOOK_VLAN_SET) */ ++#endif /* ETHARP_SUPPORT_VLAN && (defined(LWIP_HOOK_VLAN_SET) || LWIP_VLAN_PCP) */ + { + if (pbuf_add_header(p, SIZEOF_ETH_HDR) != 0) { + goto pbuf_header_failed; + diff --git a/backport-tcp-fix-sequence-number-comparison.patch b/backport-tcp-fix-sequence-number-comparison.patch new file mode 100644 index 0000000000000000000000000000000000000000..5c0f960a721ac32863cc2b0a05d92165cabb7cb8 --- /dev/null +++ b/backport-tcp-fix-sequence-number-comparison.patch @@ -0,0 +1,36 @@ +From 003d34eebd223c16a3dbf6a970bb6e23cb7d1a24 Mon Sep 17 00:00:00 2001 +From: Simon Goldschmidt +Date: Fri, 27 Mar 2020 22:59:05 +0100 +Subject: [PATCH] tcp: fix sequence number comparison +This fixes both undefined behavior (see bug #51447) as well as a possible bug +where sequence numbers in 31 bit distance may come through. +Conflict: NA +Reference: https://git.savannah.gnu.org/cgit/lwip.git/commit/?id=003d34eebd223c16a3dbf6a970bb6e23cb7d1a24 +--- + src/include/lwip/priv/tcp_priv.h | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) +diff --git a/src/include/lwip/priv/tcp_priv.h b/src/include/lwip/priv/tcp_priv.h +index 72f9126d..c84b5be8 100644 +--- a/src/include/lwip/priv/tcp_priv.h ++++ b/src/include/lwip/priv/tcp_priv.h +@@ -106,14 +106,11 @@ err_t tcp_process_refused_data(struct tcp_pcb *pcb); + #define tcp_output_nagle(tpcb) (tcp_do_output_nagle(tpcb) ? tcp_output(tpcb) : ERR_OK) + + +-#define TCP_SEQ_LT(a,b) ((s32_t)((u32_t)(a) - (u32_t)(b)) < 0) +-#define TCP_SEQ_LEQ(a,b) ((s32_t)((u32_t)(a) - (u32_t)(b)) <= 0) +-#define TCP_SEQ_GT(a,b) ((s32_t)((u32_t)(a) - (u32_t)(b)) > 0) +-#define TCP_SEQ_GEQ(a,b) ((s32_t)((u32_t)(a) - (u32_t)(b)) >= 0) ++#define TCP_SEQ_LT(a,b) (((u32_t)((u32_t)(a) - (u32_t)(b)) & 0x80000000u) != 0) ++#define TCP_SEQ_LEQ(a,b) (!(TCP_SEQ_LT(b,a))) ++#define TCP_SEQ_GT(a,b) TCP_SEQ_LT(b,a) ++#define TCP_SEQ_GEQ(a,b) TCP_SEQ_LEQ(b,a) + /* is b<=a<=c? */ +-#if 0 /* see bug #10548 */ +-#define TCP_SEQ_BETWEEN(a,b,c) ((c)-(b) >= (a)-(b)) +-#endif + #define TCP_SEQ_BETWEEN(a,b,c) (TCP_SEQ_GEQ(a,b) && TCP_SEQ_LEQ(a,c)) + + #ifndef TCP_TMR_INTERVAL +-- +2.28.0.windows.1 diff --git a/backport-tcp-tighten-up-checks-for-received-SYN.patch b/backport-tcp-tighten-up-checks-for-received-SYN.patch new file mode 100644 index 0000000000000000000000000000000000000000..0892cbf4e1033e88a3784a5552b4335b706c8e36 --- /dev/null +++ b/backport-tcp-tighten-up-checks-for-received-SYN.patch @@ -0,0 +1,58 @@ +From adbc5b5f716d108966bcf606e61de60b83f525a5 Mon Sep 17 00:00:00 2001 +From: Simon Goldschmidt +Date: Thu, 5 Mar 2020 21:20:35 +0100 +Subject: [PATCH] tcp: tighten up checks for received SYN +Any malicous segment could contain a SYN up to now (no check). +A SYN in the wrong segment could break OOSEQ queueing. +Fix this by allowing SYN only in states where it is required. +See bug #56397: Assert "tcp_receive: ooseq tcplen > rcv_wnd" +Signed-off-by: Simon Goldschmidt +Conflict: NA +Reference: https://git.savannah.gnu.org/cgit/lwip.git/commit/?id=adbc5b5f716d108966bcf606e61de60b83f525a5 +--- + src/core/tcp_in.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) +diff --git a/src/core/tcp_in.c b/src/core/tcp_in.c +index 4bfba85f..90061281 100644 +--- a/src/core/tcp_in.c ++++ b/src/core/tcp_in.c +@@ -852,6 +852,13 @@ tcp_process(struct tcp_pcb *pcb) + + tcp_parseopt(pcb); + ++ if (flags & TCP_SYN) { ++ /* accept SYN only in 2 states: */ ++ if ((pcb->state != SYN_SENT) && (pcb->state != SYN_RCVD)) { ++ return ERR_OK; ++ } ++ } ++ + /* Do different things depending on the TCP state. */ + switch (pcb->state) { + case SYN_SENT: +@@ -924,7 +931,12 @@ tcp_process(struct tcp_pcb *pcb) + } + break; + case SYN_RCVD: +- if (flags & TCP_ACK) { ++ if (flags & TCP_SYN) { ++ if (seqno == pcb->rcv_nxt - 1) { ++ /* Looks like another copy of the SYN - retransmit our SYN-ACK */ ++ tcp_rexmit(pcb); ++ } ++ } else if (flags & TCP_ACK) { + /* expected ACK number? */ + if (TCP_SEQ_BETWEEN(ackno, pcb->lastack + 1, pcb->snd_nxt)) { + pcb->state = ESTABLISHED; +@@ -975,9 +987,6 @@ tcp_process(struct tcp_pcb *pcb) + tcp_rst(pcb, ackno, seqno + tcplen, ip_current_dest_addr(), + ip_current_src_addr(), tcphdr->dest, tcphdr->src); + } +- } else if ((flags & TCP_SYN) && (seqno == pcb->rcv_nxt - 1)) { +- /* Looks like another copy of the SYN - retransmit our SYN-ACK */ +- tcp_rexmit(pcb); + } + break; + case CLOSE_WAIT: +-- +2.28.0.windows.1 diff --git a/lwip-2.1.2.zip b/lwip-2.1.3.zip similarity index 36% rename from lwip-2.1.2.zip rename to lwip-2.1.3.zip index d728ab590c215283e429dc108fb47654dfc0c4a4..9f897f7cd3ef410487b4019bde4f0591c8855eea 100644 Binary files a/lwip-2.1.2.zip and b/lwip-2.1.3.zip differ diff --git a/lwip.spec b/lwip.spec index 3904f22a05bc297d1b90bc07ea87710832e45999..d8825b316b580ff86f94de059d585d70f8efecd1 100644 --- a/lwip.spec +++ b/lwip.spec @@ -3,19 +3,116 @@ Summary: lwip is a small independent implementation of the TCP/IP protocol suite Name: lwip -Version: 2.1.2 -Release: 1 +Version: 2.1.3 +Release: 101 License: BSD URL: http://savannah.nongnu.org/projects/lwip/ Source0: http://download.savannah.nongnu.org/releases/lwip/%{name}-%{version}.zip -Patch0: 0001-add-makefile.patch +Patch6001: backport-tcp-fix-sequence-number-comparison.patch +Patch6002: backport-tcp-tighten-up-checks-for-received-SYN.patch -BuildRequires: gcc-c++ dos2unix +Patch9000: 0001-add-makefile.patch +Patch9001: 0002-adapt-lstack.patch +Patch9002: 0003-fix-the-occasional-coredump-when-the-lwip-exits.patch +Patch9003: 0004-fix-error-of-deleting-conn-table-in-connect.patch +Patch9004: 0005-syn-rcvd-state-reg-conn-into-conntable.patch +Patch9005: 0006-fix-coredump-in-etharp.patch +Patch9006: 0007-gazelle-fix-epoll_ctl-EPOLLET-mode-error.patch +Patch9007: 0008-gazelle-fix-lwip_accept-memcpy-sockaddr-large.patch +Patch9008: 0009-fix-stack-buffer-overflow-when-memcpy-addr.patch +Patch9009: 0010-fix-the-incomplete-release-of-the-conntable.patch +Patch9010: 0011-remove-gazelle-tcp-conn-func.patch +Patch9011: 0012-fix-incomplete-resource-release-in-lwip-close.patch +Patch9012: 0013-remove-gazelle-syscall-thread.patch +Patch9013: 0014-fix-some-compile-errors.patch +Patch9014: 0015-fix-tcp-port-alloc-issue.patch +Patch9015: 0016-lstack-support-mysql-mode.patch +Patch9016: 0017-support-REUSEPOR-option.patch +Patch9017: 0018-exec-gazelle_init_sock-before-read-event.patch +Patch9018: 0019-gazelle-reduce-copy-in-send.patch +Patch9019: 0020-remove-chose_dlsym_handle-function-set-handle-to-RTL.patch +Patch9020: 0021-refactor-event-if-ring-is-full-the-node-is-added-to-.patch +Patch9021: 0022-notify-app-that-sock-state-changes-to-CLOSE_WAIT.patch +Patch9022: 0023-refactor-event-and-checksum-offload-support.patch +Patch9023: 0024-refactor-pkt-read-send-performance.patch +Patch9024: 0025-Replace-gettid-with-syscall-SYS_gettid.patch +Patch9025: 0026-del-redundant-wait_close-and-move-epoll_events-pos.patch +Patch9026: 0027-modify-EISCONN-condition.patch +Patch9027: 0028-per-thread-reassdata-variables.patch +Patch9028: 0029-fix-EISCONN-err-and-remove-same-customized-modificat.patch +Patch9029: 0030-refactor-tcp-new-port.patch +Patch9030: 0031-refactor-add-event-limit-send-pkts-num.patch +Patch9031: 0032-fix-free-pbuf-miss-data.patch +Patch9032: 0033-alloc-socket-fail-clean-sock.patch +Patch9033: 0034-add-accept4-and-epoll_create1.patch +Patch9034: 0035-add-writev-and-readv.patch +Patch9035: 0036-add-fs-secure-compilation-option.patch +Patch9036: 0037-enable-ARP-QUEUE-to-avoid-sync-packet-dropped.patch +Patch9037: 0038-add-tso.patch +Patch9038: 0039-optimize-app-thread-write-buff-block.patch +Patch9039: 0040-add-huge-snd_buf.patch +Patch9040: 0041-optimite-pcb-list-limit-send-size-and-ack-now.patch +Patch9041: 0042-expand-recv-win.patch +Patch9042: 0043-add-prefetch.patch +Patch9043: 0044-skip-unnecessary-tcp_route.patch +Patch9044: 0045-add-variable-in-struct-sock.patch +Patch9045: 0046-add-dataack-when-recv-too-many-acks-with-data.patch +Patch9046: 0047-reduce-struct-pbuf-size.patch +Patch9047: 0048-listen-pcb-also-use-pcb_if.patch +Patch9048: 0049-expand-recv-mbox-size.patch +Patch9049: 0050-lwip-reuse-ip-port.patch +Patch9050: 0051-lwip-add-need_tso_send.patch +Patch9051: 0052-lwip_fnctl-only-support-F_SETFL-F_GETFL.patch +Patch9052: 0053-cleancode-improve-lwipopts.h-readability.patch +Patch9053: 0054-reduce-cpu-usage-when-send.patch +Patch9054: 0055-add-pbuf-lock-when-aggregate-pbuf.patch +Patch9055: 0056-fix-tso-small-packet-drop-in-kernel-server.patch +Patch9056: 0057-same-node-gazellectl-a.patch +Patch9057: 0058-lwip-send-recv-thread-bind-numa.patch +Patch9058: 0059-fix-last_unsent-last_unacked.patch +Patch9059: 0060-lwip-add-udp-multicast.patch +Patch9060: 0061-fix-pbuf-leak-in-udp-connection.patch +Patch9061: 0062-drop-netbuf-in-recv_udp-to-fix-mem-overflow.patch +Patch9062: 0063-optimize-avoid-too-many-empty-acks-in-tcp_input.patch +Patch9063: 0064-fix-udp-send-recv-in-multiple-queue.patch +Patch9064: 0065-fix-udp-recvmbox-size-not-set.patch +Patch9065: 0066-adapt-to-dpdk-19.11-and-dpdk-21.11.patch +Patch9066: 0067-fix-null-pointer-when-all-zero-address-listen.patch +Patch9067: 0068-enable-UDP-CKSUM-in-lwip.patch +Patch9068: 0069-add-error-check-in-hugepage_init-and-sys_mbox_free.patch +Patch9069: 0070-add-CHECKSUM_UDP-when-not-support-OFFLOAD_UDP_CHECKS.patch +Patch9070: 0071-fix-pbuf-tot_len-incorrect-after-pbuf_split_64k-is-c.patch +Patch9071: 0072-add-O_NONBLOCK-and-FIONBIO-when-not-defined.patch +Patch9072: 0073-lstack_lwip-external-api-start-with-do_lwip_-prefix.patch +Patch9073: 0074-gazelle-offloads-are-registered-to-lwip.patch +Patch9074: 0075-adapt-read-write-for-rtc-mode.patch +Patch9075: 0076-fix-recvmsg-return-EINVAL.patch +Patch9076: 0077-adpat-event-for-rtc-mode.patch +Patch9077: 0078-posix_api-support-select.patch +Patch6003: backport-Add-outgoing-VLAN-PCP-support.patch +Patch6004: backport-fix-compiling-ETHARP_SUPPORT_VLAN.patch +Patch9078: 0079-enable-vlan-define.patch +Patch9079: 0080-enable-ipv6.patch +Patch9080: 0081-ip6-hdr.patch +Patch9081: 0082-add-vlanid-in-netif.patch +Patch9082: 0083-lwipopts-add-lwip-debug-log-macro.patch +Patch9083: 0084-add-tcpslowtmr-log-and-tcpfasttmr-cnt.patch +Patch9084: 0085-add-lwip-log-tcp_rst-tcp_abandon-tcp_abort.patch +Patch9085: 0086-log-add-errevent-log-and-tcp-exception-statistics.patch +Patch9086: 0087-support-vlan-offload.patch +Patch9087: 0088-modify-log-info-err.patch +Patch9088: 0089-add-struct-gz-addr.patch +Patch9089: 0090-frag-fix-coredump-when-get-netif.patch +Patch9090: 0091-add-fd-log-info-and-fix-wrong-port-log-info.patch +Patch9091: 0092-fix-the-coredump-issue-when-UDP-traffic-is-sent.patch +Patch9092: 0093-modfiy-accept-null-pointer-when-new-conn-receive-RST-packet-in-listening.patch + +BuildRequires: gcc-c++ dos2unix dpdk-devel #Requires: -ExclusiveArch: x86_64 aarch64 +ExclusiveArch: x86_64 aarch64 loongarch64 sw_64 %description lwip is a small independent implementation of the TCP/IP protocol suite. @@ -23,16 +120,16 @@ lwip is a small independent implementation of the TCP/IP protocol suite. %prep %setup -n %{name}-%{version} -q find %{_builddir}/%{name}-%{version} -type f -exec dos2unix -q {} \; - -%patch0 -p1 +%autopatch -p1 %build +#export DPDK_VERSION_1911=1 cd %{_builddir}/%{name}-%{version}/src %make_build %install cd %{_builddir}/%{name}-%{version}/src -%make_install +%make_install INSTALL_LIB=%{buildroot}%{_libdir} %files %defattr(0644,root,root) @@ -40,6 +137,327 @@ cd %{_builddir}/%{name}-%{version}/src %{_libdir}/liblwip.a %changelog +* Wed Dec 27 2023 yangchen - 2.1.3-101 +- rolling back unrelated patches + +* Mon Dec 25 2023 hankangkang - 2.1.3-100 +- Mod the issue that 2w connection unable to establish + +* Sat Dec 23 2023 yangchen - 2.1.3-99 +- tcp_send_fin: add the fin to the last unsent segment + +* Wed Dec 20 2023 yangchen - 2.1.3-98 +- event_callback: del errevent log if err is ERR_OK + +* Fri Dec 15 2023 yangchen - 2.1.3-97 +- lwip log: fix reversed port in tcp_input + +* Thu Dec 14 2023 hankangkang - 2.1.3-96 +- modfiy-accept-null-pointer-when-new-conn-receive-RST-packet-in-listening + +* Sat Dec 9 2023 wuchangye - 2.1.3-95 +- fix the coredump issue when UDP traffic is sent + +* Fri Dec 8 2023 yangchen - 2.1.3-94 +- add fd log info and fix wrong port log info + +* Fri Dec 8 2023 jiangheng - 2.1.3-93 +- ip4 frag: fix coredump when get netif + +* Wed Dec 6 2023 zhengjiebing - 2.1.3-92 +- add struct gz_addr_t + +* Tue Dec 5 2023 hankangkang - 2.1.3-91 +- modify-log-info-err + +* Tue Nov 28 2023 zhengjiebing - 2.1.3-90 +- support vlan offload + +* Tue Nov 28 2023 jiangheng - 2.1.3-89 +- log: add errevent log and tcp exception statistics + +* Tue Nov 28 2023 yangchen - 2.1.3-88 +- add lwip log: tcp_rst & tcp_abandon & tcp_abort + +* Tue Nov 28 2023 hankangkang - 2.1.3-87 +- lwipopts: add tcpslowtmr log and tcpfasttmr cnt + +* Mon Nov 27 2023 yangchen - 2.1.3-86 +- lwipopts: add lwip debug log macro + +* Mon Nov 27 2023 zhengjiebing - 2.1.3-85 +- add vlan_id in netif + +* Fri Nov 24 2023 zhangxingrong - 2.1.3-84 +- modify error date + +* Fri Nov 17 2023 zhengjiebing - 2.1.3-83 +- enable ipv6 in lwip + +* Fri Nov 03 2023 zhujunhao - 2.1.3-82 +- add support vlan + +* Fri Nov 03 2023 yangchen - 2.1.3-81 +- posix_api support select + +* Fri Oct 27 2023 jiangheng - 2.1.3-80 +- adapt read/write for rtc mode +- fix recvmsg return EINVAL +- adapt event for rtc mode + +* Tue Oct 24 2023 jiangheng - 2.1.3-79 +- gazelle offloads are registerd to lwip + +* Sun Oct 08 2023 jiangheng - 2.1.3-78 +- lstack_lwip: external api start with do_lwip_ prefix + +* Sun Oct 08 2023 panchenbo - 2.1.3-77 +- add O_NONBLOCK and FIONBIO when not defined + +* Fri Sep 15 2023 jiangheng - 2.1.3-76 +- fix pbuf->tot_len incorrect after pbuf_split_64k is called + +* Tue Jun 27 2023 kircher - 2.1.3-75 +- add CHECKSUM_UDP when not support OFFLOAD_UDP_CHECKS + +* Sun Jun 25 2023 jiangheng - 2.1.3-74 +- add error check in hugepage_init and sys_mbox_free + +* Wed Jun 21 2023 kircher - 2.1.3-73 +- enable udp cksum in lwip + +* Thu Jun 15 2023 Lemmy Huang - 2.1.3-72 +- fix null pointer when zero port listen + +* Thu Jun 15 2023 Lemmy Huang - 2.1.3-71 +- patch -p1 automaition in lwip.spec + +* Thu Jun 15 2023 Lemmy Huang - 2.1.3-70 +- adapt to dpdk-19.11 and dpdk-21.11 + +* Wed Jun 14 2023 jiangheng - 2.1.3-69 +- fix udp recvmbox size not set + +* Wed Jun 14 2023 jiangheng - 2.1.3-68 +- fix udp send/recv in mutiple queue + +* Wed Jun 07 2023 Lemmy Huang - 2.1.3-67 +- optimize: avoid too many empty acks in tcp_input + +* Tue Jun 06 2023 jiangheng - 2.1.3-66 +- revert cleancode series patches + +* Mon May 29 2023 kircher - 2.1.3-65 +- drop netbuf in recv_udp to fix mem overflow + +* Mon May 29 2023 Lemmy Huang - 2.1.3-64 +- cleancode: refactor memp + +* Mon May 29 2023 Lemmy Huang - 2.1.3-63 +- cleancode: refactor OFFLOAD_CHECKSUM GAZELLE_TCP_DATAACKS_REXMIT GAZELLE_TCP_NEW_PORT + +* Mon May 29 2023 Lemmy Huang - 2.1.3-62 +- fix spec patch9069 + +* Mon May 29 2023 Lemmy Huang - 2.1.3-61 +- cleancode: refactor sys_now and lwip_ioctl + +* Mon May 29 2023 Lemmy Huang - 2.1.3-60 +- cleancode: refactor GAZELLE_TCP_PCB_HASH + +* Mon May 29 2023 Lemmy Huang - 2.1.3-59 +- cleancode: refactor options define + +* Thu May 25 2023 Lemmy Huang - 2.1.3-58 +- cleancode: refactor gazelle_hlist.h + +* Thu May 25 2023 Lemmy Huang - 2.1.3-57 +- cleancode: refactor gazelle_list.h + +* Wed May 24 2023 Lemmy Huang - 2.1.3-56 +- cleancode: refactor gazelle_posix_api.h + +* Tue May 23 2023 Lemmy Huang - 2.1.3-55 +- cleancode: refactor lwipsock.h + +* Tue May 23 2023 Lemmy Huang - 2.1.3-54 +- cleancode: remove perf +- cleancode: rename gazelle files in lwip + +* Tue May 23 2023 Lemmy Huang - 2.1.3-53 +- cleancode: improving makefile readability + +* Tue May 16 2023 kircher - 2.1.3-52 +- fix pbuf leak in udp connection + +* Fri May 12 2023 kircher - 2.1.3-51 +- add udp multicast support in lwip + +* Sat Apr 01 2023 jiangheng - 2.1.3-50 +- fix last_unsent/last_unacked error +- fix send failed due to pcb->nrtx > TCP_MAXRTX + +* Wed Mar 22 2023 kircher - 2.1.3-49 +- lwip send recv thread bind numa + +* Mon Mar 13 2023 jiangheng - 2.1.3-48 +- add same node ring & gazellectl -a + +* Mon Mar 13 2023 jiangheng - 2.1.3-47 +- fix tso small packet drop in kernel server + +* Mon Mar 13 2023 jiangheng - 2.1.3-46 +- use pbuf lock when aggregate pbuf + +* Fri Mar 10 2023 jiangheng - 2.1.3-45 +- reduce cpu usage when send + +* Thu Mar 9 2023 Lemmy Huang - 2.1.3-44 +- cleancode: improve lwipopts.h readability + +* Wed Feb 22 2023 jiangheng - 2.1.3-43 +- lwip_fnctl only suport F_SETFL,F_GETFL, other opt return 0 for compitable + +* Tue Feb 21 2023 majun - 2.1.3-42 +- add lwip need_tso_send + +* Tue Feb 14 2023 majun - 2.1.3-41 +- add lwip reuse ip port + +* Sat Feb 11 2023 majun - 2.1.3-40 +- fix TSO snd_nxt incorrectly update + +* Fri Dec 30 2022 wuchangsheng - 2.1.3-39 +- expand recv mbox size + +* Wed Dec 21 2022 jiangheng - 2.1.3-38 +- move pcb_if to ip_pcb to let listen pcb can use it + +* Wed Dec 21 2022 wuchangsheng - 2.1.3-37 +- reduce struct pbuf size + +* Wed Dec 21 2022 kircher - 2.1.3-36 +- do not update cwnd when send dataack + +* Tue Dec 20 2022 kircher - 2.1.3-35 +- fix the dataack is always lower than 256 + +* Tue Dec 20 2022 kircher - 2.1.3-34 +- add dataack when recv too many acks with data + +* Tue Dec 20 2022 wuchangsheng - 2.1.3-33 +- add variable in struct sock + +* Mon Dec 19 2022 kircher - 2.1.3-32 +- skip unnecessary tcp_route + +* Sun Dec 18 2022 wuchangsheng - 2.1.3-31 +- expand rcv wnd size and add prefetch + +* Tue Dec 13 2022 wuchangsheng - 2.1.3-30 +- optimite pcb unsent and unacked list + fast rexmit all pkts + +* Tue Dec 6 2022 zhujunhao - 2.1.3-29 +- add huge snd_buf + +* Sat Dec 3 2022 wuchangsheng - 2.1.3-28 +- add tso define + +* Thu Dec 01 2022 jiangheng - 2.1.3-27 +- remove lwip-2.1.3.tar.gz + +* Sat Nov 26 2022 jiangheng - 2.1.3-26 +- replace lwip-2.1.3.tar.gz to lwip-2.1.3.zip + +* Wed Nov 23 2022 jiangheng - 2.1.3-25 +- enable ARP QUEUE to avoid packet dropped + +* Sat Oct 22 2022 jiangheng - 2.1.3-24 +- add fs secure compilation option + +* Wed Oct 19 2022 zhujunhao - 2.1.3-23 +- add writev and readv + +* Sat Oct 15 2022 zhujunhao - 2.1.3-22 +- add epoll_create1 and accetp4 + +* Tue Oct 11 2022 wuchangsheng - 2.1.3-21 +- alloc socket fail clean sock + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-20 +- fix miss data due to free pbuf + close debug + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-19 +- refactor add event + limit send pkts num max 10 + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-18 +- fix multithread duplicate port num + support select appropriate port num to rss same as nic + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-17 +- fix EISCONN conditon err + remove same customized modification + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-16 +- per thread reassdata variables + +* Thu Oct 6 2022 wuchangsheng - 2.1.3-15 +- modify EISCONN path condition + add in_send and send_flag value in sock + +* Tue Jul 26 2022 wuchangsheng - 2.1.3-14 +- del redundant wait_close in lwip_sock + move epoll_events into cache aligned area + +* Tue Jul 12 2022 Honggang Li - 2.1.3-13 +- Replace gettid() with syscall() + +* Fri Jul 8 2022 xiusailong - 2.1.3-12 +- sync two patches from 20.03-LTS-SP1 + +* Thu Jul 7 2022 wuchangsheng - 2.1.3-11 +- refactor refactor pkt read send performance + +* Tue Mar 29 2022 jiangheng - 2.1.3-10 +- refactor event +- add HW checksum offload support + +* Tue Mar 15 2022 jiangheng - 2.1.3-9 +- notify app that sock state changes to CLOSE_WAIT + +* Tue Mar 15 2022 jiangheng - 2.1.3-8 +- refactor event,if ring is full, node is added to list + +* Mon Mar 07 2022 jiangheng - 2.1.3-7 +- remove chose_dlsym_handle function as it is redundant + +* Mon Mar 07 2022 wu-changsheng - 2.1.3-6 +- gazelle reduce copy in send + +* Mon Mar 07 2022 jiangheng - 2.1.3-5 +- exec gazelle_sock_init before read event + +* Thu Mar 03 2022 jiangheng - 2.1.3-4 +- support REUSEPOR option +- fix rpc msg too much +- fix recrruing events + +* Thu Feb 24 2022 jiangheng - 2.1.3-3 +- remove kernel socket interface +- support the mode that listen and accept thread be separaten + +* Fri Dec 31 2021 jiangheng - 2.1.3-2 +- adapt to lstack + +* Fri Nov 26 2021 jiangheng - 2.1.3-1 +- update to 2.1.3 + +* Mon Sep 06 2021 jiangheng - 2.1.2-2 +- backport some patches from community + * Mon Nov 30 2020 peanut_huang - 2.1.2-1 - remove README