diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c4b5ba51ee838dbe87dd28028aa5a4d5937768f5..51fc22e0e0eba40fac373cb3c487138b01aade70 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -364,9 +364,6 @@ struct tcp_sock { #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG) #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 -#endif -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) - u16 nata_data_retries; #endif u16 timeout_rehash; /* Timeout-triggered rehash attempts */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c1ac02f9a063f9803f83c7d29a67c012f03c5606..8e54b58fb09c90320756817a9aa2462d95dd4281 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -20,6 +20,9 @@ #include #include +#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) +#include +#endif /* Cancel timers, when they are not required. */ #undef INET_CSK_CLEAR_TIMERS @@ -53,14 +56,6 @@ struct inet_connection_sock_af_ops { void (*mtu_reduced)(struct sock *sk); }; -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) -enum nata_retries_type_t { - NATA_NA = 0, - NATA_URC = 1, - NATA_STL = 2, -}; -#endif - /** inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue: FIFO of established children @@ -119,6 +114,7 @@ struct inet_connection_sock { #if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) __u8 nata_retries_enabled:1, nata_reserved:7; + __u8 nata_data_retries; __u8 nata_retries_type; __u32 nata_syn_rto; __u32 nata_data_rto; @@ -230,18 +226,28 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) #if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) static inline unsigned long get_nata_rto(struct sock *sk, struct inet_connection_sock *icsk, - unsigned long when) + unsigned long when, const int what) { - if (!icsk->nata_retries_enabled) + unsigned long when_nata; + unsigned long shift; + + if (!icsk->nata_retries_enabled || what != ICSK_TIME_RETRANS) return when; if (icsk->nata_retries_type == NATA_STL) return sk->sk_state == TCP_SYN_SENT ? - icsk->nata_syn_rto : icsk->nata_data_rto; - if (icsk->nata_retries_type == NATA_URC) - return when >= icsk->nata_data_rto ? icsk->nata_data_rto : when; - - return when; + icsk->nata_syn_rto : icsk->nata_data_rto; + + when_nata = icsk->nata_data_rto; + if (icsk->icsk_retransmits > icsk->nata_data_retries) { + shift = icsk->icsk_retransmits - icsk->nata_data_retries; + if (shift > MAX_SHIFT) { + when_nata = NATA_RTO_MAX; + } else { + when_nata <<= shift; + } + } + return min(when, when_nata); } #endif @@ -255,7 +261,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, struct inet_connection_sock *icsk = inet_csk(sk); #if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) - when = get_nata_rto(sk, icsk, when); + when = get_nata_rto(sk, icsk, when, what); #endif if (when > max_when) { diff --git a/include/net/nata.h b/include/net/nata.h new file mode 100644 index 0000000000000000000000000000000000000000..2e18c0e86aa9c72b31d2604762050c2b60fb92aa --- /dev/null +++ b/include/net/nata.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * + * Network and Application-driven Transport Augmentation (NATA). + * Authors: yangyanjun + */ +#ifndef _INET_NATA_H +#define _INET_NATA_H +#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) +#include +#include + +#define NATA_DATA_RETRIES_MAX 50 +#define NATA_SYN_RETRIES_MAX 50 +#define BITS_PRE_BYTE 8 +#define NATA_RTO_MAX_SHIFT 17 +#define NATA_RTO_MAX ((unsigned)(120*HZ)) +#define MAX_SHIFT (sizeof(unsigned long) * BITS_PER_BYTE - NATA_RTO_MAX_SHIFT) + +enum nata_retries_type_t { + NATA_NA = 0, + NATA_URC = 1, + NATA_STL = 2, +}; + +#ifdef CONFIG_TCP_NATA_URC +#define NATA_URC_RTO_MS_MIN 200 // 200ms +#define NATA_URC_RTO_MS_MAX 120000 // 120s +#define NATA_URC_RTO_MS_TO_HZ 1000 +int tcp_set_nata_urc(struct sock *sk, sockptr_t optval, int optlen); +#endif /* CONFIG_TCP_NATA_URC */ + +#ifdef CONFIG_TCP_NATA_STL +#define NATA_STL_SYN_RTO_MS_MIN 800 // 800ms +#define NATA_STL_DATA_RTO_MS_MIN 1800 // 1800ms +#define NATA_STL_RTO_MS_MAX 120000 // 120s +#define NATA_STL_RTO_MS_TO_HZ 1000 +int tcp_set_nata_stl(struct sock *sk, sockptr_t optval, int optlen); +#endif /* CONFIG_TCP_NATA_STL */ + +#endif +#endif /* _INET_NATA_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index e9c9262ba34b58361d4d5c8406ef2e6b3feadfeb..f11753a65e4c1c5768184a3211d9e096146bb316 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2415,16 +2415,4 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) -static inline int tcp_get_retries_limit(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (inet_csk(sk)->nata_retries_enabled) - return tp->nata_data_retries; - - return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); -} -#endif - #endif /* _TCP_H */ diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 5b77a46885b958f99b7a7886d96922843495baaf..71e50f94d3c0b33573716f7bd654c81d33b51fe9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o +obj-$(findstring y, $(CONFIG_TCP_NATA_URC) $(CONFIG_TCP_NATA_STL)) += nata.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/nata.c b/net/ipv4/nata.c new file mode 100644 index 0000000000000000000000000000000000000000..074610a007df6ebd4824845c931cbaba9481e3b0 --- /dev/null +++ b/net/ipv4/nata.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + * + * Network and Application-driven Transport Augmentation (NATA). + * Authors: yangyanjun + */ +#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) +#include +#include +#include +#include + +#ifdef CONFIG_TCP_NATA_URC +int tcp_set_nata_urc(struct sock *sk, sockptr_t optval, int optlen) +{ + int err = -EINVAL; + struct tcp_nata_urc opt = {}; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (optlen != sizeof(struct tcp_nata_urc)) + return err; + + if (copy_from_sockptr(&opt, optval, optlen)) + return err; + + if (!opt.nata_urc_enabled) { + icsk->nata_retries_enabled = opt.nata_urc_enabled; + icsk->nata_retries_type = NATA_NA; + icsk->icsk_syn_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syn_retries); + icsk->nata_data_retries = 0; + icsk->nata_syn_rto = TCP_TIMEOUT_INIT; + icsk->nata_data_rto = TCP_TIMEOUT_INIT; + return 0; + } + + if (opt.nata_rto_ms < NATA_URC_RTO_MS_MIN || + opt.nata_rto_ms > NATA_URC_RTO_MS_MAX ) + return err; + + if (opt.nata_data_retries > NATA_DATA_RETRIES_MAX || + opt.nata_syn_retries > NATA_SYN_RETRIES_MAX) + return err; + + icsk->nata_retries_enabled = opt.nata_urc_enabled; + icsk->nata_retries_type = NATA_URC; + icsk->icsk_syn_retries = opt.nata_syn_retries; + icsk->nata_data_retries = opt.nata_data_retries; + icsk->nata_data_rto = opt.nata_rto_ms * HZ / NATA_URC_RTO_MS_TO_HZ; + icsk->nata_syn_rto = icsk->nata_data_rto; + return 0; +} +#endif /* CONFIG_TCP_NATA_URC */ + +#ifdef CONFIG_TCP_NATA_STL +int tcp_set_nata_stl(struct sock *sk, sockptr_t optval, int optlen) +{ + int err = -EINVAL; + struct tcp_nata_stl opt = {}; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (optlen != sizeof(struct tcp_nata_stl)) + return err; + + if (copy_from_sockptr(&opt, optval, optlen)) + return err; + + if (!opt.nata_stl_enabled) { + icsk->nata_retries_enabled = opt.nata_stl_enabled; + icsk->nata_retries_type = NATA_NA; + icsk->icsk_syn_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syn_retries); + icsk->nata_data_retries = 0; + icsk->nata_syn_rto = TCP_TIMEOUT_INIT; + icsk->nata_data_rto = TCP_TIMEOUT_INIT; + return 0; + } + + if ((opt.nata_syn_rto_ms < NATA_STL_SYN_RTO_MS_MIN || + opt.nata_syn_rto_ms > NATA_STL_RTO_MS_MAX || + opt.nata_data_rto_ms < NATA_STL_DATA_RTO_MS_MIN || + opt.nata_data_rto_ms > NATA_STL_RTO_MS_MAX)) + return err; + + if (opt.nata_data_retries > NATA_DATA_RETRIES_MAX || + opt.nata_syn_retries > NATA_SYN_RETRIES_MAX) + return err; + + icsk->nata_retries_enabled = opt.nata_stl_enabled; + icsk->nata_retries_type = NATA_STL; + icsk->icsk_syn_retries = opt.nata_syn_retries; + icsk->nata_data_retries = opt.nata_data_retries; + icsk->nata_syn_rto = opt.nata_syn_rto_ms * HZ / NATA_STL_RTO_MS_TO_HZ; + icsk->nata_data_rto = opt.nata_data_rto_ms * HZ / NATA_STL_RTO_MS_TO_HZ; + return 0; +} +#endif /* CONFIG_TCP_NATA_STL */ +#endif \ No newline at end of file diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ee2b3084968c77ea82d1e1a9a7b92dd3cad26627..deb84d431e5a71023b43c1ce9d7c9fe82d4c83fc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,9 @@ #ifdef CONFIG_LOWPOWER_PROTOCOL #include #endif /* CONFIG_LOWPOWER_PROTOCOL */ +#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) +#include +#endif DEFINE_PER_CPU(unsigned int, tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); @@ -465,7 +468,7 @@ void tcp_init_sock(struct sock *sk) icsk->nata_retries_type = NATA_NA; icsk->nata_syn_rto = TCP_TIMEOUT_INIT; icsk->nata_data_rto = TCP_TIMEOUT_INIT; - tp->nata_data_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); + icsk->nata_data_retries = 0; #endif } EXPORT_SYMBOL(tcp_init_sock); @@ -2856,7 +2859,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->nata_retries_type = NATA_NA; icsk->nata_syn_rto = TCP_TIMEOUT_INIT; icsk->nata_data_rto = TCP_TIMEOUT_INIT; - tp->nata_data_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); + icsk->nata_data_retries = 0; #endif tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; @@ -3196,89 +3199,6 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_keepcnt); -#ifdef CONFIG_TCP_NATA_URC -#define NATA_URC_RTO_MS_MIN 200 // 200ms -#define NATA_URC_RTO_MS_MAX 120000 // 12s -#define NATA_URC_RTO_MS_TO_HZ 1000 -static int tcp_set_nata_urc(struct sock *sk, sockptr_t optval, int optlen) -{ - int err = -EINVAL; - struct tcp_nata_urc opt = {}; - struct inet_connection_sock *icsk = inet_csk(sk); - - if (optlen != sizeof(struct tcp_nata_urc)) - return err; - - if (copy_from_sockptr(&opt, optval, optlen)) - return err; - - if (!opt.nata_urc_enabled) { - icsk->nata_retries_enabled = opt.nata_urc_enabled; - icsk->nata_retries_type = NATA_NA; - icsk->icsk_syn_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syn_retries); - tcp_sk(sk)->nata_data_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); - icsk->nata_syn_rto = TCP_TIMEOUT_INIT; - icsk->nata_data_rto = TCP_TIMEOUT_INIT; - return 0; - } - - if (opt.nata_rto_ms < NATA_URC_RTO_MS_MIN || - opt.nata_rto_ms > NATA_URC_RTO_MS_MAX ) - return err; - - icsk->nata_retries_enabled = opt.nata_urc_enabled; - icsk->nata_retries_type = NATA_URC; - icsk->icsk_syn_retries = opt.nata_syn_retries; - tcp_sk(sk)->nata_data_retries = opt.nata_data_retries; - icsk->nata_data_rto = opt.nata_rto_ms * HZ / NATA_URC_RTO_MS_TO_HZ; - icsk->nata_syn_rto = icsk->nata_data_rto; - return 0; -} -#endif - -#ifdef CONFIG_TCP_NATA_STL -#define NATA_STL_SYN_RTO_MS_MIN 800 // 800ms -#define NATA_STL_DATA_RTO_MS_MIN 1800 // 1800ms -#define NATA_STL_RTO_MS_MAX 120000 // 12s -#define NATA_STL_RTO_MS_TO_HZ 1000 -static int tcp_set_nata_stl(struct sock *sk, sockptr_t optval, int optlen) -{ - int err = -EINVAL; - struct tcp_nata_stl opt = {}; - struct inet_connection_sock *icsk = inet_csk(sk); - - if (optlen != sizeof(struct tcp_nata_stl)) - return err; - - if (copy_from_sockptr(&opt, optval, optlen)) - return err; - - if (!opt.nata_stl_enabled) { - icsk->nata_retries_enabled = opt.nata_stl_enabled; - icsk->nata_retries_type = NATA_NA; - icsk->icsk_syn_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syn_retries); - tcp_sk(sk)->nata_data_retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); - icsk->nata_syn_rto = TCP_TIMEOUT_INIT; - icsk->nata_data_rto = TCP_TIMEOUT_INIT; - return 0; - } - - if ((opt.nata_syn_rto_ms < NATA_STL_SYN_RTO_MS_MIN || - opt.nata_syn_rto_ms > NATA_STL_RTO_MS_MAX || - opt.nata_data_rto_ms < NATA_STL_DATA_RTO_MS_MIN || - opt.nata_data_rto_ms > NATA_STL_RTO_MS_MAX)) - return err; - - icsk->nata_retries_enabled = opt.nata_stl_enabled; - icsk->nata_retries_type = NATA_STL; - icsk->icsk_syn_retries = opt.nata_syn_retries; - tcp_sk(sk)->nata_data_retries = opt.nata_data_retries; - icsk->nata_syn_rto = opt.nata_syn_rto_ms * HZ / NATA_STL_RTO_MS_TO_HZ; - icsk->nata_data_rto = opt.nata_data_rto_ms * HZ / NATA_STL_RTO_MS_TO_HZ; - return 0; -} -#endif - /* * Socket option code for TCP. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ddfe2ba3ea5d0dc29a2a3a3b0a83eadee6c9f72e..aff28cf00a6703c56e17c058aa031fa022b87699 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4122,11 +4122,7 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) - if (icsk->icsk_backoff < tcp_get_retries_limit(sk)) -#else if (icsk->icsk_backoff < READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2)) -#endif icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index bf4d38b524c4c1b50ab2784a546b1551d382f2bd..5c7e10939dd90ee07b41e44c79ba203e0abb35bc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -250,11 +250,7 @@ static int tcp_write_timeout(struct sock *sk) __dst_negative_advice(sk); } -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) - retry_until = tcp_get_retries_limit(sk); -#else retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); -#endif if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -385,11 +381,7 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; -#if defined(CONFIG_TCP_NATA_URC) || defined(CONFIG_TCP_NATA_STL) - max_probes = tcp_get_retries_limit(sk); -#else max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); -#endif if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;