diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 3db92d9a030b9051e1337661b017a719ac998293..ff73b2c17be538a48dbe267424d3e72e404f8ae4 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -19,6 +19,8 @@ #include "dm.h" #define DM_RESERVED_MAX_IOS 1024 +#define DM_MAX_TARGETS 1048576 +#define DM_MAX_TARGET_PARAMS 1024 struct dm_kobject_holder { struct kobject kobj; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 5f9b9178c647e2e64632b78c70c3538ef1513f8e..2e3f24ef1d13fea367f81d12ce2de66b1b8b8db3 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1761,6 +1761,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern return -EFAULT; if (param_kernel->data_size < minimum_data_size) + if (unlikely(param_kernel->data_size < minimum_data_size) || + unlikely(param_kernel->data_size > DM_MAX_TARGETS * DM_MAX_TARGET_PARAMS)) return -EINVAL; secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5c590895c14c37b68fbd544ac7c92402f6a43d45..31bcdcd93c7a873e4cfaad7fe488d8537a404c39 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -144,7 +144,12 @@ static int alloc_targets(struct dm_table *t, unsigned int num) int dm_table_create(struct dm_table **result, fmode_t mode, unsigned num_targets, struct mapped_device *md) { - struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); + struct dm_table *t; + + if (num_targets > DM_MAX_TARGETS) + return -EOVERFLOW; + + t = kzalloc(sizeof(*t), GFP_KERNEL); if (!t) return -ENOMEM; @@ -158,7 +163,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (!num_targets) { kfree(t); - return -ENOMEM; + return -EOVERFLOW; } if (alloc_targets(t, num_targets)) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a114cad988a65130f70c6c05c93f469af9bc1b3..f2f42ffdbfddfd1ba15f274767091388f174fdeb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1553,8 +1553,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { - /* Shouldn't get preallocated anon_dev for cached roots */ - ASSERT(!anon_dev); + /* + * Some other caller may have read out the newly inserted + * subvolume already (for things like backref walk etc). Not + * that common but still possible. In that case, we just need + * to free the anon_dev. + */ + if (unlikely(anon_dev)) { + free_anon_bdev(anon_dev); + anon_dev = 0; + } + if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e62bf28b8ffd94c526a1b2eda87b7a5d8f82e8fc..8c7dc2ecbd58d48a202589f0929d438383142ba7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -365,6 +365,9 @@ struct tcp_sock { #else #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0 #endif +#ifdef CONFIG_TCP_NB_URC + u16 tcp_retries2; +#endif /* CONFIG_TCP_NB_URC */ u16 timeout_rehash; /* Timeout-triggered rehash attempts */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ff901aade442f9701a29bad5c57c8988d32d3a37..751720b5c62dbbeb7e60f471a93a038f96cba936 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -108,6 +108,11 @@ struct inet_connection_sock { __u8 icsk_syn_retries; __u8 icsk_probes_out; __u16 icsk_ext_hdr_len; +#ifdef CONFIG_TCP_NB_URC + __u8 icsk_nb_urc_enabled:1, + icsk_nb_urc_reserved:7; + __u32 icsk_nb_urc_rto; +#endif /* CONFIG_TCP_NB_URC */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -220,6 +225,11 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, { struct inet_connection_sock *icsk = inet_csk(sk); +#ifdef CONFIG_TCP_NB_URC + if (icsk->icsk_nb_urc_enabled) + when = icsk->icsk_nb_urc_rto; +#endif /* CONFIG_TCP_NB_URC */ + if (when > max_when) { pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, (void *)_THIS_IP_); diff --git a/include/net/tcp.h b/include/net/tcp.h index 114ed8a65a887c4a673045a31aebd0743ec8ecd2..fe849f81458233af7b3ff1cbb40bc81ab251fd01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2393,4 +2393,16 @@ static inline u64 tcp_transmit_time(const struct sock *sk) return 0; } +#ifdef CONFIG_TCP_NB_URC +static inline int tcp_get_retries_limit(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (inet_csk(sk)->icsk_nb_urc_enabled) + return tp->tcp_retries2; + + return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); +} +#endif /* CONFIG_TCP_NB_URC */ + #endif /* _TCP_H */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 62db78b9c1a0a21963c8cdd61212c0c09ce8a58e..27d542fa014b613faad3f765ed277c2bc09dd3db 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -128,12 +128,19 @@ enum { #define TCP_CM_INQ TCP_INQ #define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */ - +#define TCP_NB_URC 101 /* netibooster ultar-reliable connection */ #define TCP_REPAIR_ON 1 #define TCP_REPAIR_OFF 0 #define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */ +struct tcp_nb_urc { + __u8 nb_urc_enabled; + __u8 syn_retries; + __u16 tcp_retries2; + __u32 nb_urc_rto; +}; + struct tcp_repair_opt { __u32 opt_code; __u32 opt_val; diff --git a/net/Kconfig b/net/Kconfig index e64a689c1f27e8177d4709b8da173e90056fd936..b07ee3b024abd9fe83e84a040e45a58eb56e5e1d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -459,6 +459,13 @@ config ETHTOOL_NETLINK source "net/newip/Kconfig" +config TCP_NB_URC + bool "NetiBooster Ultra-Reliable Connection Feature" + default n + help + This option enables the NetiBooster Ultra-Reliable Connection feature. + When enabled, the kernel will include support for CONFIG_TCP_NB_URC. + endif # if NET # Used by archs to tell that they support BPF JIT compiler plus which flavour. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1de705e2ddcc6f9fc374508778abc253b511e82..c7f22c43dd1a395739b13260b0a7a6ec2e810de7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -457,6 +457,11 @@ void tcp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; +#ifdef CONFIG_TCP_NB_URC + icsk->icsk_nb_urc_enabled = 0; + icsk->icsk_nb_urc_rto = TCP_TIMEOUT_INIT; + tp->tcp_retries2 = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); +#endif /* CONFIG_TCP_NB_URC */ } EXPORT_SYMBOL(tcp_init_sock); @@ -2807,6 +2812,11 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_rto = TCP_TIMEOUT_INIT; icsk->icsk_rto_min = TCP_RTO_MIN; icsk->icsk_delack_max = TCP_DELACK_MAX; +#ifdef CONFIG_TCP_NB_URC + icsk->icsk_nb_urc_enabled = 0; + icsk->icsk_nb_urc_rto = TCP_TIMEOUT_INIT; + tp->tcp_retries2 = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); +#endif /* CONFIG_TCP_NB_URC */ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd = TCP_INIT_CWND; tp->snd_cwnd_cnt = 0; @@ -3143,6 +3153,30 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val) } EXPORT_SYMBOL(tcp_sock_set_keepcnt); +#ifdef CONFIG_TCP_NB_URC +static int tcp_set_nb_urc(struct sock *sk, sockptr_t optval, int optlen) { + int err = 0; + struct tcp_nb_urc opt = {}; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (optlen != sizeof(struct tcp_nb_urc)) { + err = -EINVAL; + return err; + } + + if (copy_from_sockptr(&opt, optval, sizeof(struct tcp_nb_urc))) { + err = -EINVAL; + return err; + } + + icsk->icsk_syn_retries = opt.syn_retries; + tcp_sk(sk)->tcp_retries2 = opt.tcp_retries2; + icsk->icsk_nb_urc_enabled = opt.nb_urc_enabled; + icsk->icsk_nb_urc_rto = opt.nb_urc_rto; + + return err; +} +#endif /* CONFIG_TCP_NB_URC */ /* * Socket option code for TCP. */ @@ -3449,6 +3483,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, tcp_enable_tx_delay(); tp->tcp_tx_delay = val; break; +#ifdef CONFIG_TCP_NB_URC + case TCP_NB_URC: + err = tcp_set_nb_urc(sk, optval, optlen); + break; +#endif /* CONFIG_TCP_NB_URC */ default: err = -ENOPROTOOPT; break; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e4ad274ec7a30b9cb47c4dd0fb1dd11da51a33e3..619d92b3ac2e67aa7c0777d59a5965b2fb8e49c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -4088,7 +4088,6 @@ void tcp_send_probe0(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct net *net = sock_net(sk); unsigned long timeout; int err; @@ -4104,7 +4103,11 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_probes_out++; if (err <= 0) { - if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2)) +#ifdef CONFIG_TCP_NB_URC + if (icsk->icsk_backoff < tcp_get_retries_limit(sk)) +#else + if (icsk->icsk_backoff < READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2)) +#endif /* CONFIG_TCP_NB_URC */ icsk->icsk_backoff++; timeout = tcp_probe0_when(sk, TCP_RTO_MAX); } else { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 888683f2ff3ee3ac3fc4a5c2eb454e58f4df1809..58b2555c76f910ac05c8fe55e9116c55e239109f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -249,7 +249,11 @@ static int tcp_write_timeout(struct sock *sk) __dst_negative_advice(sk); } +#ifdef CONFIG_TCP_NB_URC + retry_until = tcp_get_retries_limit(sk); +#else retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2); +#endif /* CONFIG_TCP_NB_URC */ if (sock_flag(sk, SOCK_DEAD)) { const bool alive = icsk->icsk_rto < TCP_RTO_MAX; @@ -380,7 +384,11 @@ static void tcp_probe_timer(struct sock *sk) msecs_to_jiffies(icsk->icsk_user_timeout)) goto abort; +#ifdef CONFIG_TCP_NB_URC + max_probes = tcp_get_retries_limit(sk); +#else max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); +#endif /* CONFIG_TCP_NB_URC */ if (sock_flag(sk, SOCK_DEAD)) { const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 48f340ef492aed5243faddf6703150a45738d26d..50306a9af6f70d1c72593b453facd1c1321ecd10 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8688,16 +8688,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - fallthrough; + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -8731,6 +8725,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, chain->use++; data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b239120dd9ca691df8a561418dd91dbdaa319a77..0ec0ae1483492d93da2aa01947685e1cd737398c 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval, rs->rs_rx_traces = trace.rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { - if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { + if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) { rs->rs_rx_traces = 0; return -EFAULT; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 50eae668578a7916baa6cf0f83de5e143699b215..dd980438f201f093cec2205a365d3e97713dbf3d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1215,6 +1215,8 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, } sk_msg_page_add(msg_pl, page, copy, offset); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, copy); offset += copy;