From 200a488cd5d43c255a4cf45f70c21fdb1df252dc Mon Sep 17 00:00:00 2001 From: renxiaohui Date: Mon, 30 Jun 2025 16:41:04 +0800 Subject: [PATCH 1/2] Fix race for duplicate reqsk on identical SYN commit ff46e3b4421923937b7f6e44ffcd3549a074f321 upstream When bonding is configured in BOND_MODE_BROADCAST mode, if two identical SYN packets are received at the same time and processed on different CPUs, it can potentially create the same sk (sock) but two different reqsk (request_sock) in tcp_conn_request(). These two different reqsk will respond with two SYNACK packets, and since the generation of the seq (ISN) incorporates a timestamp, the final two SYNACK packets will have different seq values. The consequence is that when the Client receives and replies with an ACK to the earlier SYNACK packet, we will reset(RST) it. ======================================================================== This behavior is consistently reproducible in my local setup, which comprises: | NETA1 ------ NETB1 | PC_A --- bond --- | | --- bond --- PC_B | NETA2 ------ NETB2 | - PC_A is the Server and has two network cards, NETA1 and NETA2. I have bonded these two cards using BOND_MODE_BROADCAST mode and configured them to be handled by different CPU. - PC_B is the Client, also equipped with two network cards, NETB1 and NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode. If the client attempts a TCP connection to the server, it might encounter a failure. Capturing packets from the server side reveals: 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <== 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <== localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, Two SYNACKs with different seq numbers are sent by localhost, resulting in an anomaly. ======================================================================== The attempted solution is as follows: Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the ehash insertion is successful (Up to now, the reason for unsuccessful insertion is that a reqsk for the same connection has already been inserted). If the insertion fails, release the reqsk. Due to the refcnt, Kuniyuki suggests also adding a return value check for the DCCP module; if ehash insertion fails, indicating a successful insertion of the same connection, simply release the reqsk as well. Simultaneously, In the reqsk_queue_hash_req(), the start of the req->rsk_timer is adjusted to be after successful insertion. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: luoxuanqiang Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: Xiaohui Ren Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- include/net/inet_connection_sock.h | 2 +- net/dccp/ipv4.c | 7 +++++-- net/dccp/ipv6.c | 7 +++++-- net/ipv4/inet_connection_sock.c | 17 +++++++++++++---- net/ipv4/tcp_input.c | 7 ++++++- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 01a73bf74fa1..298fa55c6c82 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 524b7e581a03..65a6733fc897 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -655,8 +655,11 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v4_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6f5a556f4f6d..683e4291b348 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -398,8 +398,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (dccp_v6_send_response(sk, req)) goto drop_and_free; - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - reqsk_put(req); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) + reqsk_free(req); + else + reqsk_put(req); + return 0; drop_and_free: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 762817d6c8d7..3f3824144847 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1099,25 +1099,34 @@ static void reqsk_timer_handler(struct timer_list *t) inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); } -static void reqsk_queue_hash_req(struct request_sock *req, +static bool reqsk_queue_hash_req(struct request_sock *req, unsigned long timeout) { + bool found_dup_sk = false; + + if (!inet_ehash_insert(req_to_sk(req), NULL, &found_dup_sk)) + return false; + + /* The timer needs to be setup after a successful insertion. */ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); - inet_ehash_insert(req_to_sk(req), NULL, NULL); /* before letting lookups find us, make sure all req fields * are committed to memory and refcnt initialized. */ smp_wmb(); refcount_set(&req->rsk_refcnt, 2 + 1); + return true; } -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout) { - reqsk_queue_hash_req(req, timeout); + if (!reqsk_queue_hash_req(req, timeout)) + return false; + inet_csk_reqsk_queue_added(sk); + return true; } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6c492954942..92108968d45d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7088,7 +7088,12 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tcp_rsk(req)->tfo_listener = false; if (!want_cookie) { req->timeout = tcp_timeout_init((struct sock *)req); - inet_csk_reqsk_queue_hash_add(sk, req, req->timeout); + if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, + req->timeout))) { + reqsk_free(req); + return 0; + } + } af_ops->send_synack(sk, dst, &fl, req, &foc, !want_cookie ? TCP_SYNACK_NORMAL : -- Gitee From 2fec2195ab585b7755f5df0439f5ba144bbd3f38 Mon Sep 17 00:00:00 2001 From: renxiaohui Date: Mon, 30 Jun 2025 16:42:54 +0800 Subject: [PATCH 2/2] net: fix memory leak in tcp_conn_request() commit 4f4aa4aa28142d53f8b06585c478476cfe325cfc upstream If inet_csk_reqsk_queue_hash_add() return false, tcp_conn_request() will return without free the dst memory, which allocated in af_ops->route_req. Here is the kmemleak stack: unreferenced object 0xffff8881198631c0 (size 240): comm "softirq", pid 0, jiffies 4299266571 (age 1802.392s) hex dump (first 32 bytes): 00 10 9b 03 81 88 ff ff 80 98 da bc ff ff ff ff ................ 81 55 18 bb ff ff ff ff 00 00 00 00 00 00 00 00 .U.............. backtrace: [] kmem_cache_alloc+0x60c/0xa80 [] dst_alloc+0x55/0x250 [] rt_dst_alloc+0x46/0x1d0 [] __mkroute_output+0x29a/0xa50 [] ip_route_output_key_hash+0x10b/0x240 [] ip_route_output_flow+0x1d/0x90 [] inet_csk_route_req+0x2c5/0x500 [] tcp_conn_request+0x691/0x12c0 [] tcp_rcv_state_process+0x3c8/0x11b0 [] tcp_v4_do_rcv+0x156/0x3b0 [] tcp_v4_rcv+0x1cf8/0x1d80 [] ip_protocol_deliver_rcu+0xf6/0x360 [] ip_local_deliver_finish+0xe6/0x1e0 [] ip_local_deliver+0xee/0x360 [] ip_rcv+0xad/0x2f0 [] __netif_receive_skb_one_core+0x123/0x140 Call dst_release() to free the dst memory when inet_csk_reqsk_queue_hash_add() return false in tcp_conn_request(). Fixes: ff46e3b44219 ("Fix race for duplicate reqsk on identical SYN") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20241219072859.3783576-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: Xiaohui Ren Signed-off-by: Wenya Zhang Reviewed-by: Huang Jian --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 92108968d45d..7f5b9f98cacd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7091,6 +7091,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, req->timeout))) { reqsk_free(req); + dst_release(dst); return 0; } -- Gitee