From 4782207a22887c585d3bedbc2060c619f444c30f Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Tue, 27 Jun 2023 12:03:40 +0000 Subject: [PATCH 01/10] sctp: fix potential deadlock on &net->sctp.addr_wq_lock stable inclusion from stable-v5.10.188 commit 6d2243ab783bf79d1d674ff0ca26229233c56508 category: bugfix issue: #I8YTJU CVE: CVE-2024-0639 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 6feb37b3b06e9049e20dcf7e23998f92c9c5be9a ] As &net->sctp.addr_wq_lock is also acquired by the timer sctp_addr_wq_timeout_handler() in protocal.c, the same lock acquisition at sctp_auto_asconf_init() seems should disable irq since it is called from sctp_accept() under process context. Possible deadlock scenario: sctp_accept() -> sctp_sock_migrate() -> sctp_auto_asconf_init() -> spin_lock(&net->sctp.addr_wq_lock) -> sctp_addr_wq_timeout_handler() -> spin_lock_bh(&net->sctp.addr_wq_lock); (deadlock here) This flaw was found using an experimental static analysis tool we are developing for irq-related deadlock. The tentative patch fix the potential deadlock by spin_lock_bh(). Signed-off-by: Chengfeng Ye Fixes: 34e5b0118685 ("sctp: delay auto_asconf init until binding the first addr") Acked-by: Xin Long Link: https://lore.kernel.org/r/20230627120340.19432-1-dg573847474@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/sctp/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e9b4ea3d934f..02526c46dddc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -362,9 +362,9 @@ static void sctp_auto_asconf_init(struct sctp_sock *sp) struct net *net = sock_net(&sp->inet.sk); if (net->sctp.default_auto_asconf) { - spin_lock(&net->sctp.addr_wq_lock); + spin_lock_bh(&net->sctp.addr_wq_lock); list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist); - spin_unlock(&net->sctp.addr_wq_lock); + spin_unlock_bh(&net->sctp.addr_wq_lock); sp->do_auto_asconf = 1; } } -- Gitee From 8ba6dfda34afcd45eb2aa8526882d8aea6d7d7fe Mon Sep 17 00:00:00 2001 From: Chengfeng Ye Date: Wed, 27 Sep 2023 18:14:14 +0000 Subject: [PATCH 02/10] tipc: fix a potential deadlock on &tx->lock stable inclusion from stable-v5.10.198 commit 6a24d0661fa389c241d935da38e0f6a5ee8eb1ae category: bugfix issue: #I8YTKF CVE: CVE-2024-0641 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 08e50cf071847323414df0835109b6f3560d44f5 ] It seems that tipc_crypto_key_revoke() could be be invoked by wokequeue tipc_crypto_work_rx() under process context and timer/rx callback under softirq context, thus the lock acquisition on &tx->lock seems better use spin_lock_bh() to prevent possible deadlock. This flaw was found by an experimental static analysis tool I am developing for irq-related deadlock. tipc_crypto_work_rx() --> tipc_crypto_key_distr() --> tipc_bcast_xmit() --> tipc_bcbase_xmit() --> tipc_bearer_bc_xmit() --> tipc_crypto_xmit() --> tipc_ehdr_build() --> tipc_crypto_key_revoke() --> spin_lock(&tx->lock) --> tipc_disc_timeout() --> tipc_bearer_xmit_skb() --> tipc_crypto_xmit() --> tipc_ehdr_build() --> tipc_crypto_key_revoke() --> spin_lock(&tx->lock) Signed-off-by: Chengfeng Ye Reviewed-by: Jacob Keller Acked-by: Jon Maloy Fixes: fc1b6d6de220 ("tipc: introduce TIPC encryption & authentication") Link: https://lore.kernel.org/r/20230927181414.59928-1-dg573847474@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/tipc/crypto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index de63d6d41645..7d26d0cb207a 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -1445,14 +1445,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_key key; - spin_lock(&tx->lock); + spin_lock_bh(&tx->lock); key = tx->key; WARN_ON(!key.active || tx_key != key.active); /* Free the active key */ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); - spin_unlock(&tx->lock); + spin_unlock_bh(&tx->lock); pr_warn("%s: key is revoked\n", tx->name); return -EKEYREVOKED; -- Gitee From 8d2b1b527fe624375ad4a83831f5897540dbbd69 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 21 Dec 2023 16:53:57 +0000 Subject: [PATCH 03/10] ida: Fix crash in ida_free when the bitmap is empty mainline inclusion from mainline-v6.7-rc7 commit af73483f4e8b6f5c68c9aa63257bdd929a9c194a category: bugfix issue: #I8YTHN CVE: CVE-2023-6915 Signed-off-by: Ywenrui44091 --------------------------------------- The IDA usually detects double-frees, but that detection failed to consider the case when there are no nearby IDs allocated and so we have a NULL bitmap rather than simply having a clear bit. Add some tests to the test-suite to be sure we don't inadvertently reintroduce this problem. Unfortunately they're quite noisy so include a message to disregard the warnings. Reported-by: Zhenghan Wang Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Linus Torvalds Signed-off-by: wanxiaoqing --- lib/idr.c | 2 +- lib/test_ida.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/lib/idr.c b/lib/idr.c index 7ecdfdb5309e..8331b44dd39e 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -508,7 +508,7 @@ void ida_free(struct ida *ida, unsigned int id) goto delete; xas_store(&xas, xa_mk_value(v)); } else { - if (!test_bit(bit, bitmap->bitmap)) + if (!bitmap || !test_bit(bit, bitmap->bitmap)) goto err; __clear_bit(bit, bitmap->bitmap); xas_set_mark(&xas, XA_FREE_MARK); diff --git a/lib/test_ida.c b/lib/test_ida.c index b06880625961..55105baa19da 100644 --- a/lib/test_ida.c +++ b/lib/test_ida.c @@ -150,6 +150,45 @@ static void ida_check_conv(struct ida *ida) IDA_BUG_ON(ida, !ida_is_empty(ida)); } +/* + * Check various situations where we attempt to free an ID we don't own. + */ +static void ida_check_bad_free(struct ida *ida) +{ + unsigned long i; + + printk("vvv Ignore \"not allocated\" warnings\n"); + /* IDA is empty; all of these will fail */ + ida_free(ida, 0); + for (i = 0; i < 31; i++) + ida_free(ida, 1 << i); + + /* IDA contains a single value entry */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 3, GFP_KERNEL) != 3); + ida_free(ida, 0); + for (i = 0; i < 31; i++) + ida_free(ida, 1 << i); + + /* IDA contains a single bitmap */ + IDA_BUG_ON(ida, ida_alloc_min(ida, 1023, GFP_KERNEL) != 1023); + ida_free(ida, 0); + for (i = 0; i < 31; i++) + ida_free(ida, 1 << i); + + /* IDA contains a tree */ + IDA_BUG_ON(ida, ida_alloc_min(ida, (1 << 20) - 1, GFP_KERNEL) != (1 << 20) - 1); + ida_free(ida, 0); + for (i = 0; i < 31; i++) + ida_free(ida, 1 << i); + printk("^^^ \"not allocated\" warnings over\n"); + + ida_free(ida, 3); + ida_free(ida, 1023); + ida_free(ida, (1 << 20) - 1); + + IDA_BUG_ON(ida, !ida_is_empty(ida)); +} + static DEFINE_IDA(ida); static int ida_checks(void) @@ -162,6 +201,7 @@ static int ida_checks(void) ida_check_leaf(&ida, 1024 * 64); ida_check_max(&ida); ida_check_conv(&ida); + ida_check_bad_free(&ida); printk("IDA: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run != tests_passed) ? 0 : -EINVAL; -- Gitee From 84e2d43aa9342c57f7858fde006100edf52428cf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 21 Jul 2023 15:58:38 +0200 Subject: [PATCH 04/10] drm/atomic: Fix potential use-after-free in nonblocking commits stable inclusion from stable-v5.10.188 commit f09c0ac142c59495262dd80545f261b2aeeba538 category: bugfix issue: #I90483 CVE: CVE-2023-51043 Signed-off-by: Ywenrui44091 --------------------------------------- commit 4e076c73e4f6e90816b30fcd4a0d7ab365087255 upstream. This requires a bit of background. Properly done a modeset driver's unload/remove sequence should be drm_dev_unplug(); drm_atomic_helper_shutdown(); drm_dev_put(); The trouble is that the drm_dev_unplugged() checks are by design racy, they do not synchronize against all outstanding ioctl. This is because those ioctl could block forever (both for modeset and for driver specific ioctls), leading to deadlocks in hotunplug. Instead the code sections that touch the hardware need to be annotated with drm_dev_enter/exit, to avoid accessing hardware resources after the unload/remove has finished. To avoid use-after-free issues all the involved userspace visible objects are supposed to hold a reference on the underlying drm_device, like drm_file does. The issue now is that we missed one, the atomic modeset ioctl can be run in a nonblocking fashion, and in that case it cannot rely on the implied drm_device reference provided by the ioctl calling context. This can result in a use-after-free if an nonblocking atomic commit is carefully raced against a driver unload. Fix this by unconditionally grabbing a drm_device reference for any drm_atomic_state structures. Strictly speaking this isn't required for blocking commits and TEST_ONLY calls, but it's the simpler approach. Thanks to shanzhulig for the initial idea of grabbing an unconditional reference, I just added comments, a condensed commit message and fixed a minor potential issue in where exactly we drop the final reference. Reported-by: shanzhulig Suggested-by: shanzhulig Reviewed-by: Maxime Ripard Cc: Maarten Lankhorst Cc: Thomas Zimmermann Cc: David Airlie Cc: stable@kernel.org Signed-off-by: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/gpu/drm/drm_atomic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 58527f151984..23a645a7e439 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -98,6 +98,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) if (!state->planes) goto fail; + /* + * Because drm_atomic_state can be committed asynchronously we need our + * own reference and cannot rely on the on implied by drm_file in the + * ioctl call. + */ + drm_dev_get(dev); state->dev = dev; DRM_DEBUG_ATOMIC("Allocated atomic state %p\n", state); @@ -257,7 +263,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear); void __drm_atomic_state_free(struct kref *ref) { struct drm_atomic_state *state = container_of(ref, typeof(*state), ref); - struct drm_mode_config *config = &state->dev->mode_config; + struct drm_device *dev = state->dev; + struct drm_mode_config *config = &dev->mode_config; drm_atomic_state_clear(state); @@ -269,6 +276,8 @@ void __drm_atomic_state_free(struct kref *ref) drm_atomic_state_default_release(state); kfree(state); } + + drm_dev_put(dev); } EXPORT_SYMBOL(__drm_atomic_state_free); -- Gitee From 940c931df488d7a6d7a93901fd6550d1f22537d3 Mon Sep 17 00:00:00 2001 From: shanzhulig Date: Tue, 27 Jun 2023 18:10:47 -0700 Subject: [PATCH 05/10] drm/amdgpu: Fix potential fence use-after-free v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.192 commit b870b9a47fdba29bd6828f690e4817c950fa3430 category: bugfix issue: #I9035Y CVE: CVE-2023-51042 Signed-off-by: Ywenrui44091 --------------------------------------- [ Upstream commit 2e54154b9f27262efd0cb4f903cc7d5ad1fe9628 ] fence Decrements the reference count before exiting. Avoid Race Vulnerabilities for fence use-after-free. v2 (chk): actually fix the use after free and not just move it. Signed-off-by: shanzhulig Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ffd8f5601e28..e25c3387bcf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1517,15 +1517,15 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, continue; r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; + dma_fence_put(fence); if (r < 0) return r; if (r == 0) break; - - if (fence->error) - return fence->error; } memset(wait, 0, sizeof(*wait)); -- Gitee From 445c0e1695fb55e26021742a5aa914d292e0c6b4 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 13 Oct 2023 20:41:29 +0200 Subject: [PATCH 06/10] nfc: nci: fix possible NULL pointer dereference in send_acknowledge() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.199 commit c95fa5b20fe03609e0894656fa43c18045b5097e category: bugfix issue: #I90352 CVE: CVE-2023-46343 Signed-off-by: Ywenrui44091 --------------------------------------- commit 7937609cd387246aed994e81aa4fa951358fba41 upstream. Handle memory allocation failure from nci_skb_alloc() (calling alloc_skb()) to avoid possible NULL pointer dereference. Reported-by: 黄思聪 Fixes: 391d8a2da787 ("NFC: Add NCI over SPI receive") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231013184129.18738-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/nfc/nci/spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c index 7d8e10e27c20..0651640d6868 100644 --- a/net/nfc/nci/spi.c +++ b/net/nfc/nci/spi.c @@ -151,6 +151,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge) int ret; skb = nci_skb_alloc(nspi->ndev, 0, GFP_KERNEL); + if (!skb) + return -ENOMEM; /* add the NCI SPI header to the start of the buffer */ hdr = skb_push(skb, NCI_SPI_HDR_LEN); -- Gitee From df995b8c5e3a710d62fd5a140f9dc8be8adba16d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 20 Jan 2024 22:50:04 +0100 Subject: [PATCH 07/10] netfilter: nf_tables: reject QUEUE/DROP verdict parameters mainline inclusion from mainline-v6.8-rc2 commit f342de4e2f33e0e39165d8639387aa6c19dff660 category: bugfix issue: #I917B9 CVE: CVE-2024-1086 Signed-off-by: Ywenrui44091 --------------------------------------- This reverts commit e0abdadcc6e1. core.c:nf_hook_slow assumes that the upper 16 bits of NF_DROP verdicts contain a valid errno, i.e. -EPERM, -EHOSTUNREACH or similar, or 0. Due to the reverted commit, its possible to provide a positive value, e.g. NF_ACCEPT (1), which results in use-after-free. Its not clear to me why this commit was made. NF_QUEUE is not used by nftables; "queue" rules in nftables will result in use of "nft_queue" expression. If we later need to allow specifiying errno values from userspace (do not know why), this has to call NF_DROP_GETERR and check that "err <= 0" holds true. Fixes: e0abdadcc6e1 ("netfilter: nf_tables: accept QUEUE/DROP verdict parameters") Cc: stable@vger.kernel.org Reported-by: Notselwyn Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: wanxiaoqing --- net/netfilter/nf_tables_api.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index beda7bf75a14..1a2f8c6fb6dd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8692,16 +8692,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); switch (data->verdict.code) { - default: - switch (data->verdict.code & NF_VERDICT_MASK) { - case NF_ACCEPT: - case NF_DROP: - case NF_QUEUE: - break; - default: - return -EINVAL; - } - fallthrough; + case NF_ACCEPT: + case NF_DROP: + case NF_QUEUE: + break; case NFT_CONTINUE: case NFT_BREAK: case NFT_RETURN: @@ -8735,6 +8729,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, chain->use++; data->verdict.chain = chain; break; + default: + return -EINVAL; } desc->len = sizeof(data->verdict); -- Gitee From 6129bd1b331f4b2dcaa281918cad6d9e6ef35726 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 6 Dec 2023 15:27:05 -0800 Subject: [PATCH 08/10] net: tls, update curr on splice as well stable inclusion from stable-v5.10.208 commit c6b2a6b827d4b2d0f36b520e54e083df9b330a7b category: bugfix issue: #I91BE9 CVE: CCVE-2024-0646 Signed-off-by: Ywenrui44091 --------------------------------------- commit c5a595000e2677e865a39f249c056bc05d6e55fd upstream. The curr pointer must also be updated on the splice similar to how we do this for other copy types. Fixes: d829e9c4112b ("tls: convert to generic sk_msg interface") Signed-off-by: John Fastabend Reported-by: Jann Horn Link: https://lore.kernel.org/r/20231206232706.374377-2-john.fastabend@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/tls/tls_sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9708805f46eb..a7d4a770236d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1213,6 +1213,8 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, } sk_msg_page_add(msg_pl, page, copy, offset); + msg_pl->sg.copybreak = 0; + msg_pl->sg.curr = msg_pl->sg.end; sk_mem_charge(sk, copy); offset += copy; -- Gitee From aa14a63c92b6db72fdc74a018c37dcdb8d7e7b42 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 8 Jan 2024 10:38:12 +0100 Subject: [PATCH 09/10] xen-netback: don't produce zero-size SKB frags stable inclusion from stable-v5.10.209 commit cce8ba6fa4ec43ad778d64823a2f8ca120d362c1 category: bugfix issue: #I917CU CVE: CVE-2023-46838 Signed-off-by: Ywenrui44091 --------------------------------------- commit c7ec4f2d684e17d69bbdd7c4324db0ef5daac26a upstream. While frontends may submit zero-size requests (wasting a precious slot), core networking code as of at least 3ece782693c4b ("sock: skb_copy_ubufs support for compound pages") can't deal with SKBs when they have all zero-size fragments. Respond to empty requests right when populating fragments; all further processing is fragment based and hence won't encounter these empty requests anymore. In a way this should have been that way from the beginning: When no data is to be transferred for a particular request, there's not even a point in validating the respective grant ref. That's no different from e.g. passing NULL into memcpy() when at the same time the size is 0. This is XSA-448 / CVE-2023-46838. Cc: stable@vger.kernel.org Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Reviewed-by: Paul Durrant Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/net/xen-netback/netback.c | 44 ++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 24b4f73fb336..9875c3bf0aac 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -449,12 +449,25 @@ static void xenvif_get_requests(struct xenvif_queue *queue, } for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; - shinfo->nr_frags++, gop++, nr_slots--) { + nr_slots--) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, flags); + ++txp; + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; if (txp == first) txp = txfrags; @@ -467,20 +480,39 @@ static void xenvif_get_requests(struct xenvif_queue *queue, shinfo = skb_shinfo(nskb); frags = shinfo->frags; - for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { + if (unlikely(!txp->size)) { + unsigned long flags; + + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, txp, 0, + XEN_NETIF_RSP_OKAY); + push_tx_responses(queue); + spin_unlock_irqrestore(&queue->response_lock, + flags); + continue; + } + index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + ++shinfo->nr_frags; + ++gop; } - skb_shinfo(skb)->frag_list = nskb; - } else if (nskb) { + if (shinfo->nr_frags) { + skb_shinfo(skb)->frag_list = nskb; + nskb = NULL; + } + } + + if (nskb) { /* A frag_list skb was allocated but it is no longer needed - * because enough slots were converted to copy ops above. + * because enough slots were converted to copy ops above or some + * were empty. */ kfree_skb(nskb); } -- Gitee From 1e7e6d416ee2fa81d4c4900207fbbf8af5398587 Mon Sep 17 00:00:00 2001 From: Jon Maxwell Date: Thu, 12 Jan 2023 12:25:32 +1100 Subject: [PATCH 10/10] ipv6: remove max_size check inline with ipv4 stable inclusion from stable-v5.10.208 commit dd56c5790dc3484f3c89fd4e21735c796a82b40d category: bugfix issue: #I917FS CVE: CVE-2023-52340 Signed-off-by: Ywenrui44091 --------------------------------------- commit af6d10345ca76670c1b7c37799f0d5576ccef277 upstream. In ip6_dst_gc() replace: if (entries > gc_thresh) With: if (entries > ops->gc_thresh) Sending Ipv6 packets in a loop via a raw socket triggers an issue where a route is cloned by ip6_rt_cache_alloc() for each packet sent. This quickly consumes the Ipv6 max_size threshold which defaults to 4096 resulting in these warnings: [1] 99.187805] dst_alloc: 7728 callbacks suppressed [2] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. . . [300] Route cache is full: consider increasing sysctl net.ipv6.route.max_size. When this happens the packet is dropped and sendto() gets a network is unreachable error: remaining pkt 200557 errno 101 remaining pkt 196462 errno 101 . . remaining pkt 126821 errno 101 Implement David Aherns suggestion to remove max_size check seeing that Ipv6 has a GC to manage memory usage. Ipv4 already does not check max_size. Here are some memory comparisons for Ipv4 vs Ipv6 with the patch: Test by running 5 instances of a program that sends UDP packets to a raw socket 5000000 times. Compare Ipv4 and Ipv6 performance with a similar program. Ipv4: Before test: MemFree: 29427108 kB Slab: 237612 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 2881 3990 192 42 2 : tunables 0 0 0 During test: MemFree: 29417608 kB Slab: 247712 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 44394 44394 192 42 2 : tunables 0 0 0 After test: MemFree: 29422308 kB Slab: 238104 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 Ipv6 with patch: Errno 101 errors are not observed anymore with the patch. Before test: MemFree: 29422308 kB Slab: 238104 kB ip6_dst_cache 1912 2528 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 During Test: MemFree: 29431516 kB Slab: 240940 kB ip6_dst_cache 11980 12064 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 After Test: MemFree: 29441816 kB Slab: 238132 kB ip6_dst_cache 1902 2432 256 32 2 : tunables 0 0 0 xfrm_dst_cache 0 0 320 25 2 : tunables 0 0 0 ip_dst_cache 3048 4116 192 42 2 : tunables 0 0 0 Tested-by: Andrea Mayer Signed-off-by: Jon Maxwell Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230112012532.311021-1-jmaxwell37@gmail.com Signed-off-by: Jakub Kicinski Cc: "Jitindar Singh, Suraj" Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/net/dst_ops.h | 2 +- net/core/dst.c | 8 ++------ net/ipv6/route.c | 13 +++++-------- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 88ff7bb2bb9b..632086b2f644 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -16,7 +16,7 @@ struct dst_ops { unsigned short family; unsigned int gc_thresh; - int (*gc)(struct dst_ops *ops); + void (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); unsigned int (*mtu)(const struct dst_entry *); diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744..453ec8aafc4a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -83,12 +83,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (ops->gc && !(flags & DST_NOCOUNT) && - dst_entries_get_fast(ops) > ops->gc_thresh) { - if (ops->gc(ops)) { - pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n"); - return NULL; - } - } + dst_entries_get_fast(ops) > ops->gc_thresh) + ops->gc(ops); dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC); if (!dst) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 803d1aa83140..92b87009966d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,7 +89,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); -static int ip6_dst_gc(struct dst_ops *ops); +static void ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -3184,11 +3184,10 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, return dst; } -static int ip6_dst_gc(struct dst_ops *ops) +static void ip6_dst_gc(struct dst_ops *ops) { struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; - int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; @@ -3196,11 +3195,10 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (entries > rt_max_size) + if (entries > ops->gc_thresh) entries = dst_entries_get_slow(ops); - if (time_after(rt_last_gc + rt_min_interval, jiffies) && - entries <= rt_max_size) + if (time_after(rt_last_gc + rt_min_interval, jiffies)) goto out; fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true); @@ -3210,7 +3208,6 @@ static int ip6_dst_gc(struct dst_ops *ops) out: val = atomic_read(&net->ipv6.ip6_rt_gc_expire); atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity)); - return entries > rt_max_size; } static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg, @@ -6362,7 +6359,7 @@ static int __net_init ip6_route_net_init(struct net *net) #endif net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_max_size = INT_MAX; net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; -- Gitee