From eb37b348f7fa1cd772d06215fa8349003906f876 Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Mon, 5 Aug 2024 13:59:22 +0800 Subject: [PATCH 01/18] tracing: Fix overflow in get_free_elt() stable inclusion from stable-v5.10.224 commit eb223bf01e688dfe37e813c8988ee11c8c9f8d0a category: bugfix issue: #IAODNQ CVE: NA Signed-off-by: wukuan --------------------------------------- commit bcf86c01ca4676316557dd482c8416ece8c2e143 upstream. "tracing_map->next_elt" in get_free_elt() is at risk of overflowing. Once it overflows, new elements can still be inserted into the tracing_map even though the maximum number of elements (`max_elts`) has been reached. Continuing to insert elements after the overflow could result in the tracing_map containing "tracing_map->max_size" elements, leaving no empty entries. If any attempt is made to insert an element into a full tracing_map using `__tracing_map_insert()`, it will cause an infinite loop with preemption disabled, leading to a CPU hang problem. Fix this by preventing any further increments to "tracing_map->next_elt" once it reaches "tracing_map->max_elt". Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Fixes: 08d43a5fa063e ("tracing: Add lock-free tracing_map") Co-developed-by: Cheng-Jui Wang Link: https://lore.kernel.org/20240805055922.6277-1-Tze-nan.Wu@mediatek.com Signed-off-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wukuan Signed-off-by: wangxin --- kernel/trace/tracing_map.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 51a9d1185033..1c98dd49298a 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map) struct tracing_map_elt *elt = NULL; int idx; - idx = atomic_inc_return(&map->next_elt); + idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); if (idx < map->max_elts) { elt = *(TRACING_MAP_ELT(map->elts, idx)); if (map->ops && map->ops->elt_init) @@ -694,7 +694,7 @@ void tracing_map_clear(struct tracing_map *map) { unsigned int i; - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); atomic64_set(&map->hits, 0); atomic64_set(&map->drops, 0); @@ -778,7 +778,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits, map->map_bits = map_bits; map->max_elts = (1 << map_bits); - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); map->map_size = (1 << (map_bits + 1)); map->ops = ops; -- Gitee From ffc948a0f39a18685bf8ae85b0fadd4acb174e5f Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 12 Aug 2024 00:28:21 +0100 Subject: [PATCH 02/18] Squashfs: sanity check symbolic link size stable inclusion from stable-v6.11 commit 810ee43d9cd245d138a2733d87a24858a23f577d category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- Syzkiller reports a "KMSAN: uninit-value in pick_link" bug. This is caused by an uninitialised page, which is ultimately caused by a corrupted symbolic link size read from disk. The reason why the corrupted symlink size causes an uninitialised page is due to the following sequence of events: 1. squashfs_read_inode() is called to read the symbolic link from disk. This assigns the corrupted value 3875536935 to inode->i_size. 2. Later squashfs_symlink_read_folio() is called, which assigns this corrupted value to the length variable, which being a signed int, overflows producing a negative number. 3. The following loop that fills in the page contents checks that the copied bytes is less than length, which being negative means the loop is skipped, producing an uninitialised page. This patch adds a sanity check which checks that the symbolic link size is not larger than expected. -- Signed-off-by: Phillip Lougher Link: https://lore.kernel.org/r/20240811232821.13903-1-phillip@squashfs.org.uk Reported-by: Lizhi Xu Reported-by: syzbot+24ac24ff58dc5b0d26b9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a90e8c061e86a76b@google.com/ V2: fix spelling mistake. Signed-off-by: Christian Brauner Signed-off-by: wukuan Signed-off-by: wangxin --- fs/squashfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 9fb8e9fd4c00..95a9ff9e2399 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; -- Gitee From 7bb1d9f85040950c39214dccff11fae4148e63db Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Thu, 1 Aug 2024 15:55:12 +0200 Subject: [PATCH 03/18] net: usb: qmi_wwan: fix memory leak for not ip packets stable inclusion from stable-v5.10.224 commit e87f52225e04a7001bf55bbd7a330fa4252327b5 category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- [ Upstream commit 7ab107544b777c3bd7feb9fe447367d8edd5b202 ] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the unused skb when not ip packets arrive. Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Daniele Palmas Acked-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wukuan Signed-off-by: wangxin --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 53f1cd0bfaf4..f7b7c1759d55 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -216,6 +216,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } -- Gitee From d9c8ccefa6aba6aacda89ec27a5ad7da98d4eaee Mon Sep 17 00:00:00 2001 From: Joe Hattori Date: Tue, 6 Aug 2024 10:13:27 +0900 Subject: [PATCH 04/18] net: dsa: bcm_sf2: Fix a possible memory leak in bcm_sf2_mdio_register() stable inclusion from stable-v5.10.224 commit b7b8d9f5e679af60c94251fd6728dde34be69a71 category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- [ Upstream commit e3862093ee93fcfbdadcb7957f5f8974fffa806a ] bcm_sf2_mdio_register() calls of_phy_find_device() and then phy_device_remove() in a loop to remove existing PHY devices. of_phy_find_device() eventually calls bus_find_device(), which calls get_device() on the returned struct device * to increment the refcount. The current implementation does not decrement the refcount, which causes memory leak. This commit adds the missing phy_device_free() call to decrement the refcount via put_device() to balance the refcount. Fixes: 771089c2a485 ("net: dsa: bcm_sf2: Ensure that MDIO diversion is used") Signed-off-by: Joe Hattori Tested-by: Florian Fainelli Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240806011327.3817861-1-joe@pf.is.s.u-tokyo.ac.jp Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wukuan Signed-off-by: wangxin --- drivers/net/dsa/bcm_sf2.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index affe2e1d8f8e..e55712fd79ef 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -559,8 +559,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->slave_mii_bus); -- Gitee From 70894a82ff4244cbe1de94fcc171d12261abf43d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 6 Aug 2024 13:46:47 -0400 Subject: [PATCH 05/18] padata: Fix possible divide-by-0 panic in padata_mt_helper() stable inclusion from stable-v5.10.224 commit ab8b397d5997d8c37610252528edc54bebf9f6d3 category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- commit 6d45e1c948a8b7ed6ceddb14319af69424db730c upstream. We are hit with a not easily reproducible divide-by-0 panic in padata.c at bootup time. [ 10.017908] Oops: divide error: 0000 1 PREEMPT SMP NOPTI [ 10.017908] CPU: 26 PID: 2627 Comm: kworker/u1666:1 Not tainted 6.10.0-15.el10.x86_64 #1 [ 10.017908] Hardware name: Lenovo ThinkSystem SR950 [7X12CTO1WW]/[7X12CTO1WW], BIOS [PSE140J-2.30] 07/20/2021 [ 10.017908] Workqueue: events_unbound padata_mt_helper [ 10.017908] RIP: 0010:padata_mt_helper+0x39/0xb0 : [ 10.017963] Call Trace: [ 10.017968] [ 10.018004] ? padata_mt_helper+0x39/0xb0 [ 10.018084] process_one_work+0x174/0x330 [ 10.018093] worker_thread+0x266/0x3a0 [ 10.018111] kthread+0xcf/0x100 [ 10.018124] ret_from_fork+0x31/0x50 [ 10.018138] ret_from_fork_asm+0x1a/0x30 [ 10.018147] Looking at the padata_mt_helper() function, the only way a divide-by-0 panic can happen is when ps->chunk_size is 0. The way that chunk_size is initialized in padata_do_multithreaded(), chunk_size can be 0 when the min_chunk in the passed-in padata_mt_job structure is 0. Fix this divide-by-0 panic by making sure that chunk_size will be at least 1 no matter what the input parameters are. Link: https://lkml.kernel.org/r/20240806174647.1050398-1-longman@redhat.com Fixes: 004ed42638f4 ("padata: add basic support for multithreaded jobs") Signed-off-by: Waiman Long Cc: Daniel Jordan Cc: Steffen Klassert Cc: Waiman Long Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wukuan Signed-off-by: wangxin --- kernel/padata.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/padata.c b/kernel/padata.c index 11ca3ebd8b12..e199f6bb5a85 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -521,6 +521,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) queue_work(system_unbound_wq, &pw->pw_work); -- Gitee From e365452ea568951ec8d05534338b1c5a0b4ed94e Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Wed, 19 Jun 2024 21:42:45 +0200 Subject: [PATCH 06/18] USB: serial: garmin_gps: use struct_size() to allocate pkt stable inclusion from stable-v6.11 commit df8c0b8a03e871431587a13a6765cb4c601e1573 category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- Use the struct_size macro to calculate the size of the pkt, which includes a trailing flexible array. Suggested-by: Nathan Chancellor Signed-off-by: Javier Carrasco Reviewed-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Signed-off-by: Johan Hovold Signed-off-by: wukuan Signed-off-by: wangxin --- drivers/usb/serial/garmin_gps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index c02c19bb1183..c9c90dab505a 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -267,8 +267,7 @@ static int pkt_add(struct garmin_data *garmin_data_p, /* process only packets containing data ... */ if (data_length) { - pkt = kmalloc(sizeof(struct garmin_packet)+data_length, - GFP_ATOMIC); + pkt = kmalloc(struct_size(pkt, data, data_length), GFP_ATOMIC); if (!pkt) return 0; -- Gitee From aecba9f93f3c13bc4e8072d5a7e3cdd12f286321 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Fri, 2 Aug 2024 12:47:36 +0800 Subject: [PATCH 07/18] drm/client: fix null pointer dereference in drm_client_modeset_probe stable inclusion from stable-v5.10.224 commit 5291d4f73452c91e8a11f71207617e3e234d418e category: bugfix issue: NA CVE: NA Signed-off-by: wukuan --------------------------------------- commit 113fd6372a5bb3689aba8ef5b8a265ed1529a78f upstream. In drm_client_modeset_probe(), the return value of drm_mode_duplicate() is assigned to modeset->mode, which will lead to a possible NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: cf13909aee05 ("drm/fb-helper: Move out modeset config code") Signed-off-by: Ma Ke Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240802044736.1570345-1-make24@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: wukuan Signed-off-by: wangxin --- drivers/gpu/drm/drm_client_modeset.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 0df45940c3ea..6695960925bd 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -860,6 +860,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, } modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; -- Gitee From 8926a5404f2acaeabbba7e4af91d95aeb2705b21 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 30 Jul 2024 12:26:55 +0200 Subject: [PATCH 08/18] PCI/DPC: Fix use-after-free on concurrent DPC and hot-removal stable inclusion from stable-v5.10.224 commit c52f9e1a9eb40f13993142c331a6cfd334d4b91d category: bugfix issue: NA CVE: CVE-2024-42302 Signed-off-by: wukuan --------------------------------------- commit 11a1f4bc47362700fcbde717292158873fb847ed upstream. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keith reports a use-after-free when a DPC event occurs concurrently to hot-removal of the same portion of the hierarchy: The dpc_handler() awaits readiness of the secondary bus below the Downstream Port where the DPC event occurred. To do so, it polls the config space of the first child device on the secondary bus. If that child device is concurrently removed, accesses to its struct pci_dev cause the kernel to oops. That's because pci_bridge_wait_for_secondary_bus() neglects to hold a reference on the child device. Before v6.3, the function was only called on resume from system sleep or on runtime resume. Holding a reference wasn't necessary back then because the pciehp IRQ thread could never run concurrently. (On resume from system sleep, IRQs are not enabled until after the resume_noirq phase. And runtime resume is always awaited before a PCI device is removed.) However starting with v6.3, pci_bridge_wait_for_secondary_bus() is also called on a DPC event. Commit 53b54ad074de ("PCI/DPC: Await readiness of secondary bus after reset"), which introduced that, failed to appreciate that pci_bridge_wait_for_secondary_bus() now needs to hold a reference on the child device because dpc_handler() and pciehp may indeed run concurrently. The commit was backported to v5.10+ stable kernels, so that's the oldest one affected. Add the missing reference acquisition. Abridged stack trace: BUG: unable to handle page fault for address: 00000000091400c0 CPU: 15 PID: 2464 Comm: irq/53-pcie-dpc 6.9.0 RIP: pci_bus_read_config_dword+0x17/0x50 pci_dev_wait() pci_bridge_wait_for_secondary_bus() dpc_reset_link() pcie_do_recovery() dpc_handler() Fixes: 53b54ad074de ("PCI/DPC: Await readiness of secondary bus after reset") Closes: https://lore.kernel.org/r/20240612181625.3604512-3-kbusch@meta.com/ Link: https://lore.kernel.org/linux-pci/8e4bcd4116fd94f592f2bf2749f168099c480ddf.1718707743.git.lukas@wunner.de Reported-by: Keith Busch Tested-by: Keith Busch Signed-off-by: Lukas Wunner Signed-off-by: Krzysztof Wilczyński Reviewed-by: Keith Busch Reviewed-by: Mika Westerberg Cc: stable@vger.kernel.org # v5.10+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: wukuan Signed-off-by: wangxin --- drivers/pci/pci.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d37013d007b6..36bf25bf69e7 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4807,7 +4807,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type, int timeout) { struct pci_dev *child; - int delay; + int delay, ret = 0; if (pci_dev_is_disconnected(dev)) return 0; @@ -4835,8 +4835,8 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type, return 0; } - child = list_first_entry(&dev->subordinate->devices, struct pci_dev, - bus_list); + child = pci_dev_get(list_first_entry(&dev->subordinate->devices, + struct pci_dev, bus_list)); up_read(&pci_bus_sem); /* @@ -4846,7 +4846,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type, if (!pci_is_pcie(dev)) { pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay); msleep(1000 + delay); - return 0; + goto put_child; } /* @@ -4867,7 +4867,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type, * until the timeout expires. */ if (!pcie_downstream_port(dev)) - return 0; + goto put_child; if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) { pci_dbg(dev, "waiting %d ms for downstream link\n", delay); @@ -4878,11 +4878,16 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type, if (!pcie_wait_for_link_delay(dev, true, delay)) { /* Did not train, no need to wait any further */ pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); - return -ENOTTY; + ret = -ENOTTY; + goto put_child; } } - return pci_dev_wait(child, reset_type, timeout - delay); + ret = pci_dev_wait(child, reset_type, timeout - delay); + +put_child: + pci_dev_put(child); + return ret; } void pci_reset_secondary_bus(struct pci_dev *dev) -- Gitee From df9fddedc1ec4b519d82a845de2721ce6277f5d4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 21 May 2024 16:01:00 +0200 Subject: [PATCH 09/18] net: relax socket state check at accept time. stable inclusion from stable-v5.10.223 commit 6e03006548c66b979f4e5e9fc797aac4dad82822 category: bugfix issue: NA CVE: CVE-2024-36484 Signed-off-by: wukuan --------------------------------------- commit 26afda78cda3da974fd4c287962c169e9462c495 upstream. Christoph reported the following splat: WARNING: CPU: 1 PID: 772 at net/ipv4/af_inet.c:761 __inet_accept+0x1f4/0x4a0 Modules linked in: CPU: 1 PID: 772 Comm: syz-executor510 Not tainted 6.9.0-rc7-g7da7119fe22b #56 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 RIP: 0010:__inet_accept+0x1f4/0x4a0 net/ipv4/af_inet.c:759 Code: 04 38 84 c0 0f 85 87 00 00 00 41 c7 04 24 03 00 00 00 48 83 c4 10 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc e8 ec b7 da fd <0f> 0b e9 7f fe ff ff e8 e0 b7 da fd 0f 0b e9 fe fe ff ff 89 d9 80 RSP: 0018:ffffc90000c2fc58 EFLAGS: 00010293 RAX: ffffffff836bdd14 RBX: 0000000000000000 RCX: ffff888104668000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: dffffc0000000000 R08: ffffffff836bdb89 R09: fffff52000185f64 R10: dffffc0000000000 R11: fffff52000185f64 R12: dffffc0000000000 R13: 1ffff92000185f98 R14: ffff88810754d880 R15: ffff8881007b7800 FS: 000000001c772880(0000) GS:ffff88811b280000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fb9fcf2e178 CR3: 00000001045d2002 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: inet_accept+0x138/0x1d0 net/ipv4/af_inet.c:786 do_accept+0x435/0x620 net/socket.c:1929 __sys_accept4_file net/socket.c:1969 [inline] __sys_accept4+0x9b/0x110 net/socket.c:1999 __do_sys_accept net/socket.c:2016 [inline] __se_sys_accept net/socket.c:2013 [inline] __x64_sys_accept+0x7d/0x90 net/socket.c:2013 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x58/0x100 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x4315f9 Code: fd ff 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 ab b4 fd ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffdb26d9c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002b RAX: ffffffffffffffda RBX: 0000000000400300 RCX: 00000000004315f9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000004 RBP: 00000000006e1018 R08: 0000000000400300 R09: 0000000000400300 R10: 0000000000400300 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000040cdf0 R14: 000000000040ce80 R15: 0000000000000055 The reproducer invokes shutdown() before entering the listener status. After commit 94062790aedb ("tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets"), the above causes the child to reach the accept syscall in FIN_WAIT1 status. Eric noted we can relax the existing assertion in __inet_accept() Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/490 Suggested-by: Eric Dumazet Fixes: 94062790aedb ("tcp: defer shutdown(SEND_SHUTDOWN) for TCP_SYN_RECV sockets") Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/23ab880a44d8cfd967e84de8b93dbf48848e3d8c.1716299669.git.pabeni@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Nikolay Kuratov Signed-off-by: Greg Kroah-Hartman Signed-off-by: wukuan Signed-off-by: wangxin --- net/ipv4/af_inet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 475a19db3713..ce42626663de 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -758,7 +758,9 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags, sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_RECV | - TCPF_CLOSE_WAIT | TCPF_CLOSE))); + TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | + TCPF_CLOSING | TCPF_CLOSE_WAIT | + TCPF_CLOSE))); sock_graft(sk2, newsock); -- Gitee From 685692191e729f74db1536dde710e7feb14445b9 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 9 Jul 2024 19:33:11 +0800 Subject: [PATCH 10/18] drm/gma500: fix null pointer dereference in cdv_intel_lvds_get_modes stable inclusion from stable-5.10.224 commit b6ac46a00188cde50ffba233e6efb366354a1de5 category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- commit cb520c3f366c77e8d69e4e2e2781a8ce48d98e79 upstream. In cdv_intel_lvds_get_modes(), the return value of drm_mode_duplicate() is assigned to mode, which will lead to a NULL pointer dereference on failure of drm_mode_duplicate(). Add a check to avoid npd. Cc: stable@vger.kernel.org Fixes: 6a227d5fd6c4 ("gma500: Add support for Cedarview") Signed-off-by: Ma Ke Signed-off-by: Patrik Jakobsson Link: https://patchwork.freedesktop.org/patch/msgid/20240709113311.37168-1-make24@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangxin --- drivers/gpu/drm/gma500/cdv_intel_lvds.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index eaaf4efec217..b13c34fa29ed 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -310,6 +310,9 @@ static int cdv_intel_lvds_get_modes(struct drm_connector *connector) if (mode_dev->panel_fixed_mode != NULL) { struct drm_display_mode *mode = drm_mode_duplicate(dev, mode_dev->panel_fixed_mode); + if (!mode) + return 0; + drm_mode_probed_add(connector, mode); return 1; } -- Gitee From 5b0ee8608ab8802f7f15c50a6053c9930fcec60c Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 17 May 2024 00:47:10 +0000 Subject: [PATCH 11/18] ntp: Safeguard against time_constant overflow stable inclusion from stable-5.10.224 commit f3405f4997b0233719afdec7289593f9e2ac9fd0 category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- commit 06c03c8edce333b9ad9c6b207d93d3a5ae7c10c0 upstream. Using syzkaller with the recently reintroduced signed integer overflow sanitizer produces this UBSAN report: UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:738:18 9223372036854775806 + 4 cannot be represented in type 'long' Call Trace: handle_overflow+0x171/0x1b0 __do_adjtimex+0x1236/0x1440 do_adjtimex+0x2be/0x740 The user supplied time_constant value is incremented by four and then clamped to the operating range. Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user supplied value was sanity checked to be in the operating range. That change removed the sanity check and relied on clamping after incrementing which does not work correctly when the user supplied value is in the overflow zone of the '+ 4' operation. The operation requires CAP_SYS_TIME and the side effect of the overflow is NTP getting out of sync. Similar to the fixups for time_maxerror and time_esterror, clamp the user space supplied value to the operating range. [ tglx: Switch to clamping ] Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") Signed-off-by: Justin Stitt Signed-off-by: Thomas Gleixner Cc: Miroslav Lichvar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-c-v2-1-f3a80096f36f@google.com Closes: https://github.com/KSPP/linux/issues/352 Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangxin --- kernel/time/ntp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 069ca78fb0bf..0966a36ef3ca 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -685,11 +685,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, time_esterror = txc->esterror; if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; + time_constant = clamp(txc->constant, (long long)0, (long long)MAXTC); if (!(time_status & STA_NANO)) time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); + time_constant = clamp(time_constant, (long)0, (long)MAXTC); } if (txc->modes & ADJ_TAI && -- Gitee From 4d2951c5a92efddde2c420ddacc2585730480105 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 17 May 2024 20:22:44 +0000 Subject: [PATCH 12/18] ntp: Clamp maxerror and esterror to operating range stable inclusion from stable-5.10.224 commit dc335b92e5f1c92f0d9904afff5bc0d9cc112a4c category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- [ Upstream commit 87d571d6fb77ec342a985afa8744bb9bb75b3622 ] Using syzkaller alongside the newly reintroduced signed integer overflow sanitizer spits out this report: UBSAN: signed-integer-overflow in ../kernel/time/ntp.c:461:16 9223372036854775807 + 500 cannot be represented in type 'long' Call Trace: handle_overflow+0x171/0x1b0 second_overflow+0x2d6/0x500 accumulate_nsecs_to_secs+0x60/0x160 timekeeping_advance+0x1fe/0x890 update_wall_time+0x10/0x30 time_maxerror is unconditionally incremented and the result is checked against NTP_PHASE_LIMIT, but the increment itself can overflow, resulting in wrap-around to negative space. Before commit eea83d896e31 ("ntp: NTP4 user space bits update") the user supplied value was sanity checked to be in the operating range. That change removed the sanity check and relied on clamping in handle_overflow() which does not work correctly when the user supplied value is in the overflow zone of the '+ 500' operation. The operation requires CAP_SYS_TIME and the side effect of the overflow is NTP getting out of sync. Miroslav confirmed that the input value should be clamped to the operating range and the same applies to time_esterror. The latter is not used by the kernel, but the value still should be in the operating range as it was before the sanity check got removed. Clamp them to the operating range. [ tglx: Changed it to clamping and included time_esterror ] Fixes: eea83d896e31 ("ntp: NTP4 user space bits update") Signed-off-by: Justin Stitt Signed-off-by: Thomas Gleixner Cc: Miroslav Lichvar Link: https://lore.kernel.org/all/20240517-b4-sio-ntp-usec-v2-1-d539180f2b79@google.com Closes: https://github.com/KSPP/linux/issues/354 Signed-off-by: Sasha Levin [ cast things to long long to fix compiler warnings - gregkh ] Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangxin --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 0966a36ef3ca..02d96c007673 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -679,10 +679,10 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, } if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; + time_maxerror = clamp(txc->maxerror, (long long)0, (long long)NTP_PHASE_LIMIT); if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; + time_esterror = clamp(txc->esterror, (long long)0, (long long)NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { time_constant = clamp(txc->constant, (long long)0, (long long)MAXTC); -- Gitee From 5faaf5cb9eda4098f6b657d38ab4efe969d7053a Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 9 Jul 2024 13:38:41 +0200 Subject: [PATCH 13/18] usb: vhci-hcd: Do not drop references before new references are gained stable inclusion from stable-5.10.224 commit 4dacdb9720aaab10b6be121eae55820174d97174 category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- commit afdcfd3d6fcdeca2735ca8d994c5f2d24a368f0a upstream. At a few places the driver carries stale pointers to references that can still be used. Make sure that does not happen. This strictly speaking closes ZDI-CAN-22273, though there may be similar races in the driver. Signed-off-by: Oliver Neukum Cc: stable Acked-by: Shuah Khan Link: https://lore.kernel.org/r/20240709113851.14691-1-oneukum@suse.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangxin --- drivers/usb/usbip/vhci_hcd.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index b07b2925ff78..affcb928771d 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -745,6 +745,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag * */ if (usb_pipedevice(urb->pipe) == 0) { + struct usb_device *old; __u8 type = usb_pipetype(urb->pipe); struct usb_ctrlrequest *ctrlreq = (struct usb_ctrlrequest *) urb->setup_packet; @@ -755,14 +756,15 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto no_need_xmit; } + old = vdev->udev; switch (ctrlreq->bRequest) { case USB_REQ_SET_ADDRESS: /* set_address may come when a device is reset */ dev_info(dev, "SetAddress Request (%d) to port %d\n", ctrlreq->wValue, vdev->rhport); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); spin_lock(&vdev->ud.lock); vdev->ud.status = VDEV_ST_USED; @@ -781,8 +783,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag usbip_dbg_vhci_hc( "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n"); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); goto out; default: @@ -1095,6 +1097,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) static void vhci_device_reset(struct usbip_device *ud) { struct vhci_device *vdev = container_of(ud, struct vhci_device, ud); + struct usb_device *old = vdev->udev; unsigned long flags; spin_lock_irqsave(&ud->lock, flags); @@ -1102,8 +1105,8 @@ static void vhci_device_reset(struct usbip_device *ud) vdev->speed = 0; vdev->devid = 0; - usb_put_dev(vdev->udev); vdev->udev = NULL; + usb_put_dev(old); if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); -- Gitee From 41ae579265be0da3fa22d561971d558f69ba8b5f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 12 Jul 2024 12:42:09 -0700 Subject: [PATCH 14/18] driver core: Fix uevent_show() vs driver detach race stable inclusion from stable-5.10.224 commit f098e8fc7227166206256c18d56ab622039108b1 category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- commit 15fffc6a5624b13b428bb1c6e9088e32a55eb82c upstream. uevent_show() wants to de-reference dev->driver->name. There is no clean way for a device attribute to de-reference dev->driver unless that attribute is defined via (struct device_driver).dev_groups. Instead, the anti-pattern of taking the device_lock() in the attribute handler risks deadlocks with code paths that remove device attributes while holding the lock. This deadlock is typically invisible to lockdep given the device_lock() is marked lockdep_set_novalidate_class(), but some subsystems allocate a local lockdep key for @dev->mutex to reveal reports of the form: ====================================================== WARNING: possible circular locking dependency detected 6.10.0-rc7+ #275 Tainted: G OE N ------------------------------------------------------ modprobe/2374 is trying to acquire lock: ffff8c2270070de0 (kn->active#6){++++}-{0:0}, at: __kernfs_remove+0xde/0x220 but task is already holding lock: ffff8c22016e88f8 (&cxl_root_key){+.+.}-{3:3}, at: device_release_driver_internal+0x39/0x210 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&cxl_root_key){+.+.}-{3:3}: __mutex_lock+0x99/0xc30 uevent_show+0xac/0x130 dev_attr_show+0x18/0x40 sysfs_kf_seq_show+0xac/0xf0 seq_read_iter+0x110/0x450 vfs_read+0x25b/0x340 ksys_read+0x67/0xf0 do_syscall_64+0x75/0x190 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (kn->active#6){++++}-{0:0}: __lock_acquire+0x121a/0x1fa0 lock_acquire+0xd6/0x2e0 kernfs_drain+0x1e9/0x200 __kernfs_remove+0xde/0x220 kernfs_remove_by_name_ns+0x5e/0xa0 device_del+0x168/0x410 device_unregister+0x13/0x60 devres_release_all+0xb8/0x110 device_unbind_cleanup+0xe/0x70 device_release_driver_internal+0x1c7/0x210 driver_detach+0x47/0x90 bus_remove_driver+0x6c/0xf0 cxl_acpi_exit+0xc/0x11 [cxl_acpi] __do_sys_delete_module.isra.0+0x181/0x260 do_syscall_64+0x75/0x190 entry_SYSCALL_64_after_hwframe+0x76/0x7e The observation though is that driver objects are typically much longer lived than device objects. It is reasonable to perform lockless de-reference of a @driver pointer even if it is racing detach from a device. Given the infrequency of driver unregistration, use synchronize_rcu() in module_remove_driver() to close any potential races. It is potentially overkill to suffer synchronize_rcu() just to handle the rare module removal racing uevent_show() event. Thanks to Tetsuo Handa for the debug analysis of the syzbot report [1]. Fixes: c0a40097f0bc ("drivers: core: synchronize really_probe() and dev_uevent()") Reported-by: syzbot+4762dd74e32532cda5ff@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Closes: http://lore.kernel.org/5aa5558f-90a4-4864-b1b1-5d6784c5607d@I-love.SAKURA.ne.jp [1] Link: http://lore.kernel.org/669073b8ea479_5fffa294c1@dwillia2-xfh.jf.intel.com.notmuch Cc: stable@vger.kernel.org Cc: Ashish Sangwan Cc: Namjae Jeon Cc: Dirk Behme Cc: Greg Kroah-Hartman Cc: Rafael J. Wysocki Signed-off-by: Dan Williams Link: https://lore.kernel.org/r/172081332794.577428.9738802016494057132.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wangxin --- drivers/base/core.c | 13 ++++++++----- drivers/base/module.c | 4 ++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 0b9a2b2f8c45..1886b55e0231 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -1894,6 +1895,7 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, struct kobj_uevent_env *env) { struct device *dev = kobj_to_dev(kobj); + struct device_driver *driver; int retval = 0; /* add device node properties if present */ @@ -1922,8 +1924,12 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj, if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Synchronize with module_remove_driver() */ + rcu_read_lock(); + driver = READ_ONCE(dev->driver); + if (driver) + add_uevent_var(env, "DRIVER=%s", driver->name); + rcu_read_unlock(); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -1993,11 +1999,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(kset, &dev->kobj, env); - device_unlock(dev); if (retval) goto out; diff --git a/drivers/base/module.c b/drivers/base/module.c index 46ad4d636731..851cc5367c04 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "base.h" static char *make_driver_name(struct device_driver *drv) @@ -77,6 +78,9 @@ void module_remove_driver(struct device_driver *drv) if (!drv) return; + /* Synchronize with dev_uevent() */ + synchronize_rcu(); + sysfs_remove_link(&drv->p->kobj, "module"); if (drv->owner) -- Gitee From a339b0ed072f076cd57ffa3762d59c546caa532e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Aug 2024 13:24:55 +0000 Subject: [PATCH 15/18] gtp: pull network headers in gtp_dev_xmit() mainline inclusion from mainline-v6.11 commit 3a3be7ff9224f424e485287b54be00d2c6bd9c40 category: bugfix issue: NA CVE: NA Signed-off-by: liujia --------------------------------------- syzbot/KMSAN reported use of uninit-value in get_dev_xmit() [1] We must make sure the IPv4 or Ipv6 header is pulled in skb->head before accessing fields in them. Use pskb_inet_may_pull() to fix this issue. [1] BUG: KMSAN: uninit-value in ipv6_pdp_find drivers/net/gtp.c:220 [inline] BUG: KMSAN: uninit-value in gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline] BUG: KMSAN: uninit-value in gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281 ipv6_pdp_find drivers/net/gtp.c:220 [inline] gtp_build_skb_ip6 drivers/net/gtp.c:1229 [inline] gtp_dev_xmit+0x1424/0x2540 drivers/net/gtp.c:1281 __netdev_start_xmit include/linux/netdevice.h:4913 [inline] netdev_start_xmit include/linux/netdevice.h:4922 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x247/0xa20 net/core/dev.c:3596 __dev_queue_xmit+0x358c/0x5610 net/core/dev.c:4423 dev_queue_xmit include/linux/netdevice.h:3105 [inline] packet_xmit+0x9c/0x6c0 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3145 [inline] packet_sendmsg+0x90e3/0xa3a0 net/packet/af_packet.c:3177 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2204 __do_sys_sendto net/socket.c:2216 [inline] __se_sys_sendto net/socket.c:2212 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212 x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:3994 [inline] slab_alloc_node mm/slub.c:4037 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4080 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:583 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:674 alloc_skb include/linux/skbuff.h:1320 [inline] alloc_skb_with_frags+0xc8/0xbf0 net/core/skbuff.c:6526 sock_alloc_send_pskb+0xa81/0xbf0 net/core/sock.c:2815 packet_alloc_skb net/packet/af_packet.c:2994 [inline] packet_snd net/packet/af_packet.c:3088 [inline] packet_sendmsg+0x749c/0xa3a0 net/packet/af_packet.c:3177 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:745 __sys_sendto+0x685/0x830 net/socket.c:2204 __do_sys_sendto net/socket.c:2216 [inline] __se_sys_sendto net/socket.c:2212 [inline] __x64_sys_sendto+0x125/0x1d0 net/socket.c:2212 x64_sys_call+0x3799/0x3c10 arch/x86/include/generated/asm/syscalls_64.h:45 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f CPU: 0 UID: 0 PID: 7115 Comm: syz.1.515 Not tainted 6.11.0-rc1-syzkaller-00043-g94ede2a3e913 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 06/27/2024 Fixes: 999cb275c807 ("gtp: add IPv6 support") Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Eric Dumazet Cc: Harald Welte Reviewed-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20240808132455.3413916-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: liujia <1184918406@qq.com> Signed-off-by: wangxin --- drivers/net/gtp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 1c46bc4d2705..ce0fbdea9c9c 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -564,6 +564,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ -- Gitee From 280363312c4ea287673e9a170d108bbbbf105743 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Fri, 9 Aug 2024 11:16:28 +0800 Subject: [PATCH 16/18] vfs: Don't evict inode under the inode lru traversing context stable inclusion from stable-6.11 commit 2a0629834cd82f05d424bbc193374f9a43d1f87d category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- The inode reclaiming process(See function prune_icache_sb) collects all reclaimable inodes and mark them with I_FREEING flag at first, at that time, other processes will be stuck if they try getting these inodes (See function find_inode_fast), then the reclaiming process destroy the inodes by function dispose_list(). Some filesystems(eg. ext4 with ea_inode feature, ubifs with xattr) may do inode lookup in the inode evicting callback function, if the inode lookup is operated under the inode lru traversing context, deadlock problems may happen. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Case 1: In function ext4_evict_inode(), the ea inode lookup could happen if ea_inode feature is enabled, the lookup process will be stuck under the evicting context like this: 1. File A has inode i_reg and an ea inode i_ea 2. getfattr(A, xattr_buf) // i_ea is added into lru // lru->i_ea 3. Then, following three processes running like this: PA PB echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // i_reg is added into lru, lru->i_ea->i_reg prune_icache_sb list_lru_walk_one inode_lru_isolate i_ea->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(i_reg) spin_unlock(&i_reg->i_lock) spin_unlock(lru_lock) rm file A i_reg->nlink = 0 iput(i_reg) // i_reg->nlink is 0, do evict ext4_evict_inode ext4_xattr_delete_inode ext4_xattr_inode_dec_ref_all ext4_xattr_inode_iget ext4_iget(i_ea->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(i_ea) ----→ AA deadlock dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&i_ea->i_state) Case 2: In deleted inode writing function ubifs_jnl_write_inode(), file deleting process holds BASEHD's wbuf->io_mutex while getting the xattr inode, which could race with inode reclaiming process(The reclaiming process could try locking BASEHD's wbuf->io_mutex in inode evicting function), then an ABBA deadlock problem would happen as following: 1. File A has inode ia and a xattr(with inode ixa), regular file B has inode ib and a xattr. 2. getfattr(A, xattr_buf) // ixa is added into lru // lru->ixa 3. Then, following three processes running like this: PA PB PC echo 2 > /proc/sys/vm/drop_caches shrink_slab prune_dcache_sb // ib and ia are added into lru, lru->ixa->ib->ia prune_icache_sb list_lru_walk_one inode_lru_isolate ixa->i_state |= I_FREEING // set inode state inode_lru_isolate __iget(ib) spin_unlock(&ib->i_lock) spin_unlock(lru_lock) rm file B ib->nlink = 0 rm file A iput(ia) ubifs_evict_inode(ia) ubifs_jnl_delete_inode(ia) ubifs_jnl_write_inode(ia) make_reservation(BASEHD) // Lock wbuf->io_mutex ubifs_iget(ixa->i_ino) iget_locked find_inode_fast __wait_on_freeing_inode(ixa) | iput(ib) // ib->nlink is 0, do evict | ubifs_evict_inode | ubifs_jnl_delete_inode(ib) ↓ ubifs_jnl_write_inode ABBA deadlock ←-----make_reservation(BASEHD) dispose_list // cannot be executed by prune_icache_sb wake_up_bit(&ixa->i_state) Fix the possible deadlock by using new inode state flag I_LRU_ISOLATING to pin the inode in memory while inode_lru_isolate() reclaims its pages instead of using ordinary inode reference. This way inode deletion cannot be triggered from inode_lru_isolate() thus avoiding the deadlock. evict() is made to wait for I_LRU_ISOLATING to be cleared before proceeding with inode cleanup. Link: https://lore.kernel.org/all/37c29c42-7685-d1f0-067d-63582ffac405@huaweicloud.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=219022 Fixes: e50e5129f384 ("ext4: xattr-in-inode support") Fixes: 7959cf3a7506 ("ubifs: journal: Handle xattrs like files") Cc: stable@vger.kernel.org Signed-off-by: Zhihao Cheng Link: https://lore.kernel.org/r/20240809031628.1069873-1-chengzhihao@huaweicloud.com Reviewed-by: Jan Kara Suggested-by: Jan Kara Suggested-by: Mateusz Guzik Signed-off-by: Christian Brauner Signed-off-by: wangxin --- fs/inode.c | 39 +++++++++++++++++++++++++++++++++++++-- include/linux/fs.h | 5 +++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index c698a02d3e54..22337ec5d3a0 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -455,6 +455,39 @@ static void inode_lru_list_del(struct inode *inode) this_cpu_dec(nr_unused); } +static void inode_pin_lru_isolating(struct inode *inode) +{ + lockdep_assert_held(&inode->i_lock); + WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode->i_state |= I_LRU_ISOLATING; +} + +static void inode_unpin_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); + inode->i_state &= ~I_LRU_ISOLATING; + smp_mb(); + wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); +} + +static void inode_wait_for_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_state & I_LRU_ISOLATING) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); + __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_LRU_ISOLATING); + } + spin_unlock(&inode->i_lock); +} + /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add @@ -567,6 +600,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_wait_for_lru_isolating(inode); + /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since @@ -766,7 +801,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, } if (inode_has_buffers(inode) || inode->i_data.nrpages) { - __iget(inode); + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); if (remove_inode_buffers(inode)) { @@ -779,7 +814,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, if (current->reclaim_state) current->reclaim_state->reclaimed_slab += reap; } - iput(inode); + inode_unpin_lru_isolating(inode); spin_lock(lru_lock); return LRU_RETRY; } diff --git a/include/linux/fs.h b/include/linux/fs.h index a0c79a5de2a8..f243e2021312 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2168,6 +2168,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2190,6 +2193,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- Gitee From 26cf54177d3f7a8db866ee7b86d376cb530cc3e1 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 2 Aug 2024 16:58:22 -0700 Subject: [PATCH 17/18] memcg: protect concurrent access to mem_cgroup_idr stable inclusion from stable-6.11 commit 9972605a238339b85bd16b084eed5f18414d22db category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- Commit 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") decoupled the memcg IDs from the CSS ID space to fix the cgroup creation failures. It introduced IDR to maintain the memcg ID space. The IDR depends on external synchronization mechanisms for modifications. For the mem_cgroup_idr, the idr_alloc() and idr_replace() happen within css callback and thus are protected through cgroup_mutex from concurrent modifications. However idr_remove() for mem_cgroup_idr was not protected against concurrency and can be run concurrently for different memcgs when they hit their refcnt to zero. Fix that. We have been seeing list_lru based kernel crashes at a low frequency in our fleet for a long time. These crashes were in different part of list_lru code including list_lru_add(), list_lru_del() and reparenting code. Upon further inspection, it looked like for a given object (dentry and inode), the super_block's list_lru didn't have list_lru_one for the memcg of that object. The initial suspicions were either the object is not allocated through kmem_cache_alloc_lru() or somehow memcg_list_lru_alloc() failed to allocate list_lru_one() for a memcg but returned success. No evidence were found for these cases. Looking more deeply, we started seeing situations where valid memcg's id is not present in mem_cgroup_idr and in some cases multiple valid memcgs have same id and mem_cgroup_idr is pointing to one of them. So, the most reasonable explanation is that these situations can happen due to race between multiple idr_remove() calls or race between idr_alloc()/idr_replace() and idr_remove(). These races are causing multiple memcgs to acquire the same ID and then offlining of one of them would cleanup list_lrus on the system for all of them. Later access from other memcgs to the list_lru cause crashes due to missing list_lru_one. Link: https://lkml.kernel.org/r/20240802235822.1830976-1-shakeel.butt@linux.dev Fixes: 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") Signed-off-by: Shakeel Butt Acked-by: Muchun Song Reviewed-by: Roman Gushchin Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: wangxin --- mm/memcontrol.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6a7b65ec2069..7dd408f8f44c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5198,11 +5198,28 @@ static struct cftype mem_cgroup_legacy_files[] = { */ static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&memcg_idr_lock); + ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX, + GFP_NOWAIT); + spin_unlock(&memcg_idr_lock); + idr_preload_end(); + return ret; +} static void mem_cgroup_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { + spin_lock(&memcg_idr_lock); idr_remove(&mem_cgroup_idr, memcg->id.id); + spin_unlock(&memcg_idr_lock); + memcg->id.id = 0; } } @@ -5337,9 +5354,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (!memcg) return ERR_PTR(error); - memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, - 1, MEM_CGROUP_ID_MAX, - GFP_KERNEL); + memcg->id.id = mem_cgroup_alloc_id(); if (memcg->id.id < 0) { error = memcg->id.id; goto fail; -- Gitee From 1e733bfc29466ceb1fcd69d38d5eaed2b3939483 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Wed, 17 Jul 2024 15:00:23 +0200 Subject: [PATCH 18/18] refcount: Report UAF for refcount_sub_and_test(0) when counter==0 stable inclusion from stable-6.11 commit f91f7ac900e7342e0fd66093dfbf7cb8cb585a99 category: bugfix issue: NA CVE: NA Signed-off-by: wangxin --------------------------------------- When a reference counter is at zero and refcount_sub_and_test() is invoked to subtract zero, the function accepts this request without any warning and returns true. This behavior does not seem ideal because the counter being already at zero indicates a use-after-free. Furthermore, returning true by refcount_sub_and_test() in this case potentially results in a double-free done by its caller. Modify the underlying function __refcount_sub_and_test() to warn about this case as a use-after-free and have it return false to avoid the potential double-free. Signed-off-by: Petr Pavlu Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240717130023.5675-1-petr.pavlu@suse.com Signed-off-by: Kees Cook Signed-off-by: wangxin --- drivers/misc/lkdtm/refcount.c | 15 +++++++++++++++ include/linux/refcount.h | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index de7c5ab528d9..af384fd6935c 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 497990c69b0b..3f17de9f7de4 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -274,12 +274,12 @@ static inline __must_check bool __refcount_sub_and_test(int i, refcount_t *r, in if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; -- Gitee