From b8eaab217275ecdc9308f45e5d1963b09e74f122 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2024 08:31:47 +0000 Subject: [PATCH 1/7] ipv6: avoid possible NULL deref in rt6_uncached_list_flush_dev() mainline inclusion from mainline-v6.12-rc1 commit 04ccecfa959d3b9ae7348780d8e379c6486176ac category: bugfix issue: #IB3U9S CVE: CVE-2024-47707 Signed-off-by: zhangshuqi --------------------------------------- Blamed commit accidentally removed a check for rt->rt6i_idev being NULL, as spotted by syzbot: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 10998 Comm: syz-executor Not tainted 6.11.0-rc6-syzkaller-00208-g625403177711 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: addrconf_ifdown+0x15d/0x1bd0 net/ipv6/addrconf.c:3856 addrconf_notify+0x3cb/0x1020 notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:2032 [inline] call_netdevice_notifiers net/core/dev.c:2046 [inline] unregister_netdevice_many_notify+0xd81/0x1c40 net/core/dev.c:11352 unregister_netdevice_many net/core/dev.c:11414 [inline] unregister_netdevice_queue+0x303/0x370 net/core/dev.c:11289 unregister_netdevice include/linux/netdevice.h:3129 [inline] __tun_detach+0x6b9/0x1600 drivers/net/tun.c:685 tun_detach drivers/net/tun.c:701 [inline] tun_chr_close+0x108/0x1b0 drivers/net/tun.c:3510 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1acc77def9 Code: Unable to access opcode bytes at 0x7f1acc77decf. RSP: 002b:00007ffeb26fa738 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1acc77def9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000043 RBP: 00007f1acc7dd508 R08: 00007ffeb26f84d7 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000003 R14: 00000000ffffffff R15: 00007ffeb26fa8e0 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: e332bc67cf5e ("ipv6: Don't call with rt6_uncached_list_flush_dev") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240913083147.3095442-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: zhangshuqi --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7e3eb0af4b0e..ac746c1a26e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; - if (rt_idev->dev == dev) { + if (rt_idev && rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } -- Gitee From c62d8c2ce3a4e6cceca53c28d84677c9b2e09198 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Aug 2024 10:35:23 +0800 Subject: [PATCH 2/7] ext4: fix slab-use-after-free in ext4_split_extent_at() mainline inclusion from mainline-6.12 commit c26ab35702f8cd0cdc78f96aa5856bfb77be798f category: bugfix issue: #IB2ZTL CVE: CVE-2024-49881 Signed-off-by: zhangshuqi --------------------------------------- We hit the following use-after-free: ================================================================== BUG: KASAN: slab-use-after-free in ext4_split_extent_at+0xba8/0xcc0 Read of size 2 at addr ffff88810548ed08 by task kworker/u20:0/40 CPU: 0 PID: 40 Comm: kworker/u20:0 Not tainted 6.9.0-dirty #724 Call Trace: kasan_report+0x93/0xc0 ext4_split_extent_at+0xba8/0xcc0 ext4_split_extent.isra.0+0x18f/0x500 ext4_split_convert_extents+0x275/0x750 ext4_ext_handle_unwritten_extents+0x73e/0x1580 ext4_ext_map_blocks+0xe20/0x2dc0 ext4_map_blocks+0x724/0x1700 ext4_do_writepages+0x12d6/0x2a70 [...] Allocated by task 40: __kmalloc_noprof+0x1ac/0x480 ext4_find_extent+0xf3b/0x1e70 ext4_ext_map_blocks+0x188/0x2dc0 ext4_map_blocks+0x724/0x1700 ext4_do_writepages+0x12d6/0x2a70 [...] Freed by task 40: kfree+0xf1/0x2b0 ext4_find_extent+0xa71/0x1e70 ext4_ext_insert_extent+0xa22/0x3260 ext4_split_extent_at+0x3ef/0xcc0 ext4_split_extent.isra.0+0x18f/0x500 ext4_split_convert_extents+0x275/0x750 ext4_ext_handle_unwritten_extents+0x73e/0x1580 ext4_ext_map_blocks+0xe20/0x2dc0 ext4_map_blocks+0x724/0x1700 ext4_do_writepages+0x12d6/0x2a70 [...] ================================================================== The flow of issue triggering is as follows: ext4_split_extent_at path = *ppath ext4_ext_insert_extent(ppath) ext4_ext_create_new_leaf(ppath) ext4_find_extent(orig_path) path = *orig_path read_extent_tree_block // return -ENOMEM or -EIO ext4_free_ext_path(path) kfree(path) *orig_path = NULL a. If err is -ENOMEM: ext4_ext_dirty(path + path->p_depth) // path use-after-free !!! b. If err is -EIO and we have EXT_DEBUG defined: ext4_ext_show_leaf(path) eh = path[depth].p_hdr // path also use-after-free !!! So when trying to zeroout or fix the extent length, call ext4_find_extent() to update the path. In addition we use *ppath directly as an ext4_ext_show_leaf() input to avoid possible use-after-free when EXT_DEBUG is defined, and to avoid unnecessary path updates. Fixes: dfe5080939ea ("ext4: drop EXT4_EX_NOFREE_ON_ERR from rest of extents handling code") Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Ojaswin Mujoo Tested-by: Ojaswin Mujoo Link: https://patch.msgid.link/20240822023545.1994557-4-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: zhangshuqi --- fs/ext4/extents.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e458995f2b5d..fff2a814036b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3260,6 +3260,25 @@ static int ext4_split_extent_at(handle_t *handle, if (err != -ENOSPC && err != -EDQUOT) goto out; + /* + * Update path is required because previous ext4_ext_insert_extent() + * may have freed or reallocated the path. Using EXT4_EX_NOFAIL + * guarantees that ext4_find_extent() will not return -ENOMEM, + * otherwise -ENOMEM will cause a retry in do_writepages(), and a + * WARN_ON may be triggered in ext4_da_update_reserve_space() due to + * an incorrect ee_len causing the i_reserved_data_blocks exception. + */ + path = ext4_find_extent(inode, ee_block, ppath, + flags | EXT4_EX_NOFAIL); + if (IS_ERR(path)) { + EXT4_ERROR_INODE(inode, "Failed split extent on %u, err %ld", + split, PTR_ERR(path)); + return PTR_ERR(path); + } + depth = ext_depth(inode); + ex = path[depth].p_ext; + *ppath = path; + if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & EXT4_EXT_DATA_VALID1) { @@ -3312,7 +3331,7 @@ static int ext4_split_extent_at(handle_t *handle, ext4_ext_dirty(handle, inode, path + path->p_depth); return err; out: - ext4_ext_show_leaf(inode, path); + ext4_ext_show_leaf(inode, *ppath); return err; } -- Gitee From d092eb4fa2fd823543d6b6df833599894c964d0f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 22 Aug 2024 10:35:25 +0800 Subject: [PATCH 3/7] ext4: update orig_path in ext4_find_extent() mainline inclusion from mainline-5.10.227 commit 6766937d0327000ac1b87c97bbecdd28b0dd6599 category: bugfix issue: #IB2ZTL CVE: CVE-2024-49881 Signed-off-by: zhangshuqi --------------------------------------- commit 5b4b2dcace35f618fe361a87bae6f0d13af31bc1 upstream. In ext4_find_extent(), if the path is not big enough, we free it and set *orig_path to NULL. But after reallocating and successfully initializing the path, we don't update *orig_path, in which case the caller gets a valid path but a NULL ppath, and this may cause a NULL pointer dereference or a path memory leak. For example: ext4_split_extent path = *ppath = 2000 ext4_find_extent if (depth > path[0].p_maxdepth) kfree(path = 2000); *orig_path = path = NULL; path = kcalloc() = 3000 ext4_split_extent_at(*ppath = NULL) path = *ppath; ex = path[depth].p_ext; // NULL pointer dereference! ================================================================== BUG: kernel NULL pointer dereference, address: 0000000000000010 CPU: 6 UID: 0 PID: 576 Comm: fsstress Not tainted 6.11.0-rc2-dirty #847 RIP: 0010:ext4_split_extent_at+0x6d/0x560 Call Trace: ext4_split_extent.isra.0+0xcb/0x1b0 ext4_ext_convert_to_initialized+0x168/0x6c0 ext4_ext_handle_unwritten_extents+0x325/0x4d0 ext4_ext_map_blocks+0x520/0xdb0 ext4_map_blocks+0x2b0/0x690 ext4_iomap_begin+0x20e/0x2c0 [...] ================================================================== Therefore, *orig_path is updated when the extent lookup succeeds, so that the caller can safely use path or *ppath. Fixes: 10809df84a4d ("ext4: teach ext4_ext_find_extent() to realloc path if necessary") Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240822023545.1994557-6-libaokun@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- fs/ext4/extents.c | 3 ++- fs/ext4/move_extent.c | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fff2a814036b..76c30f0f7c77 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -960,6 +960,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, ext4_ext_show_path(inode, path); + if (orig_path) + *orig_path = path; return path; err: @@ -3277,7 +3279,6 @@ static int ext4_split_extent_at(handle_t *handle, } depth = ext_depth(inode); ex = path[depth].p_ext; - *ppath = path; if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index f8dd5d972c33..661a8544d781 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -36,7 +36,6 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock, *ppath = NULL; return -ENODATA; } - *ppath = path; return 0; } -- Gitee From 77811e49616c62da7fd8c2a31f3395c43f298b83 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Tue, 4 May 2021 18:38:06 -0700 Subject: [PATCH 4/7] mm/mmap.c: don't unlock VMAs in remap_file_pages() mainline inclusion from mainline-5.13 commit fce000b1bc08c64c0cff4bb705b3970bd6fc1e34 category: bugfix issue: #IB2ZTL CVE: CVE-2024-47745 Signed-off-by: zhangshuqi --------------------------------------- Since this call uses MAP_FIXED, do_mmap() will munlock the necessary range. There is also an error in the loop test expression which will evaluate as false and the loop body has never execute. Link: https://lkml.kernel.org/r/20210223235010.2296915-1-Liam.Howlett@Oracle.com Signed-off-by: Liam R. Howlett Acked-by: Hugh Dickins Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: zhangshuqi --- mm/mmap.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 14e6bfdebbb8..6c171b1567a5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3095,25 +3095,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags &= MAP_NONBLOCK; flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; - if (vma->vm_flags & VM_LOCKED) { - struct vm_area_struct *tmp; + if (vma->vm_flags & VM_LOCKED) flags |= MAP_LOCKED; - /* drop PG_Mlocked flag for over-mapped range */ - for (tmp = vma; tmp->vm_start >= start + size; - tmp = tmp->vm_next) { - /* - * Split pmd and munlock page on the border - * of the range. - */ - vma_adjust_trans_huge(tmp, start, start + size, 0); - - munlock_vma_pages_range(tmp, - max(tmp->vm_start, start), - min(tmp->vm_end, start + size)); - } - } - file = get_file(vma->vm_file); ret = do_mmap(vma->vm_file, start, size, prot, flags, pgoff, &populate, NULL); -- Gitee From 64b575e33872a72368dbc2ea21f01598b7eb3388 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 2 Sep 2021 14:56:49 -0700 Subject: [PATCH 5/7] remap_file_pages: Use vma_lookup() instead of find_vma() mainline inclusion from mainline-5.15 commit 9b593cb20283e68e5e65b09ca10038935297f05b category: bugfix issue: #IB3U9S CVE: CVE-2024-47745 Signed-off-by: zhangshuqi --------------------------------------- Using vma_lookup() verifies the start address is contained in the found vma. This results in easier to read code. Link: https://lkml.kernel.org/r/20210817135234.1550204-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: zhangshuqi --- mm/mmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 6c171b1567a5..a12d6d24e9df 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3059,14 +3059,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (mmap_write_lock_killable(mm)) return -EINTR; - vma = find_vma(mm, start); + vma = vma_lookup(mm, start); if (!vma || !(vma->vm_flags & VM_SHARED)) goto out; - if (start < vma->vm_start) - goto out; - if (start + size > vma->vm_end) { struct vm_area_struct *next; -- Gitee From 896950a2ff6b3c71f45db429bf9f0b49ff52b30d Mon Sep 17 00:00:00 2001 From: Shu Han Date: Tue, 17 Sep 2024 17:41:04 +0800 Subject: [PATCH 6/7] mm: call the security_mmap_file() LSM hook in remap_file_pages() mainline inclusion from mainline-v6.12-rc1 commit ea7e2d5e49c05e5db1922387b09ca74aa40f46e2 category: bugfix issue: #IB3U9S CVE: CVE-2024-47745 Signed-off-by: zhangshuqi --------------------------------------- The remap_file_pages syscall handler calls do_mmap() directly, which doesn't contain the LSM security check. And if the process has called personality(READ_IMPLIES_EXEC) before and remap_file_pages() is called for RW pages, this will actually result in remapping the pages to RWX, bypassing a W^X policy enforced by SELinux. So we should check prot by security_mmap_file LSM hook in the remap_file_pages syscall handler before do_mmap() is called. Otherwise, it potentially permits an attacker to bypass a W^X policy enforced by SELinux. The bypass is similar to CVE-2016-10044, which bypass the same thing via AIO and can be found in [1]. The PoC: $ cat > test.c int main(void) { size_t pagesz = sysconf(_SC_PAGE_SIZE); int mfd = syscall(SYS_memfd_create, "test", 0); const char *buf = mmap(NULL, 4 * pagesz, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); unsigned int old = syscall(SYS_personality, 0xffffffff); syscall(SYS_personality, READ_IMPLIES_EXEC | old); syscall(SYS_remap_file_pages, buf, pagesz, 0, 2, 0); syscall(SYS_personality, old); // show the RWX page exists even if W^X policy is enforced int fd = open("/proc/self/maps", O_RDONLY); unsigned char buf2[1024]; while (1) { int ret = read(fd, buf2, 1024); if (ret <= 0) break; write(1, buf2, ret); } close(fd); } $ gcc test.c -o test $ ./test | grep rwx 7f1836c34000-7f1836c35000 rwxs 00002000 00:01 2050 /memfd:test (deleted) Link: https://project-zero.issues.chromium.org/issues/42452389 [1] Cc: stable@vger.kernel.org Signed-off-by: Shu Han Acked-by: Stephen Smalley [PM: subject line tweaks] Signed-off-by: Paul Moore Signed-off-by: zhangshuqi --- mm/mmap.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index a12d6d24e9df..da6d91afdd78 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3096,8 +3096,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, flags |= MAP_LOCKED; file = get_file(vma->vm_file); + ret = security_mmap_file(vma->vm_file, prot, flags); + if (ret) + goto out_fput; ret = do_mmap(vma->vm_file, start, size, prot, flags, pgoff, &populate, NULL); +out_fput: fput(file); out: mmap_write_unlock(mm); -- Gitee From db020692b31bab7b7349fdb0986c5166562ce729 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 Oct 2024 18:14:15 +0200 Subject: [PATCH 7/7] mm: split critical region in remap_file_pages() and invoke LSMs in between mainline inclusion from mainline-v6.12-rc1 commit 58a039e679fe72bd0efa8b2abe669a7914bb4429 category: bugfix issue: #IB3U9S CVE: CVE-2024-47745 Signed-off-by: zhangshuqi --------------------------------------- Commit ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()") fixed a security issue, it added an LSM check when trying to remap file pages, so that LSMs have the opportunity to evaluate such action like for other memory operations such as mmap() and mprotect(). However, that commit called security_mmap_file() inside the mmap_lock lock, while the other calls do it before taking the lock, after commit 8b3ec6814c83 ("take security_mmap_file() outside of ->mmap_sem"). This caused lock inversion issue with IMA which was taking the mmap_lock and i_mutex lock in the opposite way when the remap_file_pages() system call was called. Solve the issue by splitting the critical region in remap_file_pages() in two regions: the first takes a read lock of mmap_lock, retrieves the VMA and the file descriptor associated, and calculates the 'prot' and 'flags' variables; the second takes a write lock on mmap_lock, checks that the VMA flags and the VMA file descriptor are the same as the ones obtained in the first critical region (otherwise the system call fails), and calls do_mmap(). In between, after releasing the read lock and before taking the write lock, call security_mmap_file(), and solve the lock inversion issue. Link: https://lkml.kernel.org/r/20241018161415.3845146-1-roberto.sassu@huaweicloud.com Fixes: ea7e2d5e49c0 ("mm: call the security_mmap_file() LSM hook in remap_file_pages()") Signed-off-by: Kirill A. Shutemov Signed-off-by: Roberto Sassu Reported-by: syzbot+1cd571a672400ef3a930@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-security-module/66f7b10e.050a0220.46d20.0036.GAE@google.com/ Tested-by: Roberto Sassu Reviewed-by: Roberto Sassu Reviewed-by: Jann Horn Reviewed-by: Lorenzo Stoakes Reviewed-by: Liam R. Howlett Reviewed-by: Paul Moore Tested-by: syzbot+1cd571a672400ef3a930@syzkaller.appspotmail.com Cc: Jarkko Sakkinen Cc: Dmitry Kasatkin Cc: Eric Snowberg Cc: James Morris Cc: Mimi Zohar Cc: "Serge E. Hallyn" Cc: Shu Han Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: zhangshuqi --- mm/mmap.c | 69 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index da6d91afdd78..04919dc9fbbf 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3040,6 +3040,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long populate = 0; unsigned long ret = -EINVAL; struct file *file; + vm_flags_t vm_flags; pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/vm/remap_file_pages.rst.\n", current->comm, current->pid); @@ -3056,12 +3057,60 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (pgoff + (size >> PAGE_SHIFT) < pgoff) return ret; - if (mmap_write_lock_killable(mm)) + if (mmap_read_lock_killable(mm)) + return -EINTR; + + /* + * Look up VMA under read lock first so we can perform the security + * without holding locks (which can be problematic). We reacquire a + * write lock later and check nothing changed underneath us. + */ + vma = vma_lookup(mm, start); + + if (!vma || !(vma->vm_flags & VM_SHARED)) { + mmap_read_unlock(mm); + return -EINVAL; + } + + prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; + prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; + prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; + + flags &= MAP_NONBLOCK; + flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; + if (vma->vm_flags & VM_LOCKED) + flags |= MAP_LOCKED; + + /* Save vm_flags used to calculate prot and flags, and recheck later. */ + vm_flags = vma->vm_flags; + file = get_file(vma->vm_file); + + mmap_read_unlock(mm); + + /* Call outside mmap_lock to be consistent with other callers. */ + ret = security_mmap_file(file, prot, flags); + if (ret) { + fput(file); + return ret; + } + + ret = -EINVAL; + + /* OK security check passed, take write lock + let it rip. */ + if (mmap_write_lock_killable(mm)) { + fput(file); return -EINTR; + } vma = vma_lookup(mm, start); - if (!vma || !(vma->vm_flags & VM_SHARED)) + if (!vma) + goto out; + + /* Make sure things didn't change under us. */ + if (vma->vm_flags != vm_flags) + goto out; + if (vma->vm_file != file) goto out; if (start + size > vma->vm_end) { @@ -3086,25 +3135,11 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, goto out; } - prot |= vma->vm_flags & VM_READ ? PROT_READ : 0; - prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0; - prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0; - - flags &= MAP_NONBLOCK; - flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE; - if (vma->vm_flags & VM_LOCKED) - flags |= MAP_LOCKED; - - file = get_file(vma->vm_file); - ret = security_mmap_file(vma->vm_file, prot, flags); - if (ret) - goto out_fput; ret = do_mmap(vma->vm_file, start, size, prot, flags, pgoff, &populate, NULL); -out_fput: - fput(file); out: mmap_write_unlock(mm); + fput(file); if (populate) mm_populate(ret, populate); if (!IS_ERR_VALUE(ret)) -- Gitee