From 8f9ef72e6503201ba72cfb86b1af8ae9001e04c5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 15 Jul 2024 16:14:42 +0200 Subject: [PATCH 1/5] net: flow_dissector: use DEBUG_NET_WARN_ON_ONCE mainline inclusion from mainline-6.11 commit 120f1c857a73e52132e473dee89b340440cb692b category: bugfix issue: #IBNET5 CVE: CVE-2024-42321 Signed-off-by: zhangshuqi --------------------------------------- The following splat is easy to reproduce upstream as well as in -stable kernels. Florian Westphal provided the following commit: d1dab4f71d37 ("net: add and use __skb_get_hash_symmetric_net") but this complementary fix has been also suggested by Willem de Bruijn and it can be easily backported to -stable kernel which consists in using DEBUG_NET_WARN_ON_ONCE instead to silence the following splat given __skb_get_hash() is used by the nftables tracing infrastructure to to identify packets in traces. [69133.561393] ------------[ cut here ]------------ [69133.561404] WARNING: CPU: 0 PID: 43576 at net/core/flow_dissector.c:1104 __skb_flow_dissect+0x134f/ [...] [69133.561944] CPU: 0 PID: 43576 Comm: socat Not tainted 6.10.0-rc7+ #379 [69133.561959] RIP: 0010:__skb_flow_dissect+0x134f/0x2ad0 [69133.561970] Code: 83 f9 04 0f 84 b3 00 00 00 45 85 c9 0f 84 aa 00 00 00 41 83 f9 02 0f 84 81 fc ff ff 44 0f b7 b4 24 80 00 00 00 e9 8b f9 ff ff <0f> 0b e9 20 f3 ff ff 41 f6 c6 20 0f 84 e4 ef ff ff 48 8d 7b 12 e8 [69133.561979] RSP: 0018:ffffc90000006fc0 EFLAGS: 00010246 [69133.561988] RAX: 0000000000000000 RBX: ffffffff82f33e20 RCX: ffffffff81ab7e19 [69133.561994] RDX: dffffc0000000000 RSI: ffffc90000007388 RDI: ffff888103a1b418 [69133.562001] RBP: ffffc90000007310 R08: 0000000000000000 R09: 0000000000000000 [69133.562007] R10: ffffc90000007388 R11: ffffffff810cface R12: ffff888103a1b400 [69133.562013] R13: 0000000000000000 R14: ffffffff82f33e2a R15: ffffffff82f33e28 [69133.562020] FS: 00007f40f7131740(0000) GS:ffff888390800000(0000) knlGS:0000000000000000 [69133.562027] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [69133.562033] CR2: 00007f40f7346ee0 CR3: 000000015d200001 CR4: 00000000001706f0 [69133.562040] Call Trace: [69133.562044] [69133.562049] ? __warn+0x9f/0x1a0 [ 1211.841384] ? __skb_flow_dissect+0x107e/0x2860 [...] [ 1211.841496] ? bpf_flow_dissect+0x160/0x160 [ 1211.841753] __skb_get_hash+0x97/0x280 [ 1211.841765] ? __skb_get_hash_symmetric+0x230/0x230 [ 1211.841776] ? mod_find+0xbf/0xe0 [ 1211.841786] ? get_stack_info_noinstr+0x12/0xe0 [ 1211.841798] ? bpf_ksym_find+0x56/0xe0 [ 1211.841807] ? __rcu_read_unlock+0x2a/0x70 [ 1211.841819] nft_trace_init+0x1b9/0x1c0 [nf_tables] [ 1211.841895] ? nft_trace_notify+0x830/0x830 [nf_tables] [ 1211.841964] ? get_stack_info+0x2b/0x80 [ 1211.841975] ? nft_do_chain_arp+0x80/0x80 [nf_tables] [ 1211.842044] nft_do_chain+0x79c/0x850 [nf_tables] Fixes: 9b52e3f267a6 ("flow_dissector: handle no-skb use case") Suggested-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20240715141442.43775-1-pablo@netfilter.org Signed-off-by: Paolo Abeni Conflilcts: include/linux/netdevice.h Signed-off-by: zhangshuqi --- include/linux/netdevice.h | 6 ++++++ net/core/flow_dissector.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a3817aff662c..b32841bcc836 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5261,6 +5261,12 @@ do { \ }) #endif +#if defined(CONFIG_DEBUG_NET) +#define DEBUG_NET_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) +#else +#define DEBUG_NET_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3d5192177560..097d0d7c6cf8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -972,7 +972,7 @@ bool __skb_flow_dissect(const struct net *net, } } - WARN_ON_ONCE(!net); + DEBUG_NET_WARN_ON_ONCE(!net); if (net) { enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array; -- Gitee From f299740f1b5e7bc7e590de5a462600608db028bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Sep 2024 13:57:04 -0400 Subject: [PATCH 2/5] filemap: Fix bounds checking in filemap_read() mainline inclusion from mainline-6.12 commit ace149e0830c380ddfce7e466fe860ca502fe4ee category: bugfix issue: #IBNET5 CVE: CVE-2024-50272 Signed-off-by: zhangshuqi --------------------------------------- If the caller supplies an iocb->ki_pos value that is close to the filesystem upper limit, and an iterator with a count that causes us to overflow that limit, then filemap_read() enters an infinite loop. This behaviour was discovered when testing xfstests generic/525 with the "localio" optimisation for loopback NFS mounts. Reported-by: Mike Snitzer Fixes: c2a9737f45e2 ("vfs,mm: fix a dead loop in truncate_inode_pages_range()") Tested-by: Mike Snitzer Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds Signed-off-by: zhangshuqi --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 3b0d8c6dd587..0ee312dfedf0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2206,7 +2206,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb, if (unlikely(!iov_iter_count(iter))) return 0; - iov_iter_truncate(iter, inode->i_sb->s_maxbytes); + iov_iter_truncate(iter, inode->i_sb->s_maxbytes - iocb->ki_pos); index = *ppos >> PAGE_SHIFT; prev_index = ra->prev_pos >> PAGE_SHIFT; -- Gitee From f207f2757461c6278eaf3c0552d1484ca8a32e53 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 15 Nov 2024 12:36:51 +0000 Subject: [PATCH 3/5] mm: avoid unsafe VMA hook invocation when error arises on mmap hook mainline inclusion from mainline-5.10.231 commit f68a0236337e39b77e0c4976fb80f0b680d8d652 category: bugfix issue: #IBNET5 CVE: CVE-2024-53096 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 3dd6ed34ce1f2356a77fb88edafb5ec96784e3cf ] Patch series "fix error handling in mmap_region() and refactor (hotfixes)", v4. mmap_region() is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. This series goes to great lengths to simplify how mmap_region() works and to avoid unwinding errors late on in the process of setting up the VMA for the new mapping, and equally avoids such operations occurring while the VMA is in an inconsistent state. The patches in this series comprise the minimal changes required to resolve existing issues in mmap_region() error handling, in order that they can be hotfixed and backported. There is additionally a follow up series which goes further, separated out from the v1 series and sent and updated separately. This patch (of 5): After an attempted mmap() fails, we are no longer in a situation where we can safely interact with VMA hooks. This is currently not enforced, meaning that we need complicated handling to ensure we do not incorrectly call these hooks. We can avoid the whole issue by treating the VMA as suspect the moment that the file->f_ops->mmap() function reports an error by replacing whatever VMA operations were installed with a dummy empty set of VMA operations. We do so through a new helper function internal to mm - mmap_file() - which is both more logically named than the existing call_mmap() function and correctly isolates handling of the vm_op reassignment to mm. All the existing invocations of call_mmap() outside of mm are ultimately nested within the call_mmap() from mm, which we now replace. It is therefore safe to leave call_mmap() in place as a convenience function (and to avoid churn). The invokers are: ovl_file_operations -> mmap -> ovl_mmap() -> backing_file_mmap() coda_file_operations -> mmap -> coda_file_mmap() shm_file_operations -> shm_mmap() shm_file_operations_huge -> shm_mmap() dma_buf_fops -> dma_buf_mmap_internal -> i915_dmabuf_ops -> i915_gem_dmabuf_mmap() None of these callers interact with vm_ops or mappings in a problematic way on error, quickly exiting out. Link: https://lkml.kernel.org/r/cover.1730224667.git.lorenzo.stoakes@oracle.com Link: https://lkml.kernel.org/r/d41fd763496fd0048a962f3fd9407dc72dd4fd86.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Reviewed-by: Jann Horn Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Lorenzo Stoakes Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- mm/internal.h | 12 ++++++++++++ mm/mmap.c | 4 ++-- mm/nommu.c | 4 ++-- mm/util.c | 18 ++++++++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index e85e249a4d9b..3f64cddbabd6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -153,6 +153,18 @@ enum scan_balance { void page_writeback_init(void); +/* + * This is a file-backed mapping, and is about to be memory mapped - invoke its + * mmap hook and safely handle error conditions. On error, VMA hooks will be + * mutated. + * + * @file: File which backs the mapping. + * @vma: VMA which we are mapping. + * + * Returns: 0 if success, error otherwise. + */ +int mmap_file(struct file *file, struct vm_area_struct *vma); + vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, diff --git a/mm/mmap.c b/mm/mmap.c index 14e6bfdebbb8..270eefbe70d2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1842,7 +1842,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, * new file must not have been exposed to user-space, yet. */ vma->vm_file = get_file(file); - error = call_mmap(file, vma); + error = mmap_file(file, vma); if (error) goto unmap_and_free_vma; @@ -1857,7 +1857,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, addr = vma->vm_start; - /* If vm_flags changed after call_mmap(), we should try merge vma again + /* If vm_flags changed after mmap_file(), we should try merge vma again * as we may succeed this time. */ if (unlikely(vm_flags != vma->vm_flags && prev)) { diff --git a/mm/nommu.c b/mm/nommu.c index 0faf39b32cdb..fdacc3d119c3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -955,7 +955,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma) { int ret; - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); if (ret == 0) { vma->vm_region->vm_top = vma->vm_region->vm_end; return 0; @@ -986,7 +986,7 @@ static int do_mmap_private(struct vm_area_struct *vma, * - VM_MAYSHARE will be set if it may attempt to share */ if (capabilities & NOMMU_MAP_DIRECT) { - ret = call_mmap(vma->vm_file, vma); + ret = mmap_file(vma->vm_file, vma); if (ret == 0) { /* shouldn't return success if we're not sharing */ BUG_ON(!(vma->vm_flags & VM_MAYSHARE)); diff --git a/mm/util.c b/mm/util.c index 7fd3c2bb3e4f..481d6cbae4c3 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1073,3 +1073,21 @@ int __weak memcmp_pages(struct page *page1, struct page *page2) kunmap_atomic(addr1); return ret; } + +int mmap_file(struct file *file, struct vm_area_struct *vma) +{ + static const struct vm_operations_struct dummy_vm_ops = {}; + int err = call_mmap(file, vma); + + if (likely(!err)) + return 0; + + /* + * OK, we tried to call the file hook for mmap(), but an error + * arose. The mapping is in an inconsistent state and we most not invoke + * any further hooks on it. + */ + vma->vm_ops = &dummy_vm_ops; + + return err; +} -- Gitee From 80db2e183717511f7e4b7a9e642f404ad973ce56 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 15 Nov 2024 12:36:52 +0000 Subject: [PATCH 4/5] mm: unconditionally close VMAs on error mainline inclusion from mainline-5.10.231 commit 7a450540c82f4fa99f60727acd5b402f3d1786f7 category: bugfix issue: #IBNET5 CVE: CVE-2024-53096 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 4080ef1579b2413435413988d14ac8c68e4d42c8 ] Incorrect invocation of VMA callbacks when the VMA is no longer in a consistent state is bug prone and risky to perform. With regards to the important vm_ops->close() callback We have gone to great lengths to try to track whether or not we ought to close VMAs. Rather than doing so and risking making a mistake somewhere, instead unconditionally close and reset vma->vm_ops to an empty dummy operations set with a NULL .close operator. We introduce a new function to do so - vma_close() - and simplify existing vms logic which tracked whether we needed to close or not. This simplifies the logic, avoids incorrect double-calling of the .close() callback and allows us to update error paths to simply call vma_close() unconditionally - making VMA closure idempotent. Link: https://lkml.kernel.org/r/28e89dda96f68c505cb6f8e9fc9b57c3e9f74b42.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Vlastimil Babka Reviewed-by: Liam R. Howlett Reviewed-by: Jann Horn Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Mark Brown Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Lorenzo Stoakes Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- mm/internal.h | 7 +++++++ mm/mmap.c | 9 +++------ mm/nommu.c | 3 +-- mm/util.c | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 8 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 3f64cddbabd6..61f3d1697072 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -165,6 +165,13 @@ void page_writeback_init(void); */ int mmap_file(struct file *file, struct vm_area_struct *vma); +/* + * If the VMA has a close hook then close it, and since closing it might leave + * it in an inconsistent state which makes the use of any hooks suspect, clear + * them down by installing dummy empty hooks. + */ +void vma_close(struct vm_area_struct *vma); + vm_fault_t do_swap_page(struct vm_fault *vmf); void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, diff --git a/mm/mmap.c b/mm/mmap.c index 270eefbe70d2..56035ee6e354 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -183,8 +183,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) struct vm_area_struct *next = vma->vm_next; might_sleep(); - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); @@ -1936,8 +1935,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return addr; close_and_free_vma: - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); unmap_and_free_vma: vma->vm_file = NULL; fput(file); @@ -2838,8 +2836,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma, return 0; /* Clean everything up if vma_adjust failed. */ - if (new->vm_ops && new->vm_ops->close) - new->vm_ops->close(new); + vma_close(new); if (new->vm_file) fput(new->vm_file); unlink_anon_vmas(new); diff --git a/mm/nommu.c b/mm/nommu.c index fdacc3d119c3..f46a883e93e4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -662,8 +662,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) */ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma) { - if (vma->vm_ops && vma->vm_ops->close) - vma->vm_ops->close(vma); + vma_close(vma); if (vma->vm_file) fput(vma->vm_file); put_nommu_region(vma->vm_region); diff --git a/mm/util.c b/mm/util.c index 481d6cbae4c3..dd88af382e81 100644 --- a/mm/util.c +++ b/mm/util.c @@ -1091,3 +1091,18 @@ int mmap_file(struct file *file, struct vm_area_struct *vma) return err; } + +void vma_close(struct vm_area_struct *vma) +{ + static const struct vm_operations_struct dummy_vm_ops = {}; + + if (vma->vm_ops && vma->vm_ops->close) { + vma->vm_ops->close(vma); + + /* + * The mapping is in an inconsistent state, and no further hooks + * may be invoked upon it. + */ + vma->vm_ops = &dummy_vm_ops; + } +} -- Gitee From 1351de97635ef09a9466f911feb2139f7281d692 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Fri, 15 Nov 2024 12:36:54 +0000 Subject: [PATCH 5/5] mm: resolve faulty mmap_region() error path behaviour mainline inclusion from mainline-5.10.231 commit 43323a4e5b3f8ccc08e2f835abfdc7ee9da8f6ed category: bugfix issue: #IBNET5 CVE: CVE-2024-53096 Signed-off-by: zhangshuqi --------------------------------------- [ Upstream commit 5de195060b2e251a835f622759550e6202167641 ] The mmap_region() function is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. Taking advantage of previous patches in this series we move a number of checks earlier in the code, simplifying things by moving the core of the logic into a static internal function __mmap_region(). Doing this allows us to perform a number of checks up front before we do any real work, and allows us to unwind the writable unmap check unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE validation unconditionally also. We move a number of things here: 1. We preallocate memory for the iterator before we call the file-backed memory hook, allowing us to exit early and avoid having to perform complicated and error-prone close/free logic. We carefully free iterator state on both success and error paths. 2. The enclosing mmap_region() function handles the mapping_map_writable() logic early. Previously the logic had the mapping_map_writable() at the point of mapping a newly allocated file-backed VMA, and a matching mapping_unmap_writable() on success and error paths. We now do this unconditionally if this is a file-backed, shared writable mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however doing so does not invalidate the seal check we just performed, and we in any case always decrement the counter in the wrapper. We perform a debug assert to ensure a driver does not attempt to do the opposite. 3. We also move arch_validate_flags() up into the mmap_region() function. This is only relevant on arm64 and sparc64, and the check is only meaningful for SPARC with ADI enabled. We explicitly add a warning for this arch if a driver invalidates this check, though the code ought eventually to be fixed to eliminate the need for this. With all of these measures in place, we no longer need to explicitly close the VMA on error paths, as we place all checks which might fail prior to a call to any driver mmap hook. This eliminates an entire class of errors, makes the code easier to reason about and more robust. Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes Reported-by: Jann Horn Reviewed-by: Liam R. Howlett Reviewed-by: Vlastimil Babka Tested-by: Mark Brown Cc: Andreas Larsson Cc: Catalin Marinas Cc: David S. Miller Cc: Helge Deller Cc: James E.J. Bottomley Cc: Linus Torvalds Cc: Peter Xu Cc: Will Deacon Cc: Signed-off-by: Andrew Morton Signed-off-by: Lorenzo Stoakes Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- mm/mmap.c | 70 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 56035ee6e354..59ee5bcc757d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1760,7 +1760,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; } -unsigned long mmap_region(struct file *file, unsigned long addr, +static unsigned long __mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, struct list_head *uf) { @@ -1829,11 +1829,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (error) goto free_vma; } - if (vm_flags & VM_SHARED) { - error = mapping_map_writable(file->f_mapping); - if (error) - goto allow_write_and_free_vma; - } /* ->mmap() can change vma->vm_file, but must guarantee that * vma_link() below can deny write-access if VM_DENYWRITE is set @@ -1843,7 +1838,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma->vm_file = get_file(file); error = mmap_file(file, vma); if (error) - goto unmap_and_free_vma; + goto unmap_and_free_file_vma; /* Can addr have changed?? * @@ -1854,6 +1849,14 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); + /* + * Drivers should not permit writability when previously it was + * disallowed. + */ + VM_WARN_ON_ONCE(vm_flags != vma->vm_flags && + !(vm_flags & VM_MAYWRITE) && + (vma->vm_flags & VM_MAYWRITE)); + addr = vma->vm_start; /* If vm_flags changed after mmap_file(), we should try merge vma again @@ -1885,22 +1888,14 @@ unsigned long mmap_region(struct file *file, unsigned long addr, vma_set_anonymous(vma); } - /* Allow architectures to sanity-check the vma */ - if (security_mmap_region(vma) || - !arch_validate_flags(vma->vm_flags)) { - error = -EINVAL; - if (file) - goto close_and_free_vma; - else - goto free_vma; - } +#ifdef CONFIG_SPARC64 + /* TODO: Fix SPARC ADI! */ + WARN_ON_ONCE(!arch_validate_flags(vm_flags)); +#endif vma_link(mm, vma, prev, rb_link, rb_parent); - /* Once vma denies write, undo our temporary denial count */ if (file) { unmap_writable: - if (vm_flags & VM_SHARED) - mapping_unmap_writable(file->f_mapping); if (vm_flags & VM_DENYWRITE) allow_write_access(file); } @@ -1934,17 +1929,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return addr; -close_and_free_vma: - vma_close(vma); -unmap_and_free_vma: +unmap_and_free_file_vma: vma->vm_file = NULL; fput(file); /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); - if (vm_flags & VM_SHARED) - mapping_unmap_writable(file->f_mapping); -allow_write_and_free_vma: if (vm_flags & VM_DENYWRITE) allow_write_access(file); free_vma: @@ -2986,6 +2976,36 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, return __do_munmap(mm, start, len, uf, false); } +unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) +{ + unsigned long ret; + bool writable_file_mapping = false; + + /* Allow architectures to sanity-check the vm_flags. */ + if (!arch_validate_flags(vm_flags)) + return -EINVAL; + + /* Map writable and ensure this isn't a sealed memfd. */ + if (file && (vm_flags & VM_SHARED)) { + int error = mapping_map_writable(file->f_mapping); + + if (error) + return error; + writable_file_mapping = true; + } + + ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf); + + /* Clear our write mapping regardless of error. */ + if (writable_file_mapping) + mapping_unmap_writable(file->f_mapping); + + validate_mm(current->mm); + return ret; +} + static int __vm_munmap(unsigned long start, size_t len, bool downgrade) { int ret; -- Gitee