diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index c64ad1fca4e4af4dd75ab2bcd7e73d93dec12542..d616b7777eef8ccaef7c8761c3473b1077488262 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -976,7 +976,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, anon_vma_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma), false); if (prev) { vma = prev; } else { @@ -1547,7 +1547,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), - anon_vma_name(vma)); + anon_vma_name(vma), false); if (prev) { /* vma_merge() invalidated the mas */ vma = prev; @@ -1732,7 +1732,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, anon_vma_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma), false); if (prev) { vma = prev; goto next; diff --git a/include/linux/mm.h b/include/linux/mm.h index 77a7d7c4c88c517365c5f338f980a625a41f4cba..886569d56066fe4611479b5b21fb2ed1cd400f22 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3350,7 +3350,7 @@ extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, - struct anon_vma_name *); + struct anon_vma_name *, bool skip_vma_uprobe); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, unsigned long addr, int new_below); diff --git a/mm/internal.h b/mm/internal.h index ff96a9f53f016b6ae305af55480cec2c5260458f..ba2346a4144726aaa2fb38aec5ec1c2ab06fab99 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1481,6 +1481,8 @@ struct vma_prepare { struct vm_area_struct *insert; struct vm_area_struct *remove; struct vm_area_struct *remove2; + + bool skip_vma_uprobe; }; void __meminit __init_single_page(struct page *page, unsigned long pfn, diff --git a/mm/madvise.c b/mm/madvise.c index bfbb48519259531d3e8f6d3d98a982f0526f028b..b3a1500decca373a8b4c3a550f62612d8c55ecd3 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -157,7 +157,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(&vmi, mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_name); + vma->vm_userfaultfd_ctx, anon_name, false); if (*prev) { vma = *prev; goto success; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1587efaf777e3b1fc40eb35f51d012dab3533133..a82aab7ab47a5ea444b2cc01c77520dea817b683 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -844,7 +844,7 @@ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT); merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, new_pol, - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), false); if (merged) { *prev = merged; return vma_replace_policy(merged, new_pol); diff --git a/mm/mlock.c b/mm/mlock.c index f9653d30d02567d2fb58050c53fba73b22a601e9..31288fda9decd4a19964187db2f17c9798c1bdee 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -493,7 +493,7 @@ static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(vmi, mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), false); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index 32799ed58022740bb08d25e62c5aa300ea286b1e..fb54df419ea2c360f1e0c23f921f54627a08c5b5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -587,10 +587,13 @@ static inline void vma_complete(struct vma_prepare *vp, if (vp->file) { i_mmap_unlock_write(vp->mapping); - uprobe_mmap(vp->vma); - if (vp->adj_next) - uprobe_mmap(vp->adj_next); + if (!vp->skip_vma_uprobe) { + uprobe_mmap(vp->vma); + + if (vp->adj_next) + uprobe_mmap(vp->adj_next); + } } if (vp->remove) { @@ -908,6 +911,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, * NNNN is represented by *next or not represented at all (NULL) * **** is not represented - it will be merged and the vma containing the * area is returned, or the function will return NULL + * + * @skip_vma_uprobe: only valid for copy_vma process, others please + * mark it as false. */ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, @@ -915,7 +921,8 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - struct anon_vma_name *anon_name) + struct anon_vma_name *anon_name, + bool skip_vma_uprobe) { struct vm_area_struct *curr, *next, *res; struct vm_area_struct *vma, *adjust, *remove, *remove2; @@ -1062,6 +1069,8 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm, VM_WARN_ON(vp.anon_vma && adjust && adjust->anon_vma && vp.anon_vma != adjust->anon_vma); + vp.skip_vma_uprobe = skip_vma_uprobe; + vma_prepare(&vp); vma_adjust_trans_huge(vma, vma_start, vma_end, adj_start); @@ -2873,7 +2882,7 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, merge = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, NULL, vma->vm_file, vma->vm_pgoff, NULL, - NULL_VM_UFFD_CTX, NULL); + NULL_VM_UFFD_CTX, NULL, false); if (merge) { /* @@ -3457,6 +3466,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; bool faulted_in_anon_vma = true; + bool skip_vma_uprobe = false; VMA_ITERATOR(vmi, mm, addr); /* @@ -3472,9 +3482,17 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (new_vma && new_vma->vm_start < addr + len) return NULL; /* should never get here */ + /* + * If the VMA we are copying might contain a uprobe PTE, ensure + * that we do not establish one upon merge. Otherwise, when mremap() + * moves page tables, it will orphan the newly created PTE. + */ + if (vma->vm_file) + skip_vma_uprobe = true; + new_vma = vma_merge(&vmi, mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), skip_vma_uprobe); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mprotect.c b/mm/mprotect.c index ed08f87e39c44e8f5f7149c2f2eb2acace593dd3..e65363eb603e64c46fd742cf01087f88c73491e5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -633,7 +633,7 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(vmi, mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), false); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); diff --git a/mm/mremap.c b/mm/mremap.c index e990bb8c891816fed55c61cb681f3d08ec501181..a3fdb878d503374eab25a68e5ea6492a5fc5ebbb 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1073,7 +1073,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, vma = vma_merge(&vmi, mm, vma, extension_start, extension_end, vma->vm_flags, vma->anon_vma, vma->vm_file, extension_pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, anon_vma_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma), false); if (!vma) { vm_unacct_memory(pages); ret = -ENOMEM;