diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 803c61f66d54e6e970fcad1942ed2e885f0168e5..3e11e976352736162fb596f30929d0947e8da90a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -925,7 +925,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, false); if (prev) vma = prev; else @@ -1492,7 +1492,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - ((struct vm_userfaultfd_ctx){ ctx })); + ((struct vm_userfaultfd_ctx){ ctx }), false); if (prev) { vma = prev; goto next; @@ -1654,7 +1654,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, false); if (prev) { vma = prev; goto next; diff --git a/include/linux/mm.h b/include/linux/mm.h index 67e299374ac8aa1d7f1e2c23de79034500a1f3fa..8cae7fb6542ad017afc83cfb3b294a697b095941 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2367,16 +2367,16 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, - struct vm_area_struct *expand); + struct vm_area_struct *expand, bool skip_vma_uprobe); static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) { - return __vma_adjust(vma, start, end, pgoff, insert, NULL); + return __vma_adjust(vma, start, end, pgoff, insert, NULL, false); } extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx); + struct mempolicy *, struct vm_userfaultfd_ctx, bool skip_vma_uprobe); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); diff --git a/mm/madvise.c b/mm/madvise.c index 263c2c68b0defe3913e6041eb73e95f1b94f4eb7..0670786a23a59008397d18917fbd256d4919b0bf 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -145,7 +145,7 @@ static long madvise_behavior(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, false); if (*prev) { vma = *prev; goto success; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e8c82f3235e2b9909b80f6266c7e466c3e4f8785..882310ef3d9348638d94615341dbc997678997ff 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -818,7 +818,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ((vmstart - vma->vm_start) >> PAGE_SHIFT); prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, - new_pol, vma->vm_userfaultfd_ctx); + new_pol, vma->vm_userfaultfd_ctx, false); if (prev) { vma = prev; goto replace; diff --git a/mm/mlock.c b/mm/mlock.c index d9040aea2be00dfcba3e537ed939ccc73774ce1d..cb4e1207169afd6df146f09e9103eb6b685f8a88 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -540,7 +540,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, false); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index ad643f9dd3a4422f7938dae43ddcd083de119b44..c27bbe5ea5ca289e990cb7e0f3bfdae90f153de9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -684,10 +684,13 @@ static inline void __vma_unlink_prev(struct mm_struct *mm, * The following helper function should be used when such adjustments * are necessary. The "insert" vma (if any) is to be inserted * before we drop the necessary locks. + * + * @skip_vma_uprobe: only valid for copy_vma process, others please + * mark it as false. */ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, - struct vm_area_struct *expand) + struct vm_area_struct *expand, bool skip_vma_uprobe) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; @@ -894,10 +897,12 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, i_mmap_unlock_write(mapping); if (root) { - uprobe_mmap(vma); + if (!skip_vma_uprobe) { + uprobe_mmap(vma); - if (adjust_next) - uprobe_mmap(next); + if (adjust_next) + uprobe_mmap(next); + } } if (remove_next) { @@ -1102,6 +1107,9 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, * or other rmap walkers (if working on addresses beyond the "end" * parameter) may establish ptes with the wrong permissions of NNNN * instead of the right permissions of XXXX. + * + * @skip_vma_uprobe: only valid for copy_vma process, others please + * mark it as false. */ struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, @@ -1160,10 +1168,11 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, /* cases 1, 6 */ err = __vma_adjust(prev, prev->vm_start, next->vm_end, prev->vm_pgoff, NULL, - prev); + prev, skip_vma_uprobe); } else /* cases 2, 5, 7 */ err = __vma_adjust(prev, prev->vm_start, - end, prev->vm_pgoff, NULL, prev); + end, prev->vm_pgoff, NULL, + prev, skip_vma_uprobe); if (err) return NULL; khugepaged_enter_vma_merge(prev, vm_flags); @@ -1180,10 +1189,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, vm_userfaultfd_ctx)) { if (prev && addr < prev->vm_end) /* case 4 */ err = __vma_adjust(prev, prev->vm_start, - addr, prev->vm_pgoff, NULL, next); + addr, prev->vm_pgoff, NULL, + next, skip_vma_uprobe); else { /* cases 3, 8 */ err = __vma_adjust(area, addr, next->vm_end, - next->vm_pgoff - pglen, NULL, next); + next->vm_pgoff - pglen, NULL, + next, skip_vma_uprobe); /* * In case 3 area is already equal to next and * this is a noop, but in case 8 "area" has @@ -1974,7 +1985,7 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, * Can we just expand an old mapping? */ vma = vma_merge(mm, prev, addr, addr + len, vm_flags, - NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX); + NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, false); if (vma) goto out; @@ -3381,7 +3392,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla /* Can we just expand an old private anonymous mapping? */ vma = vma_merge(mm, prev, addr, addr + len, flags, - NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX); + NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, false); if (vma) goto out; @@ -3566,6 +3577,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; bool faulted_in_anon_vma = true; + bool skip_vma_uprobe = false; /* * If anonymous vma has not yet been faulted, update new pgoff @@ -3578,9 +3590,18 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) return NULL; /* should never get here */ + + /* + * If the VMA we are copying might contain a uprobe PTE, ensure + * that we do not establish one upon merge. Otherwise, when mremap() + * moves page tables, it will orphan the newly created PTE. + */ + if (vma->vm_file) + skip_vma_uprobe = true; + new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, skip_vma_uprobe); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mprotect.c b/mm/mprotect.c index 86837f25055b59918e84b8335cc37c837fdb7d8a..4c7370fd3e9062e5a1e6d998a41b2063896430c6 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -432,7 +432,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx); + vma->vm_userfaultfd_ctx, false); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);