From 534b3e2e5c6ea40c03705b85af8808f37579d233 Mon Sep 17 00:00:00 2001 From: Kaihao Bai Date: Mon, 15 Aug 2022 12:30:17 +0800 Subject: [PATCH 1/4] anolis: mm: support unmapping zeropage of the same file offset ANBZ: #2510 If a file is mapped by differnet processes, it can be shared and private mapping simultaneously. When the vma with shared mapping allocates a new page in the page cache, the zero page filled by the private mapping needs to be unmapped to re-acquire the page in the page cache. Signed-off-by: Kaihao Bai Reviewed-by: zhong jiang Link: https://gitee.com/anolis/cloud-kernel/pulls/785 Reviewed-by: Xu Yu --- include/linux/rmap.h | 9 +++++++++ mm/page_vma_mapped.c | 3 +++ mm/rmap.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index e27baa0abc1a..33e98d86331a 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -103,6 +103,7 @@ enum ttu_flags { * do a final flush if necessary */ TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ + TTU_ZEROPAGE = 0x100,/* unmap zero pages of the same offset */ }; #ifdef CONFIG_MMU @@ -638,6 +639,7 @@ int folio_referenced(struct folio *, int is_locked, void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); +void try_to_unmap_zeropage(struct folio *folio, enum ttu_flags flags); int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct page **pages, @@ -647,6 +649,8 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, #define PVMW_SYNC (1 << 0) /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) +/* Avoid extra judgement of zeropage */ +#define PVMW_ZEROPAGE (1 << 2) struct page_vma_mapped_walk { unsigned long pfn; @@ -765,6 +769,11 @@ static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) { } +static inline void try_to_unmap_zeropage(struct folio *folio, + enum ttu_flags flags) +{ +} + static inline int folio_mkclean(struct folio *folio) { return 0; diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 032d71f9876b..d4a3ac283db5 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -125,6 +125,9 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) return false; pfn = pte_pfn(ptent); + + if (pvmw->flags & PVMW_ZEROPAGE) + return is_zero_pfn(pfn); } if ((pfn + pte_nr - 1) < pvmw->pfn) diff --git a/mm/rmap.c b/mm/rmap.c index 994c459ba684..8c6634f1e9aa 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1619,6 +1619,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (flags & TTU_SYNC) pvmw.flags = PVMW_SYNC; + /* + * If the same mapping offset of a file is alreadly mapped with + * zeropage, the zeropage mapping needs to be unmapped. + */ + if (flags & TTU_ZEROPAGE) + pvmw.flags = PVMW_ZEROPAGE; + if (flags & TTU_SPLIT_HUGE_PMD) split_huge_pmd_address(vma, address, false, folio); @@ -1650,6 +1657,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); + /* + * If the page is zeropage, we just unmap it and return. + * Because currently compound page is not supported. + */ + if (flags & TTU_ZEROPAGE) { + ptep_clear_flush(vma, pvmw.address, pvmw.pte); + page_vma_mapped_walk_done(&pvmw); + break; + } + /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1914,6 +1931,32 @@ static int folio_not_mapped(struct folio *folio) return !folio_mapped(folio); } +/** + * try_to_unmap_zeropage - try to remove all page table mappings to a zero page + * with the same offset. If the flag doesn't contain TTU_ZEROPAGE, this function + * does no thing. + * @page: the page to get unmapped + * @flags: action and flags + * + * Tries to remove all the page table entries which are mapping zero + * page with the same offset, used in the pageout path. Caller must hold the page + * lock of the newly allocated page. + * + * During rmap_walk, it holds the i_mmap_sem to avoid modify VMA which not expected. + */ +void try_to_unmap_zeropage(struct folio *folio, enum ttu_flags flags) +{ + struct rmap_walk_control rwc = { + .rmap_one = try_to_unmap_one, + .arg = (void *)flags, + }; + + if (!(flags & TTU_ZEROPAGE)) + return; + + rmap_walk(folio, &rwc); +} + /** * try_to_unmap - Try to remove all page table mappings to a folio. * @folio: The folio to unmap. -- Gitee From 1b39ae6a23d853ad843f8b70fe6430964ac2bbb7 Mon Sep 17 00:00:00 2001 From: Kaihao Bai Date: Mon, 15 Aug 2022 14:32:27 +0800 Subject: [PATCH 2/4] anolis: mm: support filling in zero page if read fault in file hole ANBZ: #2510 The valid range might be only a small part of the sparse file, the reset are ranges all filled by 0, which is called hole. For file mapping, if a read fault occurs at the range of file hole, it also allocates page and add it in the page cache, which caused the waste of memory. If a read fault of a tmpfs file hole occurs, we fill it with the system zero page to avoid memory waste. Judging if the corresponding vma is MAP_PRIVATE and the fault type is not VMFAULT_WRITE, this pte can be assigned to the system zero page. Signed-off-by: Kaihao Bai Reviewed-by: zhong jiang Link: https://gitee.com/anolis/cloud-kernel/pulls/785 Reviewed-by: Xu Yu --- mm/memory.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++----- mm/shmem.c | 18 +++++++++++ 2 files changed, 100 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 7accc5c96baf..38a1f3c0e182 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5242,6 +5242,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return poisonret; } + /* Do not lock the zero page */ + if (unlikely(is_zero_page(vmf->page))) + return ret; + if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf->page); else @@ -5345,6 +5349,69 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) } #endif +/** + * set_zero_pte - Set zero page PTE to point to pages in a folio. + */ +vm_fault_t set_zero_pte(struct vm_fault *vmf, struct folio *folio, + struct page *page, unsigned int nr, unsigned long addr) +{ + struct vm_area_struct *vma = vmf->vma; + struct folio *new_folio; + bool uffd_wp = vmf_orig_pte_uffd_wp(vmf); + bool write = vmf->flags & FAULT_FLAG_WRITE; + bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); + pte_t entry; + + flush_icache_pages(vma, page, nr); + entry = mk_pte(page, vma->vm_page_prot); + + if (prefault && arch_wants_old_prefaulted_pte()) + entry = pte_mkold(entry); + else + entry = pte_sw_mkyoung(entry); + + if (write) + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (unlikely(uffd_wp)) + entry = pte_mkuffd_wp(entry); + + /* + * If it's zero page, vmf->ptl should be held to avoid other VMAs that share + * the same xarray do the same page fault simultaneously, which may + * leads to wrong semantic of MMAP_PRIVATE. + * + * E.g: + * MMAP_PRIVATE MMAP_SHARED + * do_read_fault + * do_shared_fault + * check pagecache + * alloc_page + * add_to_pagecache + * try_to_unmap_zeropage + * set_pte + * + * In this scenario, zero page can not be unmapped. + * If found the page cache entry here, corresponding page cache + * has been set. Thus retry it to get the valid page cache not + * the zero page. + */ + new_folio = filemap_get_entry(vma->vm_file->f_mapping, vmf->pgoff); + + if (new_folio) { + folio_put(new_folio); + return VM_FAULT_RETRY; + } + + entry = pte_mkspecial(entry); + + set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr); + + /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr); + + return 0; +} + /** * set_pte_range - Set a range of PTEs to point to pages in a folio. * @vmf: Fault decription. @@ -5507,11 +5574,15 @@ vm_fault_t finish_fault(struct vm_fault *vmf) goto fallback; } - folio_ref_add(folio, nr_pages - 1); - set_pte_range(vmf, folio, page, nr_pages, addr); - type = is_cow ? MM_ANONPAGES : mm_counter_file(page); - add_mm_counter(vma->vm_mm, type, nr_pages); - ret = 0; + if (likely(!is_zero_page(vmf->page))) { + folio_ref_add(folio, nr_pages - 1); + set_pte_range(vmf, folio, page, nr_pages, addr); + type = is_cow ? MM_ANONPAGES : mm_counter_file(page); + add_mm_counter(vma->vm_mm, type, nr_pages); + ret = 0; + } else { + ret = set_zero_pte(vmf, folio, page, nr_pages, addr); + } unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -5661,6 +5732,8 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) #endif ret |= finish_fault(vmf); + if (unlikely(is_zero_folio(folio))) + return ret; folio_unlock(folio); #ifdef CONFIG_DUPTEXT if (d_folio) { @@ -5711,8 +5784,10 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) __folio_mark_uptodate(folio); ret |= finish_fault(vmf); - unlock_page(vmf->page); - put_page(vmf->page); + if (unlikely(!is_zero_page(vmf->page))) { + unlock_page(vmf->page); + put_page(vmf->page); + } if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) goto uncharge_out; return ret; diff --git a/mm/shmem.c b/mm/shmem.c index 2a3ebbf94ba7..6289cb1ee4df 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2577,6 +2577,12 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, goto repeat; } + if (vmf && !mm_forbids_zeropage(vma->vm_mm) && + !(vma->vm_flags & VM_SHARED)) { + folio = page_folio(ZERO_PAGE(0)); + goto out; + } + folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0); if (IS_ERR(folio)) { error = PTR_ERR(folio); @@ -2638,6 +2644,15 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, error = -EINVAL; goto unlock; } + + /* + * If the VMA that fault page belongs to is VM_SHARED, we should unmap all + * zero page mappings to make the MMAP_PRIVATE VMA do page fault again + * to catch page cache. + */ + if (folio && vmf && (vma->vm_flags & VM_SHARED)) + try_to_unmap_zeropage(folio, TTU_ZEROPAGE); + out: *foliop = folio; return 0; @@ -2653,6 +2668,9 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, folio_unlock(folio); folio_put(folio); } + if (folio && vmf && (vma->vm_flags & VM_SHARED)) + try_to_unmap_zeropage(folio, TTU_ZEROPAGE); + return error; } -- Gitee From 668a9bede4c39b7fe2217d24acbc700b28104db7 Mon Sep 17 00:00:00 2001 From: Kaihao Bai Date: Mon, 29 Aug 2022 11:11:26 +0800 Subject: [PATCH 3/4] anolis: mm: avoid MMAP_POPULATE filling zero page ANBZ: #2510 When mmap a file with MMAP_POPULATE/MMAP_LOCK, the page cache should be pre-allocated. It should avoid filling zero page. Signed-off-by: Kaihao Bai Reviewed-by: zhong jiang Link: https://gitee.com/anolis/cloud-kernel/pulls/785 Reviewed-by: Xu Yu --- include/linux/mm_types.h | 3 ++- mm/gup.c | 2 ++ mm/shmem.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index aff540cf1f01..8a38abbbd675 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1367,7 +1367,7 @@ enum tlb_flush_reason { * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached. * We should only access orig_pte if this flag set. * @FAULT_FLAG_VMA_LOCK: The fault is handled under VMA lock. - * + * @FAULT_FLAG_NONZEROPAGE: The fault can not be filled with zero page. * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two * fault flags correctly. Currently there can be three legal combinations: @@ -1405,6 +1405,7 @@ enum fault_flag { FAULT_FLAG_UNSHARE = 1 << 10, FAULT_FLAG_ORIG_PTE_VALID = 1 << 11, FAULT_FLAG_VMA_LOCK = 1 << 12, + FAULT_FLAG_NONZEROPAGE = 1 << 13, }; typedef unsigned int __bitwise zap_flags_t; diff --git a/mm/gup.c b/mm/gup.c index 023072526639..2c12b7fa208c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -892,6 +892,8 @@ static int faultin_page(struct vm_area_struct *vma, if (*flags & FOLL_NOFAULT) return -EFAULT; + if (*flags & FOLL_MADV_POPULATE) + fault_flags |= FAULT_FLAG_NONZEROPAGE; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (*flags & FOLL_REMOTE) diff --git a/mm/shmem.c b/mm/shmem.c index 6289cb1ee4df..4329b6108118 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2578,7 +2578,8 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, } if (vmf && !mm_forbids_zeropage(vma->vm_mm) && - !(vma->vm_flags & VM_SHARED)) { + !(vma->vm_flags & VM_SHARED) && + !(vmf->flags & FAULT_FLAG_NONZEROPAGE)) { folio = page_folio(ZERO_PAGE(0)); goto out; } -- Gitee From 7f1998a61cf7795d856780705d7081dd777dbae1 Mon Sep 17 00:00:00 2001 From: Kaihao Bai Date: Mon, 29 Aug 2022 11:26:42 +0800 Subject: [PATCH 4/4] anolis: mm: add switch for file zero page ANBZ: #2510 On top of the filling the zero page of the file hole, a switch is added to provide the function of turning on/off at runtime. When turning off, all zero page mappings are evicted to ensure correctness. Signed-off-by: Kaihao Bai Reviewed-by: zhong jiang Link: https://gitee.com/anolis/cloud-kernel/pulls/785 Reviewed-by: Xu Yu --- include/linux/file_zeropage.h | 26 +++++++++ include/linux/pagemap.h | 11 ++++ include/linux/rmap.h | 6 +-- mm/Makefile | 1 + mm/file_zeropage.c | 99 +++++++++++++++++++++++++++++++++++ mm/rmap.c | 2 +- mm/shmem.c | 18 +++---- 7 files changed, 148 insertions(+), 15 deletions(-) create mode 100644 include/linux/file_zeropage.h create mode 100644 mm/file_zeropage.c diff --git a/include/linux/file_zeropage.h b/include/linux/file_zeropage.h new file mode 100644 index 000000000000..1bb7e9746ab1 --- /dev/null +++ b/include/linux/file_zeropage.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_FILE_ZEROPAGE_H_ +#define _LINUX_FILE_ZEROPAGE_H_ + +#include +#include + +DECLARE_STATIC_KEY_FALSE(file_zeropage_enabled_key); +static inline bool file_zeropage_enabled(void) +{ + return static_branch_unlikely(&file_zeropage_enabled_key); +} + +extern struct folio *__alloc_zero_folio(struct vm_area_struct *vma, struct vm_fault *vmf); + +static inline struct folio *alloc_zero_folio(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + if (file_zeropage_enabled()) + return __alloc_zero_folio(vma, vmf); + return NULL; +} + +inline void unmap_zero_folio(struct folio *folio, struct vm_area_struct *vma, + struct address_space *mapping); + +#endif /* _LINUX_FILE_ZEROPAGE_H_ */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a7c2e23dda23..534ab89fb271 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -220,6 +220,7 @@ enum mapping_flags { AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, + AS_ZERO_FOLIO = 26, /* Filled file hole with zero page */ AS_FSDAX_NORMAP = 30, }; @@ -483,6 +484,16 @@ static inline size_t mapping_max_folio_size(const struct address_space *mapping) return PAGE_SIZE << mapping_max_folio_order(mapping); } +static inline void mapping_set_zero_folio(struct address_space *mapping) +{ + test_and_set_bit(AS_ZERO_FOLIO, &mapping->flags); +} + +static inline bool mapping_zero_folio(struct address_space *mapping) +{ + return test_bit(AS_ZERO_FOLIO, &mapping->flags); +} + static inline int filemap_nr_thps(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 33e98d86331a..f96a0d71a332 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -639,7 +639,7 @@ int folio_referenced(struct folio *, int is_locked, void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); -void try_to_unmap_zeropage(struct folio *folio, enum ttu_flags flags); +void try_to_unmap_zero_folio(struct folio *folio, enum ttu_flags flags); int make_device_exclusive_range(struct mm_struct *mm, unsigned long start, unsigned long end, struct page **pages, @@ -769,8 +769,8 @@ static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) { } -static inline void try_to_unmap_zeropage(struct folio *folio, - enum ttu_flags flags) +static inline void try_to_unmap_zero_folio(struct folio *folio, + enum ttu_flags flags) { } diff --git a/mm/Makefile b/mm/Makefile index 895400493b02..67af22e50f12 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -143,6 +143,7 @@ obj-$(CONFIG_DUPTEXT) += page_dup.o obj-$(CONFIG_ASYNC_FORK) += async_fork.o obj-$(CONFIG_PAGECACHE_LIMIT) += pagecache_limit.o obj-y += unevictable.o +obj-y += file_zeropage.o obj-$(CONFIG_PGTABLE_BIND) += pgtable_bind.o obj-$(CONFIG_PT_RECLAIM) += pt_reclaim.o obj-$(CONFIG_PRE_OOM) += pre_oom.o diff --git a/mm/file_zeropage.c b/mm/file_zeropage.c new file mode 100644 index 000000000000..113372b261d0 --- /dev/null +++ b/mm/file_zeropage.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +DEFINE_STATIC_KEY_FALSE(file_zeropage_enabled_key); + +struct folio *__alloc_zero_folio(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct folio *folio = NULL; + + if (vmf && !mm_forbids_zeropage(vma->vm_mm) && + !(vma->vm_flags & VM_SHARED) && + !(vmf->flags & FAULT_FLAG_NONZEROPAGE)) + folio = page_folio(ZERO_PAGE(0)); + + return folio; +} + +inline void unmap_zero_folio(struct folio *folio, struct vm_area_struct *vma, + struct address_space *mapping) +{ + if (mapping_zero_folio(mapping) && folio && vma && (vma->vm_flags & VM_SHARED)) + try_to_unmap_zero_folio(folio, TTU_ZEROPAGE); +} + +static int __init setup_file_zeropage(char *s) +{ + if (!strcmp(s, "1")) + static_branch_enable(&file_zeropage_enabled_key); + else if (!strcmp(s, "0")) + static_branch_disable(&file_zeropage_enabled_key); + return 1; +} +__setup("file_zeropage=", setup_file_zeropage); + +static ssize_t file_zeropage_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%d\n", !!static_branch_unlikely(&file_zeropage_enabled_key)); +} + +static ssize_t file_zeropage_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + static DEFINE_MUTEX(mutex); + ssize_t ret = count; + + mutex_lock(&mutex); + + if (!strncmp(buf, "1", 1)) + static_branch_enable(&file_zeropage_enabled_key); + else if (!strncmp(buf, "0", 1)) + static_branch_disable(&file_zeropage_enabled_key); + else + ret = -EINVAL; + + mutex_unlock(&mutex); + return ret; +} + +static struct kobj_attribute file_zeropage_enabled_attr = + __ATTR(enabled, 0644, file_zeropage_enabled_show, + file_zeropage_enabled_store); + +static struct attribute *file_zeropage_attrs[] = { + &file_zeropage_enabled_attr.attr, + NULL, +}; + +static const struct attribute_group file_zeropage_attr_group = { + .attrs = file_zeropage_attrs, + .name = "file_zeropage", +}; + +static int __init file_zeropage_init(void) +{ + int err; + + err = sysfs_create_group(mm_kobj, &file_zeropage_attr_group); + if (err) { + pr_err("file_zeropage: register sysfs failed\n"); + return err; + } + return 0; +} +subsys_initcall(file_zeropage_init); diff --git a/mm/rmap.c b/mm/rmap.c index 8c6634f1e9aa..a18b0286e22a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1944,7 +1944,7 @@ static int folio_not_mapped(struct folio *folio) * * During rmap_walk, it holds the i_mmap_sem to avoid modify VMA which not expected. */ -void try_to_unmap_zeropage(struct folio *folio, enum ttu_flags flags) +void try_to_unmap_zero_folio(struct folio *folio, enum ttu_flags flags) { struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, diff --git a/mm/shmem.c b/mm/shmem.c index 4329b6108118..74ccb816962b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "swap.h" static struct vfsmount *shm_mnt; @@ -2577,12 +2578,9 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, goto repeat; } - if (vmf && !mm_forbids_zeropage(vma->vm_mm) && - !(vma->vm_flags & VM_SHARED) && - !(vmf->flags & FAULT_FLAG_NONZEROPAGE)) { - folio = page_folio(ZERO_PAGE(0)); + folio = alloc_zero_folio(vma, vmf); + if (folio) goto out; - } folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0); if (IS_ERR(folio)) { @@ -2651,9 +2649,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, * zero page mappings to make the MMAP_PRIVATE VMA do page fault again * to catch page cache. */ - if (folio && vmf && (vma->vm_flags & VM_SHARED)) - try_to_unmap_zeropage(folio, TTU_ZEROPAGE); - + unmap_zero_folio(folio, vma, inode->i_mapping); out: *foliop = folio; return 0; @@ -2666,12 +2662,10 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, filemap_remove_folio(folio); shmem_recalc_inode(inode, 0, 0); if (folio) { + unmap_zero_folio(folio, vma, inode->i_mapping); folio_unlock(folio); folio_put(folio); } - if (folio && vmf && (vma->vm_flags & VM_SHARED)) - try_to_unmap_zeropage(folio, TTU_ZEROPAGE); - return error; } @@ -2779,6 +2773,8 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf) if (folio) { vmf->page = folio_file_page(folio, vmf->pgoff); ret |= VM_FAULT_LOCKED; + if (is_zero_page(vmf->page)) + mapping_set_zero_folio(inode->i_mapping); } return ret; } -- Gitee