From a278ab5ec09c53a9b96a14c90c6f5e9160cdd7f1 Mon Sep 17 00:00:00 2001 From: Weilin Tong Date: Tue, 1 Jul 2025 11:18:07 +0800 Subject: [PATCH] anolis: mm: base min_free_kbytes on mTHP setting ANBZ: #22265 On arm64 systems with 64K PAGE_SIZE, min_free_kbytes and watermarks are often set too high, because the calculation uses MAX_ORDER, even when only smaller mTHP sizes (like 2M) are enabled. This results in unnecessarily large watermarks (e.g., >11GB min watermark on 250GB systems), which can hurt performance and latency. Update set_recommended_min_free_kbytes() to use the highest enabled hugepage order for watermark calculation, making min_free_kbytes more appropriate for the system's actual mTHP usage. This reduces watermarks when only smaller THP sizes are enabled, while keeping the original values when large sizes (e.g., 512M) are used. Signed-off-by: Weilin Tong --- include/linux/shmem_fs.h | 6 ++++++ mm/huge_memory.c | 8 ++++++++ mm/khugepaged.c | 31 ++++++++++++++++++++++++++++--- mm/shmem.c | 16 ++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 49bd6ccee23a..d34874291b12 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -119,6 +119,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, loff_t write_end, bool shmem_huge_force); bool shmem_hpage_pmd_enabled(void); +int shmem_allowable_huge_highest_order(void); #else static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, @@ -131,6 +132,11 @@ static inline bool shmem_hpage_pmd_enabled(void) { return false; } + +static inline int shmem_allowable_huge_highest_order(void) +{ + return 0; +} #endif #ifdef CONFIG_SHMEM diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92556dfe8451..e889f292d310 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -578,6 +578,14 @@ static ssize_t file_enabled_store(struct kobject *kobj, } spin_unlock(&huge_file_orders_lock); + + if (ret > 0) { + int err; + + err = start_stop_khugepaged(); + if (err) + ret = err; + } return ret; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3a2d250ce3d7..b34a0ca13caf 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2607,11 +2607,32 @@ static int khugepaged(void *none) return 0; } +static int anon_allowable_huge_highest_order(void) +{ + unsigned long orders = READ_ONCE(huge_anon_orders_always) | + READ_ONCE(huge_anon_orders_madvise); + + if (hugepage_global_enabled()) + orders |= READ_ONCE(huge_anon_orders_inherit); + + return orders == 0 ? 0 : fls(orders) - 1; +} + +static unsigned long mthp_max_allowable_nr_pages(void) +{ + int anon_hignest_order = anon_allowable_huge_highest_order(); + int shmem_highest_order = shmem_allowable_huge_highest_order(); + int file_highest_order = file_orders_always() ? fls(file_orders_always()) - 1 : 0; + + return 1UL << max3(anon_hignest_order, shmem_highest_order, file_highest_order); +} + static void set_recommended_min_free_kbytes(void) { struct zone *zone; int nr_zones = 0; unsigned long recommended_min; + unsigned long recommended_nr_pages; if (!hugepage_pmd_enabled()) { calculate_min_free_kbytes(); @@ -2629,8 +2650,12 @@ static void set_recommended_min_free_kbytes(void) nr_zones++; } - /* Ensure 2 pageblocks are free to assist fragmentation avoidance */ - recommended_min = pageblock_nr_pages * nr_zones * 2; + /* Restrict min_free_kbytes reserve to mthp maximum */ + recommended_nr_pages = min(mthp_max_allowable_nr_pages(), + (unsigned long)pageblock_nr_pages); + + /* Ensure 2 * recommended_nr_pages are free to assist fragmentation avoidance */ + recommended_min = recommended_nr_pages * nr_zones * 2; /* * Make sure that on average at least two pageblocks are almost free @@ -2638,7 +2663,7 @@ static void set_recommended_min_free_kbytes(void) * second to avoid subsequent fallbacks of other types There are 3 * MIGRATE_TYPES we care about. */ - recommended_min += pageblock_nr_pages * nr_zones * + recommended_min += recommended_nr_pages * nr_zones * MIGRATE_PCPTYPES * MIGRATE_PCPTYPES; /* don't ever allow to reserve more than 5% of the lowmem */ diff --git a/mm/shmem.c b/mm/shmem.c index e2a7889fbbc3..a7a45e7ec26f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1763,6 +1763,22 @@ bool shmem_hpage_pmd_enabled(void) return false; } +int shmem_allowable_huge_highest_order(void) +{ + unsigned long orders; + + if (shmem_huge == SHMEM_HUGE_DENY) + return 0; + + orders = READ_ONCE(huge_shmem_orders_always) | READ_ONCE(huge_shmem_orders_madvise) + | READ_ONCE(huge_shmem_orders_within_size); + + if (shmem_huge != SHMEM_HUGE_NEVER) + orders |= READ_ONCE(huge_shmem_orders_inherit); + + return orders == 0 ? 0 : fls(orders) - 1; +} + unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, loff_t write_end, bool shmem_huge_force) -- Gitee