diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index b508acfdde2e27bccda57d147934baca3ed51d8d..0880f769f4dd2c8158eceb3c84e1956125086a81 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -80,6 +80,7 @@ Currently, these files are in /proc/sys/vm: - cache_reclaim_weight - cache_reclaim_enable - cache_limit_mbytes +- readahead_early_break admin_reserve_kbytes @@ -1089,3 +1090,9 @@ cache_limit_mbytes This is used to set the upper limit of page cache in megabytes. Page cache will be reclaimed periodically if page cache is over limit. + +readahead_early_break +===================== + +This is used to break readahead when reached memcg limit or there are too +many folio that are recently evicted. diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0bfa9cce65890d449443512509116852b826b0bf..2568ff96c13ad80952f7fadfb3b9af30c3b24096 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -817,6 +817,8 @@ struct readahead_control { #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) +extern int vm_readahead_early_break; + void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f3f43b2def7f243653d73bda4d5cec29d0c44bbf..97dda5113657f824e9003fe03c053dd804fe438c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3335,6 +3335,15 @@ static struct ctl_table vm_table[] = { .extra2 = SYSCTL_ONE, }, #endif + { + .procname = "readahead_early_break", + .data = &vm_readahead_early_break, + .maxlen = sizeof(vm_readahead_early_break), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, { } }; diff --git a/mm/filemap.c b/mm/filemap.c index 2eeb9978f39e429237c315269952aff60a43c08d..dbf5379d74c72a1edbf8c1e713ea275acbe434c6 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3168,7 +3168,14 @@ void filemap_map_pages(struct vm_fault *vmf, if (xas.xa_index >= max_idx) goto unlock; - if (mmap_miss > 0) + /* + * If there are too many folios that are recently evicted + * in a file, they will probably continue to be evicted. + * In such situation, read-ahead is only a waste of IO. + * Don't decrease mmap_miss in this scenario to make sure + * we can stop read-ahead. + */ + if (mmap_miss > 0 && !(vm_readahead_early_break && PageWorkingset(page))) mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; diff --git a/mm/readahead.c b/mm/readahead.c index ed23d5dec12387fd862caad619b9d072d44649fc..b2652bc20623a938f61935bf8326117eb52a68de 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -27,6 +27,9 @@ #include "internal.h" #define READAHEAD_FIRST_SIZE (2 * 1024 * 1024) + +int vm_readahead_early_break; + /* * Initialise a struct file's readahead state. Assumes that the caller has * memset *ra to zero. @@ -220,11 +223,18 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, if (mapping->a_ops->readpages) { page->index = index + i; list_add(&page->lru, &page_pool); - } else if (add_to_page_cache_lru(page, mapping, index + i, - gfp_mask) < 0) { - put_page(page); - read_pages(ractl, &page_pool, true); - continue; + } else { + int ret; + + ret = add_to_page_cache_lru(page, mapping, index + i, + gfp_mask); + if (ret < 0) { + put_page(page); + read_pages(ractl, &page_pool, true); + if (vm_readahead_early_break && (ret == -ENOMEM)) + break; + continue; + } } if (i == nr_to_read - lookahead_size) SetPageReadahead(page);