From 8d3bf2c49e228b43dd6bcd811451666231b90bec Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Wed, 22 Apr 2020 20:54:20 +0800 Subject: [PATCH 01/14] anolis: mm: kidled: provide a unified interface in preparation for scanning slab ANBZ: #22925 ANBZ: #1702 Currently, kidled_mem_cgroup_account use 'nr_pages' to store the lru pages, but the slab size will less than an page when scanning cold slab. In order to unify the two cases, we use the size instead of the number of the pages. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- include/linux/kidled.h | 4 ++-- mm/kidled.c | 23 ++++++++++++----------- mm/memcontrol.c | 5 ++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/linux/kidled.h b/include/linux/kidled.h index e0ae7700fa19..8c7d5a382b53 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -224,7 +224,7 @@ extern const int kidled_default_buckets[NUM_KIDLED_BUCKETS]; void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, struct mem_cgroup *to, struct folio *folio, - unsigned int nr_pages); + unsigned long size); #endif /* CONFIG_MEMCG */ #ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS @@ -237,7 +237,7 @@ void kidled_free_folio_age(pg_data_t *pgdat); static inline void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, struct mem_cgroup *to, struct folio *folio, - unsigned int nr_pages) + unsigned long size) { } #endif /* CONFIG_MEMCG */ diff --git a/mm/kidled.c b/mm/kidled.c index a1db033f2bb9..2a44eeff7f7e 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -156,7 +156,7 @@ EXPORT_SYMBOL_GPL(kidled_set_folio_age); #ifdef CONFIG_MEMCG static inline void kidled_mem_cgroup_account(struct folio *folio, int age, - int nr_pages) + unsigned long size) { struct mem_cgroup *memcg; struct idle_page_stats *stats; @@ -177,7 +177,7 @@ static inline void kidled_mem_cgroup_account(struct folio *folio, stats = mem_cgroup_get_unstable_idle_stats(memcg); bucket = kidled_get_bucket(stats->buckets, age); if (bucket >= 0) - stats->count[type][bucket] += nr_pages; + stats->count[type][bucket] += size; folio_memcg_unlock(folio); } @@ -185,7 +185,7 @@ static inline void kidled_mem_cgroup_account(struct folio *folio, void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, struct mem_cgroup *to, struct folio *folio, - unsigned int nr_pages) + unsigned long size) { pg_data_t *pgdat = folio_pgdat(folio); unsigned long pfn = folio_pfn(folio); @@ -220,13 +220,13 @@ void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, return; /* Remove from the source memory cgroup */ - if (stats[0]->count[type][bucket] > nr_pages) - stats[0]->count[type][bucket] -= nr_pages; + if (stats[0]->count[type][bucket] > size) + stats[0]->count[type][bucket] -= size; else stats[0]->count[type][bucket] = 0; if (pgdat->node_idle_scan_pfn >= pfn) { - if (stats[1]->count[type][bucket] > nr_pages) - stats[1]->count[type][bucket] -= nr_pages; + if (stats[1]->count[type][bucket] > size) + stats[1]->count[type][bucket] -= size; else stats[1]->count[type][bucket] = 0; } @@ -239,9 +239,9 @@ void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, if (bucket < 0) return; - stats[2]->count[type][bucket] += nr_pages; + stats[2]->count[type][bucket] += size; if (pgdat->node_idle_scan_pfn >= pfn) - stats[3]->count[type][bucket] += nr_pages; + stats[3]->count[type][bucket] += size; } EXPORT_SYMBOL_GPL(kidled_mem_cgroup_move_stats); @@ -304,7 +304,7 @@ static inline void kidled_mem_cgroup_reset(void) #else /* !CONFIG_MEMCG */ static inline void kidled_mem_cgroup_account(struct folio *folio, int age, - int nr_pages) + unsigned long size) { } static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period @@ -424,7 +424,8 @@ static inline int kidled_scan_folio(pg_data_t *pgdat, unsigned long pfn) if (idle) { age = kidled_inc_folio_age(pgdat, pfn); if (age > 0) - kidled_mem_cgroup_account(folio, age, nr_pages); + kidled_mem_cgroup_account(folio, age, + nr_pages << PAGE_SHIFT); else age = 0; } else { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6bdeda54c0ae..e65f6cd3ed34 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4461,8 +4461,7 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) seq_printf(m, " %-8s", kidled_type_str); for (i = 0; i < j; i++) { - seq_printf(m, " %14lu", - stats->count[t][i] << PAGE_SHIFT); + seq_printf(m, " %14lu", stats->count[t][i]); } seq_puts(m, "\n"); @@ -7733,7 +7732,7 @@ static int mem_cgroup_move_account(struct page *page, ret = 0; nid = folio_nid(folio); - kidled_mem_cgroup_move_stats(from, to, folio, nr_pages); + kidled_mem_cgroup_move_stats(from, to, folio, nr_pages << PAGE_SHIFT); local_irq_disable(); mem_cgroup_charge_statistics(to, nr_pages); -- Gitee From b5fbe16474933b06ff99c080f106ec1111417678 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Mon, 16 Mar 2020 00:57:53 +0800 Subject: [PATCH 02/14] anolis: mm: kidled: make kidled support to identify cold slab ANBZ: #22925 ANBZ: #1702 In some cases, we can see a large of reclaimable slab in the machine. It can result in some memory fragment to make the system fluctuation. Meanwhile, reclaim the slab can increase the memory to be used for offline business. The patch is adapted to kidle frame, thus the kidled is able to monitor the free slab for an long time. It is an indicator for user to reap the long free slab. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- Documentation/vm/kidled.rst | 77 ++++++++++++++---------- fs/dcache.c | 48 +++++++++++++++ fs/inode.c | 62 +++++++++++++++++++ fs/internal.h | 41 +++++++++++++ fs/namei.c | 10 +++- fs/super.c | 38 ++++++++++++ include/linux/dcache.h | 4 ++ include/linux/fs.h | 3 + include/linux/kidled.h | 29 ++++++++- include/linux/list_lru.h | 3 + include/linux/shrinker.h | 4 ++ include/linux/swap.h | 3 + mm/kidled.c | 66 +++++++++++++++----- mm/list_lru.c | 21 +++++++ mm/memcontrol.c | 19 +++--- mm/shrinker.c | 116 ++++++++++++++++++++++++++++++++++++ 16 files changed, 487 insertions(+), 57 deletions(-) diff --git a/Documentation/vm/kidled.rst b/Documentation/vm/kidled.rst index f5ce2fed2a8c..527d93cef15c 100644 --- a/Documentation/vm/kidled.rst +++ b/Documentation/vm/kidled.rst @@ -7,18 +7,24 @@ kidled Introduction ============ -kidled uses a kernel thread to scan the pages on LRU list, and supports to -output statistics for each memory cgroup (process is not supported yet). -kidled scans pages round to round indexed by pfn, and will try to finish each -round in a fixed duration which is named as scan period. Of course, users can -set the scan period whose unit is seconds. Each page has an attribute named -as 'idle age', which represents how long the page is kept in idle state, the -age's unit is in one scan period. The idle aging information (field) consumes +kidled uses a kernel thread to scan the pages and slab objects on LRU list +respectively, and supports to output statistics for each memory cgroup +(process is not supported yet). Kidled scans pages round to round indexed +by pfn, but scan slab objects is different. Slab lru list is not stable as +time goes, hence we regards the first accessed slab lru size as the real +size that kidled should scan the numbers of the specified slab in a round. +Kidled scanning will try to finish each round in a fixed duration which +is named as scan period. Of course, users can set the scan period whose +unit is seconds. Scanned objects has an attribute named as 'idle age', +which represents how long the object is kept in idle state, the age's unit +is in one scan period. The idle aging information (field) of the page consumes one byte, which is stored in dynamically allocated array, tied with the NUMA -node or flags field of page descriptor (struct page). So the maximal age is -255. kidled eventually shows the histogram statistics through memory cgroup -files (``memory.idle_page_stats``). The statistics could be used to evaluate -the working-set size of that memory cgroup or the hierarchy. +node or flags field of page descriptor (struct page). Meanwhile, Slab objects +use two bytes to store the information, its lower bytes to store the idle aging +information and upper bytes to make an mark to avoid accessing an object more +than one time. So the maximal age is 255. kidled eventually shows the histogram +statistics through memory cgroup files (``memory.idle_page_stats``). The statistics +could be used to evaluate the working-set size of that memory cgroup or the hierarchy. Note: The implementation of kidled had referred to Michel Lespinasse's patch: https://lore.kernel.org/lkml/20110922161448.91a2e2b2.akpm@google.com/T/ @@ -63,7 +69,14 @@ Here are their functions: statistics, but it won't be very odd due to the duration are the same at least. -* ``memory.idle_page_stats.local`` (memory cgroup v1/v2) +* ``/sys/kernel/mm/kidled/slab_scan_enabled`` + + It controls whether slab scan or not. By default, kidled will not scan slab + because the cpu load will very high if the system has a lot of reclaimable + slabs. But we need to enable it when userspace pages have been reclaimed and + a lot of reclaimable slabs is in the system. we'd better mark and reclaim the + cold slab in front of the memory reclaim triggered by allocating memory request. + It shows histogram of idle statistics for the corresponding memory cgroup. @@ -77,7 +90,7 @@ Here are their functions: ----------------------------- snapshot start ----------------------------- # version: 1.0 - # scans: 1380 + # scans: 92 # scan_period_in_seconds: 120 # buckets: 1,2,5,15,30,60,120,240 # @@ -85,24 +98,26 @@ Here are their functions: # / _----=> swap/file # | / _---=> evict/unevict # || / _--=> inactive/active - # ||| / - # |||| [1,2) [2,5) [5,15) [15,30) [30,60) [60,120) [120,240) [240,+inf) - csei 0 0 0 0 0 0 0 0 - dsei 0 0 442368 49152 0 49152 212992 7741440 - cfei 4096 233472 1171456 1032192 28672 65536 122880 147550208 - dfei 0 0 4096 20480 4096 0 12288 12288 - csui 0 0 0 0 0 0 0 0 - dsui 0 0 0 0 0 0 0 0 - cfui 0 0 0 0 0 0 0 0 - dfui 0 0 0 0 0 0 0 0 - csea 77824 331776 1216512 1069056 217088 372736 327680 33284096 - dsea 0 0 0 0 0 0 0 139264 - cfea 4096 57344 606208 13144064 53248 135168 1683456 48357376 - dfea 0 0 0 0 0 0 0 0 - csua 0 0 0 0 0 0 0 0 - dsua 0 0 0 0 0 0 0 0 - cfua 0 0 0 0 0 0 0 0 - dfua 0 0 0 0 0 0 0 0 + # ||| / _-=> slab + # |||| / + # ||||| [1,2) [2,5) [5,15) [15,30) [30,60) [60,120) [120,240) [240,+inf) + csei 0 0 0 0 0 0 0 0 + dsei 0 16384 0 0 0 360448 0 0 + cfei 774144 3624960 1744896 1298432 20676608 161087488 0 0 + dfei 0 0 16384 0 24576 0 0 0 + csui 0 0 0 0 0 0 0 0 + dsui 0 0 0 0 0 0 0 0 + cfui 0 0 0 0 0 0 0 0 + dfui 0 0 0 0 0 0 0 0 + csea 278528 3510272 389120 872448 806912 22716416 0 0 + dsea 0 12288 0 0 0 196608 0 0 + cfea 1298432 12115968 3510272 10518528 78409728 1503793152 0 0 + dfea 0 0 0 0 0 4096 0 0 + csua 0 0 0 0 0 0 0 0 + dsua 0 0 0 0 0 0 0 0 + cfua 0 0 0 0 0 0 0 0 + dfua 0 0 0 0 0 0 0 0 + slab 2704 832 15600 20800 70720 763819160 0 0 ----------------------------- snapshot end ----------------------------- ``scans`` means how many rounds current cgroup has been scanned. diff --git a/fs/dcache.c b/fs/dcache.c index 4030c010a768..4595e86009d3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" #include "mount.h" @@ -1288,6 +1289,52 @@ long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } +#ifdef CONFIG_KIDLED +/* + * It will takes a lot of time in spin_trylock and spin_unlock when + * scanning the slab. I remove the lock operation directly even if + * it can bring in some inaccuracy in statistics. Meanwhile, it is + * safe because the dentry will not be released when lru lock is hold. + */ +static enum lru_status dentry_lru_cold_count(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) +{ + struct dentry *dentry = container_of(item, struct dentry, d_lru); + static int dentry_size; + u16 dentry_age = KIDLED_GET_SLAB_AGE(dentry); + + /* avoid an object to scan twice in an round */ + if (dentry_age && + kidled_is_slab_scanned(dentry_age, kidled_scan_rounds)) + goto out; + + if (READ_ONCE(dentry->d_lockref.count) || + (dentry->d_flags & DCACHE_REFERENCED)) { + if (dentry_age) + KIDLED_SET_SLAB_AGE(dentry, 0); + goto out; + } + + KIDLED_CLEAR_SLAB_SCANNED(dentry); + if (unlikely(!dentry_size)) + dentry_size = ksize(dentry); + dentry_age = KIDLED_INC_SLAB_AGE(dentry); + kidled_mem_cgroup_slab_account(dentry, dentry_age, dentry_size); + KIDLED_MARK_SLAB_SCANNED(dentry, kidled_scan_rounds); +out: + return LRU_ROTATE_DELAY; +} + +void cold_dcache_sb(struct super_block *sb, struct shrink_control *sc) +{ + unsigned long nr_to_walk = sc->nr_to_scan; + + list_lru_walk_node(&sb->s_dentry_lru, sc->nid, + dentry_lru_cold_count, + NULL, &nr_to_walk); +} +#endif + static enum lru_status dentry_lru_isolate_shrink(struct list_head *item, struct list_lru_one *lru, spinlock_t *lru_lock, void *arg) { @@ -1810,6 +1857,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_lockref.count = 1; dentry->d_flags = 0; + KIDLED_SET_SLAB_AGE(dentry, 0); spin_lock_init(&dentry->d_lock); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; diff --git a/fs/inode.c b/fs/inode.c index 2c44dda61a69..c5ed92ecbdbd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -19,6 +19,7 @@ #include /* for inode_has_buffers */ #include #include +#include #include #include #include "internal.h" @@ -162,6 +163,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + KIDLED_SET_SLAB_AGE(inode, 0); atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; @@ -928,6 +930,58 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } +#ifdef CONFIG_KIDLED +/* + * The implementation of principle is similar to the dentry. It will + * takes a lot of time in spin_lock/spin_unlock. it is useless that + * we only want to know the real free slab. + */ +static enum lru_status inode_lru_cold_count(struct list_head *item, + struct list_lru_one *lru, spinlock_t *lock, void *arg) +{ + struct inode *inode = container_of(item, struct inode, i_lru); + static int inode_size; + u16 inode_age = KIDLED_GET_SLAB_AGE(inode); + + if (inode_age && + kidled_is_slab_scanned(inode_age, kidled_scan_rounds)) + goto out; + + if (atomic_read(&inode->i_count) || + (inode->i_state & I_REFERENCED)) { + if (unlikely(inode_age)) + KIDLED_SET_SLAB_AGE(inode, 0); + goto out; + } + + if (inode->i_data.nrpages || + !list_empty(&inode->i_data.private_list)) { + if (unlikely(inode_age)) + KIDLED_SET_SLAB_AGE(inode, 0); + goto out; + } + + KIDLED_CLEAR_SLAB_SCANNED(inode); + if (unlikely(!inode_size)) + inode_size = ksize(inode); + inode_age = KIDLED_INC_SLAB_AGE(inode); + kidled_mem_cgroup_slab_account(inode, inode_age, inode_size); + KIDLED_MARK_SLAB_SCANNED(inode, kidled_scan_rounds); +out: + return LRU_ROTATE_DELAY; +} + +void cold_icache_sb(struct super_block *sb, + struct shrink_control *sc) +{ + unsigned long nr_to_walk = sc->nr_to_scan; + + list_lru_walk_node(&sb->s_inode_lru, sc->nid, + inode_lru_cold_count, NULL, + &nr_to_walk); +} +#endif + static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. @@ -1533,6 +1587,10 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; + } else { + /* reset its age if it has already had an age */ + if (KIDLED_GET_SLAB_AGE(inode)) + KIDLED_SET_SLAB_AGE(inode, 0); } } return inode; @@ -1563,6 +1621,10 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) if (unlikely(inode_unhashed(inode))) { iput(inode); goto again; + } else { + /* reset its age if it has already had an age */ + if (KIDLED_GET_SLAB_AGE(inode)) + KIDLED_SET_SLAB_AGE(inode, 0); } } return inode; diff --git a/fs/internal.h b/fs/internal.h index d64ae03998cc..d9f078f1bd29 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -4,6 +4,9 @@ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ +#ifdef CONFIG_KIDLED +#include +#endif struct super_block; struct file_system_type; @@ -207,6 +210,12 @@ extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *) extern char *simple_dname(struct dentry *, char *, int); extern void dput_to_list(struct dentry *, struct list_head *); extern void shrink_dentry_list(struct list_head *); +#ifdef CONFIG_KIDLED +extern void cold_dcache_sb(struct super_block *sb, + struct shrink_control *sc); +extern void cold_icache_sb(struct super_block *sb, + struct shrink_control *sc); +#endif /* * pipe.c @@ -298,3 +307,35 @@ ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *po struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); + +#ifdef CONFIG_KIDLED +#define KIDLED_GET_SLAB_AGE(object) (object->age) +#define KIDLED_SET_SLAB_AGE(object, slab_age) (object->age = slab_age) +#define KIDLED_INC_SLAB_AGE(object) \ +({ \ + u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ + \ + if (slab_age < KIDLED_MAX_IDLE_AGE) { \ + slab_age++; \ + KIDLED_SET_SLAB_AGE(object, slab_age); \ + } \ + slab_age; \ +}) +#define KIDLED_CLEAR_SLAB_SCANNED(object) \ +({ \ + u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ + \ + slab_age &= ~KIDLED_SLAB_ACCESS_MASK; \ + KIDLED_SET_SLAB_AGE(object, slab_age); \ +}) +#define KIDLED_MARK_SLAB_SCANNED(object, scan_rounds) \ +({ \ + u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ + \ + slab_age |= (scan_rounds & 0xff) << KIDLED_SLAB_ACCESS_SHIFT; \ + KIDLED_SET_SLAB_AGE(object, slab_age); \ +}) +#else +#define KIDLED_GET_SLAB_AGE(object) 0 +#define KIDLED_SET_SLAB_AGE(object, slab_age) +#endif diff --git a/fs/namei.c b/fs/namei.c index 887e8bfaf25a..7c96d1e95a88 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -858,10 +858,14 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { + int status = 1; + if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) - return dentry->d_op->d_revalidate(dentry, flags); - else - return 1; + status = dentry->d_op->d_revalidate(dentry, flags); + /* Reset the age when lookuping the dentry successfully */ + if (status > 0 && KIDLED_GET_SLAB_AGE(dentry)) + KIDLED_SET_SLAB_AGE(dentry, 0); + return status; } /** diff --git a/fs/super.c b/fs/super.c index 992a6f80252d..49774f1814a0 100644 --- a/fs/super.c +++ b/fs/super.c @@ -277,6 +277,41 @@ static unsigned long super_cache_count(struct shrinker *shrink, return total_objects; } +#ifdef CONFIG_KIDLED +static unsigned long super_cache_cold(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct super_block *sb; + unsigned long dentry_objects, inode_objects; + unsigned long dentries, inodes; + unsigned long total_objects; + unsigned long nr_to_scan = sc->nr_to_scan; + + sb = shrinker->private_data; + + if (!super_trylock_shared(sb)) + return SHRINK_STOP; + + dentry_objects = list_lru_shrink_count(&sb->s_dentry_lru, sc); + inode_objects = list_lru_shrink_count(&sb->s_inode_lru, sc); + total_objects = dentry_objects + inode_objects; + if (!total_objects) + total_objects = 1; + + /* make sure dentries and inodes scan at least one object */ + dentries = mult_frac(nr_to_scan, dentry_objects, total_objects); + inodes = mult_frac(nr_to_scan, inode_objects, total_objects); + + sc->nr_to_scan = dentries + 1; + cold_dcache_sb(sb, sc); + sc->nr_to_scan = inodes + 1; + cold_icache_sb(sb, sc); + super_unlock_shared(sb); + + return nr_to_scan; +} +#endif + static void destroy_super_work(struct work_struct *work) { struct super_block *s = container_of(work, struct super_block, @@ -390,6 +425,9 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_shrink->scan_objects = super_cache_scan; s->s_shrink->count_objects = super_cache_count; +#ifdef CONFIG_KIDLED + s->s_shrink->cold_objects = super_cache_cold; +#endif s->s_shrink->batch = 1024; s->s_shrink->private_data = s; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 05b9b6e86c3a..29063d8076d0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -113,6 +113,10 @@ struct dentry { struct rcu_head d_rcu; } d_u; +#ifdef CONFIG_KIDLED + unsigned short age; +#endif + CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) } __randomize_layout; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cc029af9359..4f858f5a4331 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -758,6 +758,9 @@ struct inode { void *i_private; /* fs or device private pointer */ +#ifdef CONFIG_KIDLED + unsigned short age; +#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) } __randomize_layout; diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 8c7d5a382b53..229481750c2e 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -5,8 +5,10 @@ #ifdef CONFIG_KIDLED #include +#include #define KIDLED_VERSION "1.0" +struct mem_cgroup; /* * We want to get more info about a specified idle page, whether it's @@ -17,17 +19,19 @@ * KIDLE_FILE : page is a page cache or not; * KIDLE_UNEVIT : page is unevictable or evictable; * KIDLE_ACTIVE : page is in active LRU list or not. + * KIDLE_SLAB : whether it belongs to a slab or not. * * Each KIDLE_ occupies one bit position in a specified idle type. - * There exist total 2^4=16 idle types. + * There exist total 2^4+1=17 idle types. */ #define KIDLE_BASE 0 #define KIDLE_DIRTY (1 << 0) #define KIDLE_FILE (1 << 1) #define KIDLE_UNEVICT (1 << 2) #define KIDLE_ACTIVE (1 << 3) +#define KIDLE_SLAB (1 << 4) -#define KIDLE_NR_TYPE 16 +#define KIDLE_NR_TYPE 17 /* * Each page has an idle age which means how long the page is keeping @@ -68,6 +72,9 @@ * kidled_get_bucket(). User shouldn't use KIDLED_INVALID_BUCKET directly. */ #define KIDLED_INVALID_BUCKET (KIDLED_MAX_IDLE_AGE + 1) +/* Mark the higher byte as an sign of slab objects access in a round */ +#define KIDLED_SLAB_ACCESS_MASK 0xff00 +#define KIDLED_SLAB_ACCESS_SHIFT 0x8 #define KIDLED_MARK_BUCKET_INVALID(buckets) \ (buckets[0] = KIDLED_INVALID_BUCKET) @@ -76,6 +83,12 @@ DECLARE_STATIC_KEY_FALSE(kidled_enabled_key); +static inline bool kidled_is_slab_scanned(unsigned short slab_age, + unsigned long scan_rounds) +{ + return slab_age >> KIDLED_SLAB_ACCESS_SHIFT == (scan_rounds & 0xff); +} + /* * We account number of idle pages depending on idle type and buckets * for a specified instance (e.g. one memory cgroup or one process...) @@ -108,10 +121,22 @@ struct kidled_scan_period { }; }; extern struct kidled_scan_period kidled_scan_period; +extern unsigned long kidled_scan_rounds; #define KIDLED_OP_SET_DURATION (1 << 0) #define KIDLED_OP_INC_SEQ (1 << 1) +extern void kidled_mem_cgroup_account(struct folio *folio, + void *ptr, int age, unsigned long size); +static inline void kidled_mem_cgroup_slab_account(void *object, + int age, int size) +{ + struct folio *folio; + + folio = virt_to_folio(object); + kidled_mem_cgroup_account(folio, object, age, size); +} + static inline struct kidled_scan_period kidled_get_current_scan_period(void) { struct kidled_scan_period scan_period; diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 069d0515b50c..b3ece2916b5a 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -21,6 +21,9 @@ enum lru_status { LRU_REMOVED_RETRY, /* item removed, but lock has been dropped and reacquired */ LRU_ROTATE, /* item referenced, give another pass */ +#ifdef CONFIG_KIDLED + LRU_ROTATE_DELAY, /* item rotate, but not execute immediately */ +#endif LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 2ee653b4b3e1..bfc79f579bf5 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -86,6 +86,10 @@ struct shrinker { unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc); +#ifdef CONFIG_KIDLED + unsigned long (*cold_objects)(struct shrinker *, + struct shrink_control *sc); +#endif long batch; /* reclaim batch size, 0 = default */ int seeks; /* seeks to recreate an obj */ unsigned flags; diff --git a/include/linux/swap.h b/include/linux/swap.h index b19ff4234661..a1b6a7ee0761 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,6 +437,9 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, pg_data_t *pgdat, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); +#ifdef CONFIG_KIDLED +extern void kidled_scan_slab(int nid, struct mem_cgroup *memcg); +#endif extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); diff --git a/mm/kidled.c b/mm/kidled.c index 2a44eeff7f7e..1c127f6132a1 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -11,6 +11,9 @@ #include #include #include +#include +#include "slab.h" +#include #include #include @@ -79,7 +82,7 @@ struct kidled_scan_period kidled_scan_period; const int kidled_default_buckets[NUM_KIDLED_BUCKETS] = { 1, 2, 5, 15, 30, 60, 120, 240 }; static DECLARE_WAIT_QUEUE_HEAD(kidled_wait); -static unsigned long kidled_scan_rounds __read_mostly; +unsigned long kidled_scan_rounds __read_mostly; static inline int kidled_get_bucket(int *idle_buckets, int age) { @@ -100,6 +103,11 @@ static inline int kidled_get_idle_type(struct folio *folio) { int idle_type = KIDLE_BASE; + if (folio_test_slab(folio)) { + idle_type |= KIDLE_SLAB; + goto out; + } + if (folio_test_dirty(folio) || folio_test_writeback(folio)) idle_type |= KIDLE_DIRTY; if (folio_is_file_lru(folio)) @@ -113,6 +121,7 @@ static inline int kidled_get_idle_type(struct folio *folio) idle_type |= KIDLE_UNEVICT; if (folio_test_active(folio)) idle_type |= KIDLE_ACTIVE; +out: return idle_type; } @@ -154,24 +163,31 @@ EXPORT_SYMBOL_GPL(kidled_set_folio_age); #endif /* !KIDLED_AGE_NOT_IN_PAGE_FLAGS */ #ifdef CONFIG_MEMCG -static inline void kidled_mem_cgroup_account(struct folio *folio, - int age, - unsigned long size) +void kidled_mem_cgroup_account(struct folio *folio, + void *ptr, int age, unsigned long size) { struct mem_cgroup *memcg; struct idle_page_stats *stats; int type, bucket; + bool locked = false; if (mem_cgroup_disabled()) return; type = kidled_get_idle_type(folio); - - folio_memcg_lock(folio); - memcg = folio_memcg(folio); - if (unlikely(!memcg)) { - folio_memcg_unlock(folio); - return; + if (type == KIDLE_SLAB) { + if (!memcg_kmem_online()) + memcg = root_mem_cgroup; + else + memcg = mem_cgroup_from_obj(ptr); + } else { + folio_memcg_lock(folio); + memcg = folio_memcg(folio); + if (unlikely(!memcg)) { + folio_memcg_unlock(folio); + return; + } + locked = true; } stats = mem_cgroup_get_unstable_idle_stats(memcg); @@ -179,7 +195,8 @@ static inline void kidled_mem_cgroup_account(struct folio *folio, if (bucket >= 0) stats->count[type][bucket] += size; - folio_memcg_unlock(folio); + if (locked) + folio_memcg_unlock(folio); } void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, @@ -303,8 +320,7 @@ static inline void kidled_mem_cgroup_reset(void) } #else /* !CONFIG_MEMCG */ static inline void kidled_mem_cgroup_account(struct folio *folio, - int age, - unsigned long size) + void *ptr, int age, unsigned long size) { } static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period @@ -424,7 +440,7 @@ static inline int kidled_scan_folio(pg_data_t *pgdat, unsigned long pfn) if (idle) { age = kidled_inc_folio_age(pgdat, pfn); if (age > 0) - kidled_mem_cgroup_account(folio, age, + kidled_mem_cgroup_account(folio, NULL, age, nr_pages << PAGE_SHIFT); else age = 0; @@ -601,6 +617,26 @@ void kidled_free_folio_age(pg_data_t *pgdat) } #endif +static inline void kidled_scan_slab_node(int nid) +{ + struct mem_cgroup *memcg; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + kidled_scan_slab(nid, memcg); + if (!memcg_kmem_online()) + break; + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); +} + +static inline void kidled_scan_slabs(void) +{ + int nid; + + for_each_online_node(nid) + kidled_scan_slab_node(nid); +} + static inline void kidled_scan_done(struct kidled_scan_period scan_period) { kidled_mem_cgroup_scan_done(scan_period); @@ -735,9 +771,11 @@ static int kidled(void *dummy) put_online_mems(); if (scan_done) { + kidled_scan_slabs(); kidled_scan_done(scan_period); restart = true; } else { + kidled_scan_slabs(); restart = false; } diff --git a/mm/list_lru.c b/mm/list_lru.c index a05e5bef3b40..3c2d3ce29f65 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -211,6 +211,10 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, struct list_lru_one *l; struct list_head *item, *n; unsigned long isolated = 0; +#ifdef CONFIG_KIDLED + bool kidled_slab_scan = false; + LIST_HEAD(head_temp); +#endif restart: l = list_lru_from_memcg_idx(lru, nid, memcg_idx); @@ -247,6 +251,12 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, case LRU_ROTATE: list_move_tail(item, &l->list); break; +#ifdef CONFIG_KIDLED + case LRU_ROTATE_DELAY: + if (unlikely(!kidled_slab_scan)) + kidled_slab_scan = true; + /* fall through */ +#endif case LRU_SKIP: break; case LRU_RETRY: @@ -260,6 +270,17 @@ __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx, BUG(); } } +#ifdef CONFIG_KIDLED + if (kidled_slab_scan) { + struct list_head *head = &l->list; + struct list_head *entry = item->prev; + + if (item != head) { + list_cut_position(&head_temp, head, entry); + list_splice_tail(&head_temp, head); + } + } +#endif out: return isolated; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e65f6cd3ed34..ba7672f601c6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4431,9 +4431,10 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) seq_puts(m, "# / _----=> swap/file\n"); seq_puts(m, "# | / _---=> evict/unevict\n"); seq_puts(m, "# || / _--=> inactive/active\n"); - seq_puts(m, "# ||| /\n"); + seq_puts(m, "# ||| / _-=> slab\n"); + seq_puts(m, "# |||| /\n"); - seq_printf(m, "# %-8s", "||||"); + seq_printf(m, "# %-8s", "|||||"); for (i = 0; i < j; i++) { char region[20]; @@ -4453,11 +4454,15 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) for (t = 0; t < KIDLE_NR_TYPE; t++) { char kidled_type_str[5]; - kidled_type_str[0] = t & KIDLE_DIRTY ? 'd' : 'c'; - kidled_type_str[1] = t & KIDLE_FILE ? 'f' : 's'; - kidled_type_str[2] = t & KIDLE_UNEVICT ? 'u' : 'e'; - kidled_type_str[3] = t & KIDLE_ACTIVE ? 'a' : 'i'; - kidled_type_str[4] = '\0'; + if (t & KIDLE_SLAB) + memcpy(kidled_type_str, "slab", 5); + else { + kidled_type_str[0] = t & KIDLE_DIRTY ? 'd' : 'c'; + kidled_type_str[1] = t & KIDLE_FILE ? 'f' : 's'; + kidled_type_str[2] = t & KIDLE_UNEVICT ? 'u' : 'e'; + kidled_type_str[3] = t & KIDLE_ACTIVE ? 'a' : 'i'; + kidled_type_str[4] = '\0'; + } seq_printf(m, " %-8s", kidled_type_str); for (i = 0; i < j; i++) { diff --git a/mm/shrinker.c b/mm/shrinker.c index ea6b5289e073..67233aaaf1ac 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -381,6 +381,122 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, #define SHRINK_BATCH 128 +#ifdef CONFIG_KIDLED +static void kidled_scan_slab_common(struct shrinker *shrinker, + struct shrink_control *sc) +{ + long batch_size = shrinker->batch ?: SHRINK_BATCH; + long freeable, nr_free; + unsigned int scan_duration = kidled_get_current_scan_duration(); + + if (!shrinker->cold_objects) + return; + freeable = shrinker->count_objects(shrinker, sc); + if (freeable == 0 || freeable == SHRINK_EMPTY) + return; + + nr_free = DIV_ROUND_UP(freeable, scan_duration); + while (nr_free > 0) { + unsigned long nr_scanned; + + sc->nr_to_scan = min(nr_free, batch_size); + nr_scanned = shrinker->cold_objects(shrinker, sc); + if (nr_scanned == SHRINK_STOP) + break; + nr_free -= nr_scanned; + cond_resched(); + } +} + +#ifdef CONFIG_MEMCG +static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) +{ + struct shrinker_info *info; + int offset, index = 0; + + if (!mem_cgroup_online(memcg)) + return; + + rcu_read_lock(); + info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); + if (unlikely(!info)) + goto out; + +again: + if (index < shrinker_id_to_index(info->map_nr_max)) { + struct shrinker_info_unit *unit; + + unit = info->unit[index]; + + rcu_read_unlock(); + + for_each_set_bit(offset, unit->map, SHRINKER_UNIT_BITS) { + struct shrink_control sc = { + .gfp_mask = GFP_KERNEL, + .nid = nid, + .memcg = memcg, + }; + struct shrinker *shrinker; + int shrinker_id = calc_shrinker_id(index, offset); + + rcu_read_lock(); + shrinker = idr_find(&shrinker_idr, shrinker_id); + if (unlikely(!shrinker || !shrinker_try_get(shrinker))) { + clear_bit(offset, unit->map); + rcu_read_unlock(); + continue; + } + rcu_read_unlock(); + + /* Call non-slab shrinkers even though kmem is disabled */ + if (!memcg_kmem_online() && + !(shrinker->flags & SHRINKER_NONSLAB)) + continue; + + kidled_scan_slab_common(shrinker, &sc); + shrinker_put(shrinker); + } + + index++; + goto again; + } +out: + rcu_read_unlock(); +} +#else /* !CONFIG_MEMCG */ +static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) +{ +} +#endif /* CONFIG_MEMCG */ + +void kidled_scan_slab(int nid, struct mem_cgroup *memcg) +{ + struct shrinker *shrinker; + + if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) + return kidled_scan_slab_memcg(nid, memcg); + + rcu_read_lock(); + list_for_each_entry(shrinker, &shrinker_list, list) { + struct shrink_control sc = { + .gfp_mask = GFP_KERNEL, + .nid = nid, + .memcg = memcg, + }; + if (!shrinker_try_get(shrinker)) + continue; + + rcu_read_unlock(); + + kidled_scan_slab_common(shrinker, &sc); + rcu_read_lock(); + shrinker_put(shrinker); + } + rcu_read_unlock(); + cond_resched(); +} +#endif + static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, struct shrinker *shrinker, int priority) { -- Gitee From 91bf0aae0ca75840e497fe9a0909c0081f112782 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Tue, 26 May 2020 01:34:55 +0800 Subject: [PATCH 03/14] anolis: mm: kidled: isolate slab scan with page scan ANBZ: #22925 ANBZ: #1702 Currently, page scan only use 2/HZ to scan the part of memory to avoid the cpu load. but As to slab scan, Generally, It will take more time to scan an round when there are a lot of slab objects in the system. Idlemd will enable the slab scan and acceptable for cpu load for a while. Meanwhile, we should not scale up the scan period. Because it will disturb the page scan. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- Documentation/vm/kidled.rst | 5 ++ include/linux/kidled.h | 13 ++++++ include/linux/swap.h | 3 +- mm/kidled.c | 93 ++++++++++++++++++++++++++++++------- mm/memcontrol.c | 5 ++ mm/shrinker.c | 25 ++++++---- 6 files changed, 116 insertions(+), 28 deletions(-) diff --git a/Documentation/vm/kidled.rst b/Documentation/vm/kidled.rst index 527d93cef15c..e230d8f1fe9a 100644 --- a/Documentation/vm/kidled.rst +++ b/Documentation/vm/kidled.rst @@ -26,6 +26,11 @@ than one time. So the maximal age is 255. kidled eventually shows the histogram statistics through memory cgroup files (``memory.idle_page_stats``). The statistics could be used to evaluate the working-set size of that memory cgroup or the hierarchy. +Especially, we add a switch to control whether slab scan or not. That isolate +page scan and slab scan effectively to avoid too many slab objects interfering +with page scan. Because it is important for us to reap cold userspace page, which +reclaim more memory at the lower cost. + Note: The implementation of kidled had referred to Michel Lespinasse's patch: https://lore.kernel.org/lkml/20110922161448.91a2e2b2.akpm@google.com/T/ Thanks for Michel Lespinasse's idea about page age and buckets! diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 229481750c2e..70bfd62c8c65 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -119,8 +119,10 @@ struct kidled_scan_period { u16 duration; /* in seconds */ }; }; + bool slab_scan_enabled; /* whether scan slab or not */ }; extern struct kidled_scan_period kidled_scan_period; +extern bool kidled_slab_scan_enabled; extern unsigned long kidled_scan_rounds; #define KIDLED_OP_SET_DURATION (1 << 0) @@ -142,6 +144,10 @@ static inline struct kidled_scan_period kidled_get_current_scan_period(void) struct kidled_scan_period scan_period; atomic_set(&scan_period.val, atomic_read(&kidled_scan_period.val)); + if (kidled_slab_scan_enabled) + scan_period.slab_scan_enabled = true; + else + scan_period.slab_scan_enabled = false; return scan_period; } @@ -156,6 +162,7 @@ static inline unsigned int kidled_get_current_scan_duration(void) static inline void kidled_reset_scan_period(struct kidled_scan_period *p) { atomic_set(&p->val, 0); + p->slab_scan_enabled = false; } /* @@ -166,6 +173,12 @@ static inline bool kidled_is_scan_period_equal(struct kidled_scan_period *p) return atomic_read(&p->val) == atomic_read(&kidled_scan_period.val); } +static inline bool +kidled_is_slab_scan_enabled_equal(struct kidled_scan_period *p) +{ + return kidled_slab_scan_enabled == p->slab_scan_enabled; +} + static inline bool kidled_set_scan_period(int op, u16 duration, struct kidled_scan_period *orig) { diff --git a/include/linux/swap.h b/include/linux/swap.h index a1b6a7ee0761..e253f9ec822a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -438,7 +438,8 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, unsigned long *nr_scanned); extern unsigned long shrink_all_memory(unsigned long nr_pages); #ifdef CONFIG_KIDLED -extern void kidled_scan_slab(int nid, struct mem_cgroup *memcg); +extern void kidled_scan_slab(int nid, struct mem_cgroup *memcg, + struct kidled_scan_period scan_period); #endif extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); diff --git a/mm/kidled.c b/mm/kidled.c index 1c127f6132a1..f87b27ebf1fe 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -71,6 +71,7 @@ DEFINE_STATIC_KEY_FALSE(kidled_enabled_key); struct kidled_scan_period kidled_scan_period; +bool kidled_slab_scan_enabled __read_mostly; /* * These bucket values are copied from Michel Lespinasse's patch, they are * the default buckets to do histogram sampling. @@ -297,7 +298,7 @@ static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period period) } } -static inline void kidled_mem_cgroup_reset(void) +static inline void kidled_mem_cgroup_reset(bool slab) { struct mem_cgroup *memcg; struct idle_page_stats *stable_stats, *unstable_stats; @@ -308,14 +309,27 @@ static inline void kidled_mem_cgroup_reset(void) down_write(&memcg->idle_stats_rwsem); stable_stats = mem_cgroup_get_stable_idle_stats(memcg); unstable_stats = mem_cgroup_get_unstable_idle_stats(memcg); - memset(&stable_stats->count, 0, sizeof(stable_stats->count)); + if (slab) { + memset(&stable_stats->count[KIDLE_SLAB], 0, + sizeof(stable_stats->count[KIDLE_SLAB])); + memcg->scan_period.slab_scan_enabled = false; + up_write(&memcg->idle_stats_rwsem); + memset(&unstable_stats->count[KIDLE_SLAB], 0, + sizeof(unstable_stats->count[KIDLE_SLAB])); + + if (!memcg_kmem_online()) + break; + } else { + memset(&stable_stats->count, 0, + sizeof(stable_stats->count)); - memcg->idle_scans = 0; - kidled_reset_scan_period(&memcg->scan_period); - up_write(&memcg->idle_stats_rwsem); + memcg->idle_scans = 0; + kidled_reset_scan_period(&memcg->scan_period); + up_write(&memcg->idle_stats_rwsem); - memset(&unstable_stats->count, 0, - sizeof(unstable_stats->count)); + memset(&unstable_stats->count, 0, + sizeof(unstable_stats->count)); + } } } #else /* !CONFIG_MEMCG */ @@ -327,7 +341,7 @@ static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period scan_period) { } -static inline void kidled_mem_cgroup_reset(void) +static inline void kidled_mem_cgroup_reset(bool slab) { } #endif /* CONFIG_MEMCG */ @@ -617,24 +631,28 @@ void kidled_free_folio_age(pg_data_t *pgdat) } #endif -static inline void kidled_scan_slab_node(int nid) +static inline void kidled_scan_slab_node(int nid, + struct kidled_scan_period scan_period) { struct mem_cgroup *memcg; memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - kidled_scan_slab(nid, memcg); + kidled_scan_slab(nid, memcg, scan_period); if (!memcg_kmem_online()) break; } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); } -static inline void kidled_scan_slabs(void) +static inline void kidled_scan_slabs(struct kidled_scan_period scan_period) { int nid; + if (!kidled_slab_scan_enabled) + return; + for_each_online_node(nid) - kidled_scan_slab_node(nid); + kidled_scan_slab_node(nid, scan_period); } static inline void kidled_scan_done(struct kidled_scan_period scan_period) @@ -648,7 +666,7 @@ static void kidled_reset(bool free) { pg_data_t *pgdat; - kidled_mem_cgroup_reset(); + kidled_mem_cgroup_reset(false); get_online_mems(); @@ -673,7 +691,7 @@ static __kidled_ref void kidled_reset(void) pg_data_t *pgdat; int i, nid; - kidled_mem_cgroup_reset(); + kidled_mem_cgroup_reset(false); get_online_mems(); for_each_online_node(nid) { @@ -697,7 +715,7 @@ static void kidled_reset(void) { pg_data_t *pgdat; - kidled_mem_cgroup_reset(); + kidled_mem_cgroup_reset(false); get_online_mems(); for_each_online_pgdat(pgdat) { @@ -733,6 +751,12 @@ static inline bool kidled_should_run(struct kidled_scan_period *p, bool *new) *p = scan_period; *new = true; + } else if (unlikely(!kidled_is_slab_scan_enabled_equal(p))) { + if (p->slab_scan_enabled) + kidled_mem_cgroup_reset(true); + else + p->slab_scan_enabled = true; + *new = false; } else { *new = false; } @@ -771,11 +795,11 @@ static int kidled(void *dummy) put_online_mems(); if (scan_done) { - kidled_scan_slabs(); + kidled_scan_slabs(scan_period); kidled_scan_done(scan_period); restart = true; } else { - kidled_scan_slabs(); + kidled_scan_slabs(scan_period); restart = false; } @@ -799,7 +823,13 @@ static int kidled(void *dummy) * * We thought it's busy when elapsed >= (HZ / 2), and if keep * busy for several consecutive times, we'll scale up the - * scan duration. + * scan duration, But except in one case when we enable the + * slab scan. It's acceptable that the cpu load is very high + * for a while and we can not scale up the scan duration. + * Otherwise it will takes a lot of time to scan an round. + * + * Because kidled is the lowest priority, and it can be + * scheduled easily when other task want to run in current cpu. * * NOTE it's a simple guard, not a promise. */ @@ -865,12 +895,39 @@ static ssize_t kidled_scan_period_store(struct kobject *kobj, return count; } +static ssize_t kidled_slab_scan_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%u\n", kidled_slab_scan_enabled); +} + +static ssize_t kidled_slab_scan_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret || val > 1) + return -EINVAL; + + WRITE_ONCE(kidled_slab_scan_enabled, val); + return count; +} + static struct kobj_attribute kidled_scan_period_attr = __ATTR(scan_period_in_seconds, 0644, kidled_scan_period_show, kidled_scan_period_store); +static struct kobj_attribute kidled_slab_scan_enabled_attr = + __ATTR(slab_scan_enabled, 0644, + kidled_slab_scan_enabled_show, kidled_slab_scan_enabled_store); + static struct attribute *kidled_attrs[] = { &kidled_scan_period_attr.attr, + &kidled_slab_scan_enabled_attr.attr, NULL }; static struct attribute_group kidled_attr_group = { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ba7672f601c6..c3ccda5fd59c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4362,6 +4362,11 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) goto output; } + if (scan_period.slab_scan_enabled && + !kidled_is_slab_scan_enabled_equal(&scan_period)) + memset(&stats->count[KIDLE_SLAB], 0, + sizeof(stats->count[KIDLE_SLAB])); + if (has_hierarchy) { for_each_mem_cgroup_tree(iter, memcg) { /* The root memcg was just accounted */ diff --git a/mm/shrinker.c b/mm/shrinker.c index 67233aaaf1ac..47ec1e978759 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -383,11 +383,11 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, #ifdef CONFIG_KIDLED static void kidled_scan_slab_common(struct shrinker *shrinker, - struct shrink_control *sc) + struct shrink_control *sc, + struct kidled_scan_period scan_period) { long batch_size = shrinker->batch ?: SHRINK_BATCH; long freeable, nr_free; - unsigned int scan_duration = kidled_get_current_scan_duration(); if (!shrinker->cold_objects) return; @@ -395,7 +395,7 @@ static void kidled_scan_slab_common(struct shrinker *shrinker, if (freeable == 0 || freeable == SHRINK_EMPTY) return; - nr_free = DIV_ROUND_UP(freeable, scan_duration); + nr_free = DIV_ROUND_UP(freeable, scan_period.duration); while (nr_free > 0) { unsigned long nr_scanned; @@ -405,11 +405,16 @@ static void kidled_scan_slab_common(struct shrinker *shrinker, break; nr_free -= nr_scanned; cond_resched(); + + if (unlikely(!kidled_is_scan_period_equal(&scan_period) || + !kidled_is_slab_scan_enabled_equal(&scan_period))) + break; } } #ifdef CONFIG_MEMCG -static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) +static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, + struct kidled_scan_period scan_period) { struct shrinker_info *info; int offset, index = 0; @@ -453,7 +458,7 @@ static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) !(shrinker->flags & SHRINKER_NONSLAB)) continue; - kidled_scan_slab_common(shrinker, &sc); + kidled_scan_slab_common(shrinker, &sc, scan_period); shrinker_put(shrinker); } @@ -464,17 +469,19 @@ static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) rcu_read_unlock(); } #else /* !CONFIG_MEMCG */ -static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg) +static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, + struct kidled_scan_period scan_period) { } #endif /* CONFIG_MEMCG */ -void kidled_scan_slab(int nid, struct mem_cgroup *memcg) +void kidled_scan_slab(int nid, struct mem_cgroup *memcg, + struct kidled_scan_period scan_period) { struct shrinker *shrinker; if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) - return kidled_scan_slab_memcg(nid, memcg); + return kidled_scan_slab_memcg(nid, memcg, scan_period); rcu_read_lock(); list_for_each_entry(shrinker, &shrinker_list, list) { @@ -488,7 +495,7 @@ void kidled_scan_slab(int nid, struct mem_cgroup *memcg) rcu_read_unlock(); - kidled_scan_slab_common(shrinker, &sc); + kidled_scan_slab_common(shrinker, &sc, scan_period); rcu_read_lock(); shrinker_put(shrinker); } -- Gitee From 9f541fc25e792575a9d1915a2b8ad6d0701c3c79 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Mon, 22 Jun 2020 21:43:45 +0800 Subject: [PATCH 04/14] anolis: mm: kidled: make kidled to support scan slab separately ANBZ: #22925 ANBZ: #1702 Currently, kidled scans pages and slab. kidled_scan_target decides how kidled either scan pages or pages and slabs together. The patch expands kidled_scan_target to support scan slab separately. And we rename kidled_scan_period to kidled_scan_control, because it is not only used by scan_period, but also used by scan type. Meanwhile, we use an local variable "count_slab_scan" to count in a round and drop out when it reach the scan_control.duration, but it is restricted in scan slab only. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- Documentation/vm/kidled.rst | 25 ++++-- include/linux/kidled.h | 112 +++++++++++++++++-------- include/linux/memcontrol.h | 2 +- include/linux/swap.h | 2 +- mm/kidled.c | 159 +++++++++++++++++++++--------------- mm/memcontrol.c | 33 +++++--- mm/shrinker.c | 20 ++--- 7 files changed, 226 insertions(+), 127 deletions(-) diff --git a/Documentation/vm/kidled.rst b/Documentation/vm/kidled.rst index e230d8f1fe9a..bd667151a06d 100644 --- a/Documentation/vm/kidled.rst +++ b/Documentation/vm/kidled.rst @@ -74,13 +74,24 @@ Here are their functions: statistics, but it won't be very odd due to the duration are the same at least. -* ``/sys/kernel/mm/kidled/slab_scan_enabled`` - - It controls whether slab scan or not. By default, kidled will not scan slab - because the cpu load will very high if the system has a lot of reclaimable - slabs. But we need to enable it when userspace pages have been reclaimed and - a lot of reclaimable slabs is in the system. we'd better mark and reclaim the - cold slab in front of the memory reclaim triggered by allocating memory request. +* ``/sys/kernel/mm/kidled/scan_target`` + + It controls which type kidled will scan, there are three kinds of type + could be selected: scan page only, scan slab only, scan both page and + slab. The users can enable them as follows. Other value will be invalid. + + To scan user page only + echo 1 > ``/sys/kernel/mm/kidled/scan_target`` + To scan slab only + echo 2 > ``/sys/kernel/mm/kidled/scan_target`` + Both scan page and slab + echo 3 > ``/sys/kernel/mm/kidled/scan_target`` + + By default, kidled will not scan slab because the cpu load will very + high if the system has a lot of reclaimable slabs. But we need to enable + it when userspace pages have been reclaimed and a lot of reclaimable + slabs is in the system. We'd better mark and reclaim the cold slab in + front of the memory reclaim triggered by allocating memory request. It shows histogram of idle statistics for the corresponding memory cgroup. diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 70bfd62c8c65..2e450ce2def1 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -10,6 +10,21 @@ #define KIDLED_VERSION "1.0" struct mem_cgroup; +/* + * Kidled_scan_type define the scan type that kidled will + * work at. The default option is to scan page only, but + * it can be modified by a specified interface at any time. + */ +enum kidled_scan_type { + SCAN_TARGET_PAGE = 0, + SCAN_TARGET_SLAB, + SCAN_TARGET_ALL +}; + +#define KIDLED_SCAN_PAGE (1 << SCAN_TARGET_PAGE) +#define KIDLED_SCAN_SLAB (1 << SCAN_TARGET_SLAB) +#define KIDLED_SCAN_ALL (KIDLED_SCAN_PAGE | KIDLED_SCAN_SLAB) + /* * We want to get more info about a specified idle page, whether it's * a page cache or in active LRU list and so on. We use KIDLE_ @@ -111,7 +126,7 @@ struct idle_page_stats { * least. */ #define KIDLED_MAX_SCAN_DURATION U16_MAX /* max 65536 seconds */ -struct kidled_scan_period { +struct kidled_scan_control { union { atomic_t val; struct { @@ -119,10 +134,10 @@ struct kidled_scan_period { u16 duration; /* in seconds */ }; }; - bool slab_scan_enabled; /* whether scan slab or not */ + unsigned int scan_target; /* decide how kidled to scan */ }; -extern struct kidled_scan_period kidled_scan_period; -extern bool kidled_slab_scan_enabled; +extern struct kidled_scan_control kidled_scan_control; +extern unsigned int kidled_scan_target; extern unsigned long kidled_scan_rounds; #define KIDLED_OP_SET_DURATION (1 << 0) @@ -139,63 +154,93 @@ static inline void kidled_mem_cgroup_slab_account(void *object, kidled_mem_cgroup_account(folio, object, age, size); } -static inline struct kidled_scan_period kidled_get_current_scan_period(void) +static inline struct kidled_scan_control kidled_get_current_scan_control(void) { - struct kidled_scan_period scan_period; - - atomic_set(&scan_period.val, atomic_read(&kidled_scan_period.val)); - if (kidled_slab_scan_enabled) - scan_period.slab_scan_enabled = true; - else - scan_period.slab_scan_enabled = false; - return scan_period; + struct kidled_scan_control scan_control; + + atomic_set(&scan_control.val, atomic_read(&kidled_scan_control.val)); + scan_control.scan_target = kidled_scan_target; + return scan_control; } static inline unsigned int kidled_get_current_scan_duration(void) { - struct kidled_scan_period scan_period = - kidled_get_current_scan_period(); + struct kidled_scan_control scan_control = + kidled_get_current_scan_control(); - return scan_period.duration; + return scan_control.duration; } -static inline void kidled_reset_scan_period(struct kidled_scan_period *p) +static inline void kidled_reset_scan_control(struct kidled_scan_control *p) { atomic_set(&p->val, 0); - p->slab_scan_enabled = false; + p->scan_target = KIDLED_SCAN_PAGE; } /* - * Compare with global kidled_scan_period, return true if equals. + * Compare with global kidled_scan_control, return true if equals. */ -static inline bool kidled_is_scan_period_equal(struct kidled_scan_period *p) +static inline bool kidled_is_scan_period_equal(struct kidled_scan_control *p) +{ + return atomic_read(&p->val) == atomic_read(&kidled_scan_control.val); +} + +static inline bool kidled_has_slab_target(struct kidled_scan_control *p) +{ + return p->scan_target & KIDLED_SCAN_SLAB; +} + +static inline bool kidled_has_page_target(struct kidled_scan_control *p) +{ + return p->scan_target & KIDLED_SCAN_PAGE; +} + +static inline bool kidled_has_slab_target_equal(struct kidled_scan_control *p) +{ + if (!kidled_has_slab_target(p)) + return false; + + return kidled_scan_target & KIDLED_SCAN_SLAB; +} + +static inline bool +kidled_is_scan_target_equal(struct kidled_scan_control *p) { - return atomic_read(&p->val) == atomic_read(&kidled_scan_period.val); + return p->scan_target == kidled_scan_target; } static inline bool -kidled_is_slab_scan_enabled_equal(struct kidled_scan_period *p) +kidled_is_slab_target(struct kidled_scan_control *p) { - return kidled_slab_scan_enabled == p->slab_scan_enabled; + return p->scan_target == KIDLED_SCAN_SLAB; } -static inline bool kidled_set_scan_period(int op, u16 duration, - struct kidled_scan_period *orig) +static inline bool +kidled_has_page_target_equal(struct kidled_scan_control *p) +{ + if (!kidled_has_page_target(p)) + return false; + + return kidled_scan_target & KIDLED_SCAN_PAGE; +} + +static inline bool kidled_set_scan_control(int op, u16 duration, + struct kidled_scan_control *orig) { bool retry = false; /* - * atomic_cmpxchg() tries to update kidled_scan_period, shouldn't + * atomic_cmpxchg() tries to update kidled_scan_control, shouldn't * retry to avoid endless loop when caller specify a period. */ if (!orig) { - orig = &kidled_scan_period; + orig = &kidled_scan_control; retry = true; } while (true) { int new_period_val, old_period_val; - struct kidled_scan_period new_period; + struct kidled_scan_control new_period; old_period_val = atomic_read(&orig->val); atomic_set(&new_period.val, old_period_val); @@ -205,7 +250,7 @@ static inline bool kidled_set_scan_period(int op, u16 duration, new_period.duration = duration; new_period_val = atomic_read(&new_period.val); - if (atomic_cmpxchg(&kidled_scan_period.val, + if (atomic_cmpxchg(&kidled_scan_control.val, old_period_val, new_period_val) == old_period_val) return true; @@ -217,7 +262,7 @@ static inline bool kidled_set_scan_period(int op, u16 duration, static inline void kidled_set_scan_duration(u16 duration) { - kidled_set_scan_period(KIDLED_OP_INC_SEQ | + kidled_set_scan_control(KIDLED_OP_INC_SEQ | KIDLED_OP_SET_DURATION, duration, NULL); } @@ -231,7 +276,8 @@ static inline bool is_kidled_enabled(void) * Caller must specify the original scan period, avoid the race between * the double operation and user's updates through sysfs interface. */ -static inline bool kidled_try_double_scan_period(struct kidled_scan_period orig) +static inline bool +kidled_try_double_scan_control(struct kidled_scan_control orig) { u16 duration = orig.duration; @@ -241,7 +287,7 @@ static inline bool kidled_try_double_scan_period(struct kidled_scan_period orig) duration <<= 1; if (duration < orig.duration) duration = KIDLED_MAX_SCAN_DURATION; - return kidled_set_scan_period(KIDLED_OP_INC_SEQ | + return kidled_set_scan_control(KIDLED_OP_INC_SEQ | KIDLED_OP_SET_DURATION, duration, &orig); @@ -253,7 +299,7 @@ static inline bool kidled_try_double_scan_period(struct kidled_scan_period orig) */ static inline void kidled_inc_scan_seq(void) { - kidled_set_scan_period(KIDLED_OP_INC_SEQ, 0, NULL); + kidled_set_scan_control(KIDLED_OP_INC_SEQ, 0, NULL); } extern const int kidled_default_buckets[NUM_KIDLED_BUCKETS]; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 79dac3a65e69..7438c7ea3f40 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -446,7 +446,7 @@ struct mem_cgroup { #ifdef CONFIG_KIDLED struct rw_semaphore idle_stats_rwsem; unsigned long idle_scans; - struct kidled_scan_period scan_period; + struct kidled_scan_control scan_control; int idle_stable_idx; struct idle_page_stats idle_stats[KIDLED_STATS_NR_TYPE]; #endif diff --git a/include/linux/swap.h b/include/linux/swap.h index e253f9ec822a..0cf9dafde89b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -439,7 +439,7 @@ extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, extern unsigned long shrink_all_memory(unsigned long nr_pages); #ifdef CONFIG_KIDLED extern void kidled_scan_slab(int nid, struct mem_cgroup *memcg, - struct kidled_scan_period scan_period); + struct kidled_scan_control scan_control); #endif extern int vm_swappiness; long remove_mapping(struct address_space *mapping, struct folio *folio); diff --git a/mm/kidled.c b/mm/kidled.c index f87b27ebf1fe..43fc5d8365af 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -70,8 +70,8 @@ DEFINE_STATIC_KEY_FALSE(kidled_enabled_key); -struct kidled_scan_period kidled_scan_period; -bool kidled_slab_scan_enabled __read_mostly; +unsigned int kidled_scan_target __read_mostly = KIDLED_SCAN_PAGE; +struct kidled_scan_control kidled_scan_control; /* * These bucket values are copied from Michel Lespinasse's patch, they are * the default buckets to do histogram sampling. @@ -263,7 +263,8 @@ void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, } EXPORT_SYMBOL_GPL(kidled_mem_cgroup_move_stats); -static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period period) +static inline void +kidled_mem_cgroup_scan_done(struct kidled_scan_control scan_control) { struct mem_cgroup *memcg; struct idle_page_stats *stable_stats, *unstable_stats; @@ -289,7 +290,7 @@ static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period period) sizeof(unstable_stats->buckets)); } - memcg->scan_period = period; + memcg->scan_control = scan_control; up_write(&memcg->idle_stats_rwsem); unstable_stats = mem_cgroup_get_unstable_idle_stats(memcg); @@ -298,7 +299,14 @@ static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period period) } } -static inline void kidled_mem_cgroup_reset(bool slab) +/* + * Reset the specified statistics by scan_type when users want to + * change the scan target. For example, we should clear the slab + * statistics when we only want to scan the page and vice versa. + * Otherwise it will mislead the user about the statistics. + */ +static inline void +kidled_mem_cgroup_reset(enum kidled_scan_type scan_type) { struct mem_cgroup *memcg; struct idle_page_stats *stable_stats, *unstable_stats; @@ -309,10 +317,21 @@ static inline void kidled_mem_cgroup_reset(bool slab) down_write(&memcg->idle_stats_rwsem); stable_stats = mem_cgroup_get_stable_idle_stats(memcg); unstable_stats = mem_cgroup_get_unstable_idle_stats(memcg); - if (slab) { + if (scan_type == SCAN_TARGET_PAGE) { + int i; + + for (i = 0; i < KIDLE_NR_TYPE - 1; i++) + memset(&stable_stats->count[i], 0, + sizeof(stable_stats->count[i])); + memcg->scan_control.scan_target = kidled_scan_target; + up_write(&memcg->idle_stats_rwsem); + for (i = 0; i < KIDLE_NR_TYPE - 1; i++) + memset(&unstable_stats->count[i], 0, + sizeof(unstable_stats->count[i])); + } else if (scan_type == SCAN_TARGET_SLAB) { memset(&stable_stats->count[KIDLE_SLAB], 0, sizeof(stable_stats->count[KIDLE_SLAB])); - memcg->scan_period.slab_scan_enabled = false; + memcg->scan_control.scan_target = kidled_scan_target; up_write(&memcg->idle_stats_rwsem); memset(&unstable_stats->count[KIDLE_SLAB], 0, sizeof(unstable_stats->count[KIDLE_SLAB])); @@ -322,11 +341,9 @@ static inline void kidled_mem_cgroup_reset(bool slab) } else { memset(&stable_stats->count, 0, sizeof(stable_stats->count)); - memcg->idle_scans = 0; - kidled_reset_scan_period(&memcg->scan_period); + kidled_reset_scan_control(&memcg->scan_control); up_write(&memcg->idle_stats_rwsem); - memset(&unstable_stats->count, 0, sizeof(unstable_stats->count)); } @@ -337,11 +354,11 @@ static inline void kidled_mem_cgroup_account(struct folio *folio, void *ptr, int age, unsigned long size) { } -static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_period - scan_period) +static inline void kidled_mem_cgroup_scan_done(struct kidled_scan_control + scan_control) { } -static inline void kidled_mem_cgroup_reset(bool slab) +static inline void kidled_mem_cgroup_reset(enum kidled_scan_type scan_type) { } #endif /* CONFIG_MEMCG */ @@ -473,7 +490,7 @@ static inline int kidled_scan_folio(pg_data_t *pgdat, unsigned long pfn) } static bool kidled_scan_node(pg_data_t *pgdat, - struct kidled_scan_period scan_period, + struct kidled_scan_control scan_control, unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn = start_pfn; @@ -483,6 +500,9 @@ static bool kidled_scan_node(pg_data_t *pgdat, int nr_nodes = num_online_nodes(); #endif + if (kidled_is_slab_target(&scan_control)) + return false; + #ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS if (unlikely(!pgdat->node_folio_age)) { u8 *age; @@ -499,7 +519,8 @@ static bool kidled_scan_node(pg_data_t *pgdat, while (pfn < end_pfn) { /* Restart new scanning when user updates the period */ - if (unlikely(!kidled_is_scan_period_equal(&scan_period))) + if (unlikely(!kidled_is_scan_period_equal(&scan_control) || + !kidled_has_page_target_equal(&scan_control))) break; #if !defined(CONFIG_ARCH_KEEP_MEMBLOCK) && !defined(CONFIG_MEMORY_HOTPLUG) @@ -529,7 +550,7 @@ static bool kidled_scan_node(pg_data_t *pgdat, * happen if caller executes to them. */ #if defined(CONFIG_ARCH_KEEP_MEMBLOCK) || defined(CONFIG_MEMORY_HOTPLUG) -static __kidled_ref bool kidled_scan_nodes(struct kidled_scan_period scan_period, +static __kidled_ref bool kidled_scan_nodes(struct kidled_scan_control scan_control, bool restart) { int i, nid; @@ -539,7 +560,7 @@ static __kidled_ref bool kidled_scan_nodes(struct kidled_scan_period scan_period for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); unsigned long pages_to_scan = DIV_ROUND_UP(pgdat->node_present_pages, - scan_period.duration); + scan_control.duration); bool init = !restart; if (restart) @@ -575,7 +596,7 @@ static __kidled_ref bool kidled_scan_nodes(struct kidled_scan_period scan_period if ((end_pfn - start_pfn) > pages_to_scan) end_pfn = start_pfn + pages_to_scan; - scan_done &= kidled_scan_node(pgdat, scan_period, + scan_done &= kidled_scan_node(pgdat, scan_control, start_pfn, end_pfn); /* * That empirical value mainly to ensure that @@ -590,7 +611,7 @@ static __kidled_ref bool kidled_scan_nodes(struct kidled_scan_period scan_period return scan_done; } #else -static bool kidled_scan_nodes(struct kidled_scan_period scan_period, +static bool kidled_scan_nodes(struct kidled_scan_control scan_control, bool restart) { unsigned long start_pfn, end_pfn; @@ -608,8 +629,8 @@ static bool kidled_scan_nodes(struct kidled_scan_period scan_period, pgdat->node_idle_scan_pfn = pgdat->node_start_pfn; start_pfn = pgdat->node_idle_scan_pfn; end_pfn = min(start_pfn + DIV_ROUND_UP(pgdat->node_spanned_pages, - scan_period.duration), node_end); - scan_done &= kidled_scan_node(pgdat, scan_period, start_pfn, + scan_control.duration), node_end); + scan_done &= kidled_scan_node(pgdat, scan_control, start_pfn, end_pfn); } @@ -632,32 +653,32 @@ void kidled_free_folio_age(pg_data_t *pgdat) #endif static inline void kidled_scan_slab_node(int nid, - struct kidled_scan_period scan_period) + struct kidled_scan_control scan_control) { struct mem_cgroup *memcg; memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - kidled_scan_slab(nid, memcg, scan_period); + kidled_scan_slab(nid, memcg, scan_control); if (!memcg_kmem_online()) break; } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); } -static inline void kidled_scan_slabs(struct kidled_scan_period scan_period) +static inline void kidled_scan_slabs(struct kidled_scan_control scan_control) { int nid; - if (!kidled_slab_scan_enabled) + if (!kidled_has_slab_target(&scan_control)) return; for_each_online_node(nid) - kidled_scan_slab_node(nid, scan_period); + kidled_scan_slab_node(nid, scan_control); } -static inline void kidled_scan_done(struct kidled_scan_period scan_period) +static inline void kidled_scan_done(struct kidled_scan_control scan_control) { - kidled_mem_cgroup_scan_done(scan_period); + kidled_mem_cgroup_scan_done(scan_control); kidled_scan_rounds++; } @@ -666,7 +687,7 @@ static void kidled_reset(bool free) { pg_data_t *pgdat; - kidled_mem_cgroup_reset(false); + kidled_mem_cgroup_reset(SCAN_TARGET_ALL); get_online_mems(); @@ -691,7 +712,7 @@ static __kidled_ref void kidled_reset(void) pg_data_t *pgdat; int i, nid; - kidled_mem_cgroup_reset(false); + kidled_mem_cgroup_reset(SCAN_TARGET_ALL); get_online_mems(); for_each_online_node(nid) { @@ -715,7 +736,7 @@ static void kidled_reset(void) { pg_data_t *pgdat; - kidled_mem_cgroup_reset(false); + kidled_mem_cgroup_reset(SCAN_TARGET_ALL); get_online_mems(); for_each_online_pgdat(pgdat) { @@ -733,30 +754,37 @@ static void kidled_reset(void) } #endif -static inline bool kidled_should_run(struct kidled_scan_period *p, bool *new) +static inline bool kidled_should_run(struct kidled_scan_control *p, bool *new) { if (unlikely(!kidled_is_scan_period_equal(p))) { - struct kidled_scan_period scan_period; + struct kidled_scan_control scan_control; - scan_period = kidled_get_current_scan_period(); + scan_control = kidled_get_current_scan_control(); if (p->duration) { #ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS - kidled_reset(!scan_period.duration); + kidled_reset(!scan_control.duration); #else kidled_reset(); #endif } - if (!scan_period.duration) + if (!scan_control.duration) static_branch_disable(&kidled_enabled_key); - *p = scan_period; + *p = scan_control; *new = true; - } else if (unlikely(!kidled_is_slab_scan_enabled_equal(p))) { - if (p->slab_scan_enabled) - kidled_mem_cgroup_reset(true); + } else if (unlikely(!kidled_is_scan_target_equal(p))) { + struct kidled_scan_control scan_control; + + scan_control = kidled_get_current_scan_control(); + if (!kidled_has_page_target_equal(p)) + kidled_mem_cgroup_reset(SCAN_TARGET_PAGE); + else if (!kidled_has_slab_target_equal(p)) + kidled_mem_cgroup_reset(SCAN_TARGET_SLAB); + if (kidled_is_slab_target(p)) + *new = true; else - p->slab_scan_enabled = true; - *new = false; + *new = false; + *p = scan_control; } else { *new = false; } @@ -771,36 +799,39 @@ static int kidled(void *dummy) { int busy_loop = 0; bool restart = true; - struct kidled_scan_period scan_period; + struct kidled_scan_control scan_control; + int count_slab_scan = 0; - kidled_reset_scan_period(&scan_period); + kidled_reset_scan_control(&scan_control); while (!kthread_should_stop()) { u64 start_jiffies, elapsed; bool new, scan_done = true; wait_event_interruptible(kidled_wait, - kidled_should_run(&scan_period, &new)); + kidled_should_run(&scan_control, &new)); if (unlikely(new)) { restart = true; busy_loop = 0; } - if (unlikely(scan_period.duration == 0)) + if (unlikely(scan_control.duration == 0)) continue; start_jiffies = jiffies_64; get_online_mems(); - scan_done = kidled_scan_nodes(scan_period, restart); + scan_done = kidled_scan_nodes(scan_control, restart); put_online_mems(); - if (scan_done) { - kidled_scan_slabs(scan_period); - kidled_scan_done(scan_period); + kidled_scan_slabs(scan_control); + if (scan_done || (kidled_is_slab_target(&scan_control) && + count_slab_scan + 1 >= scan_control.duration)) { + kidled_scan_done(scan_control); restart = true; + count_slab_scan = 0; } else { - kidled_scan_slabs(scan_period); restart = false; + count_slab_scan++; } /* @@ -810,7 +841,7 @@ static int kidled(void *dummy) * neighbors (e.g. cause spike latency). * * We hope kidled can scan specified pages which depends on - * scan_period in each slice, and supposed to finish each + * scan_control in each slice, and supposed to finish each * slice in one second: * * pages_to_scan = total_pages / scan_duration @@ -841,7 +872,7 @@ static int kidled(void *dummy) schedule_timeout_interruptible(HZ - elapsed); } else if (++busy_loop == KIDLED_BUSY_LOOP_THRESHOLD) { busy_loop = 0; - if (kidled_try_double_scan_period(scan_period)) { + if (kidled_try_double_scan_control(scan_control)) { pr_warn_ratelimited("%s: period -> %u\n", __func__, kidled_get_current_scan_duration()); @@ -895,25 +926,25 @@ static ssize_t kidled_scan_period_store(struct kobject *kobj, return count; } -static ssize_t kidled_slab_scan_enabled_show(struct kobject *kobj, +static ssize_t kidled_scan_target_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "%u\n", kidled_slab_scan_enabled); + return sprintf(buf, "%u\n", kidled_scan_target); } -static ssize_t kidled_slab_scan_enabled_store(struct kobject *kobj, +static ssize_t kidled_scan_target_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - unsigned long val; int ret; + unsigned int val; - ret = kstrtoul(buf, 10, &val); - if (ret || val > 1) + ret = kstrtouint(buf, 10, &val); + if (ret || !val || val > KIDLED_SCAN_ALL) return -EINVAL; - WRITE_ONCE(kidled_slab_scan_enabled, val); + WRITE_ONCE(kidled_scan_target, val); return count; } @@ -921,13 +952,13 @@ static struct kobj_attribute kidled_scan_period_attr = __ATTR(scan_period_in_seconds, 0644, kidled_scan_period_show, kidled_scan_period_store); -static struct kobj_attribute kidled_slab_scan_enabled_attr = - __ATTR(slab_scan_enabled, 0644, - kidled_slab_scan_enabled_show, kidled_slab_scan_enabled_store); +static struct kobj_attribute kidled_scan_target_attr = + __ATTR(scan_target, 0644, + kidled_scan_target_show, kidled_scan_target_store); static struct attribute *kidled_attrs[] = { &kidled_scan_period_attr.attr, - &kidled_slab_scan_enabled_attr.attr, + &kidled_scan_target_attr.attr, NULL }; static struct attribute_group kidled_attr_group = { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c3ccda5fd59c..91c38ef372c1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4329,7 +4329,7 @@ static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf, static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) { struct mem_cgroup *iter, *memcg = mem_cgroup_from_css(seq_css(m)); - struct kidled_scan_period scan_period, period; + struct kidled_scan_control scan_control; struct idle_page_stats *stats, *cache; unsigned long scans; bool has_hierarchy = !!seq_cft(m)->private; @@ -4344,7 +4344,7 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) down_read(&memcg->idle_stats_rwsem); *stats = memcg->idle_stats[memcg->idle_stable_idx]; scans = memcg->idle_scans; - scan_period = memcg->scan_period; + scan_control = memcg->scan_control; up_read(&memcg->idle_stats_rwsem); /* Nothing will be outputed with invalid buckets */ @@ -4355,34 +4355,45 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) } /* Zeroes will be output with mismatched scan period */ - if (!kidled_is_scan_period_equal(&scan_period)) { + if (!kidled_is_scan_period_equal(&scan_control)) { memset(&stats->count, 0, sizeof(stats->count)); - scan_period = kidled_get_current_scan_period(); + scan_control = kidled_get_current_scan_control(); scans = 0; goto output; } - if (scan_period.slab_scan_enabled && - !kidled_is_slab_scan_enabled_equal(&scan_period)) - memset(&stats->count[KIDLE_SLAB], 0, - sizeof(stats->count[KIDLE_SLAB])); + /* Zeroes will be output with mismatched scan type */ + if (!kidled_is_scan_target_equal(&scan_control)) { + if (!kidled_has_page_target_equal(&scan_control)) { + int i; + + for (i = 0; i < KIDLE_NR_TYPE - 1; i++) + memset(&stats->count[i], 0, + sizeof(stats->count[i])); + } else if (!kidled_has_slab_target_equal(&scan_control)) { + memset(&stats->count[KIDLE_SLAB], 0, + sizeof(stats->count[KIDLE_SLAB])); + } + } if (has_hierarchy) { for_each_mem_cgroup_tree(iter, memcg) { + struct kidled_scan_control scan_control; + /* The root memcg was just accounted */ if (iter == memcg) continue; down_read(&iter->idle_stats_rwsem); *cache = iter->idle_stats[iter->idle_stable_idx]; - period = memcg->scan_period; + scan_control = memcg->scan_control; up_read(&iter->idle_stats_rwsem); /* * Skip to account if the scan period is mismatched * or buckets are invalid. */ - if (!kidled_is_scan_period_equal(&period) || + if (!kidled_is_scan_period_equal(&scan_control) || KIDLED_IS_BUCKET_INVALID(cache->buckets)) continue; @@ -4412,7 +4423,7 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) output: seq_printf(m, "# version: %s\n", KIDLED_VERSION); seq_printf(m, "# scans: %lu\n", scans); - seq_printf(m, "# scan_period_in_seconds: %u\n", scan_period.duration); + seq_printf(m, "# scan_period_in_seconds: %u\n", scan_control.duration); seq_puts(m, "# buckets: "); if (no_buckets) { seq_puts(m, "no valid bucket available\n"); diff --git a/mm/shrinker.c b/mm/shrinker.c index 47ec1e978759..45d36086e44c 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -384,7 +384,7 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, #ifdef CONFIG_KIDLED static void kidled_scan_slab_common(struct shrinker *shrinker, struct shrink_control *sc, - struct kidled_scan_period scan_period) + struct kidled_scan_control scan_control) { long batch_size = shrinker->batch ?: SHRINK_BATCH; long freeable, nr_free; @@ -395,7 +395,7 @@ static void kidled_scan_slab_common(struct shrinker *shrinker, if (freeable == 0 || freeable == SHRINK_EMPTY) return; - nr_free = DIV_ROUND_UP(freeable, scan_period.duration); + nr_free = DIV_ROUND_UP(freeable, scan_control.duration); while (nr_free > 0) { unsigned long nr_scanned; @@ -406,15 +406,15 @@ static void kidled_scan_slab_common(struct shrinker *shrinker, nr_free -= nr_scanned; cond_resched(); - if (unlikely(!kidled_is_scan_period_equal(&scan_period) || - !kidled_is_slab_scan_enabled_equal(&scan_period))) + if (unlikely(!kidled_is_scan_period_equal(&scan_control) || + !kidled_has_slab_target_equal(&scan_control))) break; } } #ifdef CONFIG_MEMCG static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, - struct kidled_scan_period scan_period) + struct kidled_scan_control scan_control) { struct shrinker_info *info; int offset, index = 0; @@ -458,7 +458,7 @@ static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, !(shrinker->flags & SHRINKER_NONSLAB)) continue; - kidled_scan_slab_common(shrinker, &sc, scan_period); + kidled_scan_slab_common(shrinker, &sc, scan_control); shrinker_put(shrinker); } @@ -470,18 +470,18 @@ static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, } #else /* !CONFIG_MEMCG */ static void kidled_scan_slab_memcg(int nid, struct mem_cgroup *memcg, - struct kidled_scan_period scan_period) + struct kidled_scan_control scan_control) { } #endif /* CONFIG_MEMCG */ void kidled_scan_slab(int nid, struct mem_cgroup *memcg, - struct kidled_scan_period scan_period) + struct kidled_scan_control scan_control) { struct shrinker *shrinker; if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) - return kidled_scan_slab_memcg(nid, memcg, scan_period); + return kidled_scan_slab_memcg(nid, memcg, scan_control); rcu_read_lock(); list_for_each_entry(shrinker, &shrinker_list, list) { @@ -495,7 +495,7 @@ void kidled_scan_slab(int nid, struct mem_cgroup *memcg, rcu_read_unlock(); - kidled_scan_slab_common(shrinker, &sc, scan_period); + kidled_scan_slab_common(shrinker, &sc, scan_control); rcu_read_lock(); shrinker_put(shrinker); } -- Gitee From 30bf1656d2a40ca3866e14fcf01e555d3dde4e9f Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sun, 28 Jun 2020 22:13:34 +0800 Subject: [PATCH 05/14] anolis: mm: kidled: Make kidled scan both page and slab equally ANBZ: #22925 ANBZ: #1702 Currently, We force on page scan even if kidled has supported slab scan, but due to introducing in scanning slab only. Hence, We can treat the page and slab equally. The patch will make sure that available page and slab have scanned completely only if kidled select the specified scan_target. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- include/linux/kidled.h | 6 ++++++ mm/kidled.c | 30 +++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 2e450ce2def1..f6d1e5cbd72f 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -215,6 +215,12 @@ kidled_is_slab_target(struct kidled_scan_control *p) return p->scan_target == KIDLED_SCAN_SLAB; } +static inline bool +kidled_is_page_target(struct kidled_scan_control *p) +{ + return p->scan_target == KIDLED_SCAN_PAGE; +} + static inline bool kidled_has_page_target_equal(struct kidled_scan_control *p) { diff --git a/mm/kidled.c b/mm/kidled.c index 43fc5d8365af..dde4d4e8b713 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -502,6 +502,8 @@ static bool kidled_scan_node(pg_data_t *pgdat, if (kidled_is_slab_target(&scan_control)) return false; + else if (pgdat->node_idle_scan_pfn >= node_end) + return true; #ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS if (unlikely(!pgdat->node_folio_age)) { @@ -754,7 +756,8 @@ static void kidled_reset(void) } #endif -static inline bool kidled_should_run(struct kidled_scan_control *p, bool *new) +static inline bool kidled_should_run(struct kidled_scan_control *p, + bool *new, int *count_slab_scan) { if (unlikely(!kidled_is_scan_period_equal(p))) { struct kidled_scan_control scan_control; @@ -778,8 +781,10 @@ static inline bool kidled_should_run(struct kidled_scan_control *p, bool *new) scan_control = kidled_get_current_scan_control(); if (!kidled_has_page_target_equal(p)) kidled_mem_cgroup_reset(SCAN_TARGET_PAGE); - else if (!kidled_has_slab_target_equal(p)) + else if (!kidled_has_slab_target_equal(p)) { kidled_mem_cgroup_reset(SCAN_TARGET_SLAB); + *count_slab_scan = 0; + } if (kidled_is_slab_target(p)) *new = true; else @@ -795,6 +800,20 @@ static inline bool kidled_should_run(struct kidled_scan_control *p, bool *new) return false; } +static inline bool is_kidled_scan_done(bool scan_done, + int count_slab_scan, + struct kidled_scan_control scan_control) +{ + u16 duration = scan_control.duration; + + if (kidled_is_slab_target(&scan_control)) + return count_slab_scan >= duration; + else if (kidled_is_page_target(&scan_control)) + return scan_done; + else + return scan_done && (count_slab_scan >= duration); +} + static int kidled(void *dummy) { int busy_loop = 0; @@ -809,7 +828,8 @@ static int kidled(void *dummy) bool new, scan_done = true; wait_event_interruptible(kidled_wait, - kidled_should_run(&scan_control, &new)); + kidled_should_run(&scan_control, + &new, &count_slab_scan)); if (unlikely(new)) { restart = true; busy_loop = 0; @@ -824,8 +844,8 @@ static int kidled(void *dummy) put_online_mems(); kidled_scan_slabs(scan_control); - if (scan_done || (kidled_is_slab_target(&scan_control) && - count_slab_scan + 1 >= scan_control.duration)) { + if (is_kidled_scan_done(scan_done, + count_slab_scan + 1, scan_control)) { kidled_scan_done(scan_control); restart = true; count_slab_scan = 0; -- Gitee From 0ebc1a557e6bb487f40b3dbe67458b16120e6453 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Fri, 31 Jul 2020 10:02:09 +0800 Subject: [PATCH 06/14] anolis: mm: kidled: use different variables to store the rounds ANBZ: #22925 ANBZ: #1702 Currently, scan_target can be controlled to switch different scan type for kidled, hence we can not use an same variable to record their idle rounds. Meanwhile, we do not clear the the specified statistics when user disable the related scan type, we can reuse it when user restart the specified scan. Even though the previous statistics maybe not stable at present, but we will restart the newly scan to replace the old value. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- Documentation/vm/kidled.rst | 9 +++++--- include/linux/kidled.h | 13 ++++++++++-- include/linux/memcontrol.h | 3 ++- mm/kidled.c | 36 ++++++++++++++++++++++++-------- mm/memcontrol.c | 41 ++++++++++++++++++++++++++----------- 5 files changed, 75 insertions(+), 27 deletions(-) diff --git a/Documentation/vm/kidled.rst b/Documentation/vm/kidled.rst index bd667151a06d..7aa9a07ff773 100644 --- a/Documentation/vm/kidled.rst +++ b/Documentation/vm/kidled.rst @@ -106,7 +106,8 @@ Here are their functions: ----------------------------- snapshot start ----------------------------- # version: 1.0 - # scans: 92 + # page_scans: 92 + # slab_scans: 92 # scan_period_in_seconds: 120 # buckets: 1,2,5,15,30,60,120,240 # @@ -136,7 +137,8 @@ Here are their functions: slab 2704 832 15600 20800 70720 763819160 0 0 ----------------------------- snapshot end ----------------------------- - ``scans`` means how many rounds current cgroup has been scanned. + ``page_scans`` means how many rounds current cgroup's pagecache has been scanned. + ``slab_scans`` means how many rounds current cgroup's slab has been scanned. ``scan_period_in_seconds`` means kidled will take how long to finish one round. ``buckets`` is to allow scripts parsing easily. The table shows how many bytes are in idle state, the row is indexed by idle @@ -163,7 +165,8 @@ Here are their functions: $ sudo bash -c "echo '' > /sys/fs/cgroup/memory/test/memory.idle_page_stats" $ cat /sys/fs/cgroup/memory/test/memory.idle_page_stats # version: 1.0 - # scans: 0 + # page_scans: 0 + # slab_scans: 0 # scan_period_in_seconds: 1 # buckets: no valid bucket available ----------------------------- snapshot end ----------------------------- diff --git a/include/linux/kidled.h b/include/linux/kidled.h index f6d1e5cbd72f..9d5400ccaacc 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -210,13 +210,13 @@ kidled_is_scan_target_equal(struct kidled_scan_control *p) } static inline bool -kidled_is_slab_target(struct kidled_scan_control *p) +kidled_has_slab_target_only(struct kidled_scan_control *p) { return p->scan_target == KIDLED_SCAN_SLAB; } static inline bool -kidled_is_page_target(struct kidled_scan_control *p) +kidled_has_page_target_only(struct kidled_scan_control *p) { return p->scan_target == KIDLED_SCAN_PAGE; } @@ -230,6 +230,15 @@ kidled_has_page_target_equal(struct kidled_scan_control *p) return kidled_scan_target & KIDLED_SCAN_PAGE; } +static inline void kidled_get_reset_type(struct kidled_scan_control *p, + bool *page_disabled, bool *slab_disabled) +{ + if (kidled_has_page_target(p) && !kidled_has_page_target_equal(p)) + *page_disabled = 1; + if (kidled_has_slab_target(p) && !kidled_has_slab_target_equal(p)) + *slab_disabled = 1; +} + static inline bool kidled_set_scan_control(int op, u16 duration, struct kidled_scan_control *orig) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7438c7ea3f40..7953d16014ce 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -445,7 +445,8 @@ struct mem_cgroup { #ifdef CONFIG_KIDLED struct rw_semaphore idle_stats_rwsem; - unsigned long idle_scans; + unsigned long idle_page_scans; + unsigned long idle_slab_scans; struct kidled_scan_control scan_control; int idle_stable_idx; struct idle_page_stats idle_stats[KIDLED_STATS_NR_TYPE]; diff --git a/mm/kidled.c b/mm/kidled.c index dde4d4e8b713..c7a8a393006a 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -268,6 +268,7 @@ kidled_mem_cgroup_scan_done(struct kidled_scan_control scan_control) { struct mem_cgroup *memcg; struct idle_page_stats *stable_stats, *unstable_stats; + bool slab_only = false; for (memcg = mem_cgroup_iter(NULL, NULL, NULL); memcg != NULL; @@ -284,7 +285,13 @@ kidled_mem_cgroup_scan_done(struct kidled_scan_control scan_control) */ if (!KIDLED_IS_BUCKET_INVALID(unstable_stats->buckets)) { mem_cgroup_idle_page_stats_switch(memcg); - memcg->idle_scans++; + if (kidled_has_page_target(&scan_control)) + memcg->idle_page_scans++; + if (kidled_has_slab_target(&scan_control) && + (memcg_kmem_online() || mem_cgroup_is_root(memcg))) + memcg->idle_slab_scans++; + + slab_only = kidled_has_slab_target_only(&scan_control); } else { memcpy(unstable_stats->buckets, stable_stats->buckets, sizeof(unstable_stats->buckets)); @@ -296,6 +303,9 @@ kidled_mem_cgroup_scan_done(struct kidled_scan_control scan_control) unstable_stats = mem_cgroup_get_unstable_idle_stats(memcg); memset(&unstable_stats->count, 0, sizeof(unstable_stats->count)); + + if (slab_only && !memcg_kmem_online()) + break; } } @@ -341,7 +351,7 @@ kidled_mem_cgroup_reset(enum kidled_scan_type scan_type) } else { memset(&stable_stats->count, 0, sizeof(stable_stats->count)); - memcg->idle_scans = 0; + memcg->idle_page_scans = 0; kidled_reset_scan_control(&memcg->scan_control); up_write(&memcg->idle_stats_rwsem); memset(&unstable_stats->count, 0, @@ -500,7 +510,7 @@ static bool kidled_scan_node(pg_data_t *pgdat, int nr_nodes = num_online_nodes(); #endif - if (kidled_is_slab_target(&scan_control)) + if (kidled_has_slab_target_only(&scan_control)) return false; else if (pgdat->node_idle_scan_pfn >= node_end) return true; @@ -777,15 +787,23 @@ static inline bool kidled_should_run(struct kidled_scan_control *p, *new = true; } else if (unlikely(!kidled_is_scan_target_equal(p))) { struct kidled_scan_control scan_control; + bool page_disabled = false; + bool slab_disabled = false; scan_control = kidled_get_current_scan_control(); - if (!kidled_has_page_target_equal(p)) - kidled_mem_cgroup_reset(SCAN_TARGET_PAGE); - else if (!kidled_has_slab_target_equal(p)) { + kidled_get_reset_type(p, &page_disabled, &slab_disabled); + if (slab_disabled) { kidled_mem_cgroup_reset(SCAN_TARGET_SLAB); *count_slab_scan = 0; } - if (kidled_is_slab_target(p)) + if (page_disabled) + kidled_mem_cgroup_reset(SCAN_TARGET_PAGE); + + /* + * It need to restart the page scan when user enable + * the specified scan type again. + */ + if (kidled_has_slab_target_only(p)) *new = true; else *new = false; @@ -806,9 +824,9 @@ static inline bool is_kidled_scan_done(bool scan_done, { u16 duration = scan_control.duration; - if (kidled_is_slab_target(&scan_control)) + if (kidled_has_slab_target_only(&scan_control)) return count_slab_scan >= duration; - else if (kidled_is_page_target(&scan_control)) + else if (kidled_has_page_target_only(&scan_control)) return scan_done; else return scan_done && (count_slab_scan >= duration); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 91c38ef372c1..ddebbb173980 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4331,7 +4331,7 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) struct mem_cgroup *iter, *memcg = mem_cgroup_from_css(seq_css(m)); struct kidled_scan_control scan_control; struct idle_page_stats *stats, *cache; - unsigned long scans; + unsigned long page_scans, slab_scans; bool has_hierarchy = !!seq_cft(m)->private; bool no_buckets = false; int i, j, t; @@ -4343,14 +4343,16 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) down_read(&memcg->idle_stats_rwsem); *stats = memcg->idle_stats[memcg->idle_stable_idx]; - scans = memcg->idle_scans; + page_scans = memcg->idle_page_scans; + slab_scans = memcg->idle_slab_scans; scan_control = memcg->scan_control; up_read(&memcg->idle_stats_rwsem); /* Nothing will be outputed with invalid buckets */ if (KIDLED_IS_BUCKET_INVALID(stats->buckets)) { no_buckets = true; - scans = 0; + page_scans = 0; + slab_scans = 0; goto output; } @@ -4358,22 +4360,35 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) if (!kidled_is_scan_period_equal(&scan_control)) { memset(&stats->count, 0, sizeof(stats->count)); scan_control = kidled_get_current_scan_control(); - scans = 0; + page_scans = 0; + slab_scans = 0; goto output; } /* Zeroes will be output with mismatched scan type */ if (!kidled_is_scan_target_equal(&scan_control)) { - if (!kidled_has_page_target_equal(&scan_control)) { - int i; + bool page_disabled = false; + bool slab_disabled = false; - for (i = 0; i < KIDLE_NR_TYPE - 1; i++) - memset(&stats->count[i], 0, - sizeof(stats->count[i])); - } else if (!kidled_has_slab_target_equal(&scan_control)) { + kidled_get_reset_type(&scan_control, &page_disabled, &slab_disabled); + if (slab_disabled) { memset(&stats->count[KIDLE_SLAB], 0, sizeof(stats->count[KIDLE_SLAB])); + slab_scans = 0; + } + if (page_disabled) { + int i; + + for (i = 0; i < KIDLE_NR_TYPE - 1; i++) { + memset(&stats->count[i], 0, sizeof(stats->count[i])); + page_scans = 0; + } } + } else { + if (kidled_has_slab_target_only(&scan_control) && page_scans != 0) + page_scans = 0; + if (kidled_has_page_target_only(&scan_control) && slab_scans != 0) + slab_scans = 0; } if (has_hierarchy) { @@ -4422,7 +4437,8 @@ static int mem_cgroup_idle_page_stats_show(struct seq_file *m, void *v) output: seq_printf(m, "# version: %s\n", KIDLED_VERSION); - seq_printf(m, "# scans: %lu\n", scans); + seq_printf(m, "# page_scans: %lu\n", page_scans); + seq_printf(m, "# slab_scans: %lu\n", slab_scans); seq_printf(m, "# scan_period_in_seconds: %u\n", scan_control.duration); seq_puts(m, "# buckets: "); if (no_buckets) { @@ -4546,7 +4562,8 @@ static ssize_t mem_cgroup_idle_page_stats_write(struct kernfs_open_file *of, * holding any read side locks. */ KIDLED_MARK_BUCKET_INVALID(unstable_stats->buckets); - memcg->idle_scans = 0; + memcg->idle_page_scans = 0; + memcg->idle_slab_scans = 0; up_write(&memcg->idle_stats_rwsem); return nbytes; -- Gitee From 974fd6c325cf6e2eedaf10b68d5e3987b5b5b30d Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sat, 6 Nov 2021 17:49:23 +0800 Subject: [PATCH 07/14] anolis: mm: kidled: refactor cold slab ANBZ: #22925 ANBZ: #1702 The previous cold slab use an internal field to record the age, it will increase the object size and bring in fragmentation. The patch refactor the implementation to cut down the defect. It will reuse the page->mem_cgroup because slab will not in the memcg when kmem accouting disable, and use an extra pointer to store the age when kmem accouting enable, it is an specal obj_cgroup pointer allocated by memcg_alloc_page_obj_cgroups. [Backport note] In kernel 6.6, the association of slab and memcg is managed uniformly through the memcg_data structure. For this backport, we made the following modifications to align with our older kernel version: 1. When kmem memcg accounting is disabled, we reuse the memcg_data field to store the slab age pointer, and mark its usage by MEMCG_DATA_SLAB_AGE. 2. When kmem memcg accounting is enabled, we extend the (struct obj_cgroup *) array allocated by memcg_alloc_slab_cgroups, reserving an extra pointer to store the age for each slab object. These adjustments ensure correct slab age management compatible with our kernel version and reduce memory fragmentation. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- fs/dcache.c | 12 ++--- fs/inode.c | 22 ++++---- fs/internal.h | 31 ----------- fs/namei.c | 5 +- include/linux/dcache.h | 3 -- include/linux/fs.h | 3 -- include/linux/kidled.h | 55 +++++++++++++++++++ include/linux/memcontrol.h | 6 ++- mm/kidled.c | 106 +++++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 6 ++- mm/slab.h | 7 ++- mm/slub.c | 2 + 12 files changed, 198 insertions(+), 60 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4595e86009d3..713e686de0cc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1301,7 +1301,7 @@ static enum lru_status dentry_lru_cold_count(struct list_head *item, { struct dentry *dentry = container_of(item, struct dentry, d_lru); static int dentry_size; - u16 dentry_age = KIDLED_GET_SLAB_AGE(dentry); + u16 dentry_age = kidled_get_slab_age(dentry); /* avoid an object to scan twice in an round */ if (dentry_age && @@ -1311,16 +1311,16 @@ static enum lru_status dentry_lru_cold_count(struct list_head *item, if (READ_ONCE(dentry->d_lockref.count) || (dentry->d_flags & DCACHE_REFERENCED)) { if (dentry_age) - KIDLED_SET_SLAB_AGE(dentry, 0); + kidled_set_slab_age(dentry, 0); goto out; } - KIDLED_CLEAR_SLAB_SCANNED(dentry); + kidled_clear_slab_scanned(dentry); if (unlikely(!dentry_size)) dentry_size = ksize(dentry); - dentry_age = KIDLED_INC_SLAB_AGE(dentry); + dentry_age = kidled_inc_slab_age(dentry); kidled_mem_cgroup_slab_account(dentry, dentry_age, dentry_size); - KIDLED_MARK_SLAB_SCANNED(dentry, kidled_scan_rounds); + kidled_mark_slab_scanned(dentry, kidled_scan_rounds); out: return LRU_ROTATE_DELAY; } @@ -1857,7 +1857,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_lockref.count = 1; dentry->d_flags = 0; - KIDLED_SET_SLAB_AGE(dentry, 0); + kidled_set_slab_age(dentry, 0); spin_lock_init(&dentry->d_lock); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; diff --git a/fs/inode.c b/fs/inode.c index c5ed92ecbdbd..6606fa242b28 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -163,7 +163,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; - KIDLED_SET_SLAB_AGE(inode, 0); + kidled_set_slab_age(inode, 0); atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; @@ -941,7 +941,7 @@ static enum lru_status inode_lru_cold_count(struct list_head *item, { struct inode *inode = container_of(item, struct inode, i_lru); static int inode_size; - u16 inode_age = KIDLED_GET_SLAB_AGE(inode); + u16 inode_age = kidled_get_slab_age(inode); if (inode_age && kidled_is_slab_scanned(inode_age, kidled_scan_rounds)) @@ -950,23 +950,23 @@ static enum lru_status inode_lru_cold_count(struct list_head *item, if (atomic_read(&inode->i_count) || (inode->i_state & I_REFERENCED)) { if (unlikely(inode_age)) - KIDLED_SET_SLAB_AGE(inode, 0); + kidled_set_slab_age(inode, 0); goto out; } if (inode->i_data.nrpages || !list_empty(&inode->i_data.private_list)) { if (unlikely(inode_age)) - KIDLED_SET_SLAB_AGE(inode, 0); + kidled_set_slab_age(inode, 0); goto out; } - KIDLED_CLEAR_SLAB_SCANNED(inode); + kidled_clear_slab_scanned(inode); if (unlikely(!inode_size)) inode_size = ksize(inode); - inode_age = KIDLED_INC_SLAB_AGE(inode); + inode_age = kidled_inc_slab_age(inode); kidled_mem_cgroup_slab_account(inode, inode_age, inode_size); - KIDLED_MARK_SLAB_SCANNED(inode, kidled_scan_rounds); + kidled_mark_slab_scanned(inode, kidled_scan_rounds); out: return LRU_ROTATE_DELAY; } @@ -1589,8 +1589,8 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval, goto again; } else { /* reset its age if it has already had an age */ - if (KIDLED_GET_SLAB_AGE(inode)) - KIDLED_SET_SLAB_AGE(inode, 0); + if (kidled_get_slab_age(inode)) + kidled_set_slab_age(inode, 0); } } return inode; @@ -1623,8 +1623,8 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) goto again; } else { /* reset its age if it has already had an age */ - if (KIDLED_GET_SLAB_AGE(inode)) - KIDLED_SET_SLAB_AGE(inode, 0); + if (kidled_get_slab_age(inode)) + kidled_set_slab_age(inode, 0); } } return inode; diff --git a/fs/internal.h b/fs/internal.h index d9f078f1bd29..e527ff2f732a 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -308,34 +308,3 @@ struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); -#ifdef CONFIG_KIDLED -#define KIDLED_GET_SLAB_AGE(object) (object->age) -#define KIDLED_SET_SLAB_AGE(object, slab_age) (object->age = slab_age) -#define KIDLED_INC_SLAB_AGE(object) \ -({ \ - u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ - \ - if (slab_age < KIDLED_MAX_IDLE_AGE) { \ - slab_age++; \ - KIDLED_SET_SLAB_AGE(object, slab_age); \ - } \ - slab_age; \ -}) -#define KIDLED_CLEAR_SLAB_SCANNED(object) \ -({ \ - u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ - \ - slab_age &= ~KIDLED_SLAB_ACCESS_MASK; \ - KIDLED_SET_SLAB_AGE(object, slab_age); \ -}) -#define KIDLED_MARK_SLAB_SCANNED(object, scan_rounds) \ -({ \ - u16 slab_age = KIDLED_GET_SLAB_AGE(object); \ - \ - slab_age |= (scan_rounds & 0xff) << KIDLED_SLAB_ACCESS_SHIFT; \ - KIDLED_SET_SLAB_AGE(object, slab_age); \ -}) -#else -#define KIDLED_GET_SLAB_AGE(object) 0 -#define KIDLED_SET_SLAB_AGE(object, slab_age) -#endif diff --git a/fs/namei.c b/fs/namei.c index 7c96d1e95a88..bc3cc6554112 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "internal.h" #include "mount.h" @@ -863,8 +864,8 @@ static inline int d_revalidate(struct dentry *dentry, unsigned int flags) if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) status = dentry->d_op->d_revalidate(dentry, flags); /* Reset the age when lookuping the dentry successfully */ - if (status > 0 && KIDLED_GET_SLAB_AGE(dentry)) - KIDLED_SET_SLAB_AGE(dentry, 0); + if (status > 0 && kidled_get_slab_age(dentry)) + kidled_set_slab_age(dentry, 0); return status; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 29063d8076d0..224fa7e22795 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -113,9 +113,6 @@ struct dentry { struct rcu_head d_rcu; } d_u; -#ifdef CONFIG_KIDLED - unsigned short age; -#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f858f5a4331..0cc029af9359 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -758,9 +758,6 @@ struct inode { void *i_private; /* fs or device private pointer */ -#ifdef CONFIG_KIDLED - unsigned short age; -#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) } __randomize_layout; diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 9d5400ccaacc..63947c5c1bc4 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -143,6 +143,8 @@ extern unsigned long kidled_scan_rounds; #define KIDLED_OP_SET_DURATION (1 << 0) #define KIDLED_OP_INC_SEQ (1 << 1) +extern int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags); +extern void kidled_free_slab_age(struct slab *slab); extern void kidled_mem_cgroup_account(struct folio *folio, void *ptr, int age, unsigned long size); static inline void kidled_mem_cgroup_slab_account(void *object, @@ -317,6 +319,37 @@ static inline void kidled_inc_scan_seq(void) kidled_set_scan_control(KIDLED_OP_INC_SEQ, 0, NULL); } +extern bool page_has_slab_age(struct slab *slab); +extern unsigned short kidled_get_slab_age(void *object); +extern void kidled_set_slab_age(void *object, unsigned short age); +static inline unsigned short kidled_inc_slab_age(void *object) +{ + unsigned short slab_age = kidled_get_slab_age(object); + + if (slab_age < KIDLED_MAX_IDLE_AGE) { + slab_age++; + kidled_set_slab_age(object, slab_age); + } + + return slab_age; +} + +static inline void kidled_clear_slab_scanned(void *object) +{ + unsigned short slab_age = kidled_get_slab_age(object); + + slab_age &= ~KIDLED_SLAB_ACCESS_MASK; + kidled_set_slab_age(object, slab_age); +} + +static inline void kidled_mark_slab_scanned(void *object, unsigned long scan_rounds) +{ + unsigned short slab_age = kidled_get_slab_age(object); + + slab_age |= (scan_rounds & 0xff) << KIDLED_SLAB_ACCESS_SHIFT; + kidled_set_slab_age(object, slab_age); +} + extern const int kidled_default_buckets[NUM_KIDLED_BUCKETS]; #ifdef CONFIG_MEMCG @@ -341,6 +374,28 @@ static inline void kidled_mem_cgroup_move_stats(struct mem_cgroup *from, } #endif /* CONFIG_MEMCG */ +static inline unsigned short kidled_get_slab_age(void *object) +{ + return 0; +} + +static inline void kidled_set_slab_age(void *object, unsigned short age) +{ +} + +static inline int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags) +{ + return 0; +} + +static inline void kidled_free_slab_age(struct slab *slab) +{ +} + +static inline bool page_has_slab_age(struct slab *slab) +{ + return false; +} static inline unsigned int kidled_get_current_scan_duration(void) { return 0; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7953d16014ce..fc07bd75557b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -478,8 +478,10 @@ enum page_memcg_data_flags { MEMCG_DATA_OBJCGS = (1UL << 0), /* page has been accounted as a non-slab kernel page */ MEMCG_DATA_KMEM = (1UL << 1), + /* page->memcg_data is a pointer to the slab age */ + MEMCG_DATA_SLAB_AGE = (1UL << 2), /* the next bit after the last actual flag */ - __NR_MEMCG_DATA_FLAGS = (1UL << 2), + __NR_MEMCG_DATA_FLAGS = (1UL << 3), }; #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) @@ -631,7 +633,7 @@ static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) */ unsigned long memcg_data = READ_ONCE(folio->memcg_data); - if (memcg_data & MEMCG_DATA_OBJCGS) + if ((memcg_data & MEMCG_DATA_OBJCGS) || (memcg_data & MEMCG_DATA_SLAB_AGE)) return NULL; if (memcg_data & MEMCG_DATA_KMEM) { diff --git a/mm/kidled.c b/mm/kidled.c index c7a8a393006a..df7d6e83adde 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -924,6 +924,112 @@ static int kidled(void *dummy) return 0; } +static inline unsigned short *kidled_slab_age(struct slab *slab) +{ + return (unsigned short *)((unsigned long)slab->memcg_data & ~MEMCG_DATA_SLAB_AGE); +} + +bool page_has_slab_age(struct slab *slab) +{ + return (((unsigned long)slab->memcg_data & MEMCG_DATA_FLAGS_MASK) == MEMCG_DATA_SLAB_AGE); +} + +static unsigned short *kidled_get_slab_age_array(void *object) +{ + struct slab *slab = virt_to_slab(object); + unsigned int objects = objs_per_slab(slab->slab_cache, slab); + unsigned short *slab_age; + + if (memcg_kmem_online()) + slab_age = (unsigned short *)slab_objcgs(slab)[objects]; + else + slab_age = kidled_slab_age(slab); + + return slab_age; +} + +unsigned short kidled_get_slab_age(void *object) +{ + unsigned short *slab_age = kidled_get_slab_age_array(object); + struct slab *slab = virt_to_slab(object); + unsigned int off = obj_to_index(slab->slab_cache, slab, object); + + if (unlikely(!slab_age)) + return 0; + + return *(slab_age + off); +} + +void kidled_set_slab_age(void *object, unsigned short age) +{ + unsigned short *slab_age = kidled_get_slab_age_array(object); + struct slab *slab = virt_to_slab(object); + unsigned int off = obj_to_index(slab->slab_cache, slab, object); + + if (unlikely(!slab_age)) + return; + + *(slab_age + off) = age; +} + +static inline bool kidled_available_slab(struct kmem_cache *s) +{ + if (!strcmp(s->name, "inode_cache") || + !strcmp(s->name, "ext4_inode_cache") || + !strcmp(s->name, "dentry")) + return true; + + return false; +} + +/* + * each slab object pointer to an memcg respectively when kmem account enable, + * slab page can be used by root mem_cgroup and children memcg. slab object + * age is recorded in slab_age of page when kmem account disable. Otherwise, + * an special obj_cgroups pointer will store the value. + */ +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | __GFP_ACCOUNT) +int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags) +{ + unsigned int objects = objs_per_slab(s, slab); + void *ver; + int ret; + + if (!kidled_available_slab(s)) + return 0; + + /* void count the memory to kmem accounting when kmem enable */ + flags &= ~OBJCGS_CLEAR_MASK; + ver = kzalloc_node(objects * sizeof(unsigned short), flags, slab_nid(slab)); + if (!ver) + return -ENOMEM; + + if (memcg_kmem_online()) { + if (!slab_objcgs(slab)) { + ret = memcg_alloc_slab_cgroups(slab, s, flags, true); + + if (!ret) + slab_objcgs(slab)[objects] = ver; + else { + kfree(ver); + return -ENOMEM; + } + } else { + slab_objcgs(slab)[objects] = ver; + } + return 0; + } + + slab->memcg_data = ((unsigned long)ver | MEMCG_DATA_SLAB_AGE); + return 0; +} + +void kidled_free_slab_age(struct slab *slab) +{ + kfree(kidled_slab_age(slab)); + slab->memcg_data = 0; +} + static ssize_t kidled_scan_period_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ddebbb173980..4174c53a3069 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3226,6 +3226,10 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, unsigned long memcg_data; void *vec; +#ifdef CONFIG_KIDLED + /* extra allocate an special pointer for cold slab */ + objects += 1; +#endif gfp &= ~OBJCGS_CLEAR_MASK; vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, slab_nid(slab)); @@ -3262,7 +3266,7 @@ struct mem_cgroup *mem_cgroup_from_obj_folio(struct folio *folio, void *p) * Memcg membership data for each individual object is saved in * slab->memcg_data. */ - if (folio_test_slab(folio)) { + if (folio_test_slab(folio) && !page_has_slab_age(folio_slab(folio))) { struct obj_cgroup **objcgs; struct slab *slab; unsigned int off; diff --git a/mm/slab.h b/mm/slab.h index 319a2a35aeab..853f80fc031b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -452,7 +452,8 @@ static inline struct obj_cgroup **slab_objcgs(struct slab *slab) VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), slab_page(slab)); - VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, slab_page(slab)); + VM_BUG_ON_PAGE((memcg_data & MEMCG_DATA_FLAGS_MASK) != MEMCG_DATA_KMEM, + slab_page(slab)); return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } @@ -653,6 +654,10 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, { if (memcg_kmem_online()) memcg_free_slab_cgroups(slab); + else { + VM_BUG_ON_PAGE(!page_has_slab_age(slab), slab); + kidled_free_slab_age(slab); + } mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -(PAGE_SIZE << order)); diff --git a/mm/slub.c b/mm/slub.c index 30a1f3fb88c6..df3d4fc61335 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2084,6 +2084,8 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) } set_freepointer(s, p, NULL); } + if (unlikely(kidled_alloc_slab_age(slab, s, alloc_gfp))) + pr_warn("Fails to trace %s:%p cold slab distribution.\n", s->name, slab); return slab; } -- Gitee From 6b6e1f6471efd2080112259850c83e06c3c5f2c0 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Wed, 10 Nov 2021 17:53:19 +0800 Subject: [PATCH 08/14] anolis: mm: kidled: allocate slab age for specified kmem cache ANBZ: #22925 ANBZ: #1702 Currently, slab age pointer is allocated with obj_cgroup array when kmem accouting enable. hence, all slab page should be allocated, but some specify slab is filtered by us. however, we just allocate slab age for specified slab when kmem accouting disable. Reviewed-by: Gang Deng Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- mm/kidled.c | 10 ---------- mm/memcontrol.c | 6 +++--- mm/slab.h | 20 ++++++++++++++++++-- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/mm/kidled.c b/mm/kidled.c index df7d6e83adde..98a248ce5e65 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -972,16 +972,6 @@ void kidled_set_slab_age(void *object, unsigned short age) *(slab_age + off) = age; } -static inline bool kidled_available_slab(struct kmem_cache *s) -{ - if (!strcmp(s->name, "inode_cache") || - !strcmp(s->name, "ext4_inode_cache") || - !strcmp(s->name, "dentry")) - return true; - - return false; -} - /* * each slab object pointer to an memcg respectively when kmem account enable, * slab page can be used by root mem_cgroup and children memcg. slab object diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4174c53a3069..a1100bd1353f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3226,10 +3226,10 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, unsigned long memcg_data; void *vec; -#ifdef CONFIG_KIDLED /* extra allocate an special pointer for cold slab */ - objects += 1; -#endif + if (kidled_available_slab(s)) + objects += 1; + gfp &= ~OBJCGS_CLEAR_MASK; vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp, slab_nid(slab)); diff --git a/mm/slab.h b/mm/slab.h index 853f80fc031b..c50b8b742035 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -402,6 +402,22 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; } +#ifdef CONFIG_KIDLED +static inline bool kidled_available_slab(struct kmem_cache *s) +{ + if (!strcmp(s->name, "inode_cache") || + !strcmp(s->name, "ext4_inode_cache") || + !strcmp(s->name, "dentry")) + return true; + return false; +} +#else +static inline bool kidled_available_slab(struct kmem_cache *s) +{ + return false; +} +#endif + #ifdef CONFIG_SLUB_DEBUG #ifdef CONFIG_SLUB_DEBUG_ON DECLARE_STATIC_KEY_TRUE(slub_debug_enabled); @@ -655,8 +671,8 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, if (memcg_kmem_online()) memcg_free_slab_cgroups(slab); else { - VM_BUG_ON_PAGE(!page_has_slab_age(slab), slab); - kidled_free_slab_age(slab); + if (page_has_slab_age(slab)) + kidled_free_slab_age(slab); } mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s), -- Gitee From 38205be59241cf5f74151ec709220abccd547550 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Fri, 10 Dec 2021 16:10:36 +0800 Subject: [PATCH 09/14] anolis: mm: kidled: Do not check the slab age when it disable the slab scan ANBZ: #22925 ANBZ: #1702 Currently, we always check the slab age in some code path whether mark it as the hot slab when KIDLED enable. but it can bring in some performance regression when kidled do not work or slab scan is turned off. The patch use an static key to control the access denial, will-it-scale has proved that it will not bring in performance regression any more after appling the patch. Reviewed-by: Xu Yu Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Acked-by: Gang Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/565 Signed-off-by: Weilin Tong --- mm/kidled.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/mm/kidled.c b/mm/kidled.c index 98a248ce5e65..32e399e13350 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -83,6 +83,7 @@ struct kidled_scan_control kidled_scan_control; const int kidled_default_buckets[NUM_KIDLED_BUCKETS] = { 1, 2, 5, 15, 30, 60, 120, 240 }; static DECLARE_WAIT_QUEUE_HEAD(kidled_wait); +static DEFINE_STATIC_KEY_FALSE(kidled_slab_key); unsigned long kidled_scan_rounds __read_mostly; static inline int kidled_get_bucket(int *idle_buckets, int age) @@ -924,6 +925,31 @@ static int kidled(void *dummy) return 0; } +static inline bool kidled_allow_scan_slab(void) +{ + struct kidled_scan_control scan_control = + kidled_get_current_scan_control(); + + if (!scan_control.duration) + return false; + + if (!kidled_has_slab_target(&scan_control)) + return false; + + return true; +} + +static inline void kidled_slab_scan_enabled(void) +{ + if (!static_key_enabled(&kidled_slab_key)) { + if (kidled_allow_scan_slab()) + static_branch_enable(&kidled_slab_key); + } else { + if (!kidled_allow_scan_slab()) + static_branch_disable(&kidled_slab_key); + } +} + static inline unsigned short *kidled_slab_age(struct slab *slab) { return (unsigned short *)((unsigned long)slab->memcg_data & ~MEMCG_DATA_SLAB_AGE); @@ -950,25 +976,39 @@ static unsigned short *kidled_get_slab_age_array(void *object) unsigned short kidled_get_slab_age(void *object) { - unsigned short *slab_age = kidled_get_slab_age_array(object); - struct slab *slab = virt_to_slab(object); - unsigned int off = obj_to_index(slab->slab_cache, slab, object); + unsigned short *slab_age; + struct slab *slab; + unsigned int off; + + if (!static_branch_unlikely(&kidled_slab_key)) + return 0; - if (unlikely(!slab_age)) + slab_age = kidled_get_slab_age_array(object); + if (!slab_age) return 0; + slab = virt_to_slab(object); + off = obj_to_index(slab->slab_cache, slab, object); + return *(slab_age + off); } void kidled_set_slab_age(void *object, unsigned short age) { - unsigned short *slab_age = kidled_get_slab_age_array(object); - struct slab *slab = virt_to_slab(object); - unsigned int off = obj_to_index(slab->slab_cache, slab, object); + unsigned short *slab_age; + struct slab *slab; + unsigned int off; - if (unlikely(!slab_age)) + if (!static_branch_unlikely(&kidled_slab_key)) return; + slab_age = kidled_get_slab_age_array(object); + if (!slab_age) + return; + + slab = virt_to_slab(object); + off = obj_to_index(slab->slab_cache, slab, object); + *(slab_age + off) = age; } @@ -1057,6 +1097,7 @@ static ssize_t kidled_scan_period_store(struct kobject *kobj, kidled_set_scan_duration(secs); wake_up_interruptible(&kidled_wait); + kidled_slab_scan_enabled(); return count; } @@ -1079,6 +1120,7 @@ static ssize_t kidled_scan_target_store(struct kobject *kobj, return -EINVAL; WRITE_ONCE(kidled_scan_target, val); + kidled_slab_scan_enabled(); return count; } -- Gitee From 60673625e4a7c6cf22aa9316f4231596410cbc60 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Mon, 1 Aug 2022 08:43:11 +0800 Subject: [PATCH 10/14] anolis: mm: fix an memory exception issue when kmem accouting ANBZ: #22925 ANBZ: #1746 Cold slab will reuse page->obj_cgroups fields, but it fails to use memcg_kmem_enabled to estimate because it will make effect just create the child memcg. hence the kidled_get/set_slab_age will use the obj_cgroups mistakenly, it will result in an unexpected result. Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/593 Signed-off-by: Weilin Tong --- include/linux/memcontrol.h | 2 ++ mm/kidled.c | 15 +++++++++++---- mm/memcontrol.c | 2 +- mm/slab.h | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fc07bd75557b..3b8c6b2ab5cc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1105,6 +1105,8 @@ void folio_memcg_unlock(struct folio *folio); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +extern bool cgroup_memory_nokmem; + /* try to stablize folio_memcg() for all the pages in a memcg */ static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) { diff --git a/mm/kidled.c b/mm/kidled.c index 32e399e13350..dff673ca6375 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -180,8 +180,11 @@ void kidled_mem_cgroup_account(struct folio *folio, if (type == KIDLE_SLAB) { if (!memcg_kmem_online()) memcg = root_mem_cgroup; - else + else { memcg = mem_cgroup_from_obj(ptr); + if (!memcg) + return; + } } else { folio_memcg_lock(folio); memcg = folio_memcg(folio); @@ -964,13 +967,17 @@ static unsigned short *kidled_get_slab_age_array(void *object) { struct slab *slab = virt_to_slab(object); unsigned int objects = objs_per_slab(slab->slab_cache, slab); - unsigned short *slab_age; + unsigned short *slab_age = NULL; - if (memcg_kmem_online()) + if (!kidled_available_slab(slab->slab_cache)) + goto out; + + if (!cgroup_memory_nokmem) slab_age = (unsigned short *)slab_objcgs(slab)[objects]; else slab_age = kidled_slab_age(slab); +out: return slab_age; } @@ -1034,7 +1041,7 @@ int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags) if (!ver) return -ENOMEM; - if (memcg_kmem_online()) { + if (!cgroup_memory_nokmem) { if (!slab_objcgs(slab)) { ret = memcg_alloc_slab_cgroups(slab, s, flags, true); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a1100bd1353f..bc7f8f17d391 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -97,7 +97,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg); static bool cgroup_memory_nosocket __ro_after_init; /* Kernel memory accounting disabled? */ -static bool cgroup_memory_nokmem __ro_after_init; +bool cgroup_memory_nokmem __ro_after_init; #ifdef CONFIG_MEMSLI /* Cgroup memory SLI disabled? */ diff --git a/mm/slab.h b/mm/slab.h index c50b8b742035..33f4bfbc910b 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -668,7 +668,7 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (memcg_kmem_online()) + if (!cgroup_memory_nokmem) memcg_free_slab_cgroups(slab); else { if (page_has_slab_age(slab)) -- Gitee From 430fba4226ca50c3ddf1cd748952fe7f834fc4b5 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sat, 30 Jul 2022 14:34:51 +0800 Subject: [PATCH 11/14] anolis: mm: fix cold slab memory leak ANBZ: #22925 ANBZ: #1746 the cold slab use an extra memory to record the age of slab, it fails to free the memory when page->obj_cgroup release the memory. hence the patch will free the slab memory as well. Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/593 Signed-off-by: Weilin Tong --- mm/slab.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 33f4bfbc910b..184f1d6b1b7d 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -479,8 +479,13 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, enum node_stat_item idx, int nr); -static inline void memcg_free_slab_cgroups(struct slab *slab) +static inline void memcg_free_slab_cgroups(struct slab *slab, struct kmem_cache *s) { + unsigned int objects = objs_per_slab(s, slab); + + if (kidled_available_slab(s)) + kfree(slab_objcgs(slab)[objects]); + kfree(slab_objcgs(slab)); slab->memcg_data = 0; } @@ -619,7 +624,7 @@ static inline int memcg_alloc_slab_cgroups(struct slab *slab, return 0; } -static inline void memcg_free_slab_cgroups(struct slab *slab) +static inline void memcg_free_slab_cgroups(struct slab *slab, struct kmem_cache *s) { } @@ -669,7 +674,7 @@ static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { if (!cgroup_memory_nokmem) - memcg_free_slab_cgroups(slab); + memcg_free_slab_cgroups(slab, s); else { if (page_has_slab_age(slab)) kidled_free_slab_age(slab); -- Gitee From 3a0c567c9587861b9c41dacb3e13fe1017bcb433 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sat, 30 Jul 2022 14:55:26 +0800 Subject: [PATCH 12/14] anolis: mm: Do not scan cold slab when fails to allocate memory ANBZ: #22925 ANBZ: #1746 Cold slab use extra allocated memory for recording the age of slab, but it can fails to allocate when the memory pressure is too heavy. hence we need to check the case even though it is unlikely to happen. Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/593 Signed-off-by: Weilin Tong --- mm/kidled.c | 8 +++++--- mm/slab.h | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/kidled.c b/mm/kidled.c index dff673ca6375..b56a81327089 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -972,9 +972,11 @@ static unsigned short *kidled_get_slab_age_array(void *object) if (!kidled_available_slab(slab->slab_cache)) goto out; - if (!cgroup_memory_nokmem) - slab_age = (unsigned short *)slab_objcgs(slab)[objects]; - else + if (!cgroup_memory_nokmem) { + /* In case fail to allocate memory for cold slab */ + if (likely(slab_objcgs(slab))) + slab_age = (unsigned short *)slab_objcgs(slab)[objects]; + } else slab_age = kidled_slab_age(slab); out: diff --git a/mm/slab.h b/mm/slab.h index 184f1d6b1b7d..46c171e52e6e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -483,8 +483,11 @@ static inline void memcg_free_slab_cgroups(struct slab *slab, struct kmem_cache { unsigned int objects = objs_per_slab(s, slab); - if (kidled_available_slab(s)) - kfree(slab_objcgs(slab)[objects]); + if (kidled_available_slab(s)) { + /* In case fail to allocate memory for cold slab */ + if (likely(slab_objcgs(slab))) + kfree(slab_objcgs(slab)[objects]); + } kfree(slab_objcgs(slab)); slab->memcg_data = 0; -- Gitee From 41eba4bf01e5baae45d352cd8dcb7adc843316bf Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Tue, 6 Feb 2024 10:23:42 +0800 Subject: [PATCH 13/14] anolis: mm/kidled: do not monitor kfence memory ANBZ: #22925 ANBZ: #8176 Since memory from kfence pool cannot be reclaimed, it is meaningless to scan the kfence pages. Mark them out of kidled available slab and prepare for the next patch that lets kfence free page->obj_cgroups. Signed-off-by: Tianchen Ding Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/2759 Signed-off-by: Weilin Tong --- mm/kidled.c | 4 ++-- mm/memcontrol.c | 2 +- mm/slab.h | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/mm/kidled.c b/mm/kidled.c index b56a81327089..a7de9b18e4f1 100644 --- a/mm/kidled.c +++ b/mm/kidled.c @@ -969,7 +969,7 @@ static unsigned short *kidled_get_slab_age_array(void *object) unsigned int objects = objs_per_slab(slab->slab_cache, slab); unsigned short *slab_age = NULL; - if (!kidled_available_slab(slab->slab_cache)) + if (!kidled_available_slab(slab_folio(slab), slab->slab_cache)) goto out; if (!cgroup_memory_nokmem) { @@ -1034,7 +1034,7 @@ int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags) void *ver; int ret; - if (!kidled_available_slab(s)) + if (!kidled_available_slab(slab_folio(slab), s)) return 0; /* void count the memory to kmem accounting when kmem enable */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bc7f8f17d391..8db51adc5776 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3227,7 +3227,7 @@ int memcg_alloc_slab_cgroups(struct slab *slab, struct kmem_cache *s, void *vec; /* extra allocate an special pointer for cold slab */ - if (kidled_available_slab(s)) + if (kidled_available_slab(slab_folio(slab), s)) objects += 1; gfp &= ~OBJCGS_CLEAR_MASK; diff --git a/mm/slab.h b/mm/slab.h index 46c171e52e6e..afe0b9c3aa06 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -403,8 +403,13 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) } #ifdef CONFIG_KIDLED -static inline bool kidled_available_slab(struct kmem_cache *s) +static inline bool kidled_available_slab(struct folio *folio, struct kmem_cache *s) { +#ifdef CONFIG_KFENCE + /* Do not monitor kfence memory. */ + if (unlikely(PageKfence(&folio->page))) + return false; +#endif if (!strcmp(s->name, "inode_cache") || !strcmp(s->name, "ext4_inode_cache") || !strcmp(s->name, "dentry")) @@ -412,7 +417,7 @@ static inline bool kidled_available_slab(struct kmem_cache *s) return false; } #else -static inline bool kidled_available_slab(struct kmem_cache *s) +static inline bool kidled_available_slab(struct folio *folio, struct kmem_cache *s) { return false; } @@ -483,7 +488,7 @@ static inline void memcg_free_slab_cgroups(struct slab *slab, struct kmem_cache { unsigned int objects = objs_per_slab(s, slab); - if (kidled_available_slab(s)) { + if (kidled_available_slab(slab_folio(slab), s)) { /* In case fail to allocate memory for cold slab */ if (likely(slab_objcgs(slab))) kfree(slab_objcgs(slab)[objects]); -- Gitee From 072d2f57b4db2d8f79bf9398cf525fb940210b47 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Mon, 1 Aug 2022 13:54:48 +0800 Subject: [PATCH 14/14] anolis: mm: fix an compiler issue when memcg disabled ANBZ: #22925 ANBZ: #1746 slab_age is embedded in struct page, and it is disappear when config_memcg is disabled. hence it will lead to the cold slab fails to work. the patch make sure cold slab will work whether the memcg is on or off. Signed-off-by: zhongjiang-ali Signed-off-by: Rongwei Wang Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/593 Signed-off-by: Weilin Tong --- include/linux/kidled.h | 5 +++++ include/linux/memcontrol.h | 2 -- include/linux/mm_types.h | 4 ++-- mm/slab.h | 15 +++++++++++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/linux/kidled.h b/include/linux/kidled.h index 63947c5c1bc4..0fc766b91371 100644 --- a/include/linux/kidled.h +++ b/include/linux/kidled.h @@ -143,6 +143,11 @@ extern unsigned long kidled_scan_rounds; #define KIDLED_OP_SET_DURATION (1 << 0) #define KIDLED_OP_INC_SEQ (1 << 1) +#ifdef CONFIG_MEMCG_KMEM +extern bool cgroup_memory_nokmem; +#else +#define cgroup_memory_nokmem 1 +#endif extern int kidled_alloc_slab_age(struct slab *slab, struct kmem_cache *s, gfp_t flags); extern void kidled_free_slab_age(struct slab *slab); extern void kidled_mem_cgroup_account(struct folio *folio, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3b8c6b2ab5cc..fc07bd75557b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1105,8 +1105,6 @@ void folio_memcg_unlock(struct folio *folio); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); -extern bool cgroup_memory_nokmem; - /* try to stablize folio_memcg() for all the pages in a memcg */ static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a49e6d6aecbf..aff540cf1f01 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -180,7 +180,7 @@ struct page { /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; -#ifdef CONFIG_MEMCG +#if defined(CONFIG_MEMCG) || defined(CONFIG_KIDLED) unsigned long memcg_data; #endif @@ -342,7 +342,7 @@ struct folio { }; atomic_t _mapcount; atomic_t _refcount; -#ifdef CONFIG_MEMCG +#if defined(CONFIG_MEMCG) || defined(CONFIG_KIDLED) unsigned long memcg_data; #endif #if defined(WANT_PAGE_VIRTUAL) diff --git a/mm/slab.h b/mm/slab.h index afe0b9c3aa06..350d35ffa37d 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -101,7 +101,7 @@ struct slab { #endif atomic_t __page_refcount; -#ifdef CONFIG_MEMCG +#if defined(CONFIG_MEMCG) || defined(CONFIG_KIDLED) unsigned long memcg_data; #endif }; @@ -416,11 +416,22 @@ static inline bool kidled_available_slab(struct folio *folio, struct kmem_cache return true; return false; } + +/* cold slab will need the special condition */ +static inline bool kidled_kmem_enabled(void) +{ + return !cgroup_memory_nokmem; +} #else static inline bool kidled_available_slab(struct folio *folio, struct kmem_cache *s) { return false; } + +static inline bool kidled_kmem_enabled(void) +{ + return memcg_kmem_online(); +} #endif #ifdef CONFIG_SLUB_DEBUG @@ -681,7 +692,7 @@ static __always_inline void account_slab(struct slab *slab, int order, static __always_inline void unaccount_slab(struct slab *slab, int order, struct kmem_cache *s) { - if (!cgroup_memory_nokmem) + if (kidled_kmem_enabled()) memcg_free_slab_cgroups(slab, s); else { if (page_has_slab_age(slab)) -- Gitee