From 9c11e8fbcd5886f59856f0c2764380b3007b0f9c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Mar 2021 17:57:06 +0800 Subject: [PATCH 1/3] f2fs: introduce gc_merge mount option mainline inclusion from v5.13-rc1 commit 5911d2d1d1a38b26585383478bd71d9254e48bdf category:feature issue: #I4TEGS CVE:N/A -------------------------------- In this patch, we will add two new mount options: "gc_merge" and "nogc_merge", when background_gc is on, "gc_merge" option can be set to let background GC thread to handle foreground GC requests, it can eliminate the sluggish issue caused by slow foreground GC operation when GC is triggered from a process with limited I/O and CPU resources. Original idea is from Xiang. Signed-off-by: Wang Xiaojun Signed-off-by: Chao Yu --- Documentation/filesystems/f2fs.rst | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 26 ++++++++++++++++++++++---- fs/f2fs/gc.h | 6 ++++++ fs/f2fs/segment.c | 15 +++++++++++++-- fs/f2fs/super.c | 19 +++++++++++++++++-- 6 files changed, 65 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8c0fbdd8ce6f..cb18f7c1bea3 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -110,6 +110,12 @@ background_gc=%s Turn on/off cleaning operations, namely garbage on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. +gc_merge When background_gc is on, this option can be enabled to + let background GC thread to handle foreground GC requests, + it can eliminate the sluggish issue caused by slow foreground + GC operation when GC is triggered from a process with limited + I/O and CPU resources. +nogc_merge Disable GC merge feature. disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d7799bd30b1..b2d734438aff 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,6 +99,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 +#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72f227f6ebad..cb3e7808d0e4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,19 +31,24 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode; + bool sync_mode, foreground = false; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || + waitqueue_active(fggc_wq) || gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + foreground = true; + /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = 0; @@ -90,7 +95,10 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!down_write_trylock(&sbi->gc_lock)) { + if (foreground) { + down_write(&sbi->gc_lock); + goto do_gc; + } else if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } @@ -107,14 +115,22 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi->stat_info); + if (!foreground) + stat_inc_bggc_count(sbi->stat_info); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* foreground GC was been triggered via f2fs_balance_fs() */ + if (foreground) + sync_mode = false; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; + if (foreground) + wake_up_all(&gc_th->fggc_wq); + trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); @@ -148,6 +164,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + init_waitqueue_head(&sbi->gc_thread->fggc_wq); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { @@ -165,6 +182,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); + wake_up_all(&gc_th->fggc_wq); kfree(gc_th); sbi->gc_thread = NULL; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 0c8dae12dc51..3fe145e8e594 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -42,6 +42,12 @@ struct f2fs_gc_kthread { /* for changing gc mode */ unsigned int gc_wake; + + /* for GC_MERGE mount option */ + wait_queue_head_t fggc_wq; /* + * caller of f2fs_balance_fs() + * will wait on this wait queue. + */ }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04b449978aa..ecbfb63809f9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -510,8 +510,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index de543168b370..36b8d03ded95 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -147,6 +147,8 @@ enum { Opt_compress_log_size, Opt_compress_extension, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@ -215,6 +217,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@ -944,6 +948,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1536,6 +1546,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1902,7 +1915,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3872,7 +3886,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) -- Gitee From f7733970e7dba8cc0bcf1693c27c6a98d25e4d39 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Wed, 9 Oct 2019 10:49:18 +0800 Subject: [PATCH 2/3] f2fs: introduce a policy to optimize discard ohos inclusion category:feature issue: #I4TEGS CVE:N/A -------------------------------- This patch introduces a new discard policy. This policy adjusts the sending frequency and discard granularity based on the current disk fragmentation and I/O. This avoids interference with foreground I/Os and prolongs the component lifespan. Signed-off-by: Wang Xiaojun --- fs/f2fs/f2fs.h | 38 +++++++++- fs/f2fs/segment.c | 189 +++++++++++++++++++++++++++++++--------------- fs/f2fs/sysfs.c | 66 ++++++++++++++++ 3 files changed, 232 insertions(+), 61 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2d734438aff..c7724661dd5e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -276,11 +276,17 @@ struct discard_entry { /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +#define DISCARD_GRAN_BL 16 +#define DISCARD_GRAN_BG 512 +#define DISCARD_GRAN_FORCE 1 /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) +#define FS_FREE_SPACE_PERCENT 20 +#define DEVICE_FREE_SPACE_PERCENT 10 +#define HUNDRED_PERCENT 100 enum { D_PREP, /* initial */ @@ -319,24 +325,37 @@ struct discard_cmd { enum { DPOLICY_BG, + DPOLICY_BALANCE, DPOLICY_FORCE, DPOLICY_FSTRIM, DPOLICY_UMOUNT, MAX_DPOLICY, }; +enum { + SUB_POLICY_BIG, + SUB_POLICY_MID, + SUB_POLICY_SMALL, + NR_SUB_POLICY, +}; + +struct discard_sub_policy { + unsigned int max_requests; + int interval; +}; + struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ unsigned int mid_interval; /* used for device busy */ unsigned int max_interval; /* used for candidates not exist */ - unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ + struct discard_sub_policy sub_policy[NR_SUB_POLICY]; }; struct discard_cmd_control { @@ -358,6 +377,7 @@ struct discard_cmd_control { atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ + int discard_type; /* discard type */ }; /* for the list of fsync inodes, used only during recovery */ @@ -3050,6 +3070,18 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, f2fs_record_iostat(sbi); } +static inline block_t fs_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + FS_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + +static inline block_t device_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + DEVICE_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3259,6 +3291,10 @@ void f2fs_destroy_node_manager_caches(void); /* * segment.c */ +unsigned long find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +unsigned long find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); bool f2fs_need_SSR(struct f2fs_sb_info *sbi); void f2fs_register_inmem_page(struct inode *inode, struct page *page); void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ecbfb63809f9..3fafcc0c5f7e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -30,6 +30,24 @@ static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +static struct discard_policy dpolicys[MAX_DPOLICY] = { + {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{1, 0}, {0, 0}, {0, 0}}}, + {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL, + {{1, 0}, {2, 50}, {0, 0}}}, + {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE, + {{1, 0}, {2, 50}, {4, 2000}}}, + {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE, + {{8, 0}, {8, 0}, {8, 0}}}, + {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{UINT_MAX, 0}, {0, 0}, {0, 0}}} +}; + static unsigned long __reverse_ulong(unsigned char *str) { unsigned long tmp = 0; @@ -93,7 +111,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) * f2fs_set_bit(0, bitmap) => 1000 0000 * f2fs_set_bit(7, bitmap) => 0000 0001 */ -static unsigned long __find_rev_next_bit(const unsigned long *addr, +unsigned long find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -129,7 +147,7 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result - size + __reverse_ffs(tmp); } -static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, +unsigned long find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -1109,7 +1127,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, else size = max_blocks; map = (unsigned long *)(sentry->cur_valid_map); - offset = __find_rev_next_bit(map, size, offset); + offset = find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); blk = START_BLOCK(sbi, segno + 1); } @@ -1117,43 +1135,41 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, } static void __init_discard_policy(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, + struct discard_policy *policy, int discard_type, unsigned int granularity) { - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->ordered = false; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = false; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - dpolicy->sync = false; - dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; - } + *policy = dpolicys[DPOLICY_BG]; + } else if (discard_type == DPOLICY_BALANCE) { + *policy = dpolicys[DPOLICY_BALANCE]; } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FORCE]; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FSTRIM]; + if (policy->granularity != granularity) + policy->granularity = granularity; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; - dpolicy->timeout = true; + *policy = dpolicys[DPOLICY_UMOUNT]; } + dcc->discard_type = discard_type; +} + +static void select_sub_discard_policy(struct discard_sub_policy **spolicy, + int index, struct discard_policy *dpolicy) +{ + if (dpolicy->type == DPOLICY_FSTRIM) { + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + return; + } + + if ((index + 1) >= DISCARD_GRAN_BG) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + else if ((index + 1) >= DISCARD_GRAN_BL) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID]; + else + *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL]; } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, @@ -1162,6 +1178,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, + int spolicy_index, struct discard_cmd *dc, unsigned int *issued) { @@ -1173,9 +1190,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); int flag = dpolicy->sync ? REQ_SYNC : 0; + struct discard_sub_policy *spolicy = NULL; block_t lstart, start, len, total_len; int err = 0; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); + if (dc->state != D_PREP) return 0; @@ -1191,7 +1211,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len = 0; - while (total_len && *issued < dpolicy->max_requests && !err) { + while (total_len && *issued < spolicy->max_requests && !err) { struct bio *bio = NULL; unsigned long flags; bool last = true; @@ -1202,7 +1222,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, } (*issued)++; - if (*issued == dpolicy->max_requests) + if (*issued == spolicy->max_requests) last = true; dc->len += len; @@ -1449,7 +1469,8 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy) + struct discard_policy *dpolicy, + int spolicy_index) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; @@ -1459,8 +1480,11 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, unsigned int pos = dcc->next_pos; unsigned int issued = 0; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, pos, (struct rb_entry **)&prev_dc, @@ -1484,9 +1508,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued); - if (issued >= dpolicy->max_requests) + if (issued >= spolicy->max_requests) break; next: node = rb_next(&dc->rb_node); @@ -1519,11 +1543,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); + /* only do this check in CHECK_FS, may be time consumed */ + if (unlikely(dcc->rbtree_check)) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } retry: + blk_start_plug(&plug); issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && @@ -1533,8 +1565,13 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) - return __issue_discard_cmd_orderly(sbi, dpolicy); + select_sub_discard_policy(&spolicy, i, dpolicy); + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) { + issued = __issue_discard_cmd_orderly(sbi, dpolicy, i); + blk_finish_plug(&plug); + return issued; + } pend_list = &dcc->pend_list[i]; @@ -1544,7 +1581,6 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); - blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1555,22 +1591,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; - break; + goto skip; } - - __submit_discard_cmd(sbi, dpolicy, dc, &issued); - - if (issued >= dpolicy->max_requests) + __submit_discard_cmd(sbi, dpolicy, i, dc, &issued); +skip: + if (issued >= spolicy->max_requests) break; } - blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= spolicy->max_requests || io_interrupted) break; } + blk_finish_plug(&plug); + if (spolicy) + dpolicy->min_interval = spolicy->interval; + if (dpolicy->type == DPOLICY_UMOUNT && issued) { __wait_all_discard_cmd(sbi, dpolicy); goto retry; @@ -1731,8 +1769,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, - dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1743,6 +1780,29 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) return dropped; } +static int select_discard_type(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + block_t user_block_count = sbi->user_block_count; + block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + block_t fs_available_blocks = user_block_count - + valid_user_blocks(sbi) + ovp_count; + int discard_type; + + if (fs_available_blocks >= fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks >= + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_BG; + } else if (fs_available_blocks < fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks < + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_FORCE; + } else { + discard_type = DPOLICY_BALANCE; + } + return discard_type; +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -1750,13 +1810,13 @@ static int issue_discard_thread(void *data) wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; - int issued; + int issued, discard_type; set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + discard_type = select_discard_type(sbi); + __init_discard_policy(sbi, &dpolicy, discard_type, 0); wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@ -1782,7 +1842,7 @@ static int issue_discard_thread(void *data) } if (sbi->gc_mode == GC_URGENT_HIGH) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0); sb_start_intwrite(sbi->sb); @@ -1927,11 +1987,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { - start = __find_rev_next_bit(dmap, max_blocks, end + 1); + start = find_rev_next_bit(dmap, max_blocks, end + 1); if (start >= max_blocks) break; - end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + end = find_rev_next_zero_bit(dmap, max_blocks, start + 1); if (force && start && end != max_blocks && (end - start) < cpc->trim_minlen) continue; @@ -2099,7 +2159,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->discard_granularity = DISCARD_GRAN_BG; INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); @@ -2642,7 +2702,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); seg->next_blkoff = pos; } @@ -2673,7 +2733,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); + pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); return pos < sbi->blocks_per_seg; } @@ -3014,8 +3074,17 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; + struct discard_sub_policy *spolicy = NULL; int issued; unsigned int trimmed = 0; + /* fstrim each time 8 discard without no interrupt */ + select_sub_discard_policy(&spolicy, 0, dpolicy); + + if (dcc->rbtree_check) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } next: issued = 0; @@ -3047,9 +3116,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, goto skip; } - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued); - if (issued >= dpolicy->max_requests) { + if (issued >= spolicy->max_requests) { start = dc->lstart + dc->len; if (err) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b8850c81068a..f2eb96c2fd37 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -542,6 +542,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_type, discard_type); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -631,6 +632,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), + ATTR_LIST(discard_type), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), @@ -908,6 +910,66 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int undiscard_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sit_i = SIT_I(sbi); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int total = 0; + unsigned int i, j; + + if (!f2fs_realtime_discard_enable(sbi)) + goto out; + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned int entries = SIT_VBLOCK_MAP_SIZE / + sizeof(unsigned long); + unsigned int max_blocks = sbi->blocks_per_seg; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; + unsigned long *dmap = SIT_I(sbi)->tmp_map; + int start = 0, end = -1; + + down_write(&sit_i->sentry_lock); + if (se->valid_blocks == max_blocks) { + up_write(&sit_i->sentry_lock); + continue; + } + + if (se->valid_blocks == 0) { + mutex_lock(&dirty_i->seglist_lock); + if (test_bit((int)i, dirty_i->dirty_segmap[PRE])) + total += 512; + mutex_unlock(&dirty_i->seglist_lock); + } else { + for (j = 0; j < entries; j++) + dmap[j] = ~ckpt_map[j] & ~discard_map[j]; + while (1) { + start = (int)find_rev_next_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(end + 1)); + + if ((unsigned int)start >= max_blocks) + break; + + end = (int)find_rev_next_zero_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(start + 1)); + total += (unsigned int)(end - start); + } + } + + up_write(&sit_i->sentry_lock); + } + +out: + seq_printf(seq, "total undiscard:%u K\n", total * 4); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -964,6 +1026,9 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) iostat_info_seq_show, sb); proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("undiscard_info", S_IRUGO, sbi->s_proc, + undiscard_info_seq_show, sb); + } return 0; } @@ -975,6 +1040,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("undiscard_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); -- Gitee From ed88bae9cf7caee4fff8650f6bc8af01b7d31570 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Thu, 21 Nov 2019 09:51:15 +0800 Subject: [PATCH 3/3] f2fs: add f2fs grading ssr feature ohos inclusion category:feature issue: #I4TEGS CVE:N/A -------------------------------- In order to reduce the fragmentation segment and improve the IO performance, we can use: the SSR for small file, the LFS for big file. Since the small file will use the SSR, the fragmentation segment will be reduced, there will be more sequence segment for big file. So the IO performance will be improved. Signed-off-by: Wang Xiaojun --- fs/f2fs/Kconfig | 7 +++ fs/f2fs/data.c | 12 ++-- fs/f2fs/f2fs.h | 20 ++++++- fs/f2fs/gc.c | 2 +- fs/f2fs/segment.c | 107 +++++++++++++++++++++++++++++++++--- fs/f2fs/segment.h | 31 ++++++++++- fs/f2fs/super.c | 46 +++++++++++++++- fs/f2fs/sysfs.c | 42 +++++++++++++- include/trace/events/f2fs.h | 33 +++++++++++ 9 files changed, 282 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d13c5c6a9787..3dfc4f60de0c 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -137,3 +137,10 @@ config F2FS_FS_LZORLE default y help Support LZO-RLE compress algorithm, if unsure, say Y. + +config F2FS_GRADING_SSR + bool "F2FS grading ssr" + depends on F2FS_FS + default y + help + use grading ssr to improve the end performance diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1b11a42847c4..f54de04e6b87 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1390,7 +1390,7 @@ struct page *f2fs_get_new_data_page(struct inode *inode, return page; } -static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type, int contig_level) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1417,7 +1417,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + &sum, seg_type, NULL, contig_level); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); @@ -1511,6 +1511,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0,0,0}; block_t blkaddr; unsigned int start_pgofs; + int contig_level = SEQ_NONE; +#ifdef CONFIG_F2FS_GRADING_SSR + contig_level = check_io_seq(maxblocks); +#endif if (!maxblocks) return 0; @@ -1594,7 +1598,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, /* use out-place-update for driect IO under LFS mode */ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { - err = __allocate_data_block(&dn, map->m_seg_type); + err = __allocate_data_block(&dn, map->m_seg_type, contig_level); if (err) goto sync_out; blkaddr = dn.data_blkaddr; @@ -1615,7 +1619,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, - map->m_seg_type); + map->m_seg_type, contig_level); if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7724661dd5e..41222b59e596 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1363,6 +1363,20 @@ struct decompress_io_ctx { #define MAX_COMPRESS_LOG_SIZE 8 #define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size)) +#ifdef CONFIG_F2FS_GRADING_SSR +struct f2fs_hot_cold_params { + unsigned int enable; + unsigned int hot_data_lower_limit; + unsigned int hot_data_waterline; + unsigned int warm_data_lower_limit; + unsigned int warm_data_waterline; + unsigned int hot_node_lower_limit; + unsigned int hot_node_waterline; + unsigned int warm_node_lower_limit; + unsigned int warm_node_waterline; +}; +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1569,6 +1583,10 @@ struct f2fs_sb_info { struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ #endif + +#ifdef CONFIG_F2FS_GRADING_SSR + struct f2fs_hot_cold_params hot_cold_params; +#endif }; struct f2fs_private_dio { @@ -3352,7 +3370,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio); + struct f2fs_io_info *fio, int contig_level); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb3e7808d0e4..a981e466cc7d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1238,7 +1238,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, type, NULL); + &sum, type, NULL, SEQ_NONE); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3fafcc0c5f7e..24d22c2954b5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -201,6 +201,75 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } +#ifdef CONFIG_F2FS_GRADING_SSR +static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + u64 valid_blocks = sbi->total_valid_block_count; + u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + u64 left_space = (total_blocks - valid_blocks) << 2; + unsigned int free_segs = free_segments(sbi); + unsigned int ovp_segments = overprovision_segments(sbi); + unsigned int lower_limit = 0; + unsigned int waterline = 0; + int dirty_sum = node_secs + 2 * dent_secs + imeta_secs; + + if (sbi->hot_cold_params.enable == GRADING_SSR_OFF) + return f2fs_need_SSR(sbi); + if (f2fs_lfs_mode(sbi)) + return false; + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA && + free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2) + return false; + if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi))) + return true; + if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_MIN_BLKS_LIMIT) + return false; + + left_space -= ovp_segments * KBS_PER_SEGMENT; + if (unlikely(left_space == 0)) + return false; + + switch (type) { + case CURSEG_HOT_DATA: + lower_limit = sbi->hot_cold_params.hot_data_lower_limit; + waterline = sbi->hot_cold_params.hot_data_waterline; + break; + case CURSEG_WARM_DATA: + lower_limit = sbi->hot_cold_params.warm_data_lower_limit; + waterline = sbi->hot_cold_params.warm_data_waterline; + break; + case CURSEG_HOT_NODE: + lower_limit = sbi->hot_cold_params.hot_node_lower_limit; + waterline = sbi->hot_cold_params.hot_node_waterline; + break; + case CURSEG_WARM_NODE: + lower_limit = sbi->hot_cold_params.warm_node_lower_limit; + waterline = sbi->hot_cold_params.warm_node_waterline; + break; + default: + return false; + } + + if (left_space > lower_limit) + return false; + + if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT)) + <= waterline) { + trace_f2fs_grading_ssr_allocate( + (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count), + free_segments(sbi), contig_level); + return true; + } else { + return false; + } +} +#endif + void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; @@ -2940,7 +3009,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, * This function should be returned with success, otherwise BUG */ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) + int type, bool force, int contig_level) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2953,8 +3022,12 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, is_next_segment_free(sbi, curseg, type) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_GRADING_SSR + else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0)) +#else else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) +#endif change_curseg(sbi, type, true); else new_curseg(sbi, type, false); @@ -3012,7 +3085,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE); locate_dirty_segment(sbi, old_segno); } @@ -3412,13 +3485,17 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio) + struct f2fs_io_info *fio, int contig_level) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned long long old_mtime; bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; +#ifdef CONFIG_F2FS_GRADING_SSR + struct inode *inode = NULL; +#endif + int contig = SEQ_NONE; down_read(&SM_I(sbi)->curseg_lock); @@ -3465,11 +3542,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - sit_i->s_ops->allocate_segment(sbi, type, false); + } else { +#ifdef CONFIG_F2FS_GRADING_SSR + if (contig_level != SEQ_NONE) { + contig = contig_level; + goto allocate_label; + } + + if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) && + page->mapping != META_MAPPING(sbi)) { + inode = page->mapping->host; + contig = check_io_seq(get_dirty_pages(inode)); + } +allocate_label: +#endif + sit_i->s_ops->allocate_segment(sbi, type, false, contig); + } } /* * segment dirty status should be updated after segment allocation, @@ -3536,7 +3627,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + &fio->new_blkaddr, sum, type, fio, SEQ_NONE); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); @@ -4905,7 +4996,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + allocate_segment_by_default(sbi, type, true, SEQ_NONE); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1bf33fc27b8f..fa18a6b6fc4c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -130,7 +130,18 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) +#ifdef CONFIG_F2FS_GRADING_SSR +#define KBS_PER_SEGMENT 2048 +#define SSR_MIN_BLKS_LIMIT (16 << 18) /* 16G */ +#define SSR_CONTIG_DIRTY_NUMS 32 /* Dirty pages for LFS alloction in grading ssr. */ +#define SSR_CONTIG_LARGE 256 /* Larege files */ +#endif +enum { + SEQ_NONE, + SEQ_32BLKS, + SEQ_256BLKS +}; /* * indicate a block allocation direction: RIGHT and LEFT. * RIGHT means allocating new sections towards the end of volume. @@ -180,6 +191,13 @@ enum { FORCE_FG_GC, }; +#ifdef CONFIG_F2FS_GRADING_SSR +enum { + GRADING_SSR_OFF = 0, + GRADING_SSR_ON +}; +#endif + /* for a function parameter to select a victim segment */ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ @@ -221,7 +239,7 @@ struct sec_entry { }; struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); + void (*allocate_segment)(struct f2fs_sb_info *, int, bool, int); }; #define MAX_SKIP_GC_COUNT 16 @@ -913,3 +931,14 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) dcc->discard_wake = 1; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +#ifdef CONFIG_F2FS_GRADING_SSR +static inline int check_io_seq(int blks) +{ + if (blks >= SSR_CONTIG_LARGE) + return SEQ_256BLKS; + if (blks >= SSR_CONTIG_DIRTY_NUMS) + return SEQ_32BLKS; + return SEQ_NONE; +} +#endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 36b8d03ded95..b305e024d46f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -36,6 +36,19 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_F2FS_GRADING_SSR +#define SSR_DEFALT_SPACE_LIMIT (5<<20) /* 5G default space limit */ +#define SSR_DEFALT_WATERLINE 80 /* 80% default waterline */ +#define SSR_HN_SAPCE_LIMIT_128G (8<<20) /* 8G default sapce limit for 128G devices */ +#define SSR_HN_WATERLINE_128G 80 /* 80% default hot node waterline for 128G devices */ +#define SSR_WN_SAPCE_LIMIT_128G (5<<20) /* 5G default warm node sapce limit for 128G devices */ +#define SSR_WN_WATERLINE_128G 70 /* 70% default warm node waterline for 128G devices */ +#define SSR_HD_SAPCE_LIMIT_128G (8<<20) /* 8G default hot data sapce limit for 128G devices */ +#define SSR_HD_WATERLINE_128G 65 /* 65% default hot data waterline for 128G devices */ +#define SSR_WD_SAPCE_LIMIT_128G (5<<20) /* 5G default warm data sapce limit for 128G devices */ +#define SSR_WD_WATERLINE_128G 60 /* 60% default warm data waterline for 128G devices */ +#endif + static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -3503,6 +3516,35 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = 1; } +#ifdef CONFIG_F2FS_GRADING_SSR +static void f2fs_init_grading_ssr(struct f2fs_sb_info *sbi) +{ + u32 total_blocks = le64_to_cpu(sbi->raw_super->block_count) >> 18; + + if (total_blocks > 64) { /* 64G */ + sbi->hot_cold_params.hot_data_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_data_waterline = SSR_HD_WATERLINE_128G; + sbi->hot_cold_params.warm_data_lower_limit = SSR_WD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_data_waterline = SSR_WD_WATERLINE_128G; + sbi->hot_cold_params.hot_node_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_node_waterline = SSR_HN_WATERLINE_128G; + sbi->hot_cold_params.warm_node_lower_limit = SSR_WN_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_node_waterline = SSR_WN_WATERLINE_128G; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } else { + sbi->hot_cold_params.hot_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.hot_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } +} +#endif + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -3795,7 +3837,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto free_node_inode; } - +#ifdef CONFIG_F2FS_GRADING_SSR + f2fs_init_grading_ssr(sbi); +#endif err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index f2eb96c2fd37..c90280c3168f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -34,6 +34,9 @@ enum { FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif RESERVED_BLOCKS, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_GRADING_SSR + F2FS_HOT_COLD_PARAMS, /* struct f2fs_hot_cold_params */ +#endif }; struct f2fs_attr { @@ -61,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)NM_I(sbi); else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_GRADING_SSR + else if (struct_type == F2FS_HOT_COLD_PARAMS) + return (unsigned char *)&sbi->hot_cold_params; +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) @@ -569,6 +576,26 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); +#ifdef CONFIG_F2FS_GRADING_SSR +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_lower_limit, hot_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_waterline, hot_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_lower_limit, warm_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_waterline, warm_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_lower_limit, hot_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_waterline, hot_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_lower_limit, warm_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_waterline, warm_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_enable, enable); +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -679,6 +706,17 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), +#endif +#ifdef CONFIG_F2FS_GRADING_SSR + ATTR_LIST(hc_hot_data_lower_limit), + ATTR_LIST(hc_hot_data_waterline), + ATTR_LIST(hc_warm_data_lower_limit), + ATTR_LIST(hc_warm_data_waterline), + ATTR_LIST(hc_hot_node_lower_limit), + ATTR_LIST(hc_hot_node_waterline), + ATTR_LIST(hc_warm_node_lower_limit), + ATTR_LIST(hc_warm_node_waterline), + ATTR_LIST(hc_enable), #endif NULL, }; @@ -919,6 +957,8 @@ static int undiscard_info_seq_show(struct seq_file *seq, void *offset) unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); unsigned int total = 0; unsigned int i, j; + unsigned int max_blocks = sbi->blocks_per_seg; + unsigned long *dmap = SIT_I(sbi)->tmp_map; if (!f2fs_realtime_discard_enable(sbi)) goto out; @@ -927,10 +967,8 @@ static int undiscard_info_seq_show(struct seq_file *seq, void *offset) struct seg_entry *se = get_seg_entry(sbi, i); unsigned int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); - unsigned int max_blocks = sbi->blocks_per_seg; unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; unsigned long *discard_map = (unsigned long *)se->discard_map; - unsigned long *dmap = SIT_I(sbi)->tmp_map; int start = 0, end = -1; down_write(&sit_i->sentry_lock); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6a..b4fe1db78eae 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1957,6 +1957,39 @@ TRACE_EVENT(f2fs_fiemap, __entry->ret) ); +#ifdef CONFIG_F2FS_GRADING_SSR +DECLARE_EVENT_CLASS(f2fs_grading_ssr, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq), + + TP_STRUCT__entry( + __field(unsigned int, left) + __field(unsigned int, free) + __field(unsigned int, seq) + ), + + TP_fast_assign( + __entry->left = left; + __entry->free = free; + __entry->seq = seq; + ), + + TP_printk("ssr: left_space %u free_segments: %u is_seq: %u ", + __entry->left, __entry->free, __entry->seq) +); + +DEFINE_EVENT(f2fs_grading_ssr, f2fs_grading_ssr_allocate, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq) +); +#endif + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- Gitee