From 896c1a4aa3926ffad805bdb05a6dee4a874b7728 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Mar 2021 17:57:06 +0800 Subject: [PATCH 1/2] f2fs: introduce gc_merge mount option mainline inclusion from v5.13-rc1 commit 5911d2d1d1a38b26585383478bd71d9254e48bdf category:feature issue:I4STY9 CVE:N/A Signed-off-by: Wang Xiaojun -------------------------------- In this patch, we will add two new mount options: "gc_merge" and "nogc_merge", when background_gc is on, "gc_merge" option can be set to let background GC thread to handle foreground GC requests, it can eliminate the sluggish issue caused by slow foreground GC operation when GC is triggered from a process with limited I/O and CPU resources. Original idea is from Xiang. Signed-off-by: Chao Yu --- Documentation/filesystems/f2fs.rst | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 26 ++++++++++++++++++++++---- fs/f2fs/gc.h | 6 ++++++ fs/f2fs/segment.c | 15 +++++++++++++-- fs/f2fs/super.c | 19 +++++++++++++++++-- 6 files changed, 65 insertions(+), 8 deletions(-) diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8c0fbdd8ce6f..cb18f7c1bea3 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -110,6 +110,12 @@ background_gc=%s Turn on/off cleaning operations, namely garbage on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. +gc_merge When background_gc is on, this option can be enabled to + let background GC thread to handle foreground GC requests, + it can eliminate the sluggish issue caused by slow foreground + GC operation when GC is triggered from a process with limited + I/O and CPU resources. +nogc_merge Disable GC merge feature. disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d7799bd30b1..b2d734438aff 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,6 +99,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 +#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72f227f6ebad..cb3e7808d0e4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,19 +31,24 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode; + bool sync_mode, foreground = false; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || + waitqueue_active(fggc_wq) || gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + foreground = true; + /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = 0; @@ -90,7 +95,10 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!down_write_trylock(&sbi->gc_lock)) { + if (foreground) { + down_write(&sbi->gc_lock); + goto do_gc; + } else if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } @@ -107,14 +115,22 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi->stat_info); + if (!foreground) + stat_inc_bggc_count(sbi->stat_info); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* foreground GC was been triggered via f2fs_balance_fs() */ + if (foreground) + sync_mode = false; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; + if (foreground) + wake_up_all(&gc_th->fggc_wq); + trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); @@ -148,6 +164,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + init_waitqueue_head(&sbi->gc_thread->fggc_wq); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { @@ -165,6 +182,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); + wake_up_all(&gc_th->fggc_wq); kfree(gc_th); sbi->gc_thread = NULL; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 0c8dae12dc51..3fe145e8e594 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -42,6 +42,12 @@ struct f2fs_gc_kthread { /* for changing gc mode */ unsigned int gc_wake; + + /* for GC_MERGE mount option */ + wait_queue_head_t fggc_wq; /* + * caller of f2fs_balance_fs() + * will wait on this wait queue. + */ }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04b449978aa..ecbfb63809f9 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -510,8 +510,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } } } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index de543168b370..36b8d03ded95 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -147,6 +147,8 @@ enum { Opt_compress_log_size, Opt_compress_extension, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@ -215,6 +217,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@ -944,6 +948,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1536,6 +1546,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1902,7 +1915,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3872,7 +3886,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) -- Gitee From 4505f1296b20326d51dd5739dad76edc5e5e0d57 Mon Sep 17 00:00:00 2001 From: Wang Xiaojun Date: Wed, 9 Oct 2019 10:49:18 +0800 Subject: [PATCH 2/2] f2fs: introduce a policy to optimize discard gc and ssr ohos inclusion category:feature issue:I4STY9 CVE:N/A -------------------------------- This patch introduces a new discard and gc policy. This policies adjust the sending frequency and discard granularity based on the current disk fragmentation and I/O. This avoids interference with foreground I/Os and prolongs the component lifespan. In order to reduce the fragmentation segment and improve the IO performance, we can use: the SSR for small file, the LFS for big file. Since the small file will use the SSR, the fragmentation segment will be reduced, there will be more sequence segment for big file. So the IO performance will be improved. Signed-off-by: Wang Xiaojun --- fs/f2fs/Kconfig | 7 + fs/f2fs/data.c | 18 +- fs/f2fs/f2fs.h | 90 +++++++- fs/f2fs/gc.c | 159 +++++++++++++- fs/f2fs/gc.h | 9 + fs/f2fs/segment.c | 413 ++++++++++++++++++++++++++++++------ fs/f2fs/segment.h | 33 ++- fs/f2fs/super.c | 69 +++++- fs/f2fs/sysfs.c | 106 +++++++++ include/trace/events/f2fs.h | 33 +++ 10 files changed, 862 insertions(+), 75 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d13c5c6a9787..3dfc4f60de0c 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -137,3 +137,10 @@ config F2FS_FS_LZORLE default y help Support LZO-RLE compress algorithm, if unsure, say Y. + +config F2FS_GRADING_SSR + bool "F2FS grading ssr" + depends on F2FS_FS + default y + help + use grading ssr to improve the end performance diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1b11a42847c4..1d0f1d47bc60 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1390,7 +1390,7 @@ struct page *f2fs_get_new_data_page(struct inode *inode, return page; } -static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type, int contig_level) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1417,7 +1417,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + &sum, seg_type, NULL, contig_level); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); @@ -1511,6 +1511,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0,0,0}; block_t blkaddr; unsigned int start_pgofs; + int contig_level = SEQ_NONE; + +#ifdef CONFIG_F2FS_GRADING_SSR + contig_level = check_io_seq(maxblocks); +#endif if (!maxblocks) return 0; @@ -1594,7 +1599,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, /* use out-place-update for driect IO under LFS mode */ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { - err = __allocate_data_block(&dn, map->m_seg_type); +#ifdef CONFIG_F2FS_GRADING_SSR + err = __allocate_data_block(&dn, map->m_seg_type, contig_level); +#endif if (err) goto sync_out; blkaddr = dn.data_blkaddr; @@ -1614,8 +1621,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } else { WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && flag != F2FS_GET_BLOCK_DIO); + +#ifdef CONFIG_F2FS_GRADING_SSR err = __allocate_data_block(&dn, - map->m_seg_type); + map->m_seg_type, contig_level); +#endif if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b2d734438aff..e1526151fb5e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -276,11 +276,17 @@ struct discard_entry { /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +#define DISCARD_GRAN_BL 16 +#define DISCARD_GRAN_BG 512 +#define DISCARD_GRAN_FORCE 1 /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) +#define FS_FREE_SPACE_PERCENT 20 +#define DEVICE_FREE_SPACE_PERCENT 10 +#define HUNDRED_PERCENT 100 enum { D_PREP, /* initial */ @@ -319,24 +325,37 @@ struct discard_cmd { enum { DPOLICY_BG, + DPOLICY_BALANCE, DPOLICY_FORCE, DPOLICY_FSTRIM, DPOLICY_UMOUNT, MAX_DPOLICY, }; +enum { + SUB_POLICY_BIG, + SUB_POLICY_MID, + SUB_POLICY_SMALL, + NR_SUB_POLICY, +}; + +struct discard_sub_policy { + unsigned int max_requests; + int interval; +}; + struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ unsigned int mid_interval; /* used for device busy */ unsigned int max_interval; /* used for candidates not exist */ - unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ + struct discard_sub_policy sub_policy[NR_SUB_POLICY]; }; struct discard_cmd_control { @@ -358,6 +377,7 @@ struct discard_cmd_control { atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ + int discard_type; /* discard type */ }; /* for the list of fsync inodes, used only during recovery */ @@ -1343,6 +1363,20 @@ struct decompress_io_ctx { #define MAX_COMPRESS_LOG_SIZE 8 #define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size)) +#ifdef CONFIG_F2FS_GRADING_SSR +struct f2fs_hot_cold_params { + unsigned int enable; + unsigned int hot_data_lower_limit; + unsigned int hot_data_waterline; + unsigned int warm_data_lower_limit; + unsigned int warm_data_waterline; + unsigned int hot_node_lower_limit; + unsigned int hot_node_waterline; + unsigned int warm_node_lower_limit; + unsigned int warm_node_waterline; +}; +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1502,7 +1536,10 @@ struct f2fs_sb_info { unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ + struct mutex ba_mutex; + struct f2fs_block_alloc_info *ba_info; /* block alloc statistics */ #endif + atomic_t need_ssr_gc; spinlock_t stat_lock; /* lock for stat operations */ /* For app/fs IO statistics */ @@ -1548,6 +1585,11 @@ struct f2fs_sb_info { #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ +#endif + bool is_frag; /* urgent gc flag */ + unsigned long last_urgent_check; /* last urgent check jiffies */ +#ifdef CONFIG_F2FS_GRADING_SSR + struct f2fs_hot_cold_params hot_cold_params; #endif }; @@ -3050,6 +3092,18 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, f2fs_record_iostat(sbi); } +static inline block_t fs_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + FS_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + +static inline block_t device_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + DEVICE_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3259,6 +3313,12 @@ void f2fs_destroy_node_manager_caches(void); /* * segment.c */ +unsigned long __find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +unsigned long __find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +int find_next_free_extent(const unsigned long *addr, + unsigned long size, unsigned long *offset); bool f2fs_need_SSR(struct f2fs_sb_info *sbi); void f2fs_register_inmem_page(struct inode *inode, struct page *page); void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); @@ -3316,7 +3376,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio); + struct f2fs_io_info *fio, int contig_level); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); @@ -3655,6 +3715,27 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); void f2fs_update_sit_info(struct f2fs_sb_info *sbi); +struct f2fs_block_alloc_info { + unsigned long last_node_alloc_cnt, last_data_alloc_cnt; + unsigned long curr_node_alloc_cnt, curr_data_alloc_cnt; + unsigned long ssr_last_jiffies; +}; +static inline struct f2fs_block_alloc_info *F2FS_BA_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_block_alloc_info *)sbi->ba_info; +} +#define inc_ba_val(sbi, member, val) do { \ + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); \ + if (ba) \ + ba->member += (val); \ +} while (0) +#define inc_ba_array_val(sbi, member, idx, val) do { \ + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); \ + if (ba) \ + ba->member[(idx)] += (val); \ +} while (0) +#define ba_mutex_lock(mutex) mutex_lock((mutex)) +#define ba_mutex_unlock(mutex) mutex_unlock((mutex)) #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) @@ -3698,6 +3779,11 @@ static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} + +#define inc_ba_val(sbi, member, val) +#define inc_ba_array_val(sbi, member, idx, val) +#define ba_mutex_lock(mutex) +#define ba_mutex_unlock(mutex) #endif extern const struct file_operations f2fs_dir_operations; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cb3e7808d0e4..69a8e3c27a7b 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" @@ -26,6 +28,116 @@ static struct kmem_cache *victim_entry_slab; static unsigned int count_bits(const unsigned long *addr, unsigned int offset, unsigned int len); +#define MIN_WT 1000 +#define DEF_GC_BALANCE_MIN_SLEEP_TIME 10000 /* milliseconds */ +#define DEF_GC_FRAG_MIN_SLEEP_TIME 1000 /* milliseconds */ +#define GC_URGENT_DISABLE_BLKS (32 << 18) /* 32G */ +#define GC_URGENT_SPACE (10 << 18) /* 10G */ +#define GC_URGENT_INTERVAL (10 * 60 * 1000) /* 10 mins */ +#define BLOCK_COUNT_TYPE 10 /* 10 kinds of successive blocks */ + +static bool __is_frag_urgent(struct f2fs_sb_info *sbi) +{ + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int i; + unsigned int block_count[BLOCK_COUNT_TYPE]; + unsigned int tot_blocks = 0; + u64 total_blocks = le64_to_cpu(sbi->raw_super->block_count); + unsigned int valid_blocks = sbi->total_valid_block_count; + + if (total_blocks < GC_URGENT_DISABLE_BLKS) + return false; + + if (total_blocks - valid_blocks > GC_URGENT_SPACE) + return false; + + memset(block_count, 0, sizeof(block_count)); + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned long start = 0; + int blocks, index; + + if (se->valid_blocks == 0) { + block_count[BLOCK_COUNT_TYPE - 1] += sbi->blocks_per_seg; + continue; + } + if (se->valid_blocks == sbi->blocks_per_seg) + continue; + + while (start < sbi->blocks_per_seg) { + blocks = find_next_free_extent((unsigned long *)se->cur_valid_map, + sbi->blocks_per_seg, + &start); + if (unlikely(blocks < 0)) + break; + + index = ilog2(blocks); + if (unlikely(index >= BLOCK_COUNT_TYPE)) { + index = BLOCK_COUNT_TYPE - 1; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } + + block_count[index] += blocks; + tot_blocks += blocks; + } + cond_resched(); + } + for (i = 0; i < BLOCK_COUNT_TYPE; i++) + f2fs_info(sbi, "block_cnt[%d]: %d\n", i, block_count[i]); + + if ((block_count[0] + block_count[1]) >= (tot_blocks >> 1)) + return true; + + return false; +} + +/* if invalid blocks in dirty segments is more than 10% of total free space */ +static inline bool is_reclaimable_dirty_blocks_enough(struct f2fs_sb_info *sbi) +{ + s64 total_free_blocks; + s64 total_reclaimable_blocks; + + total_free_blocks = sbi->user_block_count - written_block_count(sbi); + total_reclaimable_blocks = total_free_blocks - free_user_blocks(sbi); + + if (total_reclaimable_blocks <= 0) + return false; + + if ((total_reclaimable_blocks * 10) <= total_free_blocks) + return false; + + return true; +} + +static bool is_frag_urgent(struct f2fs_sb_info *sbi) +{ + unsigned long next_check = sbi->last_urgent_check + + msecs_to_jiffies(GC_URGENT_INTERVAL); + bool ret; + + if (time_after(jiffies, next_check)) { + sbi->last_urgent_check = jiffies; + sbi->is_frag = __is_frag_urgent(sbi); + } + ret = free_segments(sbi) < 3 * overprovision_segments(sbi) && + is_reclaimable_dirty_blocks_enough(sbi) && + sbi->is_frag; + + return ret; +} + +/* + * GC tuning ratio [0, 100] in performance mode + */ +static inline int gc_perf_ratio(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return reclaimable_user_blocks == 0 ? 100 : + div_u64(100ULL * free_user_blocks(sbi), reclaimable_user_blocks); +} + static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; @@ -39,11 +151,33 @@ static int gc_thread_func(void *data) set_freezable(); do { bool sync_mode, foreground = false; + int ssr_gc_count; + + if (is_frag_urgent(sbi)) + gc_th->gc_preference = GC_FRAG; + else if (div_u64(100ULL * written_block_count(sbi), sbi->user_block_count) > 90) + gc_th->gc_preference = GC_LIFETIME; + else if (gc_perf_ratio(sbi) < 10 && free_segments(sbi) < + 3 * overprovision_segments(sbi)) + gc_th->gc_preference = GC_PERF; + else + gc_th->gc_preference = GC_BALANCE; + + if (gc_th->gc_preference == GC_PERF) + wait_ms = max(DEF_GC_BALANCE_MIN_SLEEP_TIME * + gc_perf_ratio(sbi) / 100, MIN_WT); + else if (gc_th->gc_preference == GC_BALANCE) + gc_th->min_sleep_time = DEF_GC_BALANCE_MIN_SLEEP_TIME; + else if (gc_th->gc_preference == GC_FRAG) + wait_ms = DEF_GC_FRAG_MIN_SLEEP_TIME; + else + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || waitqueue_active(fggc_wq) || - gc_th->gc_wake, + gc_th->gc_wake || + atomic_read(&sbi->need_ssr_gc), msecs_to_jiffies(wait_ms)); if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) @@ -76,6 +210,17 @@ static int gc_thread_func(void *data) continue; } + ssr_gc_count = atomic_read(&sbi->need_ssr_gc); + if (ssr_gc_count) { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, true, false, false, NULL_SEGNO); + atomic_sub(ssr_gc_count, &sbi->need_ssr_gc); + if (!has_not_enough_free_secs(sbi, 0, 0)) { + wake_up_all(&gc_th->fggc_wq); + goto next; + } + } + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -156,11 +301,12 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) } gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->min_sleep_time = DEF_GC_BALANCE_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; gc_th->gc_wake= 0; + gc_th->gc_preference = GC_BALANCE; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); @@ -304,10 +450,12 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; + unsigned int max_age; unsigned char age = 0; unsigned char u; unsigned int i; @@ -327,8 +475,11 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) sit_i->min_mtime = mtime; if (mtime > sit_i->max_mtime) sit_i->max_mtime = mtime; + /* Reduce the cost weight of age when free blocks less than 10% */ + max_age = (gc_th && gc_th->gc_preference != GC_LIFETIME && + gc_perf_ratio(sbi) < 10) ? max(10 * gc_perf_ratio(sbi), 1) : 100; if (sit_i->max_mtime != sit_i->min_mtime) - age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + age = max_age - div64_u64(max_age * (mtime - sit_i->min_mtime), sit_i->max_mtime - sit_i->min_mtime); return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); @@ -1238,7 +1389,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, type, NULL); + &sum, type, NULL, SEQ_NONE); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 3fe145e8e594..3f0ae64209a0 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -30,6 +30,14 @@ /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ +/* GC preferences */ +enum { + GC_LIFETIME = 0, + GC_BALANCE, + GC_PERF, + GC_FRAG +}; + struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; @@ -41,6 +49,7 @@ struct f2fs_gc_kthread { unsigned int no_gc_sleep_time; /* for changing gc mode */ + unsigned int gc_preference; unsigned int gc_wake; /* for GC_MERGE mount option */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ecbfb63809f9..5f5e1ca05503 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,24 @@ static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +static struct discard_policy dpolicys[MAX_DPOLICY] = { + {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{1, 0}, {0, 0}, {0, 0}}}, + {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL, + {{1, 0}, {2, 50}, {0, 0}}}, + {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE, + {{1, 0}, {2, 50}, {4, 2000}}}, + {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE, + {{8, 0}, {8, 0}, {8, 0}}}, + {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{UINT_MAX, 0}, {0, 0}, {0, 0}}} +}; + static unsigned long __reverse_ulong(unsigned char *str) { unsigned long tmp = 0; @@ -93,7 +112,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) * f2fs_set_bit(0, bitmap) => 1000 0000 * f2fs_set_bit(7, bitmap) => 0000 0001 */ -static unsigned long __find_rev_next_bit(const unsigned long *addr, +unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -129,7 +148,7 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result - size + __reverse_ffs(tmp); } -static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, +unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -166,6 +185,19 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result - size + __reverse_ffz(tmp); } +int find_next_free_extent(const unsigned long *addr, + unsigned long size, unsigned long *offset) +{ + unsigned long pos, pos_zero_bit; + + pos_zero_bit = __find_rev_next_zero_bit(addr, size, *offset); + if (pos_zero_bit >= size) + return -ENOSPC; + pos = __find_rev_next_bit(addr, size, pos_zero_bit); + *offset = pos; + return (int)(pos - pos_zero_bit); +} + bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); @@ -183,6 +215,74 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } +#ifdef CONFIG_F2FS_GRADING_SSR +static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + u64 valid_blocks = sbi->total_valid_block_count; + u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + u64 left_space = (total_blocks - valid_blocks) << 2; + unsigned int free_segs = free_segments(sbi); + unsigned int ovp_segments = overprovision_segments(sbi); + unsigned int lower_limit = 0; + unsigned int waterline = 0; + int dirty_sum = node_secs + 2 * dent_secs + imeta_secs; + + if (sbi->hot_cold_params.enable == GRADING_SSR_OFF) + return f2fs_need_SSR(sbi); + if (f2fs_lfs_mode(sbi)) + return false; + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA && + free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2) + return false; + if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi))) + return true; + if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_LOWER_LIMIT_BLK) + return false; + + left_space -= ovp_segments * KBS_PER_SEGMENT; + if (unlikely(left_space == 0)) + return false; + + switch (type) { + case CURSEG_HOT_DATA: + lower_limit = sbi->hot_cold_params.hot_data_lower_limit; + waterline = sbi->hot_cold_params.hot_data_waterline; + break; + case CURSEG_WARM_DATA: + lower_limit = sbi->hot_cold_params.warm_data_lower_limit; + waterline = sbi->hot_cold_params.warm_data_waterline; + break; + case CURSEG_HOT_NODE: + lower_limit = sbi->hot_cold_params.hot_node_lower_limit; + waterline = sbi->hot_cold_params.hot_node_waterline; + break; + case CURSEG_WARM_NODE: + lower_limit = sbi->hot_cold_params.warm_node_lower_limit; + waterline = sbi->hot_cold_params.warm_node_waterline; + break; + default: + return false; + } + + if (left_space > lower_limit) + return false; + if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT)) + <= waterline) { + trace_f2fs_grading_ssr_allocate( + (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count), + free_segments(sbi), contig_level); + return true; + } else { + return false; + } +} +#endif + void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; @@ -487,6 +587,101 @@ int f2fs_commit_inmem_pages(struct inode *inode) return err; } +#ifdef CONFIG_F2FS_STAT_FS +#define DEF_DIRTY_STAT_INTERVAL 15 /* 15 secs */ +static inline void f2fs_balance_decision(struct f2fs_sb_info *sbi, + bool *time_after, bool *balance_node, bool *balance_data) +{ + long diff_node_blocks, diff_data_blocks; + unsigned long last_jiffies; + struct timespec64 ts = {DEF_DIRTY_STAT_INTERVAL, 0}; + unsigned long interval = timespec64_to_jiffies(&ts); + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); + + ba_mutex_lock(&sbi->ba_mutex); + last_jiffies = ba->ssr_last_jiffies; + ba_mutex_unlock(&sbi->ba_mutex); + + if (!time_after(jiffies, last_jiffies + interval)) { + *time_after = false; + return; + } + + *time_after = true; + + /* how many blocks are consumed during this interval */ + ba_mutex_lock(&sbi->ba_mutex); + + diff_node_blocks = (long)(ba->curr_node_alloc_cnt - ba->last_node_alloc_cnt); + diff_data_blocks = (long)(ba->curr_data_alloc_cnt - ba->last_data_alloc_cnt); + + ba->last_node_alloc_cnt = ba->curr_node_alloc_cnt; + ba->last_data_alloc_cnt = ba->curr_data_alloc_cnt; + ba->ssr_last_jiffies = jiffies; + + ba_mutex_unlock(&sbi->ba_mutex); + + if (diff_node_blocks > (long)sbi->blocks_per_seg) + *balance_node = true; + else + *balance_node = false; + + if (diff_data_blocks > (long)sbi->blocks_per_seg) + *balance_data = true; + else + *balance_data = false; +} +#else +static inline void f2fs_balance_decision(struct f2fs_sb_info *sbi, + bool *time_after, bool *balance_node, bool *balance_data) +{ + *time_after = true; + *balance_node = true; + *balance_data = true; +} +#endif + +static bool need_balance_dirty_type(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int dirty_node_segs = 0; + unsigned int dirty_data_segs = 0; + unsigned int dirty_segs; + bool time_after, balance_node, balance_data; + unsigned int randnum; + int i; + + f2fs_balance_decision(sbi, &time_after, &balance_node, &balance_data); + + if (!time_after) + return false; + if (!balance_node && !balance_data) + return false; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + dirty_data_segs += dirty_i->nr_dirty[i]; + for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) + dirty_node_segs += dirty_i->nr_dirty[i]; + dirty_segs = dirty_data_segs + dirty_node_segs; + + if (!dirty_segs) + return false; + + if (dirty_data_segs < reserved_segments(sbi) && balance_data) { + get_random_bytes(&randnum, sizeof(unsigned int)); + if (randnum % 100 > dirty_data_segs * 100 / dirty_segs) + return true; + } + + if (dirty_node_segs < reserved_segments(sbi) && balance_node) { + get_random_bytes(&randnum, sizeof(unsigned int)); + if (randnum % 100 > dirty_node_segs * 100 / dirty_segs) + return true; + } + + return false; +} + /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. @@ -523,6 +718,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, false, NULL_SEGNO); } + } else if (f2fs_need_SSR(sbi) && need_balance_dirty_type(sbi)) { + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + atomic_inc(&sbi->need_ssr_gc); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + } else { + /* + * if f2fs_gc_task is not available, + * do f2fs_gc in the original task. + */ + down_write(&sbi->gc_lock); + f2fs_gc(sbi, true, false, false, NULL_SEGNO); + } } } @@ -1117,43 +1325,41 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, } static void __init_discard_policy(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, + struct discard_policy *policy, int discard_type, unsigned int granularity) { - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->ordered = false; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = false; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - dpolicy->sync = false; - dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; - } + *policy = dpolicys[DPOLICY_BG]; + } else if (discard_type == DPOLICY_BALANCE) { + *policy = dpolicys[DPOLICY_BALANCE]; } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FORCE]; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FSTRIM]; + if (policy->granularity != granularity) + policy->granularity = granularity; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; - dpolicy->timeout = true; + *policy = dpolicys[DPOLICY_UMOUNT]; } + dcc->discard_type = discard_type; +} + +static void select_sub_discard_policy(struct discard_sub_policy **spolicy, + int index, struct discard_policy *dpolicy) +{ + if (dpolicy->type == DPOLICY_FSTRIM) { + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + return; + } + + if ((index + 1) >= DISCARD_GRAN_BG) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + else if ((index + 1) >= DISCARD_GRAN_BL) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID]; + else + *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL]; } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, @@ -1162,6 +1368,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, + int spolicy_index, struct discard_cmd *dc, unsigned int *issued) { @@ -1173,9 +1380,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); int flag = dpolicy->sync ? REQ_SYNC : 0; + struct discard_sub_policy *spolicy = NULL; block_t lstart, start, len, total_len; int err = 0; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); + if (dc->state != D_PREP) return 0; @@ -1191,7 +1401,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len = 0; - while (total_len && *issued < dpolicy->max_requests && !err) { + while (total_len && *issued < spolicy->max_requests && !err) { struct bio *bio = NULL; unsigned long flags; bool last = true; @@ -1202,7 +1412,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, } (*issued)++; - if (*issued == dpolicy->max_requests) + if (*issued == spolicy->max_requests) last = true; dc->len += len; @@ -1449,7 +1659,8 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy) + struct discard_policy *dpolicy, + int spolicy_index) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; @@ -1459,8 +1670,11 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, unsigned int pos = dcc->next_pos; unsigned int issued = 0; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, pos, (struct rb_entry **)&prev_dc, @@ -1484,9 +1698,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued); - if (issued >= dpolicy->max_requests) + if (issued >= spolicy->max_requests) break; next: node = rb_next(&dc->rb_node); @@ -1519,11 +1733,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); + /* only do this check in CHECK_FS, may be time consumed */ + if (unlikely(dcc->rbtree_check)) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } retry: + blk_start_plug(&plug); issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && @@ -1533,8 +1755,13 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) - return __issue_discard_cmd_orderly(sbi, dpolicy); + select_sub_discard_policy(&spolicy, i, dpolicy); + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) { + issued = __issue_discard_cmd_orderly(sbi, dpolicy, i); + blk_finish_plug(&plug); + return issued; + } pend_list = &dcc->pend_list[i]; @@ -1544,7 +1771,6 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); - blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1555,22 +1781,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; - break; + goto skip; } - - __submit_discard_cmd(sbi, dpolicy, dc, &issued); - - if (issued >= dpolicy->max_requests) + __submit_discard_cmd(sbi, dpolicy, i, dc, &issued); +skip: + if (issued >= spolicy->max_requests) break; } - blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= spolicy->max_requests || io_interrupted) break; } + blk_finish_plug(&plug); + if (spolicy) + dpolicy->min_interval = spolicy->interval; + if (dpolicy->type == DPOLICY_UMOUNT && issued) { __wait_all_discard_cmd(sbi, dpolicy); goto retry; @@ -1731,8 +1959,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, - dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1743,6 +1970,29 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) return dropped; } +static int select_discard_type(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + block_t user_block_count = sbi->user_block_count; + block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + block_t fs_available_blocks = user_block_count - + valid_user_blocks(sbi) + ovp_count; + int discard_type; + + if (fs_available_blocks >= fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks >= + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_BG; + } else if (fs_available_blocks < fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks < + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_FORCE; + } else { + discard_type = DPOLICY_BALANCE; + } + return discard_type; +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -1750,13 +2000,13 @@ static int issue_discard_thread(void *data) wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; - int issued; + int issued, discard_type; set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + discard_type = select_discard_type(sbi); + __init_discard_policy(sbi, &dpolicy, discard_type, 0); wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@ -1782,7 +2032,7 @@ static int issue_discard_thread(void *data) } if (sbi->gc_mode == GC_URGENT_HIGH) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0); sb_start_intwrite(sbi->sb); @@ -2099,7 +2349,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->discard_granularity = DISCARD_GRAN_BG; INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); @@ -2880,7 +3130,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, * This function should be returned with success, otherwise BUG */ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) + int type, bool force, int contig_level) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2893,8 +3143,12 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, is_next_segment_free(sbi, curseg, type) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_GRADING_SSR + else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0)) +#else else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) +#endif change_curseg(sbi, type, true); else new_curseg(sbi, type, false); @@ -2952,7 +3206,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE); locate_dirty_segment(sbi, old_segno); } @@ -3014,8 +3268,17 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; + struct discard_sub_policy *spolicy = NULL; int issued; unsigned int trimmed = 0; + /* fstrim each time 8 discard without no interrupt */ + select_sub_discard_policy(&spolicy, 0, dpolicy); + + if (dcc->rbtree_check) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } next: issued = 0; @@ -3047,9 +3310,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, goto skip; } - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued); - if (issued >= dpolicy->max_requests) { + if (issued >= spolicy->max_requests) { start = dc->lstart + dc->len; if (err) @@ -3343,13 +3606,17 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio) + struct f2fs_io_info *fio, int contig_level) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned long long old_mtime; bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; +#ifdef CONFIG_F2FS_GRADING_SSR + struct inode *inode = NULL; +#endif + int contig = SEQ_NONE; down_read(&SM_I(sbi)->curseg_lock); @@ -3378,6 +3645,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, __refresh_next_blkoff(sbi, curseg); stat_inc_block_count(sbi, curseg); + ba_mutex_lock(&sbi->ba_mutex); + if (type >= CURSEG_HOT_DATA && type <= CURSEG_COLD_DATA) + inc_ba_val(sbi, curr_data_alloc_cnt, 1); + else if (type >= CURSEG_HOT_NODE && type <= CURSEG_COLD_NODE) + inc_ba_val(sbi, curr_node_alloc_cnt, 1); + ba_mutex_unlock(&sbi->ba_mutex); if (from_gc) { old_mtime = get_segment_mtime(sbi, old_blkaddr); @@ -3396,11 +3669,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - sit_i->s_ops->allocate_segment(sbi, type, false); + } else { +#ifdef CONFIG_F2FS_GRADING_SSR + if (contig_level != SEQ_NONE) { + contig = contig_level; + goto allocate_label; + } + + if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) && + page->mapping != META_MAPPING(sbi)) { + inode = page->mapping->host; + contig = check_io_seq(get_dirty_pages(inode)); + } +allocate_label: +#endif + sit_i->s_ops->allocate_segment(sbi, type, false, contig); + } } /* * segment dirty status should be updated after segment allocation, @@ -3467,7 +3754,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + &fio->new_blkaddr, sum, type, fio, SEQ_NONE); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); @@ -4836,7 +5123,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + allocate_segment_by_default(sbi, type, true, SEQ_NONE); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1bf33fc27b8f..b17624665985 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -130,7 +130,19 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) +#ifdef CONFIG_F2FS_GRADING_SSR +#define KBS_PER_SEGMENT 2048 +#define SSR_LOWER_LIMIT_BLK (16<<18) /* 16G */ +#endif + +#define SSR_CONTIG_DIRTY_NUMS 32 /*Dirty pages for LFS alloction in grading ssr . */ +#define SSR_CONTIG_LARGE 256 /*Larege files */ +enum { + SEQ_NONE, + SEQ_32BLKS, + SEQ_256BLKS +}; /* * indicate a block allocation direction: RIGHT and LEFT. * RIGHT means allocating new sections towards the end of volume. @@ -180,6 +192,13 @@ enum { FORCE_FG_GC, }; +#ifdef CONFIG_F2FS_GRADING_SSR +enum { + GRADING_SSR_OFF = 0, + GRADING_SSR_ON +}; +#endif + /* for a function parameter to select a victim segment */ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ @@ -221,7 +240,7 @@ struct sec_entry { }; struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); + void (*allocate_segment)(struct f2fs_sb_info *, int, bool, int); }; #define MAX_SKIP_GC_COUNT 16 @@ -913,3 +932,15 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) dcc->discard_wake = 1; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +#ifdef CONFIG_F2FS_GRADING_SSR +static inline int check_io_seq(int blks) +{ + if (blks >= SSR_CONTIG_LARGE) + return SEQ_256BLKS; + else if (blks >= SSR_CONTIG_DIRTY_NUMS) + return SEQ_32BLKS; + else + return SEQ_NONE; +} +#endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 36b8d03ded95..be89c9b179c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -36,6 +36,19 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_F2FS_GRADING_SSR +#define SSR_DEFALT_SPACE_LIMIT (5<<20) /* 5G default space limit */ +#define SSR_DEFALT_WATERLINE 80 /* 80% default waterline */ +#define SSR_HN_SAPCE_LIMIT_128G (8<<20) /* 8G default sapce limit for 128G devices */ +#define SSR_HN_WATERLINE_128G 80 /* 80% default hot node waterline for 128G devices */ +#define SSR_WN_SAPCE_LIMIT_128G (5<<20) /* 5G default warm node sapce limit for 128G devices */ +#define SSR_WN_WATERLINE_128G 70 /* 70% default warm node waterline for 128G devices */ +#define SSR_HD_SAPCE_LIMIT_128G (8<<20) /* 8G default hot data sapce limit for 128G devices */ +#define SSR_HD_WATERLINE_128G 65 /* 65% default hot data waterline for 128G devices */ +#define SSR_WD_SAPCE_LIMIT_128G (5<<20) /* 5G default warm data sapce limit for 128G devices */ +#define SSR_WD_WATERLINE_128G 60 /* 60% default warm data waterline for 128G devices */ +#endif + static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -1311,6 +1324,10 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE utf8_unload(sb->s_encoding); +#endif +#ifdef CONFIG_F2FS_STAT_FS + kfree(sbi->ba_info); + sbi->ba_info = NULL; #endif kfree(sbi); } @@ -3503,6 +3520,35 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = 1; } +#ifdef CONFIG_F2FS_GRADING_SSR +static void f2fs_init_grading_ssr(struct f2fs_sb_info *sbi) +{ + u32 total_blocks = le64_to_cpu(sbi->raw_super->block_count) >> 18; + + if (total_blocks > 64) { /* 64G */ + sbi->hot_cold_params.hot_data_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_data_waterline = SSR_HD_WATERLINE_128G; + sbi->hot_cold_params.warm_data_lower_limit = SSR_WD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_data_waterline = SSR_WD_WATERLINE_128G; + sbi->hot_cold_params.hot_node_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_node_waterline = SSR_HN_WATERLINE_128G; + sbi->hot_cold_params.warm_node_lower_limit = SSR_WN_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_node_waterline = SSR_WN_WATERLINE_128G; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } else { + sbi->hot_cold_params.hot_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.hot_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } +} +#endif + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -3526,6 +3572,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; +#ifdef CONFIG_F2FS_STAT_FS + sbi->ba_info = kzalloc(sizeof(struct f2fs_block_alloc_info), GFP_KERNEL); + if (!sbi->ba_info) { + err = -ENOMEM; + goto free_sbi; + } + sbi->ba_info->ssr_last_jiffies = jiffies; + mutex_init(&sbi->ba_mutex); +#endif + sbi->sb = sb; /* Load the checksum driver */ @@ -3763,6 +3819,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) le64_to_cpu(seg_i->journal->info.kbytes_written); f2fs_build_gc_manager(sbi); + atomic_set(&sbi->need_ssr_gc, 0); err = f2fs_build_stats(sbi); if (err) @@ -3795,7 +3852,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto free_node_inode; } - +#ifdef CONFIG_F2FS_GRADING_SSR + f2fs_init_grading_ssr(sbi); +#endif err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; @@ -3808,6 +3867,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif + /* urgent gc flag init */ + sbi->is_frag = false; + sbi->last_urgent_check = jiffies; + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) @@ -3982,6 +4045,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_sb_buf: kfree(raw_super); free_sbi: +#ifdef CONFIG_F2FS_STAT_FS + kfree(sbi->ba_info); + sbi->ba_info = NULL; +#endif if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b8850c81068a..92f109294994 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -34,6 +34,9 @@ enum { FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif RESERVED_BLOCKS, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_GRADING_SSR + F2FS_HOT_COLD_PARAMS, +#endif }; struct f2fs_attr { @@ -61,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)NM_I(sbi); else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_GRADING_SSR + else if (struct_type == F2FS_HOT_COLD_PARAMS) + return (unsigned char *)&sbi->hot_cold_params; +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) @@ -538,10 +545,12 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_preference, gc_preference); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_type, discard_type); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -568,6 +577,26 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); +#ifdef CONFIG_F2FS_GRADING_SSR +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_lower_limit, hot_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_waterline, hot_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_lower_limit, warm_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_waterline, warm_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_lower_limit, hot_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_waterline, hot_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_lower_limit, warm_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_waterline, warm_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_enable, enable); +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -626,11 +655,13 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), + ATTR_LIST(gc_preference), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), + ATTR_LIST(discard_type), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), @@ -677,6 +708,17 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), +#endif +#ifdef CONFIG_F2FS_GRADING_SSR + ATTR_LIST(hc_hot_data_lower_limit), + ATTR_LIST(hc_hot_data_waterline), + ATTR_LIST(hc_warm_data_lower_limit), + ATTR_LIST(hc_warm_data_waterline), + ATTR_LIST(hc_hot_node_lower_limit), + ATTR_LIST(hc_hot_node_waterline), + ATTR_LIST(hc_warm_node_lower_limit), + ATTR_LIST(hc_warm_node_waterline), + ATTR_LIST(hc_enable), #endif NULL, }; @@ -908,6 +950,66 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int undiscard_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sit_i = SIT_I(sbi); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int total = 0; + unsigned int i, j; + + if (!f2fs_realtime_discard_enable(sbi)) + goto out; + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned int entries = SIT_VBLOCK_MAP_SIZE / + sizeof(unsigned long); + unsigned int max_blocks = sbi->blocks_per_seg; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; + unsigned long *dmap = SIT_I(sbi)->tmp_map; + int start = 0, end = -1; + + down_write(&sit_i->sentry_lock); + if (se->valid_blocks == max_blocks) { + up_write(&sit_i->sentry_lock); + continue; + } + + if (se->valid_blocks == 0) { + mutex_lock(&dirty_i->seglist_lock); + if (test_bit((int)i, dirty_i->dirty_segmap[PRE])) + total += 512; + mutex_unlock(&dirty_i->seglist_lock); + } else { + for (j = 0; j < entries; j++) + dmap[j] = ~ckpt_map[j] & ~discard_map[j]; + while (1) { + start = (int)__find_rev_next_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(end + 1)); + + if ((unsigned int)start >= max_blocks) + break; + + end = (int)__find_rev_next_zero_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(start + 1)); + total += (unsigned int)(end - start); + } + } + + up_write(&sit_i->sentry_lock); + } + +out: + seq_printf(seq, "total undiscard:%u K\n", total * 4); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -964,6 +1066,9 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) iostat_info_seq_show, sb); proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("undiscard_info", S_IRUGO, sbi->s_proc, + undiscard_info_seq_show, sb); + } return 0; } @@ -975,6 +1080,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("undiscard_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6a..b4fe1db78eae 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1957,6 +1957,39 @@ TRACE_EVENT(f2fs_fiemap, __entry->ret) ); +#ifdef CONFIG_F2FS_GRADING_SSR +DECLARE_EVENT_CLASS(f2fs_grading_ssr, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq), + + TP_STRUCT__entry( + __field(unsigned int, left) + __field(unsigned int, free) + __field(unsigned int, seq) + ), + + TP_fast_assign( + __entry->left = left; + __entry->free = free; + __entry->seq = seq; + ), + + TP_printk("ssr: left_space %u free_segments: %u is_seq: %u ", + __entry->left, __entry->free, __entry->seq) +); + +DEFINE_EVENT(f2fs_grading_ssr, f2fs_grading_ssr_allocate, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq) +); +#endif + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ -- Gitee