diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8c0fbdd8ce6fba671b8efc2b74678ef86cf676e1..cb18f7c1bea30ea356c5466a1f86a016e8c3b221 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -110,6 +110,12 @@ background_gc=%s Turn on/off cleaning operations, namely garbage on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. +gc_merge When background_gc is on, this option can be enabled to + let background GC thread to handle foreground GC requests, + it can eliminate the sluggish issue caused by slow foreground + GC operation when GC is triggered from a process with limited + I/O and CPU resources. +nogc_merge Disable GC merge feature. disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d13c5c6a978769b69eef060d50569c578616ba14..3dfc4f60de0c7f98920cb9c38011e15fe2235a53 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -137,3 +137,10 @@ config F2FS_FS_LZORLE default y help Support LZO-RLE compress algorithm, if unsure, say Y. + +config F2FS_GRADING_SSR + bool "F2FS grading ssr" + depends on F2FS_FS + default y + help + use grading ssr to improve the end performance diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1b11a42847c48e92b8a3af471c5793c724ffcc66..1d0f1d47bc60ed75fc711b78d79f927135d9dc11 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1390,7 +1390,7 @@ struct page *f2fs_get_new_data_page(struct inode *inode, return page; } -static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type, int contig_level) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1417,7 +1417,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + &sum, seg_type, NULL, contig_level); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); @@ -1511,6 +1511,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0,0,0}; block_t blkaddr; unsigned int start_pgofs; + int contig_level = SEQ_NONE; + +#ifdef CONFIG_F2FS_GRADING_SSR + contig_level = check_io_seq(maxblocks); +#endif if (!maxblocks) return 0; @@ -1594,7 +1599,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, /* use out-place-update for driect IO under LFS mode */ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { - err = __allocate_data_block(&dn, map->m_seg_type); +#ifdef CONFIG_F2FS_GRADING_SSR + err = __allocate_data_block(&dn, map->m_seg_type, contig_level); +#endif if (err) goto sync_out; blkaddr = dn.data_blkaddr; @@ -1614,8 +1621,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, } else { WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && flag != F2FS_GET_BLOCK_DIO); + +#ifdef CONFIG_F2FS_GRADING_SSR err = __allocate_data_block(&dn, - map->m_seg_type); + map->m_seg_type, contig_level); +#endif if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d7799bd30b107fe39760e79880a9daeaceaa4aa..e1526151fb5e6364674d6e372cbada2856af737c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,6 +99,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 +#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -275,11 +276,17 @@ struct discard_entry { /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +#define DISCARD_GRAN_BL 16 +#define DISCARD_GRAN_BG 512 +#define DISCARD_GRAN_FORCE 1 /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) +#define FS_FREE_SPACE_PERCENT 20 +#define DEVICE_FREE_SPACE_PERCENT 10 +#define HUNDRED_PERCENT 100 enum { D_PREP, /* initial */ @@ -318,24 +325,37 @@ struct discard_cmd { enum { DPOLICY_BG, + DPOLICY_BALANCE, DPOLICY_FORCE, DPOLICY_FSTRIM, DPOLICY_UMOUNT, MAX_DPOLICY, }; +enum { + SUB_POLICY_BIG, + SUB_POLICY_MID, + SUB_POLICY_SMALL, + NR_SUB_POLICY, +}; + +struct discard_sub_policy { + unsigned int max_requests; + int interval; +}; + struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ unsigned int mid_interval; /* used for device busy */ unsigned int max_interval; /* used for candidates not exist */ - unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ + struct discard_sub_policy sub_policy[NR_SUB_POLICY]; }; struct discard_cmd_control { @@ -357,6 +377,7 @@ struct discard_cmd_control { atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ + int discard_type; /* discard type */ }; /* for the list of fsync inodes, used only during recovery */ @@ -1342,6 +1363,20 @@ struct decompress_io_ctx { #define MAX_COMPRESS_LOG_SIZE 8 #define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size)) +#ifdef CONFIG_F2FS_GRADING_SSR +struct f2fs_hot_cold_params { + unsigned int enable; + unsigned int hot_data_lower_limit; + unsigned int hot_data_waterline; + unsigned int warm_data_lower_limit; + unsigned int warm_data_waterline; + unsigned int hot_node_lower_limit; + unsigned int hot_node_waterline; + unsigned int warm_node_lower_limit; + unsigned int warm_node_waterline; +}; +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1501,7 +1536,10 @@ struct f2fs_sb_info { unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ + struct mutex ba_mutex; + struct f2fs_block_alloc_info *ba_info; /* block alloc statistics */ #endif + atomic_t need_ssr_gc; spinlock_t stat_lock; /* lock for stat operations */ /* For app/fs IO statistics */ @@ -1547,6 +1585,11 @@ struct f2fs_sb_info { #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ +#endif + bool is_frag; /* urgent gc flag */ + unsigned long last_urgent_check; /* last urgent check jiffies */ +#ifdef CONFIG_F2FS_GRADING_SSR + struct f2fs_hot_cold_params hot_cold_params; #endif }; @@ -3049,6 +3092,18 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, f2fs_record_iostat(sbi); } +static inline block_t fs_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + FS_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + +static inline block_t device_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + DEVICE_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3258,6 +3313,12 @@ void f2fs_destroy_node_manager_caches(void); /* * segment.c */ +unsigned long __find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +unsigned long __find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +int find_next_free_extent(const unsigned long *addr, + unsigned long size, unsigned long *offset); bool f2fs_need_SSR(struct f2fs_sb_info *sbi); void f2fs_register_inmem_page(struct inode *inode, struct page *page); void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); @@ -3315,7 +3376,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio); + struct f2fs_io_info *fio, int contig_level); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); @@ -3654,6 +3715,27 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); void f2fs_update_sit_info(struct f2fs_sb_info *sbi); +struct f2fs_block_alloc_info { + unsigned long last_node_alloc_cnt, last_data_alloc_cnt; + unsigned long curr_node_alloc_cnt, curr_data_alloc_cnt; + unsigned long ssr_last_jiffies; +}; +static inline struct f2fs_block_alloc_info *F2FS_BA_STAT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_block_alloc_info *)sbi->ba_info; +} +#define inc_ba_val(sbi, member, val) do { \ + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); \ + if (ba) \ + ba->member += (val); \ +} while (0) +#define inc_ba_array_val(sbi, member, idx, val) do { \ + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); \ + if (ba) \ + ba->member[(idx)] += (val); \ +} while (0) +#define ba_mutex_lock(mutex) mutex_lock((mutex)) +#define ba_mutex_unlock(mutex) mutex_unlock((mutex)) #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) @@ -3697,6 +3779,11 @@ static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {} + +#define inc_ba_val(sbi, member, val) +#define inc_ba_array_val(sbi, member, idx, val) +#define ba_mutex_lock(mutex) +#define ba_mutex_unlock(mutex) #endif extern const struct file_operations f2fs_dir_operations; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72f227f6ebad094c1712efae3eeedb0e2d47d7ba..69a8e3c27a7bbd0e66449a63f06eb5b1d289c220 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include "f2fs.h" #include "node.h" @@ -26,24 +28,161 @@ static struct kmem_cache *victim_entry_slab; static unsigned int count_bits(const unsigned long *addr, unsigned int offset, unsigned int len); +#define MIN_WT 1000 +#define DEF_GC_BALANCE_MIN_SLEEP_TIME 10000 /* milliseconds */ +#define DEF_GC_FRAG_MIN_SLEEP_TIME 1000 /* milliseconds */ +#define GC_URGENT_DISABLE_BLKS (32 << 18) /* 32G */ +#define GC_URGENT_SPACE (10 << 18) /* 10G */ +#define GC_URGENT_INTERVAL (10 * 60 * 1000) /* 10 mins */ +#define BLOCK_COUNT_TYPE 10 /* 10 kinds of successive blocks */ + +static bool __is_frag_urgent(struct f2fs_sb_info *sbi) +{ + unsigned int total_segs = + le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int i; + unsigned int block_count[BLOCK_COUNT_TYPE]; + unsigned int tot_blocks = 0; + u64 total_blocks = le64_to_cpu(sbi->raw_super->block_count); + unsigned int valid_blocks = sbi->total_valid_block_count; + + if (total_blocks < GC_URGENT_DISABLE_BLKS) + return false; + + if (total_blocks - valid_blocks > GC_URGENT_SPACE) + return false; + + memset(block_count, 0, sizeof(block_count)); + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned long start = 0; + int blocks, index; + + if (se->valid_blocks == 0) { + block_count[BLOCK_COUNT_TYPE - 1] += sbi->blocks_per_seg; + continue; + } + if (se->valid_blocks == sbi->blocks_per_seg) + continue; + + while (start < sbi->blocks_per_seg) { + blocks = find_next_free_extent((unsigned long *)se->cur_valid_map, + sbi->blocks_per_seg, + &start); + if (unlikely(blocks < 0)) + break; + + index = ilog2(blocks); + if (unlikely(index >= BLOCK_COUNT_TYPE)) { + index = BLOCK_COUNT_TYPE - 1; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } + + block_count[index] += blocks; + tot_blocks += blocks; + } + cond_resched(); + } + for (i = 0; i < BLOCK_COUNT_TYPE; i++) + f2fs_info(sbi, "block_cnt[%d]: %d\n", i, block_count[i]); + + if ((block_count[0] + block_count[1]) >= (tot_blocks >> 1)) + return true; + + return false; +} + +/* if invalid blocks in dirty segments is more than 10% of total free space */ +static inline bool is_reclaimable_dirty_blocks_enough(struct f2fs_sb_info *sbi) +{ + s64 total_free_blocks; + s64 total_reclaimable_blocks; + + total_free_blocks = sbi->user_block_count - written_block_count(sbi); + total_reclaimable_blocks = total_free_blocks - free_user_blocks(sbi); + + if (total_reclaimable_blocks <= 0) + return false; + + if ((total_reclaimable_blocks * 10) <= total_free_blocks) + return false; + + return true; +} + +static bool is_frag_urgent(struct f2fs_sb_info *sbi) +{ + unsigned long next_check = sbi->last_urgent_check + + msecs_to_jiffies(GC_URGENT_INTERVAL); + bool ret; + + if (time_after(jiffies, next_check)) { + sbi->last_urgent_check = jiffies; + sbi->is_frag = __is_frag_urgent(sbi); + } + ret = free_segments(sbi) < 3 * overprovision_segments(sbi) && + is_reclaimable_dirty_blocks_enough(sbi) && + sbi->is_frag; + + return ret; +} + +/* + * GC tuning ratio [0, 100] in performance mode + */ +static inline int gc_perf_ratio(struct f2fs_sb_info *sbi) +{ + block_t reclaimable_user_blocks = sbi->user_block_count - + written_block_count(sbi); + return reclaimable_user_blocks == 0 ? 100 : + div_u64(100ULL * free_user_blocks(sbi), reclaimable_user_blocks); +} + static int gc_thread_func(void *data) { struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode; + bool sync_mode, foreground = false; + int ssr_gc_count; + + if (is_frag_urgent(sbi)) + gc_th->gc_preference = GC_FRAG; + else if (div_u64(100ULL * written_block_count(sbi), sbi->user_block_count) > 90) + gc_th->gc_preference = GC_LIFETIME; + else if (gc_perf_ratio(sbi) < 10 && free_segments(sbi) < + 3 * overprovision_segments(sbi)) + gc_th->gc_preference = GC_PERF; + else + gc_th->gc_preference = GC_BALANCE; + + if (gc_th->gc_preference == GC_PERF) + wait_ms = max(DEF_GC_BALANCE_MIN_SLEEP_TIME * + gc_perf_ratio(sbi) / 100, MIN_WT); + else if (gc_th->gc_preference == GC_BALANCE) + gc_th->min_sleep_time = DEF_GC_BALANCE_MIN_SLEEP_TIME; + else if (gc_th->gc_preference == GC_FRAG) + wait_ms = DEF_GC_FRAG_MIN_SLEEP_TIME; + else + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || - gc_th->gc_wake, + waitqueue_active(fggc_wq) || + gc_th->gc_wake || + atomic_read(&sbi->need_ssr_gc), msecs_to_jiffies(wait_ms)); + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + foreground = true; + /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = 0; @@ -71,6 +210,17 @@ static int gc_thread_func(void *data) continue; } + ssr_gc_count = atomic_read(&sbi->need_ssr_gc); + if (ssr_gc_count) { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, true, false, false, NULL_SEGNO); + atomic_sub(ssr_gc_count, &sbi->need_ssr_gc); + if (!has_not_enough_free_secs(sbi, 0, 0)) { + wake_up_all(&gc_th->fggc_wq); + goto next; + } + } + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -90,7 +240,10 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!down_write_trylock(&sbi->gc_lock)) { + if (foreground) { + down_write(&sbi->gc_lock); + goto do_gc; + } else if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } @@ -107,14 +260,22 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi->stat_info); + if (!foreground) + stat_inc_bggc_count(sbi->stat_info); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* foreground GC was been triggered via f2fs_balance_fs() */ + if (foreground) + sync_mode = false; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; + if (foreground) + wake_up_all(&gc_th->fggc_wq); + trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); @@ -140,14 +301,16 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) } gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->min_sleep_time = DEF_GC_BALANCE_MIN_SLEEP_TIME; gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; gc_th->gc_wake= 0; + gc_th->gc_preference = GC_BALANCE; sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + init_waitqueue_head(&sbi->gc_thread->fggc_wq); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { @@ -165,6 +328,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); + wake_up_all(&gc_th->fggc_wq); kfree(gc_th); sbi->gc_thread = NULL; } @@ -286,10 +450,12 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); + struct f2fs_gc_kthread *gc_th = sbi->gc_thread; unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; + unsigned int max_age; unsigned char age = 0; unsigned char u; unsigned int i; @@ -309,8 +475,11 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) sit_i->min_mtime = mtime; if (mtime > sit_i->max_mtime) sit_i->max_mtime = mtime; + /* Reduce the cost weight of age when free blocks less than 10% */ + max_age = (gc_th && gc_th->gc_preference != GC_LIFETIME && + gc_perf_ratio(sbi) < 10) ? max(10 * gc_perf_ratio(sbi), 1) : 100; if (sit_i->max_mtime != sit_i->min_mtime) - age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime), + age = max_age - div64_u64(max_age * (mtime - sit_i->min_mtime), sit_i->max_mtime - sit_i->min_mtime); return UINT_MAX - ((100 * (100 - u) * age) / (100 + u)); @@ -1220,7 +1389,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, type, NULL); + &sum, type, NULL, SEQ_NONE); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 0c8dae12dc512899f8850e6a8fec8a4abb207b93..3f0ae64209a08b64f98c474d9aea8c8b1e34b79d 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -30,6 +30,14 @@ /* Search max. number of dirty segments to select a victim segment */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ +/* GC preferences */ +enum { + GC_LIFETIME = 0, + GC_BALANCE, + GC_PERF, + GC_FRAG +}; + struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; @@ -41,7 +49,14 @@ struct f2fs_gc_kthread { unsigned int no_gc_sleep_time; /* for changing gc mode */ + unsigned int gc_preference; unsigned int gc_wake; + + /* for GC_MERGE mount option */ + wait_queue_head_t fggc_wq; /* + * caller of f2fs_balance_fs() + * will wait on this wait queue. + */ }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04b449978aa8e4c8146527508bb2d6fcd60096b..5f5e1ca05503793e61578f073fd14a344dd3cf9c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,24 @@ static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +static struct discard_policy dpolicys[MAX_DPOLICY] = { + {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{1, 0}, {0, 0}, {0, 0}}}, + {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL, + {{1, 0}, {2, 50}, {0, 0}}}, + {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE, + {{1, 0}, {2, 50}, {4, 2000}}}, + {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE, + {{8, 0}, {8, 0}, {8, 0}}}, + {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{UINT_MAX, 0}, {0, 0}, {0, 0}}} +}; + static unsigned long __reverse_ulong(unsigned char *str) { unsigned long tmp = 0; @@ -93,7 +112,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) * f2fs_set_bit(0, bitmap) => 1000 0000 * f2fs_set_bit(7, bitmap) => 0000 0001 */ -static unsigned long __find_rev_next_bit(const unsigned long *addr, +unsigned long __find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -129,7 +148,7 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result - size + __reverse_ffs(tmp); } -static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, +unsigned long __find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -166,6 +185,19 @@ static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, return result - size + __reverse_ffz(tmp); } +int find_next_free_extent(const unsigned long *addr, + unsigned long size, unsigned long *offset) +{ + unsigned long pos, pos_zero_bit; + + pos_zero_bit = __find_rev_next_zero_bit(addr, size, *offset); + if (pos_zero_bit >= size) + return -ENOSPC; + pos = __find_rev_next_bit(addr, size, pos_zero_bit); + *offset = pos; + return (int)(pos - pos_zero_bit); +} + bool f2fs_need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); @@ -183,6 +215,74 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } +#ifdef CONFIG_F2FS_GRADING_SSR +static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + u64 valid_blocks = sbi->total_valid_block_count; + u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + u64 left_space = (total_blocks - valid_blocks) << 2; + unsigned int free_segs = free_segments(sbi); + unsigned int ovp_segments = overprovision_segments(sbi); + unsigned int lower_limit = 0; + unsigned int waterline = 0; + int dirty_sum = node_secs + 2 * dent_secs + imeta_secs; + + if (sbi->hot_cold_params.enable == GRADING_SSR_OFF) + return f2fs_need_SSR(sbi); + if (f2fs_lfs_mode(sbi)) + return false; + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA && + free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2) + return false; + if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi))) + return true; + if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_LOWER_LIMIT_BLK) + return false; + + left_space -= ovp_segments * KBS_PER_SEGMENT; + if (unlikely(left_space == 0)) + return false; + + switch (type) { + case CURSEG_HOT_DATA: + lower_limit = sbi->hot_cold_params.hot_data_lower_limit; + waterline = sbi->hot_cold_params.hot_data_waterline; + break; + case CURSEG_WARM_DATA: + lower_limit = sbi->hot_cold_params.warm_data_lower_limit; + waterline = sbi->hot_cold_params.warm_data_waterline; + break; + case CURSEG_HOT_NODE: + lower_limit = sbi->hot_cold_params.hot_node_lower_limit; + waterline = sbi->hot_cold_params.hot_node_waterline; + break; + case CURSEG_WARM_NODE: + lower_limit = sbi->hot_cold_params.warm_node_lower_limit; + waterline = sbi->hot_cold_params.warm_node_waterline; + break; + default: + return false; + } + + if (left_space > lower_limit) + return false; + if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT)) + <= waterline) { + trace_f2fs_grading_ssr_allocate( + (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count), + free_segments(sbi), contig_level); + return true; + } else { + return false; + } +} +#endif + void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; @@ -487,6 +587,101 @@ int f2fs_commit_inmem_pages(struct inode *inode) return err; } +#ifdef CONFIG_F2FS_STAT_FS +#define DEF_DIRTY_STAT_INTERVAL 15 /* 15 secs */ +static inline void f2fs_balance_decision(struct f2fs_sb_info *sbi, + bool *time_after, bool *balance_node, bool *balance_data) +{ + long diff_node_blocks, diff_data_blocks; + unsigned long last_jiffies; + struct timespec64 ts = {DEF_DIRTY_STAT_INTERVAL, 0}; + unsigned long interval = timespec64_to_jiffies(&ts); + struct f2fs_block_alloc_info *ba = F2FS_BA_STAT(sbi); + + ba_mutex_lock(&sbi->ba_mutex); + last_jiffies = ba->ssr_last_jiffies; + ba_mutex_unlock(&sbi->ba_mutex); + + if (!time_after(jiffies, last_jiffies + interval)) { + *time_after = false; + return; + } + + *time_after = true; + + /* how many blocks are consumed during this interval */ + ba_mutex_lock(&sbi->ba_mutex); + + diff_node_blocks = (long)(ba->curr_node_alloc_cnt - ba->last_node_alloc_cnt); + diff_data_blocks = (long)(ba->curr_data_alloc_cnt - ba->last_data_alloc_cnt); + + ba->last_node_alloc_cnt = ba->curr_node_alloc_cnt; + ba->last_data_alloc_cnt = ba->curr_data_alloc_cnt; + ba->ssr_last_jiffies = jiffies; + + ba_mutex_unlock(&sbi->ba_mutex); + + if (diff_node_blocks > (long)sbi->blocks_per_seg) + *balance_node = true; + else + *balance_node = false; + + if (diff_data_blocks > (long)sbi->blocks_per_seg) + *balance_data = true; + else + *balance_data = false; +} +#else +static inline void f2fs_balance_decision(struct f2fs_sb_info *sbi, + bool *time_after, bool *balance_node, bool *balance_data) +{ + *time_after = true; + *balance_node = true; + *balance_data = true; +} +#endif + +static bool need_balance_dirty_type(struct f2fs_sb_info *sbi) +{ + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + unsigned int dirty_node_segs = 0; + unsigned int dirty_data_segs = 0; + unsigned int dirty_segs; + bool time_after, balance_node, balance_data; + unsigned int randnum; + int i; + + f2fs_balance_decision(sbi, &time_after, &balance_node, &balance_data); + + if (!time_after) + return false; + if (!balance_node && !balance_data) + return false; + + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) + dirty_data_segs += dirty_i->nr_dirty[i]; + for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) + dirty_node_segs += dirty_i->nr_dirty[i]; + dirty_segs = dirty_data_segs + dirty_node_segs; + + if (!dirty_segs) + return false; + + if (dirty_data_segs < reserved_segments(sbi) && balance_data) { + get_random_bytes(&randnum, sizeof(unsigned int)); + if (randnum % 100 > dirty_data_segs * 100 / dirty_segs) + return true; + } + + if (dirty_node_segs < reserved_segments(sbi) && balance_node) { + get_random_bytes(&randnum, sizeof(unsigned int)); + if (randnum % 100 > dirty_node_segs * 100 / dirty_segs) + return true; + } + + return false; +} + /* * This function balances dirty node and dentry pages. * In addition, it controls garbage collection. @@ -510,8 +705,32 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } + } else if (f2fs_need_SSR(sbi) && need_balance_dirty_type(sbi)) { + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + atomic_inc(&sbi->need_ssr_gc); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + } else { + /* + * if f2fs_gc_task is not available, + * do f2fs_gc in the original task. + */ + down_write(&sbi->gc_lock); + f2fs_gc(sbi, true, false, false, NULL_SEGNO); + } } } @@ -1106,43 +1325,41 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, } static void __init_discard_policy(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, + struct discard_policy *policy, int discard_type, unsigned int granularity) { - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->ordered = false; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = false; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - dpolicy->sync = false; - dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; - } + *policy = dpolicys[DPOLICY_BG]; + } else if (discard_type == DPOLICY_BALANCE) { + *policy = dpolicys[DPOLICY_BALANCE]; } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FORCE]; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FSTRIM]; + if (policy->granularity != granularity) + policy->granularity = granularity; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; - dpolicy->timeout = true; + *policy = dpolicys[DPOLICY_UMOUNT]; + } + dcc->discard_type = discard_type; +} + +static void select_sub_discard_policy(struct discard_sub_policy **spolicy, + int index, struct discard_policy *dpolicy) +{ + if (dpolicy->type == DPOLICY_FSTRIM) { + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + return; } + + if ((index + 1) >= DISCARD_GRAN_BG) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + else if ((index + 1) >= DISCARD_GRAN_BL) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID]; + else + *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL]; } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, @@ -1151,6 +1368,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, + int spolicy_index, struct discard_cmd *dc, unsigned int *issued) { @@ -1162,9 +1380,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); int flag = dpolicy->sync ? REQ_SYNC : 0; + struct discard_sub_policy *spolicy = NULL; block_t lstart, start, len, total_len; int err = 0; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); + if (dc->state != D_PREP) return 0; @@ -1180,7 +1401,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len = 0; - while (total_len && *issued < dpolicy->max_requests && !err) { + while (total_len && *issued < spolicy->max_requests && !err) { struct bio *bio = NULL; unsigned long flags; bool last = true; @@ -1191,7 +1412,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, } (*issued)++; - if (*issued == dpolicy->max_requests) + if (*issued == spolicy->max_requests) last = true; dc->len += len; @@ -1438,7 +1659,8 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy) + struct discard_policy *dpolicy, + int spolicy_index) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; @@ -1448,8 +1670,11 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, unsigned int pos = dcc->next_pos; unsigned int issued = 0; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, pos, (struct rb_entry **)&prev_dc, @@ -1473,9 +1698,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued); - if (issued >= dpolicy->max_requests) + if (issued >= spolicy->max_requests) break; next: node = rb_next(&dc->rb_node); @@ -1508,11 +1733,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); + /* only do this check in CHECK_FS, may be time consumed */ + if (unlikely(dcc->rbtree_check)) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } retry: + blk_start_plug(&plug); issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && @@ -1522,8 +1755,13 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) - return __issue_discard_cmd_orderly(sbi, dpolicy); + select_sub_discard_policy(&spolicy, i, dpolicy); + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) { + issued = __issue_discard_cmd_orderly(sbi, dpolicy, i); + blk_finish_plug(&plug); + return issued; + } pend_list = &dcc->pend_list[i]; @@ -1533,7 +1771,6 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); - blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1544,22 +1781,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; - break; + goto skip; } - - __submit_discard_cmd(sbi, dpolicy, dc, &issued); - - if (issued >= dpolicy->max_requests) + __submit_discard_cmd(sbi, dpolicy, i, dc, &issued); +skip: + if (issued >= spolicy->max_requests) break; } - blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= spolicy->max_requests || io_interrupted) break; } + blk_finish_plug(&plug); + if (spolicy) + dpolicy->min_interval = spolicy->interval; + if (dpolicy->type == DPOLICY_UMOUNT && issued) { __wait_all_discard_cmd(sbi, dpolicy); goto retry; @@ -1720,8 +1959,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, - dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1732,6 +1970,29 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) return dropped; } +static int select_discard_type(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + block_t user_block_count = sbi->user_block_count; + block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + block_t fs_available_blocks = user_block_count - + valid_user_blocks(sbi) + ovp_count; + int discard_type; + + if (fs_available_blocks >= fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks >= + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_BG; + } else if (fs_available_blocks < fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks < + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_FORCE; + } else { + discard_type = DPOLICY_BALANCE; + } + return discard_type; +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -1739,13 +2000,13 @@ static int issue_discard_thread(void *data) wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; - int issued; + int issued, discard_type; set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + discard_type = select_discard_type(sbi); + __init_discard_policy(sbi, &dpolicy, discard_type, 0); wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@ -1771,7 +2032,7 @@ static int issue_discard_thread(void *data) } if (sbi->gc_mode == GC_URGENT_HIGH) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0); sb_start_intwrite(sbi->sb); @@ -2088,7 +2349,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->discard_granularity = DISCARD_GRAN_BG; INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); @@ -2869,7 +3130,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, * This function should be returned with success, otherwise BUG */ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) + int type, bool force, int contig_level) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2882,8 +3143,12 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, is_next_segment_free(sbi, curseg, type) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_GRADING_SSR + else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0)) +#else else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) +#endif change_curseg(sbi, type, true); else new_curseg(sbi, type, false); @@ -2941,7 +3206,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE); locate_dirty_segment(sbi, old_segno); } @@ -3003,8 +3268,17 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; + struct discard_sub_policy *spolicy = NULL; int issued; unsigned int trimmed = 0; + /* fstrim each time 8 discard without no interrupt */ + select_sub_discard_policy(&spolicy, 0, dpolicy); + + if (dcc->rbtree_check) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } next: issued = 0; @@ -3036,9 +3310,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, goto skip; } - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued); - if (issued >= dpolicy->max_requests) { + if (issued >= spolicy->max_requests) { start = dc->lstart + dc->len; if (err) @@ -3332,13 +3606,17 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio) + struct f2fs_io_info *fio, int contig_level) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned long long old_mtime; bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; +#ifdef CONFIG_F2FS_GRADING_SSR + struct inode *inode = NULL; +#endif + int contig = SEQ_NONE; down_read(&SM_I(sbi)->curseg_lock); @@ -3367,6 +3645,12 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, __refresh_next_blkoff(sbi, curseg); stat_inc_block_count(sbi, curseg); + ba_mutex_lock(&sbi->ba_mutex); + if (type >= CURSEG_HOT_DATA && type <= CURSEG_COLD_DATA) + inc_ba_val(sbi, curr_data_alloc_cnt, 1); + else if (type >= CURSEG_HOT_NODE && type <= CURSEG_COLD_NODE) + inc_ba_val(sbi, curr_node_alloc_cnt, 1); + ba_mutex_unlock(&sbi->ba_mutex); if (from_gc) { old_mtime = get_segment_mtime(sbi, old_blkaddr); @@ -3385,11 +3669,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - sit_i->s_ops->allocate_segment(sbi, type, false); + } else { +#ifdef CONFIG_F2FS_GRADING_SSR + if (contig_level != SEQ_NONE) { + contig = contig_level; + goto allocate_label; + } + + if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) && + page->mapping != META_MAPPING(sbi)) { + inode = page->mapping->host; + contig = check_io_seq(get_dirty_pages(inode)); + } +allocate_label: +#endif + sit_i->s_ops->allocate_segment(sbi, type, false, contig); + } } /* * segment dirty status should be updated after segment allocation, @@ -3456,7 +3754,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + &fio->new_blkaddr, sum, type, fio, SEQ_NONE); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); @@ -4825,7 +5123,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + allocate_segment_by_default(sbi, type, true, SEQ_NONE); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1bf33fc27b8f83b69630c1266c67e36a45318627..b176246659850f33c55aba1e79ca6c4820a77b7d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -130,7 +130,19 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) +#ifdef CONFIG_F2FS_GRADING_SSR +#define KBS_PER_SEGMENT 2048 +#define SSR_LOWER_LIMIT_BLK (16<<18) /* 16G */ +#endif + +#define SSR_CONTIG_DIRTY_NUMS 32 /*Dirty pages for LFS alloction in grading ssr . */ +#define SSR_CONTIG_LARGE 256 /*Larege files */ +enum { + SEQ_NONE, + SEQ_32BLKS, + SEQ_256BLKS +}; /* * indicate a block allocation direction: RIGHT and LEFT. * RIGHT means allocating new sections towards the end of volume. @@ -180,6 +192,13 @@ enum { FORCE_FG_GC, }; +#ifdef CONFIG_F2FS_GRADING_SSR +enum { + GRADING_SSR_OFF = 0, + GRADING_SSR_ON +}; +#endif + /* for a function parameter to select a victim segment */ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ @@ -221,7 +240,7 @@ struct sec_entry { }; struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); + void (*allocate_segment)(struct f2fs_sb_info *, int, bool, int); }; #define MAX_SKIP_GC_COUNT 16 @@ -913,3 +932,15 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) dcc->discard_wake = 1; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +#ifdef CONFIG_F2FS_GRADING_SSR +static inline int check_io_seq(int blks) +{ + if (blks >= SSR_CONTIG_LARGE) + return SEQ_256BLKS; + else if (blks >= SSR_CONTIG_DIRTY_NUMS) + return SEQ_32BLKS; + else + return SEQ_NONE; +} +#endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index de543168b3708e331cd468b1810f95c691693aad..be89c9b179c987df3dec382f469bbddd6a44182c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -36,6 +36,19 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_F2FS_GRADING_SSR +#define SSR_DEFALT_SPACE_LIMIT (5<<20) /* 5G default space limit */ +#define SSR_DEFALT_WATERLINE 80 /* 80% default waterline */ +#define SSR_HN_SAPCE_LIMIT_128G (8<<20) /* 8G default sapce limit for 128G devices */ +#define SSR_HN_WATERLINE_128G 80 /* 80% default hot node waterline for 128G devices */ +#define SSR_WN_SAPCE_LIMIT_128G (5<<20) /* 5G default warm node sapce limit for 128G devices */ +#define SSR_WN_WATERLINE_128G 70 /* 70% default warm node waterline for 128G devices */ +#define SSR_HD_SAPCE_LIMIT_128G (8<<20) /* 8G default hot data sapce limit for 128G devices */ +#define SSR_HD_WATERLINE_128G 65 /* 65% default hot data waterline for 128G devices */ +#define SSR_WD_SAPCE_LIMIT_128G (5<<20) /* 5G default warm data sapce limit for 128G devices */ +#define SSR_WD_WATERLINE_128G 60 /* 60% default warm data waterline for 128G devices */ +#endif + static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -147,6 +160,8 @@ enum { Opt_compress_log_size, Opt_compress_extension, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@ -215,6 +230,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@ -944,6 +961,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1301,6 +1324,10 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE utf8_unload(sb->s_encoding); +#endif +#ifdef CONFIG_F2FS_STAT_FS + kfree(sbi->ba_info); + sbi->ba_info = NULL; #endif kfree(sbi); } @@ -1536,6 +1563,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1902,7 +1932,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3489,6 +3520,35 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = 1; } +#ifdef CONFIG_F2FS_GRADING_SSR +static void f2fs_init_grading_ssr(struct f2fs_sb_info *sbi) +{ + u32 total_blocks = le64_to_cpu(sbi->raw_super->block_count) >> 18; + + if (total_blocks > 64) { /* 64G */ + sbi->hot_cold_params.hot_data_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_data_waterline = SSR_HD_WATERLINE_128G; + sbi->hot_cold_params.warm_data_lower_limit = SSR_WD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_data_waterline = SSR_WD_WATERLINE_128G; + sbi->hot_cold_params.hot_node_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_node_waterline = SSR_HN_WATERLINE_128G; + sbi->hot_cold_params.warm_node_lower_limit = SSR_WN_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_node_waterline = SSR_WN_WATERLINE_128G; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } else { + sbi->hot_cold_params.hot_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.hot_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } +} +#endif + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -3512,6 +3572,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; +#ifdef CONFIG_F2FS_STAT_FS + sbi->ba_info = kzalloc(sizeof(struct f2fs_block_alloc_info), GFP_KERNEL); + if (!sbi->ba_info) { + err = -ENOMEM; + goto free_sbi; + } + sbi->ba_info->ssr_last_jiffies = jiffies; + mutex_init(&sbi->ba_mutex); +#endif + sbi->sb = sb; /* Load the checksum driver */ @@ -3749,6 +3819,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) le64_to_cpu(seg_i->journal->info.kbytes_written); f2fs_build_gc_manager(sbi); + atomic_set(&sbi->need_ssr_gc, 0); err = f2fs_build_stats(sbi); if (err) @@ -3781,7 +3852,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto free_node_inode; } - +#ifdef CONFIG_F2FS_GRADING_SSR + f2fs_init_grading_ssr(sbi); +#endif err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; @@ -3794,6 +3867,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif + /* urgent gc flag init */ + sbi->is_frag = false; + sbi->last_urgent_check = jiffies; + /* if there are any orphan inodes, free them */ err = f2fs_recover_orphan_inodes(sbi); if (err) @@ -3872,7 +3949,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) @@ -3967,6 +4045,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) free_sb_buf: kfree(raw_super); free_sbi: +#ifdef CONFIG_F2FS_STAT_FS + kfree(sbi->ba_info); + sbi->ba_info = NULL; +#endif if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b8850c81068a0e861413041c0c78482f35dcd839..92f10929499412fad35ce03799a35d7a63b85426 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -34,6 +34,9 @@ enum { FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif RESERVED_BLOCKS, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_GRADING_SSR + F2FS_HOT_COLD_PARAMS, +#endif }; struct f2fs_attr { @@ -61,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)NM_I(sbi); else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_GRADING_SSR + else if (struct_type == F2FS_HOT_COLD_PARAMS) + return (unsigned char *)&sbi->hot_cold_params; +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) @@ -538,10 +545,12 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); +F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_preference, gc_preference); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_type, discard_type); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -568,6 +577,26 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); +#ifdef CONFIG_F2FS_GRADING_SSR +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_lower_limit, hot_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_waterline, hot_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_lower_limit, warm_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_waterline, warm_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_lower_limit, hot_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_waterline, hot_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_lower_limit, warm_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_waterline, warm_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_enable, enable); +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -626,11 +655,13 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), ATTR_LIST(gc_idle), + ATTR_LIST(gc_preference), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), + ATTR_LIST(discard_type), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), @@ -677,6 +708,17 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), +#endif +#ifdef CONFIG_F2FS_GRADING_SSR + ATTR_LIST(hc_hot_data_lower_limit), + ATTR_LIST(hc_hot_data_waterline), + ATTR_LIST(hc_warm_data_lower_limit), + ATTR_LIST(hc_warm_data_waterline), + ATTR_LIST(hc_hot_node_lower_limit), + ATTR_LIST(hc_hot_node_waterline), + ATTR_LIST(hc_warm_node_lower_limit), + ATTR_LIST(hc_warm_node_waterline), + ATTR_LIST(hc_enable), #endif NULL, }; @@ -908,6 +950,66 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int undiscard_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sit_i = SIT_I(sbi); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int total = 0; + unsigned int i, j; + + if (!f2fs_realtime_discard_enable(sbi)) + goto out; + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned int entries = SIT_VBLOCK_MAP_SIZE / + sizeof(unsigned long); + unsigned int max_blocks = sbi->blocks_per_seg; + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; + unsigned long *dmap = SIT_I(sbi)->tmp_map; + int start = 0, end = -1; + + down_write(&sit_i->sentry_lock); + if (se->valid_blocks == max_blocks) { + up_write(&sit_i->sentry_lock); + continue; + } + + if (se->valid_blocks == 0) { + mutex_lock(&dirty_i->seglist_lock); + if (test_bit((int)i, dirty_i->dirty_segmap[PRE])) + total += 512; + mutex_unlock(&dirty_i->seglist_lock); + } else { + for (j = 0; j < entries; j++) + dmap[j] = ~ckpt_map[j] & ~discard_map[j]; + while (1) { + start = (int)__find_rev_next_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(end + 1)); + + if ((unsigned int)start >= max_blocks) + break; + + end = (int)__find_rev_next_zero_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(start + 1)); + total += (unsigned int)(end - start); + } + } + + up_write(&sit_i->sentry_lock); + } + +out: + seq_printf(seq, "total undiscard:%u K\n", total * 4); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -964,6 +1066,9 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) iostat_info_seq_show, sb); proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("undiscard_info", S_IRUGO, sbi->s_proc, + undiscard_info_seq_show, sb); + } return 0; } @@ -975,6 +1080,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("undiscard_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6aa48985c00c3385f6a7cb9afe7b2d..b4fe1db78eaed0fb640cedea35d69519f37850a1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1957,6 +1957,39 @@ TRACE_EVENT(f2fs_fiemap, __entry->ret) ); +#ifdef CONFIG_F2FS_GRADING_SSR +DECLARE_EVENT_CLASS(f2fs_grading_ssr, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq), + + TP_STRUCT__entry( + __field(unsigned int, left) + __field(unsigned int, free) + __field(unsigned int, seq) + ), + + TP_fast_assign( + __entry->left = left; + __entry->free = free; + __entry->seq = seq; + ), + + TP_printk("ssr: left_space %u free_segments: %u is_seq: %u ", + __entry->left, __entry->free, __entry->seq) +); + +DEFINE_EVENT(f2fs_grading_ssr, f2fs_grading_ssr_allocate, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq) +); +#endif + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */