From 0944abb7fb4794928ff8e06e08a9cc3d7b07ebb9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Oct 2023 21:10:50 +0100 Subject: [PATCH 01/17] mpage: convert map_buffer_to_folio() to folio_create_empty_buffers() ANBZ: #21672 commit 4f05f139e3f8938c4c456bd886355c9bbcc3190d upstream. Saves a folio->page->folio conversion. Link: https://lkml.kernel.org/r/20231016201114.1928083-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Andreas Gruenbacher Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/mpage.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index 242e213ee064..964a6efe594d 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -119,8 +119,7 @@ static void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh, folio_mark_uptodate(folio); return; } - create_empty_buffers(&folio->page, i_blocksize(inode), 0); - head = folio_buffers(folio); + head = folio_create_empty_buffers(folio, i_blocksize(inode), 0); } page_bh = head; -- Gitee From 779a6cee1867f29b4b1274eb9519c40b7684eb7c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Oct 2023 21:10:51 +0100 Subject: [PATCH 02/17] ext4: convert to folio_create_empty_buffers ANBZ: #21672 commit d4059993674b533798a551859042957bb5578332 upstream. Remove an unnecessary folio->page->folio conversion and take advantage of the new return value from folio_create_empty_buffers(). Link: https://lkml.kernel.org/r/20231016201114.1928083-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Andreas Gruenbacher Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/ext4/inode.c | 14 +++++--------- fs/ext4/move_extent.c | 11 +++++------ 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3805c0f43610..4721eeaf707c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1046,10 +1046,8 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, BUG_ON(from > to); head = folio_buffers(folio); - if (!head) { - create_empty_buffers(&folio->page, blocksize, 0); - head = folio_buffers(folio); - } + if (!head) + head = folio_create_empty_buffers(folio, blocksize, 0); bbits = ilog2(blocksize); block = (sector_t)folio->index << (PAGE_SHIFT - bbits); @@ -1179,7 +1177,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, * starting the handle. */ if (!folio_buffers(folio)) - create_empty_buffers(&folio->page, inode->i_sb->s_blocksize, 0); + folio_create_empty_buffers(folio, inode->i_sb->s_blocksize, 0); folio_unlock(folio); @@ -3684,10 +3682,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); bh = folio_buffers(folio); - if (!bh) { - create_empty_buffers(&folio->page, blocksize, 0); - bh = folio_buffers(folio); - } + if (!bh) + bh = folio_create_empty_buffers(folio, blocksize, 0); /* Find the buffer that contains "offset" */ pos = blocksize; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 49eff9648314..860420405e8a 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -183,10 +183,8 @@ static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to) blocksize = i_blocksize(inode); head = folio_buffers(folio); - if (!head) { - create_empty_buffers(&folio->page, blocksize, 0); - head = folio_buffers(folio); - } + if (!head) + head = folio_create_empty_buffers(folio, blocksize, 0); block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits); for (bh = head, block_start = 0; bh != head || !block_start; @@ -386,9 +384,10 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, } /* Perform all necessary steps similar write_begin()/write_end() * but keeping in mind that i_size will not change */ - if (!folio_buffers(folio[0])) - create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0); bh = folio_buffers(folio[0]); + if (!bh) + bh = folio_create_empty_buffers(folio[0], + 1 << orig_inode->i_blkbits, 0); for (i = 0; i < data_offset_in_page; i++) bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { -- Gitee From 73431bf593b16f6bfe181bb76b37d76d70950fad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 7 Nov 2023 21:26:40 +0000 Subject: [PATCH 03/17] mm: add folio_zero_tail() and use it in ext4 ANBZ: #21672 commit a4fc4a0c45f2617c3aa8b693739de264e0c09909 upstream. Patch series "Add folio_zero_tail() and folio_fill_tail()". I'm trying to make it easier for filesystems with tailpacking / stuffing / inline data to use folios. The primary function here is folio_fill_tail(). You give it a pointer to memory where the data currently is, and it takes care of copying it into the folio at that offset. That works for gfs2 & iomap. Then There's Ext4. Rather than gin up some kind of specialist "Here's a two pointers to two blocks of memory" routine, just let it do its current thing, and let it call folio_zero_tail(), which is also called by folio_fill_tail(). Other filesystems can be converted later; these ones seemed like good examples as they're already partly or completely converted to folios. This patch (of 3): Instead of unmapping the folio after copying the data to it, then mapping it again to zero the tail, provide folio_zero_tail() to zero the tail of an already-mapped folio. [akpm@linux-foundation.org: fix kerneldoc argument ordering] Link: https://lkml.kernel.org/r/20231107212643.3490372-1-willy@infradead.org Link: https://lkml.kernel.org/r/20231107212643.3490372-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Andreas Gruenbacher Cc: Darrick J. Wong Cc: Theodore Ts'o Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/ext4/inline.c | 3 +-- include/linux/highmem.h | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3f363276ddd3..44a5f6df59ec 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -502,9 +502,8 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) BUG_ON(len > PAGE_SIZE); kaddr = kmap_local_folio(folio, 0); ret = ext4_read_inline_data(inode, kaddr, len, &iloc); - flush_dcache_folio(folio); + kaddr = folio_zero_tail(folio, len, kaddr + len); kunmap_local(kaddr); - folio_zero_segment(folio, len, folio_size(folio)); folio_mark_uptodate(folio); brelse(iloc.bh); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bf4ea9f2f457..279e7cd02573 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -487,6 +487,44 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, flush_dcache_folio(folio); } +/** + * folio_zero_tail - Zero the tail of a folio. + * @folio: The folio to zero. + * @offset: The byte offset in the folio to start zeroing at. + * @kaddr: The address the folio is currently mapped to. + * + * If you have already used kmap_local_folio() to map a folio, written + * some data to it and now need to zero the end of the folio (and flush + * the dcache), you can use this function. If you do not have the + * folio kmapped (eg the folio has been partially populated by DMA), + * use folio_zero_range() or folio_zero_segment() instead. + * + * Return: An address which can be passed to kunmap_local(). + */ +static inline __must_check void *folio_zero_tail(struct folio *folio, + size_t offset, void *kaddr) +{ + size_t len = folio_size(folio) - offset; + + if (folio_test_highmem(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { + memset(kaddr, 0, max); + kunmap_local(kaddr); + len -= max; + offset += max; + max = PAGE_SIZE; + kaddr = kmap_local_folio(folio, offset); + } + } + + memset(kaddr, 0, len); + flush_dcache_folio(folio); + + return kaddr; +} + /** * memcpy_from_file_folio - Copy some bytes from a file folio. * @to: The destination buffer. -- Gitee From a109ab800c279abe01d560848301934bf311b689 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Thu, 29 Feb 2024 11:40:14 +0530 Subject: [PATCH 04/17] ext4: Remove PAGE_MASK dependency on mpage_submit_folio ANBZ: #21672 commit 53c17fe55a06cbb405b94d96759611d725d2c47a upstream. This patch simply removes the PAGE_MASK dependency since mpage_submit_folio() is already converted to work with folio. Signed-off-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/d6eadb090334ea49ceef4e643b371fabfcea328f.1709182251.git.ritesh.list@gmail.com Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4721eeaf707c..abe00f856f78 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1926,7 +1926,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio) len = folio_size(folio); if (folio_pos(folio) + len > size && !ext4_verity_in_progress(mpd->inode)) - len = size & ~PAGE_MASK; + len = size & (len - 1); err = ext4_bio_write_folio(&mpd->io_submit, folio, len); if (!err) mpd->wbc->nr_to_write--; -- Gitee From ad540235191fbb305ba7216dc211f1cb5c0a4f16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 18:28:54 +0100 Subject: [PATCH 05/17] ext4: convert bd_bitmap_page to bd_bitmap_folio ANBZ: #21672 commit 99b150d84e4939735cfce245e32e3d29312c68ec upstream. There is no need to make this a multi-page folio, so leave all the infrastructure around it in pages. But since we're locking it, playing with its refcount and checking whether it's uptodate, it needs to move to the folio API. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240416172900.244637-2-willy@infradead.org Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 98 ++++++++++++++++++++++++----------------------- fs/ext4/mballoc.h | 2 +- 2 files changed, 52 insertions(+), 48 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 7285fcec8a7e..52bdd60d2512 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1452,9 +1452,10 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, int block, pnum, poff; int blocks_per_page; struct page *page; + struct folio *folio; e4b->bd_buddy_page = NULL; - e4b->bd_bitmap_page = NULL; + e4b->bd_bitmap_folio = NULL; blocks_per_page = PAGE_SIZE / sb->s_blocksize; /* @@ -1465,12 +1466,13 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, block = group * 2; pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, gfp); - if (!page) - return -ENOMEM; - BUG_ON(page->mapping != inode->i_mapping); - e4b->bd_bitmap_page = page; - e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); + folio = __filemap_get_folio(inode->i_mapping, pnum, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (IS_ERR(folio)) + return PTR_ERR(folio); + BUG_ON(folio->mapping != inode->i_mapping); + e4b->bd_bitmap_folio = folio; + e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize); if (blocks_per_page >= 2) { /* buddy and bitmap are on the same page */ @@ -1489,9 +1491,9 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) { - if (e4b->bd_bitmap_page) { - unlock_page(e4b->bd_bitmap_page); - put_page(e4b->bd_bitmap_page); + if (e4b->bd_bitmap_folio) { + folio_unlock(e4b->bd_bitmap_folio); + folio_put(e4b->bd_bitmap_folio); } if (e4b->bd_buddy_page) { unlock_page(e4b->bd_buddy_page); @@ -1511,6 +1513,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) struct ext4_group_info *this_grp; struct ext4_buddy e4b; struct page *page; + struct folio *folio; int ret = 0; might_sleep(); @@ -1537,11 +1540,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) goto err; } - page = e4b.bd_bitmap_page; - ret = ext4_mb_init_cache(page, NULL, gfp); + folio = e4b.bd_bitmap_folio; + ret = ext4_mb_init_cache(&folio->page, NULL, gfp); if (ret) goto err; - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } @@ -1583,6 +1586,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, int pnum; int poff; struct page *page; + struct folio *folio; int ret; struct ext4_group_info *grp; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1601,7 +1605,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, e4b->bd_sb = sb; e4b->bd_group = group; e4b->bd_buddy_page = NULL; - e4b->bd_bitmap_page = NULL; + e4b->bd_bitmap_folio = NULL; if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { /* @@ -1622,53 +1626,53 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, pnum = block / blocks_per_page; poff = block % blocks_per_page; - /* we could use find_or_create_page(), but it locks page - * what we'd like to avoid in fast path ... */ - page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); - if (page == NULL || !PageUptodate(page)) { - if (page) + /* Avoid locking the folio in the fast path ... */ + folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { + if (!IS_ERR(folio)) /* - * drop the page reference and try - * to get the page with lock. If we + * drop the folio reference and try + * to get the folio with lock. If we * are not uptodate that implies - * somebody just created the page but - * is yet to initialize the same. So + * somebody just created the folio but + * is yet to initialize it. So * wait for it to initialize. */ - put_page(page); - page = find_or_create_page(inode->i_mapping, pnum, gfp); - if (page) { - if (WARN_RATELIMIT(page->mapping != inode->i_mapping, - "ext4: bitmap's paging->mapping != inode->i_mapping\n")) { + folio_put(folio); + folio = __filemap_get_folio(inode->i_mapping, pnum, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (!IS_ERR(folio)) { + if (WARN_RATELIMIT(folio->mapping != inode->i_mapping, + "ext4: bitmap's mapping != inode->i_mapping\n")) { /* should never happen */ - unlock_page(page); + folio_unlock(folio); ret = -EINVAL; goto err; } - if (!PageUptodate(page)) { - ret = ext4_mb_init_cache(page, NULL, gfp); + if (!folio_test_uptodate(folio)) { + ret = ext4_mb_init_cache(&folio->page, NULL, gfp); if (ret) { - unlock_page(page); + folio_unlock(folio); goto err; } - mb_cmp_bitmaps(e4b, page_address(page) + + mb_cmp_bitmaps(e4b, folio_address(folio) + (poff * sb->s_blocksize)); } - unlock_page(page); + folio_unlock(folio); } } - if (page == NULL) { - ret = -ENOMEM; + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); goto err; } - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } /* Pages marked accessed already */ - e4b->bd_bitmap_page = page; - e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); + e4b->bd_bitmap_folio = folio; + e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize); block++; pnum = block / blocks_per_page; @@ -1716,8 +1720,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, err: if (page) put_page(page); - if (e4b->bd_bitmap_page) - put_page(e4b->bd_bitmap_page); + if (e4b->bd_bitmap_folio) + folio_put(e4b->bd_bitmap_folio); e4b->bd_buddy = NULL; e4b->bd_bitmap = NULL; @@ -1732,8 +1736,8 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) { - if (e4b->bd_bitmap_page) - put_page(e4b->bd_bitmap_page); + if (e4b->bd_bitmap_folio) + folio_put(e4b->bd_bitmap_folio); if (e4b->bd_buddy_page) put_page(e4b->bd_buddy_page); } @@ -2159,7 +2163,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, * double allocate blocks. The reference is dropped * in ext4_mb_release_context */ - ac->ac_bitmap_page = e4b->bd_bitmap_page; + ac->ac_bitmap_page = &e4b->bd_bitmap_folio->page; get_page(ac->ac_bitmap_page); ac->ac_buddy_page = e4b->bd_buddy_page; get_page(ac->ac_buddy_page); @@ -3899,7 +3903,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb, * balance refcounts from ext4_mb_free_metadata() */ put_page(e4b.bd_buddy_page); - put_page(e4b.bd_bitmap_page); + folio_put(e4b.bd_bitmap_folio); } ext4_unlock_group(sb, entry->efd_group); ext4_mb_unload_buddy(&e4b); @@ -6341,7 +6345,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, struct rb_node *parent = NULL, *new_node; BUG_ON(!ext4_handle_valid(handle)); - BUG_ON(e4b->bd_bitmap_page == NULL); + BUG_ON(e4b->bd_bitmap_folio == NULL); BUG_ON(e4b->bd_buddy_page == NULL); new_node = &new_entry->efd_node; @@ -6354,7 +6358,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * on-disk bitmap and lose not-yet-available * blocks */ get_page(e4b->bd_buddy_page); - get_page(e4b->bd_bitmap_page); + folio_get(e4b->bd_bitmap_folio); } while (*n) { parent = *n; diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dd16050022f5..2d0aca8dc02e 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -218,7 +218,7 @@ struct ext4_allocation_context { struct ext4_buddy { struct page *bd_buddy_page; void *bd_buddy; - struct page *bd_bitmap_page; + struct folio *bd_bitmap_folio; void *bd_bitmap; struct ext4_group_info *bd_info; struct super_block *bd_sb; -- Gitee From 7638b04c38d05ae8150be3bf772b15626b788110 Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Tue, 24 Oct 2023 11:52:15 +0800 Subject: [PATCH 06/17] ext4: delete redundant calculations in ext4_mb_get_buddy_page_lock() ANBZ: #21672 commit f2fec3e99a32d7c14dbf63c824f8286ebc94b18d upstream. 'blocks_per_page' is always 1 after 'if (blocks_per_page >= 2)', 'pnum' and 'block' are equal in this case. Signed-off-by: Gou Hao Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231024035215.29474-1-gouhao@uniontech.com Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 52bdd60d2512..d37bec7ca24f 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1479,9 +1479,8 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, return 0; } - block++; - pnum = block / blocks_per_page; - page = find_or_create_page(inode->i_mapping, pnum, gfp); + /* blocks_per_page == 1, hence we need another page for the buddy */ + page = find_or_create_page(inode->i_mapping, block + 1, gfp); if (!page) return -ENOMEM; BUG_ON(page->mapping != inode->i_mapping); -- Gitee From c2fd3051cdfe7ceefdb8530ad023c5cb5346053a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 18:28:55 +0100 Subject: [PATCH 07/17] ext4: convert bd_buddy_page to bd_buddy_folio ANBZ: #21672 commit 5eea586b47f05b5f5518cf8f9dd9283a01a8066d upstream. There is no need to make this a multi-page folio, so leave all the infrastructure around it in pages. But since we're locking it, playing with its refcount and checking whether it's uptodate, it needs to move to the folio API. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240416172900.244637-3-willy@infradead.org Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 91 +++++++++++++++++++++++------------------------ fs/ext4/mballoc.h | 2 +- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d37bec7ca24f..9c3a0938f686 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1443,7 +1443,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) * Lock the buddy and bitmap pages. This make sure other parallel init_group * on the same buddy page doesn't happen whild holding the buddy page lock. * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap - * are on the same page e4b->bd_buddy_page is NULL and return value is 0. + * are on the same page e4b->bd_buddy_folio is NULL and return value is 0. */ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp) @@ -1451,10 +1451,9 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, struct inode *inode = EXT4_SB(sb)->s_buddy_cache; int block, pnum, poff; int blocks_per_page; - struct page *page; struct folio *folio; - e4b->bd_buddy_page = NULL; + e4b->bd_buddy_folio = NULL; e4b->bd_bitmap_folio = NULL; blocks_per_page = PAGE_SIZE / sb->s_blocksize; @@ -1480,11 +1479,12 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, } /* blocks_per_page == 1, hence we need another page for the buddy */ - page = find_or_create_page(inode->i_mapping, block + 1, gfp); - if (!page) - return -ENOMEM; - BUG_ON(page->mapping != inode->i_mapping); - e4b->bd_buddy_page = page; + folio = __filemap_get_folio(inode->i_mapping, block + 1, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (IS_ERR(folio)) + return PTR_ERR(folio); + BUG_ON(folio->mapping != inode->i_mapping); + e4b->bd_buddy_folio = folio; return 0; } @@ -1494,9 +1494,9 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) folio_unlock(e4b->bd_bitmap_folio); folio_put(e4b->bd_bitmap_folio); } - if (e4b->bd_buddy_page) { - unlock_page(e4b->bd_buddy_page); - put_page(e4b->bd_buddy_page); + if (e4b->bd_buddy_folio) { + folio_unlock(e4b->bd_buddy_folio); + folio_put(e4b->bd_buddy_folio); } } @@ -1511,7 +1511,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) struct ext4_group_info *this_grp; struct ext4_buddy e4b; - struct page *page; struct folio *folio; int ret = 0; @@ -1548,7 +1547,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) goto err; } - if (e4b.bd_buddy_page == NULL) { + if (e4b.bd_buddy_folio == NULL) { /* * If both the bitmap and buddy are in * the same page we don't need to force @@ -1558,11 +1557,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) goto err; } /* init buddy cache */ - page = e4b.bd_buddy_page; - ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp); + folio = e4b.bd_buddy_folio; + ret = ext4_mb_init_cache(&folio->page, e4b.bd_bitmap, gfp); if (ret) goto err; - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } @@ -1584,7 +1583,6 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, int block; int pnum; int poff; - struct page *page; struct folio *folio; int ret; struct ext4_group_info *grp; @@ -1603,7 +1601,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, e4b->bd_info = grp; e4b->bd_sb = sb; e4b->bd_group = group; - e4b->bd_buddy_page = NULL; + e4b->bd_buddy_folio = NULL; e4b->bd_bitmap_folio = NULL; if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { @@ -1669,7 +1667,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, goto err; } - /* Pages marked accessed already */ + /* Folios marked accessed already */ e4b->bd_bitmap_folio = folio; e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize); @@ -1677,48 +1675,49 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, pnum = block / blocks_per_page; poff = block % blocks_per_page; - page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED); - if (page == NULL || !PageUptodate(page)) { - if (page) - put_page(page); - page = find_or_create_page(inode->i_mapping, pnum, gfp); - if (page) { - if (WARN_RATELIMIT(page->mapping != inode->i_mapping, - "ext4: buddy bitmap's page->mapping != inode->i_mapping\n")) { + folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) { + if (!IS_ERR(folio)) + folio_put(folio); + folio = __filemap_get_folio(inode->i_mapping, pnum, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + if (!IS_ERR(folio)) { + if (WARN_RATELIMIT(folio->mapping != inode->i_mapping, + "ext4: buddy bitmap's mapping != inode->i_mapping\n")) { /* should never happen */ - unlock_page(page); + folio_unlock(folio); ret = -EINVAL; goto err; } - if (!PageUptodate(page)) { - ret = ext4_mb_init_cache(page, e4b->bd_bitmap, + if (!folio_test_uptodate(folio)) { + ret = ext4_mb_init_cache(&folio->page, e4b->bd_bitmap, gfp); if (ret) { - unlock_page(page); + folio_unlock(folio); goto err; } } - unlock_page(page); + folio_unlock(folio); } } - if (page == NULL) { - ret = -ENOMEM; + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); goto err; } - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { ret = -EIO; goto err; } - /* Pages marked accessed already */ - e4b->bd_buddy_page = page; - e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize); + /* Folios marked accessed already */ + e4b->bd_buddy_folio = folio; + e4b->bd_buddy = folio_address(folio) + (poff * sb->s_blocksize); return 0; err: - if (page) - put_page(page); + if (folio) + folio_put(folio); if (e4b->bd_bitmap_folio) folio_put(e4b->bd_bitmap_folio); @@ -1737,8 +1736,8 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) { if (e4b->bd_bitmap_folio) folio_put(e4b->bd_bitmap_folio); - if (e4b->bd_buddy_page) - put_page(e4b->bd_buddy_page); + if (e4b->bd_buddy_folio) + folio_put(e4b->bd_buddy_folio); } @@ -2164,7 +2163,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, */ ac->ac_bitmap_page = &e4b->bd_bitmap_folio->page; get_page(ac->ac_bitmap_page); - ac->ac_buddy_page = e4b->bd_buddy_page; + ac->ac_buddy_page = &e4b->bd_buddy_folio->page; get_page(ac->ac_buddy_page); /* store last allocated for subsequent stream allocation */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { @@ -3901,7 +3900,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb, /* No more items in the per group rb tree * balance refcounts from ext4_mb_free_metadata() */ - put_page(e4b.bd_buddy_page); + folio_put(e4b.bd_buddy_folio); folio_put(e4b.bd_bitmap_folio); } ext4_unlock_group(sb, entry->efd_group); @@ -6345,7 +6344,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, BUG_ON(!ext4_handle_valid(handle)); BUG_ON(e4b->bd_bitmap_folio == NULL); - BUG_ON(e4b->bd_buddy_page == NULL); + BUG_ON(e4b->bd_buddy_folio == NULL); new_node = &new_entry->efd_node; cluster = new_entry->efd_start_cluster; @@ -6356,7 +6355,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, * otherwise we'll refresh it from * on-disk bitmap and lose not-yet-available * blocks */ - get_page(e4b->bd_buddy_page); + folio_get(e4b->bd_buddy_folio); folio_get(e4b->bd_bitmap_folio); } while (*n) { diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 2d0aca8dc02e..0dd6bc69ab61 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -216,7 +216,7 @@ struct ext4_allocation_context { #define AC_STATUS_BREAK 3 struct ext4_buddy { - struct page *bd_buddy_page; + struct folio *bd_buddy_folio; void *bd_buddy; struct folio *bd_bitmap_folio; void *bd_bitmap; -- Gitee From d9e8f64a8ec5f7e19126a647182b4fd01584c1fb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 18:28:56 +0100 Subject: [PATCH 08/17] ext4: convert ext4_mb_init_cache() to take a folio ANBZ: #21672 commit e1622a0d558200b0ccdfbf69ee29b9ac1f5c2442 upstream. All callers now have a folio, so convert this function from operating on a page to operating on a folio. The folio is assumed to be a single page. Signe-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240416172900.244637-4-willy@infradead.org Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9c3a0938f686..751b02d33f2e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1274,7 +1274,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b) * for this page; do not hold this lock when calling this routine! */ -static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) +static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp) { ext4_group_t ngroups; unsigned int blocksize; @@ -1292,13 +1292,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) char *bitmap; struct ext4_group_info *grinfo; - inode = page->mapping->host; + inode = folio->mapping->host; sb = inode->i_sb; ngroups = ext4_get_groups_count(sb); blocksize = i_blocksize(inode); blocks_per_page = PAGE_SIZE / blocksize; - mb_debug(sb, "init page %lu\n", page->index); + mb_debug(sb, "init folio %lu\n", folio->index); groups_per_page = blocks_per_page >> 1; if (groups_per_page == 0) @@ -1313,9 +1313,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) } else bh = &bhs; - first_group = page->index * blocks_per_page / 2; + first_group = folio->index * blocks_per_page / 2; - /* read all groups the page covers into the cache */ + /* read all groups the folio covers into the cache */ for (i = 0, group = first_group; i < groups_per_page; i++, group++) { if (group >= ngroups) break; @@ -1326,10 +1326,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) /* * If page is uptodate then we came here after online resize * which added some new uninitialized group info structs, so - * we must skip all initialized uptodate buddies on the page, + * we must skip all initialized uptodate buddies on the folio, * which may be currently in use by an allocating task. */ - if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) { + if (folio_test_uptodate(folio) && + !EXT4_MB_GRP_NEED_INIT(grinfo)) { bh[i] = NULL; continue; } @@ -1353,7 +1354,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) err = err2; } - first_block = page->index * blocks_per_page; + first_block = folio->index * blocks_per_page; for (i = 0; i < blocks_per_page; i++) { group = (first_block + i) >> 1; if (group >= ngroups) @@ -1374,7 +1375,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) * above * */ - data = page_address(page) + (i * blocksize); + data = folio_address(folio) + (i * blocksize); bitmap = bh[group - first_group]->b_data; /* @@ -1389,8 +1390,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) if ((first_block + i) & 1) { /* this is block of buddy */ BUG_ON(incore == NULL); - mb_debug(sb, "put buddy for group %u in page %lu/%x\n", - group, page->index, i * blocksize); + mb_debug(sb, "put buddy for group %u in folio %lu/%x\n", + group, folio->index, i * blocksize); trace_ext4_mb_buddy_bitmap_load(sb, group); grinfo->bb_fragments = 0; memset(grinfo->bb_counters, 0, @@ -1408,8 +1409,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) } else { /* this is block of bitmap */ BUG_ON(incore != NULL); - mb_debug(sb, "put bitmap for group %u in page %lu/%x\n", - group, page->index, i * blocksize); + mb_debug(sb, "put bitmap for group %u in folio %lu/%x\n", + group, folio->index, i * blocksize); trace_ext4_mb_bitmap_load(sb, group); /* see comments in ext4_mb_put_pa() */ @@ -1427,7 +1428,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp) incore = data; } } - SetPageUptodate(page); + folio_mark_uptodate(folio); out: if (bh) { @@ -1539,7 +1540,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) } folio = e4b.bd_bitmap_folio; - ret = ext4_mb_init_cache(&folio->page, NULL, gfp); + ret = ext4_mb_init_cache(folio, NULL, gfp); if (ret) goto err; if (!folio_test_uptodate(folio)) { @@ -1558,7 +1559,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp) } /* init buddy cache */ folio = e4b.bd_buddy_folio; - ret = ext4_mb_init_cache(&folio->page, e4b.bd_bitmap, gfp); + ret = ext4_mb_init_cache(folio, e4b.bd_bitmap, gfp); if (ret) goto err; if (!folio_test_uptodate(folio)) { @@ -1647,7 +1648,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, goto err; } if (!folio_test_uptodate(folio)) { - ret = ext4_mb_init_cache(&folio->page, NULL, gfp); + ret = ext4_mb_init_cache(folio, NULL, gfp); if (ret) { folio_unlock(folio); goto err; @@ -1690,7 +1691,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group, goto err; } if (!folio_test_uptodate(folio)) { - ret = ext4_mb_init_cache(&folio->page, e4b->bd_bitmap, + ret = ext4_mb_init_cache(folio, e4b->bd_bitmap, gfp); if (ret) { folio_unlock(folio); -- Gitee From 21c10532ee1fa00e683addfe9b3199d8163d260f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 18:28:57 +0100 Subject: [PATCH 09/17] ext4: convert ac_bitmap_page to ac_bitmap_folio ANBZ: #21672 commit ccedf35b5daa429c0e731eac6fb32b0208302c6b upstream. This just carries around the bd_bitmap_folio so should also be a folio. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240416172900.244637-5-willy@infradead.org Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 8 ++++---- fs/ext4/mballoc.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 751b02d33f2e..9e419cf86c8d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2162,8 +2162,8 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, * double allocate blocks. The reference is dropped * in ext4_mb_release_context */ - ac->ac_bitmap_page = &e4b->bd_bitmap_folio->page; - get_page(ac->ac_bitmap_page); + ac->ac_bitmap_folio = e4b->bd_bitmap_folio; + folio_get(ac->ac_bitmap_folio); ac->ac_buddy_page = &e4b->bd_buddy_folio->page; get_page(ac->ac_buddy_page); /* store last allocated for subsequent stream allocation */ @@ -6024,8 +6024,8 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) ext4_mb_put_pa(ac, ac->ac_sb, pa); } - if (ac->ac_bitmap_page) - put_page(ac->ac_bitmap_page); + if (ac->ac_bitmap_folio) + folio_put(ac->ac_bitmap_folio); if (ac->ac_buddy_page) put_page(ac->ac_buddy_page); if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 0dd6bc69ab61..34a70dc9ab94 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -205,7 +205,7 @@ struct ext4_allocation_context { __u8 ac_2order; /* if request is to allocate 2^N blocks and * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ - struct page *ac_bitmap_page; + struct folio *ac_bitmap_folio; struct page *ac_buddy_page; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; -- Gitee From 516107e26a857ab38314f7dc0366bc3e831ef464 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 18:28:58 +0100 Subject: [PATCH 10/17] ext4: convert ac_buddy_page to ac_buddy_folio ANBZ: #21672 commit c84f1510fba9fb181e6a1aa7b5fcfc67381b39c9 upstream. This just carries around the bd_buddy_folio so should also be a folio. Signed-off-by: Matthew Wilcox (Oracle) Link: https://lore.kernel.org/r/20240416172900.244637-6-willy@infradead.org Signed-off-by: Theodore Ts'o Signed-off-by: Joseph Qi --- fs/ext4/mballoc.c | 8 ++++---- fs/ext4/mballoc.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9e419cf86c8d..db9075f397d1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2164,8 +2164,8 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, */ ac->ac_bitmap_folio = e4b->bd_bitmap_folio; folio_get(ac->ac_bitmap_folio); - ac->ac_buddy_page = &e4b->bd_buddy_folio->page; - get_page(ac->ac_buddy_page); + ac->ac_buddy_folio = e4b->bd_buddy_folio; + folio_get(ac->ac_buddy_folio); /* store last allocated for subsequent stream allocation */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { spin_lock(&sbi->s_md_lock); @@ -6026,8 +6026,8 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) } if (ac->ac_bitmap_folio) folio_put(ac->ac_bitmap_folio); - if (ac->ac_buddy_page) - put_page(ac->ac_buddy_page); + if (ac->ac_buddy_folio) + folio_put(ac->ac_buddy_folio); if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC) mutex_unlock(&ac->ac_lg->lg_mutex); ext4_mb_collect_stats(ac); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 34a70dc9ab94..bdf14f393694 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -206,7 +206,7 @@ struct ext4_allocation_context { * N > 0, the field stores N, otherwise 0 */ __u8 ac_op; /* operation, for history only */ struct folio *ac_bitmap_folio; - struct page *ac_buddy_page; + struct folio *ac_buddy_folio; struct ext4_prealloc_space *ac_pa; struct ext4_locality_group *ac_lg; }; -- Gitee From 0011d619af75468f9535410253f50f9f9420b704 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:02 +0000 Subject: [PATCH 11/17] buffer: return bool from grow_dev_folio() ANBZ: #21672 commit 6d840a18773f36baaecc2d2f7fb18ec5862349e6 upstream. Patch series "More buffer_head cleanups", v2. The first patch is a left-over from last cycle. The rest fix "obvious" block size > PAGE_SIZE problems. I haven't tested with a large block size setup (but I have done an ext4 xfstests run). This patch (of 7): Rename grow_dev_page() to grow_dev_folio() and make it return a bool. Document what that bool means; it's more subtle than it first appears. Also rename the 'failed' label to 'unlock' beacuse it's not exactly 'failed'. It just hasn't succeeded. Link: https://lkml.kernel.org/r/20231109210608.2252323-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 59c8058f8453..e6534b55c581 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1024,40 +1024,43 @@ static sector_t folio_init_buffers(struct folio *folio, } /* - * Create the page-cache page that contains the requested block. + * Create the page-cache folio that contains the requested block. * * This is used purely for blockdev mappings. + * + * Returns false if we have a 'permanent' failure. Returns true if + * we succeeded, or the caller should retry. */ -static int -grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size, int sizebits, gfp_t gfp) +static bool grow_dev_folio(struct block_device *bdev, sector_t block, + pgoff_t index, unsigned size, int sizebits, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct folio *folio; struct buffer_head *bh; - sector_t end_block; - int ret = 0; + sector_t end_block = 0; folio = __filemap_get_folio(inode->i_mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) - return PTR_ERR(folio); + return false; bh = folio_buffers(folio); if (bh) { if (bh->b_size == size) { end_block = folio_init_buffers(folio, bdev, (sector_t)index << sizebits, size); - goto done; + goto unlock; } + + /* Caller should retry if this call fails */ + end_block = ~0ULL; if (!try_to_free_buffers(folio)) - goto failed; + goto unlock; } - ret = -ENOMEM; bh = folio_alloc_buffers(folio, size, gfp | __GFP_ACCOUNT); if (!bh) - goto failed; + goto unlock; /* * Link the folio to the buffers and initialise them. Take the @@ -1069,20 +1072,19 @@ grow_dev_page(struct block_device *bdev, sector_t block, end_block = folio_init_buffers(folio, bdev, (sector_t)index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); -done: - ret = (block < end_block) ? 1 : -ENXIO; -failed: +unlock: folio_unlock(folio); folio_put(folio); - return ret; + return block < end_block; } /* - * Create buffers for the specified block device block's page. If - * that page was dirty, the buffers are set dirty also. + * Create buffers for the specified block device block's folio. If + * that folio was dirty, the buffers are set dirty also. Returns false + * if we've hit a permanent error. */ -static int -grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) +static bool grow_buffers(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { pgoff_t index; int sizebits; @@ -1099,11 +1101,11 @@ grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) "device %pg\n", __func__, (unsigned long long)block, bdev); - return -EIO; + return false; } - /* Create a page with the proper size buffers.. */ - return grow_dev_page(bdev, block, index, size, sizebits, gfp); + /* Create a folio with the proper size buffers */ + return grow_dev_folio(bdev, block, index, size, sizebits, gfp); } static struct buffer_head * @@ -1124,14 +1126,12 @@ __getblk_slow(struct block_device *bdev, sector_t block, for (;;) { struct buffer_head *bh; - int ret; bh = __find_get_block(bdev, block, size); if (bh) return bh; - ret = grow_buffers(bdev, block, size, gfp); - if (ret < 0) + if (!grow_buffers(bdev, block, size, gfp)) return NULL; } } -- Gitee From a9e4fa5b584a09a3de772805de59f5e541c6049d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:03 +0000 Subject: [PATCH 12/17] buffer: calculate block number inside folio_init_buffers() ANBZ: #21672 commit 382497ada051a6fc79612aba5e30cdfa26364374 upstream. The calculation of block from index doesn't work for devices with a block size larger than PAGE_SIZE as we end up shifting by a negative number. Instead, calculate the number of the first block from the folio's position in the block device. We no longer need to pass sizebits to grow_dev_folio(). Link: https://lkml.kernel.org/r/20231109210608.2252323-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index e6534b55c581..bfbab978abc2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -995,11 +995,12 @@ static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) * Initialise the state of a blockdev folio's buffers. */ static sector_t folio_init_buffers(struct folio *folio, - struct block_device *bdev, sector_t block, int size) + struct block_device *bdev, unsigned size) { struct buffer_head *head = folio_buffers(folio); struct buffer_head *bh = head; bool uptodate = folio_test_uptodate(folio); + sector_t block = div_u64(folio_pos(folio), size); sector_t end_block = blkdev_max_block(bdev, size); do { @@ -1032,7 +1033,7 @@ static sector_t folio_init_buffers(struct folio *folio, * we succeeded, or the caller should retry. */ static bool grow_dev_folio(struct block_device *bdev, sector_t block, - pgoff_t index, unsigned size, int sizebits, gfp_t gfp) + pgoff_t index, unsigned size, gfp_t gfp) { struct inode *inode = bdev->bd_inode; struct folio *folio; @@ -1047,8 +1048,7 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, bh = folio_buffers(folio); if (bh) { if (bh->b_size == size) { - end_block = folio_init_buffers(folio, bdev, - (sector_t)index << sizebits, size); + end_block = folio_init_buffers(folio, bdev, size); goto unlock; } @@ -1069,8 +1069,7 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(folio, bh); - end_block = folio_init_buffers(folio, bdev, - (sector_t)index << sizebits, size); + end_block = folio_init_buffers(folio, bdev, size); spin_unlock(&inode->i_mapping->private_lock); unlock: folio_unlock(folio); @@ -1105,7 +1104,7 @@ static bool grow_buffers(struct block_device *bdev, sector_t block, } /* Create a folio with the proper size buffers */ - return grow_dev_folio(bdev, block, index, size, sizebits, gfp); + return grow_dev_folio(bdev, block, index, size, gfp); } static struct buffer_head * -- Gitee From 645bea54a9b684c220e75cd7967441aca2df4ac6 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:04 +0000 Subject: [PATCH 13/17] buffer: fix grow_buffers() for block size > PAGE_SIZE ANBZ: #21672 commit 5f3bd90d9b98855c2e811aa3b4823d583b0020df upstream. We must not shift by a negative number so work in terms of a byte offset to avoid the awkward shift left-or-right-depending-on-sign option. This means we need to use check_mul_overflow() to ensure that a large block number does not result in a wrap. Link: https://lkml.kernel.org/r/20231109210608.2252323-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Nathan Chancellor Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi [nathan@kernel.org: add cast in grow_buffers() to avoid a multiplication libcall] Link: https://lkml.kernel.org/r/20231128-avoid-muloti4-grow_buffers-v1-1-bc3d0f0ec483@kernel.org Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index bfbab978abc2..7bbaa1e1ed2c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1085,26 +1085,21 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, static bool grow_buffers(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) { - pgoff_t index; - int sizebits; - - sizebits = PAGE_SHIFT - __ffs(size); - index = block >> sizebits; + loff_t pos; /* - * Check for a block which wants to lie outside our maximum possible - * pagecache index. (this comparison is done using sector_t types). + * Check for a block which lies outside our maximum possible + * pagecache index. */ - if (unlikely(index != block >> sizebits)) { - printk(KERN_ERR "%s: requested out-of-range block %llu for " - "device %pg\n", + if (check_mul_overflow(block, (sector_t)size, &pos) || pos > MAX_LFS_FILESIZE) { + printk(KERN_ERR "%s: requested out-of-range block %llu for device %pg\n", __func__, (unsigned long long)block, bdev); return false; } /* Create a folio with the proper size buffers */ - return grow_dev_folio(bdev, block, index, size, gfp); + return grow_dev_folio(bdev, block, pos / PAGE_SIZE, size, gfp); } static struct buffer_head * -- Gitee From c9f3bb1d59a6c77d4b2f7d6fa9902eb3b64cfc6c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:05 +0000 Subject: [PATCH 14/17] buffer: cast block to loff_t before shifting it ANBZ: #21672 commit 808441943f6b817f4836752c6e0d1c07507f375e upstream. While sector_t is always defined as a u64 today, that hasn't always been the case and it might not always be the same size as loff_t in the future. Link: https://lkml.kernel.org/r/20231109210608.2252323-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/buffer.c b/fs/buffer.c index 7bbaa1e1ed2c..e08368f2262d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2015,7 +2015,7 @@ static int iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, const struct iomap *iomap) { - loff_t offset = block << inode->i_blkbits; + loff_t offset = (loff_t)block << inode->i_blkbits; bh->b_bdev = iomap->bdev; -- Gitee From cdce7258a230569947a7a8c98b998e01e094b76a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:06 +0000 Subject: [PATCH 15/17] buffer: fix various functions for block size > PAGE_SIZE ANBZ: #21672 commit 4b04646caed5449ca97b909bbadca0a7a2762159 upstream. If i_blkbits is larger than PAGE_SHIFT, we shift by a negative number, which is undefined. It is safe to shift the block left as a block device must be smaller than MAX_LFS_FILESIZE, which is guaranteed to fit in loff_t. Link: https://lkml.kernel.org/r/20231109210608.2252323-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index e08368f2262d..a31a5859bebd 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); - index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); + index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio)) goto out; @@ -1700,13 +1700,13 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) struct inode *bd_inode = bdev->bd_inode; struct address_space *bd_mapping = bd_inode->i_mapping; struct folio_batch fbatch; - pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); + pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; pgoff_t end; int i, count; struct buffer_head *bh; struct buffer_head *head; - end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); + end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE; folio_batch_init(&fbatch); while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) { count = folio_batch_count(&fbatch); @@ -2669,8 +2669,8 @@ int block_truncate_page(struct address_space *mapping, return 0; length = blocksize - length; - iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits); - + iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits; + folio = filemap_grab_folio(mapping, index); if (IS_ERR(folio)) return PTR_ERR(folio); -- Gitee From f818fc382a37c5b3db1cb39e9e27b33b19f8e76d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:07 +0000 Subject: [PATCH 16/17] buffer: handle large folios in __block_write_begin_int() ANBZ: #21672 commit b0619401b8cdafcf32ad352a8e9a225ab0b4b10d upstream. When __block_write_begin_int() was converted to support folios, we did not expect large folios to be passed to it. With the current work to support large block size storage devices, this will no longer be true so change the checks on 'from' and 'to' to be related to the size of the folio instead of PAGE_SIZE. Also remove an assumption that the block size is smaller than PAGE_SIZE. Link: https://lkml.kernel.org/r/20231109210608.2252323-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reported-by: Ryusuke Konishi Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index a31a5859bebd..d75b0e03721c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2082,27 +2082,24 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap) { - unsigned from = pos & (PAGE_SIZE - 1); - unsigned to = from + len; + size_t from = offset_in_folio(folio, pos); + size_t to = from + len; struct inode *inode = folio->mapping->host; - unsigned block_start, block_end; + size_t block_start, block_end; sector_t block; int err = 0; - unsigned blocksize, bbits; + size_t blocksize; struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; BUG_ON(!folio_test_locked(folio)); - BUG_ON(from > PAGE_SIZE); - BUG_ON(to > PAGE_SIZE); + BUG_ON(to > folio_size(folio)); BUG_ON(from > to); head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; - bbits = block_size_bits(blocksize); - - block = (sector_t)folio->index << (PAGE_SHIFT - bbits); + block = div_u64(folio_pos(folio), blocksize); - for(bh = head, block_start = 0; bh != head || !block_start; + for (bh = head, block_start = 0; bh != head || !block_start; block++, block_start=block_end, bh = bh->b_this_page) { block_end = block_start + blocksize; if (block_end <= from || block_start >= to) { -- Gitee From 0fe8e38099abff633f2f8a383bbce655661612ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Nov 2023 21:06:08 +0000 Subject: [PATCH 17/17] buffer: fix more functions for block size > PAGE_SIZE ANBZ: #21672 commit fa399c3112344fa420944e99cd529d679411ebe6 upstream. Both __block_write_full_folio() and block_read_full_folio() assumed that block size <= PAGE_SIZE. Replace the shift with a divide, which is probably cheaper than first calculating the shift. That lets us remove block_size_bits() as these were the last callers. Link: https://lkml.kernel.org/r/20231109210608.2252323-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hannes Reinecke Cc: Luis Chamberlain Cc: Pankaj Raghav Cc: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Joseph Qi --- fs/buffer.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index d75b0e03721c..d9b34cea3c40 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1749,19 +1749,6 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) } EXPORT_SYMBOL(clean_bdev_aliases); -/* - * Size is a power-of-two in the range 512..PAGE_SIZE, - * and the case we care about most is PAGE_SIZE. - * - * So this *could* possibly be written with those - * constraints in mind (relevant mostly if some - * architecture has a slow bit-scan instruction) - */ -static inline int block_size_bits(unsigned int blocksize) -{ - return ilog2(blocksize); -} - static struct buffer_head *folio_create_buffers(struct folio *folio, struct inode *inode, unsigned int b_state) @@ -1814,7 +1801,7 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, sector_t block; sector_t last_block; struct buffer_head *bh, *head; - unsigned int blocksize, bbits; + size_t blocksize; int nr_underway = 0; blk_opf_t write_flags = wbc_to_write_flags(wbc); @@ -1833,10 +1820,9 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio, bh = head; blocksize = bh->b_size; - bbits = block_size_bits(blocksize); - block = (sector_t)folio->index << (PAGE_SHIFT - bbits); - last_block = (i_size_read(inode) - 1) >> bbits; + block = div_u64(folio_pos(folio), blocksize); + last_block = div_u64(i_size_read(inode) - 1, blocksize); /* * Get all the dirty buffers mapped to disk addresses and @@ -2364,7 +2350,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) struct inode *inode = folio->mapping->host; sector_t iblock, lblock; struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; - unsigned int blocksize, bbits; + size_t blocksize; int nr, i; int fully_mapped = 1; bool page_error = false; @@ -2378,10 +2364,9 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) head = folio_create_buffers(folio, inode, 0); blocksize = head->b_size; - bbits = block_size_bits(blocksize); - iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits); - lblock = (limit+blocksize-1) >> bbits; + iblock = div_u64(folio_pos(folio), blocksize); + lblock = div_u64(limit + blocksize - 1, blocksize); bh = head; nr = 0; i = 0; -- Gitee