From e4846e24dc8c6c63435523499cff5860ce3ac7f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 1 Aug 2024 15:22:22 -0400 Subject: [PATCH 01/11] protect the fetch of ->fd[fd] in do_dup2() from mispredictions stable inclusion from stable-v5.10.224 commit 08775b3d6ed117cf4518754ec7300ee42b6a5368 category: bugfix issue: #IB9RV6 CVE: CVE-2024-42265 Signed-off-by: May27May --------------------------------------- commit 8aa37bde1a7b645816cda8b80df4753ecf172bf1 upstream. both callers have verified that fd is not greater than ->max_fds; however, misprediction might end up with tofree = fdt->fd[fd]; being speculatively executed. That's wrong for the same reasons why it's wrong in close_fd()/file_close_fd_locked(); the same solution applies - array_index_nospec(fd, fdt->max_fds) could differ from fd only in case of speculative execution on mispredicted path. Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman Signed-off-by: May27May --- fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/file.c b/fs/file.c index d6bc73960e4a..20cc228fe47a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -1093,6 +1093,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; -- Gitee From 0df6c884a9d30ad26197a96f3391bbb1818ebc88 Mon Sep 17 00:00:00 2001 From: lei lu Date: Mon, 3 Jun 2024 17:46:08 +0800 Subject: [PATCH 02/11] xfs: add bounds checking to xlog_recover_process_data mainline inclusion from mainline-v6.11-rc1 commit fb63435b7c7dc112b1ae1baea5486e0a6e27b196 category: bugfix issue: #IB9RV6 CVE: CVE-2024-41014 Signed-off-by: May27May --------------------------------------- There is a lack of verification of the space occupied by fixed members of xlog_op_header in the xlog_recover_process_data. We can create a crafted image to trigger an out of bounds read by following these steps: 1) Mount an image of xfs, and do some file operations to leave records 2) Before umounting, copy the image for subsequent steps to simulate abnormal exit. Because umount will ensure that tail_blk and head_blk are the same, which will result in the inability to enter xlog_recover_process_data 3) Write a tool to parse and modify the copied image in step 2 4) Make the end of the xlog_op_header entries only 1 byte away from xlog_rec_header->h_size 5) xlog_rec_header->h_num_logops++ 6) Modify xlog_rec_header->h_crc Fix: Add a check to make sure there is sufficient space to access fixed members of xlog_op_header. Signed-off-by: lei lu Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R Signed-off-by: May27May --- fs/xfs/xfs_log_recover.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 79457312cdbb..b264f9bb5984 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2419,7 +2419,10 @@ xlog_recover_process_data( ohead = (struct xlog_op_header *)dp; dp += sizeof(*ohead); - ASSERT(dp <= end); + if (dp > end) { + xfs_warn(log->l_mp, "%s: op header overrun", __func__); + return -EFSCORRUPTED; + } /* errors will abort recovery */ error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, -- Gitee From f8cabbc02b2d3e1242d1659e406f900079747d39 Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Sat, 20 Apr 2024 10:18:55 +0530 Subject: [PATCH 03/11] usb: dwc3: core: Skip setting event buffers for host only controllers stable inclusion from stable-v5.10.225 commit 9e1c4d0d6ae9fedb934c3ff4394944d855db744b category: bugfix issue: #IB9RV6 CVE: CVE-2024-46675 Signed-off-by: May27May --------------------------------------- [ Upstream commit 89d7f962994604a3e3d480832788d06179abefc5 ] On some SoC's like SA8295P where the tertiary controller is host-only capable, GEVTADDRHI/LO, GEVTSIZ, GEVTCOUNT registers are not accessible. Trying to access them leads to a crash. For DRD/Peripheral supported controllers, event buffer setup is done again in gadget_pullup. Skip setup or cleanup of event buffers if controller is host-only capable. Suggested-by: Johan Hovold Signed-off-by: Krishna Kurapati Acked-by: Thinh Nguyen Reviewed-by: Johan Hovold Reviewed-by: Bjorn Andersson Tested-by: Johan Hovold Link: https://lore.kernel.org/r/20240420044901.884098-4-quic_kriskura@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: May27May --- drivers/usb/dwc3/core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 214a8ff2d69c..434874558e5f 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -446,6 +446,13 @@ static void dwc3_free_event_buffers(struct dwc3 *dwc) static int dwc3_alloc_event_buffers(struct dwc3 *dwc, unsigned length) { struct dwc3_event_buffer *evt; + unsigned int hw_mode; + + hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0); + if (hw_mode == DWC3_GHWPARAMS0_MODE_HOST) { + dwc->ev_buf = NULL; + return 0; + } evt = dwc3_alloc_one_event_buffer(dwc, length); if (IS_ERR(evt)) { @@ -467,6 +474,9 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc) { struct dwc3_event_buffer *evt; + if (!dwc->ev_buf) + return 0; + evt = dwc->ev_buf; evt->lpos = 0; dwc3_writel(dwc->regs, DWC3_GEVNTADRLO(0), @@ -484,6 +494,9 @@ void dwc3_event_buffers_cleanup(struct dwc3 *dwc) { struct dwc3_event_buffer *evt; + if (!dwc->ev_buf) + return; + evt = dwc->ev_buf; evt->lpos = 0; -- Gitee From 36ae57ea466379d88bb339ad1c66f6db0b06d0d7 Mon Sep 17 00:00:00 2001 From: Selvarasu Ganesan Date: Thu, 15 Aug 2024 12:18:31 +0530 Subject: [PATCH 04/11] usb: dwc3: core: Prevent USB core invalid event buffer address access stable inclusion from stable-v5.10.225 commit b72da4d89b97da71e056cc4d1429b2bc426a9c2f category: bugfix issue: #IB9RV6 CVE: CVE-2024-46675 Signed-off-by: May27May --------------------------------------- commit 14e497183df28c006603cc67fd3797a537eef7b9 upstream. This commit addresses an issue where the USB core could access an invalid event buffer address during runtime suspend, potentially causing SMMU faults and other memory issues in Exynos platforms. The problem arises from the following sequence. 1. In dwc3_gadget_suspend, there is a chance of a timeout when moving the USB core to the halt state after clearing the run/stop bit by software. 2. In dwc3_core_exit, the event buffer is cleared regardless of the USB core's status, which may lead to an SMMU faults and other memory issues. if the USB core tries to access the event buffer address. To prevent this hardware quirk on Exynos platforms, this commit ensures that the event buffer address is not cleared by software when the USB core is active during runtime suspend by checking its status before clearing the buffer address. Cc: stable Signed-off-by: Selvarasu Ganesan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240815064836.1491-1-selvarasu.g@samsung.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: May27May --- drivers/usb/dwc3/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 434874558e5f..d37c97bb1469 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -493,9 +493,17 @@ int dwc3_event_buffers_setup(struct dwc3 *dwc) void dwc3_event_buffers_cleanup(struct dwc3 *dwc) { struct dwc3_event_buffer *evt; + u32 reg; if (!dwc->ev_buf) return; + /* + * Exynos platforms may not be able to access event buffer if the + * controller failed to halt on dwc3_core_exit(). + */ + reg = dwc3_readl(dwc->regs, DWC3_DSTS); + if (!(reg & DWC3_DSTS_DEVCTRLHLT)) + return; evt = dwc->ev_buf; -- Gitee From d6020889d7290c40c51184dfc5876ad10a2d8c80 Mon Sep 17 00:00:00 2001 From: lei lu Date: Fri, 14 Jun 2024 10:22:53 +0800 Subject: [PATCH 05/11] xfs: don't walk off the end of a directory data block mainline inclusion from mainline-v6.11-rc1 commit 0c7fcdb6d06cdf8b19b57c17605215b06afa864a category: bugfix issue: #IB9RV6 CVE: CVE-2024-41013 Signed-off-by: May27May --------------------------------------- This adds sanity checks for xfs_dir2_data_unused and xfs_dir2_data_entry to make sure don't stray beyond valid memory region. Before patching, the loop simply checks that the start offset of the dup and dep is within the range. So in a crafted image, if last entry is xfs_dir2_data_unused, we can change dup->length to dup->length-1 and leave 1 byte of space. In the next traversal, this space will be considered as dup or dep. We may encounter an out of bound read when accessing the fixed members. In the patch, we make sure that the remaining bytes large enough to hold an unused entry before accessing xfs_dir2_data_unused and xfs_dir2_data_unused is XFS_DIR2_DATA_ALIGN byte aligned. We also make sure that the remaining bytes large enough to hold a dirent with a single-byte name before accessing xfs_dir2_data_entry. Signed-off-by: lei lu Reviewed-by: Darrick J. Wong Signed-off-by: Chandan Babu R Signed-off-by: May27May Conflicts: fs/xfs/libxfs/xfs_dir2_data.c --- fs/xfs/libxfs/xfs_dir2_data.c | 31 ++++++++++++++++++++++++++----- fs/xfs/libxfs/xfs_dir2_priv.h | 7 +++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index 375b3edb2ad2..7c2dabb10935 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -177,6 +177,14 @@ __xfs_dir3_data_check( while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; + unsigned int reclen; + + /* + * Are the remaining bytes large enough to hold an + * unused entry? + */ + if (offset > end - xfs_dir2_data_unusedsize(1)) + return __this_address; /* * If it's unused, look for the space in the bestfree table. @@ -186,9 +194,13 @@ __xfs_dir3_data_check( if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { xfs_failaddr_t fa; + reclen = xfs_dir2_data_unusedsize( + be16_to_cpu(dup->length)); if (lastfree != 0) return __this_address; - if (offset + be16_to_cpu(dup->length) > end) + if (be16_to_cpu(dup->length) != reclen) + return __this_address; + if (offset + reclen > end) return __this_address; if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != offset) @@ -206,10 +218,18 @@ __xfs_dir3_data_check( be16_to_cpu(bf[2].length)) return __this_address; } - offset += be16_to_cpu(dup->length); + offset += reclen; lastfree = 1; continue; } + + /* + * This is not an unused entry. Are the remaining bytes + * large enough for a dirent with a single-byte name? + */ + if (offset > end - xfs_dir2_data_entsize(mp, 1)) + return __this_address; + /* * It's a real entry. Validate the fields. * If this is a block directory then make sure it's @@ -218,9 +238,10 @@ __xfs_dir3_data_check( */ if (dep->namelen == 0) return __this_address; - if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) + reclen = xfs_dir2_data_entsize(mp, dep->namelen); + if (offset + reclen > end) return __this_address; - if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end) + if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber))) return __this_address; if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) return __this_address; @@ -244,7 +265,7 @@ __xfs_dir3_data_check( if (i >= be32_to_cpu(btp->count)) return __this_address; } - offset += xfs_dir2_data_entsize(mp, dep->namelen); + offset += reclen; } /* * Need to have seen all the entries and all the bestfree slots. diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 44c6a77cba05..e46063cde07d 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -186,6 +186,13 @@ void xfs_dir2_sf_put_ftype(struct xfs_mount *mp, extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize); +static inline unsigned int +xfs_dir2_data_unusedsize( + unsigned int len) +{ + return round_up(len, XFS_DIR2_DATA_ALIGN); +} + static inline unsigned int xfs_dir2_data_entsize( struct xfs_mount *mp, -- Gitee From 69a51df12244c004f23011b15d210d43f0a6eceb Mon Sep 17 00:00:00 2001 From: Yongqiang Liu Date: Thu, 7 Mar 2024 13:05:09 +0100 Subject: [PATCH 06/11] ARM: 9359/1: flush: check if the folio is reserved for no-mapping addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mainline inclusion from mainline-v6.9-rc1 commit 0c66c6f4e21cb22220cbd8821c5c73fc157d20dc category: bugfix issue: #IB9RV6 CVE: CVE-2024-26947 Signed-off-by: May27May --------------------------------------- Since commit a4d5613c4dc6 ("arm: extend pfn_valid to take into account freed memory map alignment") changes the semantics of pfn_valid() to check presence of the memory map for a PFN. A valid page for an address which is reserved but not mapped by the kernel[1], the system crashed during some uio test with the following memory layout: node 0: [mem 0x00000000c0a00000-0x00000000cc8fffff] node 0: [mem 0x00000000d0000000-0x00000000da1fffff] the uio layout is:0xc0900000, 0x100000 the crash backtrace like: Unable to handle kernel paging request at virtual address bff00000 [...] CPU: 1 PID: 465 Comm: startapp.bin Tainted: G O 5.10.0 #1 Hardware name: Generic DT based system PC is at b15_flush_kern_dcache_area+0x24/0x3c LR is at __sync_icache_dcache+0x6c/0x98 [...] (b15_flush_kern_dcache_area) from (__sync_icache_dcache+0x6c/0x98) (__sync_icache_dcache) from (set_pte_at+0x28/0x54) (set_pte_at) from (remap_pfn_range+0x1a0/0x274) (remap_pfn_range) from (uio_mmap+0x184/0x1b8 [uio]) (uio_mmap [uio]) from (__mmap_region+0x264/0x5f4) (__mmap_region) from (__do_mmap_mm+0x3ec/0x440) (__do_mmap_mm) from (do_mmap+0x50/0x58) (do_mmap) from (vm_mmap_pgoff+0xfc/0x188) (vm_mmap_pgoff) from (ksys_mmap_pgoff+0xac/0xc4) (ksys_mmap_pgoff) from (ret_fast_syscall+0x0/0x5c) Code: e0801001 e2423001 e1c00003 f57ff04f (ee070f3e) ---[ end trace 09cf0734c3805d52 ]--- Kernel panic - not syncing: Fatal exception So check if PG_reserved was set to solve this issue. [1]: https://lore.kernel.org/lkml/Zbtdue57RO0QScJM@linux.ibm.com/ Fixes: a4d5613c4dc6 ("arm: extend pfn_valid to take into account freed memory map alignment") Suggested-by: Mike Rapoport Signed-off-by: Yongqiang Liu Signed-off-by: Russell King (Oracle) Signed-off-by: May27May Conflicts: arch/arm/mm/flush.c --- arch/arm/mm/flush.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d89db7895d1..ef848bf0df28 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -280,6 +280,9 @@ void __sync_icache_dcache(pte_t pteval) return; page = pfn_to_page(pfn); + if (PageReserved(page)) + return; + if (cache_is_vipt_aliasing()) mapping = page_mapping_file(page); else -- Gitee From a20b40116360e152ab45d60a2ab588f2f6c8f682 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 17 Jan 2022 16:36:53 +1100 Subject: [PATCH 07/11] SUNRPC: lock against ->sock changing during sysfs read mainline inclusion from mainline-v5.17-rc4 commit b49ea673e119f59c71645e2f65b3ccad857c90ee category: bugfix issue: #IB9RV6 CVE: CVE-2024-48816 Signed-off-by: May27May --------------------------------------- ->sock can be set to NULL asynchronously unless ->recv_mutex is held. So it is important to hold that mutex. Otherwise a sysfs read can trigger an oops. Commit 17f09d3f619a ("SUNRPC: Check if the xprt is connected before handling sysfs reads") appears to attempt to fix this problem, but it only narrows the race window. Fixes: 17f09d3f619a ("SUNRPC: Check if the xprt is connected before handling sysfs reads") Fixes: a8482488a7d6 ("SUNRPC query transport's source port") Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker Signed-off-by: May27May Conflicts: net/sunrpc/sysfs.c --- net/sunrpc/xprtsock.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ae5b5380f0f0..063814aca1e1 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1676,7 +1676,12 @@ static int xs_get_srcport(struct sock_xprt *transport) unsigned short get_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - return xs_sock_getport(sock->sock); + unsigned short ret = 0; + mutex_lock(&sock->recv_mutex); + if (sock->sock) + ret = xs_sock_getport(sock->sock); + mutex_unlock(&sock->recv_mutex); + return ret; } EXPORT_SYMBOL(get_srcport); -- Gitee From d17ad8d0efeb7f1520d5da18ab03f0586eb59253 Mon Sep 17 00:00:00 2001 From: Hyeong-Jun Kim Date: Tue, 2 Nov 2021 16:10:02 +0900 Subject: [PATCH 08/11] f2fs: invalidate META_MAPPING before IPU/DIO write mainline inclusion from mainline-v5.16-rc1 commit e3b49ea36802053f312013fd4ccb6e59920a9f76 category: bugfix issue: #IB9RV6 CVE: CVE-2024-26869 Signed-off-by: May27May --------------------------------------- Encrypted pages during GC are read and cached in META_MAPPING. However, due to cached pages in META_MAPPING, there is an issue where newly written pages are lost by IPU or DIO writes. Thread A - f2fs_gc() Thread B /* phase 3 */ down_write(i_gc_rwsem) ra_data_block() ---- (a) up_write(i_gc_rwsem) f2fs_direct_IO() : - down_read(i_gc_rwsem) - __blockdev_direct_io() - get_data_block_dio_write() - f2fs_dio_submit_bio() ---- (b) - up_read(i_gc_rwsem) /* phase 4 */ down_write(i_gc_rwsem) move_data_block() ---- (c) up_write(i_gc_rwsem) (a) In phase 3 of f2fs_gc(), up-to-date page is read from storage and cached in META_MAPPING. (b) In thread B, writing new data by IPU or DIO write on same blkaddr as read in (a). cached page in META_MAPPING become out-dated. (c) In phase 4 of f2fs_gc(), out-dated page in META_MAPPING is copied to new blkaddr. In conclusion, the newly written data in (b) is lost. To address this issue, invalidating pages in META_MAPPING before IPU or DIO write. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Signed-off-by: Hyeong-Jun Kim Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: May27May Conflicts: fs/f2fs/data.c --- fs/f2fs/data.c | 5 ++++- fs/f2fs/segment.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index fd8722da9c4f..709323fb5a02 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1719,9 +1719,12 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, sync_out: /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + invalidate_mapping_pages(META_MAPPING(sbi), + map->m_pblk, map->m_pblk); + } if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a54d7b89dd6..73fffb11d42c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3725,6 +3725,9 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) return -EFSCORRUPTED; } + invalidate_mapping_pages(META_MAPPING(sbi), + fio->new_blkaddr, fio->new_blkaddr); + stat_inc_inplace_blocks(fio->sbi); if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) -- Gitee From 057c4097c149860bc85060d5d5d9ced75cdc010b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Jul 2022 14:30:15 +0800 Subject: [PATCH 09/11] f2fs: fix to invalidate META_MAPPING before DIO write mainline inclusion from mainline-v6.0-rc1 commit 67ca06872eb02944b4c6f92cffa9242e92c63109 category: bugfix issue: #IB9RV6 CVE: CVE-2024-26869 Signed-off-by: May27May --------------------------------------- Quoted from commit e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") " Encrypted pages during GC are read and cached in META_MAPPING. However, due to cached pages in META_MAPPING, there is an issue where newly written pages are lost by IPU or DIO writes. Thread A - f2fs_gc() Thread B /* phase 3 */ down_write(i_gc_rwsem) ra_data_block() ---- (a) up_write(i_gc_rwsem) f2fs_direct_IO() : - down_read(i_gc_rwsem) - __blockdev_direct_io() - get_data_block_dio_write() - f2fs_dio_submit_bio() ---- (b) - up_read(i_gc_rwsem) /* phase 4 */ down_write(i_gc_rwsem) move_data_block() ---- (c) up_write(i_gc_rwsem) (a) In phase 3 of f2fs_gc(), up-to-date page is read from storage and cached in META_MAPPING. (b) In thread B, writing new data by IPU or DIO write on same blkaddr as read in (a). cached page in META_MAPPING become out-dated. (c) In phase 4 of f2fs_gc(), out-dated page in META_MAPPING is copied to new blkaddr. In conclusion, the newly written data in (b) is lost. To address this issue, invalidating pages in META_MAPPING before IPU or DIO write. " In previous commit, we missed to cover extent cache hit case, and passed wrong value for parameter @end of invalidate_mapping_pages(), fix both issues. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") Cc: Hyeong-Jun Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: May27May Conflicts: fs/f2fs/data.c --- fs/f2fs/data.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 709323fb5a02..0f33084e149e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1540,9 +1540,12 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, *map->m_next_extent = pgofs + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) + if (flag == F2FS_GET_BLOCK_DIO) { f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + invalidate_mapping_pages(META_MAPPING(sbi), + map->m_pblk, map->m_pblk + map->m_len - 1); + } goto out; } @@ -1723,7 +1726,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk); + map->m_pblk, map->m_pblk + map->m_len - 1); } if (flag == F2FS_GET_BLOCK_PRECACHE) { -- Gitee From da73466e43df420e92616c3cea8c00690f8d59aa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Jul 2022 23:26:43 +0800 Subject: [PATCH 10/11] f2fs: invalidate meta pages only for post_read required inode mainline inclusion from mainline-v6.0-rc1 commit 0d5b9d8156396bbe1c982708b38ab9e188c45ec9 category: bugfix issue: #IB9RV6 CVE: CVE-2024-26869 Signed-off-by: May27May --------------------------------------- After commit e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write"), invalidate_mapping_pages() will be called to avoid race condition in between IPU/DIO and readahead for GC. However, readahead flow is only used for post_read required inode, so this patch adds check condition to avoids unnecessary page cache invalidating for non-post_read inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: May27May Conflicts: fs/f2fs/data.c --- fs/f2fs/data.c | 11 +++-------- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 9 ++++++++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0f33084e149e..8b8981d426cc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1540,12 +1540,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, *map->m_next_extent = pgofs + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) { + if (flag == F2FS_GET_BLOCK_DIO) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk + map->m_len - 1); - } goto out; } @@ -1722,12 +1719,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, sync_out: /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); - invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk + map->m_len - 1); - } if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { @@ -2809,6 +2803,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .submitted = false, .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, + .post_read = f2fs_post_read_required(inode), .io_type = io_type, .io_wbc = wbc, .bio = bio, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 702ac7d98333..82a381d62252 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1143,6 +1143,7 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ int compr_blocks; /* # of compressed block addresses */ bool encrypted; /* indicate file is encrypted */ + bool post_read; /* require post read */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 73fffb11d42c..7a7cc9630362 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3725,7 +3725,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) return -EFSCORRUPTED; } - invalidate_mapping_pages(META_MAPPING(sbi), + if (fio->post_read) + invalidate_mapping_pages(META_MAPPING(sbi), fio->new_blkaddr, fio->new_blkaddr); stat_inc_inplace_blocks(fio->sbi); @@ -3894,10 +3895,16 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); block_t i; + if (!f2fs_post_read_required(inode)) + return; + for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); + + invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) -- Gitee From aa637fc245b2999d834dfcb0c777660fbfae619e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 8 Mar 2024 09:08:34 +0800 Subject: [PATCH 11/11] f2fs: fix to truncate meta inode pages forcely stable inclusion from stable-v6.6.23 commit c92f2927df860a60ba815d3ee610a944b92a8694 category: bugfix issue: #IB9RV6 CVE: CVE-2024-26869 Signed-off-by: May27May --------------------------------------- [ Upstream commit 9f0c4a46be1fe9b97dbe66d49204c1371e3ece65 ] Below race case can cause data corruption: Thread A GC thread - gc_data_segment - ra_data_block - locked meta_inode page - f2fs_inplace_write_data - invalidate_mapping_pages : fail to invalidate meta_inode page due to lock failure or dirty|writeback status - f2fs_submit_page_bio : write last dirty data to old blkaddr - move_data_block - load old data from meta_inode page - f2fs_submit_page_write : write old data to new blkaddr Because invalidate_mapping_pages() will skip invalidating page which has unclear status including locked, dirty, writeback and so on, so we need to use truncate_inode_pages_range() instead of invalidate_mapping_pages() to make sure meta_inode page will be dropped. Fixes: 6aa58d8ad20a ("f2fs: readahead encrypted block during GC") Fixes: e3b49ea36802 ("f2fs: invalidate META_MAPPING before IPU/DIO write") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: May27May Conflicts: fs/f2fs/f2fs.h --- fs/f2fs/checkpoint.c | 5 +++-- fs/f2fs/f2fs.h | 26 ++++++++++++++++++++++++++ fs/f2fs/segment.c | 5 ++--- include/linux/f2fs_fs.h | 1 + 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8ca549cc975e..8a94bf3611d3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1551,8 +1551,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) */ if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi) || f2fs_sb_has_compression(sbi)) - invalidate_mapping_pages(META_MAPPING(sbi), - MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); + f2fs_bug_on(sbi, + invalidate_inode_pages2_range(META_MAPPING(sbi), + MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1)); f2fs_release_ino_entry(sbi, false); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 82a381d62252..80c79159d7b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4187,6 +4187,32 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } +static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int cnt) +{ + bool need_submit = false; + int i = 0; + + do { + struct page *page; + + page = find_get_page(META_MAPPING(sbi), blkaddr + i); + if (page) { + if (PageWriteback(page)) + need_submit = true; + f2fs_put_page(page, 0); + } + } while (++i < cnt && !need_submit); + + if (need_submit) + f2fs_submit_merged_write_cond(sbi, sbi->meta_inode, + NULL, 0, DATA); + + truncate_inode_pages_range(META_MAPPING(sbi), + F2FS_BLK_TO_BYTES((loff_t)blkaddr), + F2FS_BLK_END_BYTES((loff_t)(blkaddr + cnt - 1))); +} + #define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7a7cc9630362..f76cab61c0ad 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3726,8 +3726,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) } if (fio->post_read) - invalidate_mapping_pages(META_MAPPING(sbi), - fio->new_blkaddr, fio->new_blkaddr); + f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1); stat_inc_inplace_blocks(fio->sbi); @@ -3904,7 +3903,7 @@ void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, for (i = 0; i < len; i++) f2fs_wait_on_block_writeback(inode, blkaddr + i); - invalidate_mapping_pages(META_MAPPING(sbi), blkaddr, blkaddr + len - 1); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); } static int read_compacted_summaries(struct f2fs_sb_info *sbi) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..7fd4bea6be64 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -27,6 +27,7 @@ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) +#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1) /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -- Gitee