From 2fade5c9cc54cbee4098a54415adcffc7706c38a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:38 +0800 Subject: [PATCH 01/56] ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found() stable inclusion from stable-v5.10.211 commit 927794a02169778c9c2e7b25c768ab3ea8c1dc03 category: bugfix issue: NA CVE: CVE-2024-26773 [ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ] Determine if the group block bitmap is corrupted before using ac_b_ex in ext4_mb_try_best_found() to avoid allocating blocks from a group with a corrupted block bitmap in the following concurrency and making the situation worse. ext4_mb_regular_allocator ext4_lock_group(sb, group) ext4_mb_good_group // check if the group bbitmap is corrupted ext4_mb_complex_scan_group // Scan group gets ac_b_ex but doesn't use it ext4_unlock_group(sb, group) ext4_mark_group_bitmap_corrupted(group) // The block bitmap was corrupted during // the group unlock gap. ext4_mb_try_best_found ext4_lock_group(ac->ac_sb, group) ext4_mb_use_best_found mb_mark_used // Allocating blocks in block bitmap corrupted group Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/ext4/mballoc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a954ad5944de..033287321c06 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1873,6 +1873,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, return err; ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex); if (max > 0) { @@ -1880,6 +1883,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac, ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- Gitee From b81044e8a0cafbb49631de1c3e5142c7554999bf Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:39 +0800 Subject: [PATCH 02/56] ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal() stable inclusion from stable-v5.10.211 commit ffeb72a80a82aba59a6774b0611f792e0ed3b0b7 category: bugfix issue: NA CVE: CVE-2024-26772 [ Upstream commit 832698373a25950942c04a512daa652c18a9b513 ] Places the logic for checking if the group's block bitmap is corrupt under the protection of the group lock to avoid allocating blocks from the group with a corrupted block bitmap. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-8-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/ext4/mballoc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 033287321c06..ac6cf2b53108 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1912,12 +1912,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, if (err) return err; - if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) { - ext4_mb_unload_buddy(e4b); - return 0; - } - ext4_lock_group(ac->ac_sb, group); + if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) + goto out; + max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, ac->ac_g_ex.fe_len, &ex); ex.fe_logical = 0xDEADFA11; /* debug value */ @@ -1950,6 +1948,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, ac->ac_b_ex = ex; ext4_mb_use_best_found(ac, e4b); } +out: ext4_unlock_group(ac->ac_sb, group); ext4_mb_unload_buddy(e4b); -- Gitee From 23c695bf79b9e96db8c7618337e7db57c9e2fbd4 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Thu, 4 Jan 2024 22:20:33 +0800 Subject: [PATCH 03/56] ext4: fix double-free of blocks due to wrong extents moved_len stable inclusion from stable-v5.10.210 commit d033a555d9a1cf53dbf3301af7199cc4a4c8f537 category: bugfix issue: NA CVE: CVE-2024-26704 commit 55583e899a5357308274601364741a83e78d6ac4 upstream. In ext4_move_extents(), moved_len is only updated when all moves are successfully executed, and only discards orig_inode and donor_inode preallocations when moved_len is not zero. When the loop fails to exit after successfully moving some extents, moved_len is not updated and remains at 0, so it does not discard the preallocations. If the moved extents overlap with the preallocated extents, the overlapped extents are freed twice in ext4_mb_release_inode_pa() and ext4_process_freed_data() (as described in commit 94d7c16cbbbd ("ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT")), and bb_free is incremented twice. Hence when trim is executed, a zero-division bug is triggered in mb_update_avg_fragment_size() because bb_free is not zero and bb_fragments is zero. Therefore, update move_len after each extent move to avoid the issue. Reported-by: Wei Chen Reported-by: xingwei lee Closes: https://lore.kernel.org/r/CAO4mrferzqBUnCag8R3m2zf897ts9UEuhjFQGPtODT92rYyR2Q@mail.gmail.com Fixes: fcf6b1b729bc ("ext4: refactor ext4_move_extents code base") CC: # 3.18 Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20240104142040.2835097-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/ext4/move_extent.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 64a579734f93..f8dd5d972c33 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -615,6 +615,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, goto out; o_end = o_start + len; + *moved_len = 0; while (o_start < o_end) { struct ext4_extent *ex; ext4_lblk_t cur_blk, next_blk; @@ -670,7 +671,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, */ ext4_double_up_write_data_sem(orig_inode, donor_inode); /* Swap original branches with new branches */ - move_extent_per_page(o_filp, donor_inode, + *moved_len += move_extent_per_page(o_filp, donor_inode, orig_page_index, donor_page_index, offset_in_page, cur_len, unwritten, &ret); @@ -680,9 +681,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, o_start += cur_len; d_start += cur_len; } - *moved_len = o_start - orig_blk; - if (*moved_len > len) - *moved_len = len; out: if (*moved_len) { -- Gitee From 72be3a762d0fdd05cc4d17e0c8ef40f91334ff8d Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Wed, 27 Dec 2023 09:35:23 +0000 Subject: [PATCH 04/56] crypto: scomp - fix req->dst buffer overflow stable inclusion from stable-v5.10.209 commit 4518dc468cdd796757190515a9be7408adc8911e category: bugfix issue: NA CVE: CVE-2023-52612 [ Upstream commit 744e1885922a9943458954cfea917b31064b4131 ] The req->dst buffer size should be checked before copying from the scomp_scratch->dst to avoid req->dst buffer overflow problem. Fixes: 1ab53a77b772 ("crypto: acomp - add driver-side scomp interface") Reported-by: syzbot+3eff5e51bf1db122a16e@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/0000000000000b05cd060d6b5511@google.com/ Signed-off-by: Chengming Zhou Reviewed-by: Barry Song Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- crypto/scompress.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crypto/scompress.c b/crypto/scompress.c index 738f4f8f0f41..4d6366a44400 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -124,6 +124,7 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) struct crypto_scomp *scomp = *tfm_ctx; void **ctx = acomp_request_ctx(req); struct scomp_scratch *scratch; + unsigned int dlen; int ret; if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) @@ -135,6 +136,8 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE) req->dlen = SCOMP_SCRATCH_SIZE; + dlen = req->dlen; + scratch = raw_cpu_ptr(&scomp_scratch); spin_lock(&scratch->lock); @@ -152,6 +155,9 @@ static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) ret = -ENOMEM; goto out; } + } else if (req->dlen > dlen) { + ret = -ENOSPC; + goto out; } scatterwalk_map_and_copy(scratch->dst, req->dst, 0, req->dlen, 1); -- Gitee From 51fdd1fd9e83e65fb26f2893d1502abd8d0da296 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Wed, 31 Jan 2024 10:08:28 +0800 Subject: [PATCH 05/56] net: stmmac: xgmac: fix handling of DPP safety error for DMA channels stable inclusion from stable-v5.10.210 commit 2fc45a4631ac7837a5c497cb4f7e2115d950fc37 category: bugfix issue: NA CVE: CVE-2024-26684 [ Upstream commit 46eba193d04f8bd717e525eb4110f3c46c12aec3 ] Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") checks and reports safety errors, but leaves the Data Path Parity Errors for each channel in DMA unhandled at all, lead to a storm of interrupt. Fix it by checking and clearing the DMA_DPP_Interrupt_Status register. Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 + .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++++++++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index af4303523929..0bc345aff1cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -189,6 +189,7 @@ struct stmmac_safety_stats { unsigned long mac_errors[32]; unsigned long mtl_errors[32]; unsigned long dma_errors[32]; + unsigned long dma_dpp_errors[32]; }; /* Number of fields in Safety Stats */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 6c3b8a950f58..ceb3f2f12089 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -282,6 +282,8 @@ #define XGMAC_RXCEIE BIT(4) #define XGMAC_TXCEIE BIT(0) #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc +#define XGMAC_MTL_DPP_CONTROL 0x000010e0 +#define XGMAC_DDPP_DISABLE BIT(0) #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) #define XGMAC_TQS GENMASK(25, 16) #define XGMAC_TQS_SHIFT 16 @@ -364,6 +366,7 @@ #define XGMAC_DCEIE BIT(1) #define XGMAC_TCEIE BIT(0) #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c +#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_SPH BIT(24) #define XGMAC_PBLx8 BIT(16) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index ad4df9bddcf3..ba6ee9f1ecb6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -788,6 +788,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { { false, "UNKNOWN", "Unknown Error" }, /* 31 */ }; +static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; +static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; +static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { + { true, "TDPES0", dpp_tx_err }, + { true, "TDPES1", dpp_tx_err }, + { true, "TDPES2", dpp_tx_err }, + { true, "TDPES3", dpp_tx_err }, + { true, "TDPES4", dpp_tx_err }, + { true, "TDPES5", dpp_tx_err }, + { true, "TDPES6", dpp_tx_err }, + { true, "TDPES7", dpp_tx_err }, + { true, "TDPES8", dpp_tx_err }, + { true, "TDPES9", dpp_tx_err }, + { true, "TDPES10", dpp_tx_err }, + { true, "TDPES11", dpp_tx_err }, + { true, "TDPES12", dpp_tx_err }, + { true, "TDPES13", dpp_tx_err }, + { true, "TDPES14", dpp_tx_err }, + { true, "TDPES15", dpp_tx_err }, + { true, "RDPES0", dpp_rx_err }, + { true, "RDPES1", dpp_rx_err }, + { true, "RDPES2", dpp_rx_err }, + { true, "RDPES3", dpp_rx_err }, + { true, "RDPES4", dpp_rx_err }, + { true, "RDPES5", dpp_rx_err }, + { true, "RDPES6", dpp_rx_err }, + { true, "RDPES7", dpp_rx_err }, + { true, "RDPES8", dpp_rx_err }, + { true, "RDPES9", dpp_rx_err }, + { true, "RDPES10", dpp_rx_err }, + { true, "RDPES11", dpp_rx_err }, + { true, "RDPES12", dpp_rx_err }, + { true, "RDPES13", dpp_rx_err }, + { true, "RDPES14", dpp_rx_err }, + { true, "RDPES15", dpp_rx_err }, +}; + static void dwxgmac3_handle_dma_err(struct net_device *ndev, void __iomem *ioaddr, bool correctable, struct stmmac_safety_stats *stats) @@ -799,6 +836,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, dwxgmac3_log_error(ndev, value, correctable, "DMA", dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); + + value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); + writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); + + dwxgmac3_log_error(ndev, value, false, "DMA_DPP", + dwxgmac3_dma_dpp_errors, + STAT_OFF(dma_dpp_errors), stats); } static int dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp) @@ -835,6 +879,12 @@ static int dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp) value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + /* 5. Enable Data Path Parity Protection */ + value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); + /* already enabled by default, explicit enable it again */ + value &= ~XGMAC_DDPP_DISABLE; + writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); + return 0; } @@ -868,7 +918,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, ret |= !corr; } - err = dma & (XGMAC_DEUIS | XGMAC_DECIS); + /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in + * DMA_Safety_Interrupt_Status, so we handle DMA Data Path + * Parity Errors here + */ + err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); corr = dma & XGMAC_DECIS; if (err) { dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); @@ -884,6 +938,7 @@ static const struct dwxgmac3_error { { dwxgmac3_mac_errors }, { dwxgmac3_mtl_errors }, { dwxgmac3_dma_errors }, + { dwxgmac3_dma_dpp_errors }, }; static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, -- Gitee From 7c044299128f6288817fa90ee34110bacfc06fa2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 2 Dec 2023 09:01:54 +0800 Subject: [PATCH 06/56] hwrng: core - Fix page fault dead lock on mmap-ed hwrng stable inclusion from stable-v5.10.210 commit c6a8111aacbfe7a8a70f46cc0de8eed00561693c category: bugfix issue: NA CVE: CVE-2023-52615 commit 78aafb3884f6bc6636efcc1760c891c8500b9922 upstream. There is a dead-lock in the hwrng device read path. This triggers when the user reads from /dev/hwrng into memory also mmap-ed from /dev/hwrng. The resulting page fault triggers a recursive read which then dead-locks. Fix this by using a stack buffer when calling copy_to_user. Reported-by: Edward Adam Davis Reported-by: syzbot+c52ab18308964d248092@syzkaller.appspotmail.com Fixes: 9996508b3353 ("hwrng: core - Replace u32 in driver API with byte array") Cc: Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/char/hw_random/core.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 5749998feaa4..6e2c1ba18012 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -24,10 +24,13 @@ #include #include #include +#include #include #define RNG_MODULE_NAME "hw_random" +#define RNG_BUFFER_SIZE (SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES) + static struct hwrng *current_rng; /* the current rng has been explicitly chosen by user via sysfs */ static int cur_rng_set_by_user; @@ -59,7 +62,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static size_t rng_buffer_size(void) { - return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; + return RNG_BUFFER_SIZE; } static void add_early_randomness(struct hwrng *rng) @@ -206,6 +209,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + u8 buffer[RNG_BUFFER_SIZE]; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -233,34 +237,37 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; + } else if (bytes_read == 0 && + (filp->f_flags & O_NONBLOCK)) { + err = -EAGAIN; + goto out_unlock_reading; } + data_avail = bytes_read; } - if (!data_avail) { - if (filp->f_flags & O_NONBLOCK) { - err = -EAGAIN; - goto out_unlock_reading; - } - } else { - len = data_avail; + len = data_avail; + if (len) { if (len > size) len = size; data_avail -= len; - if (copy_to_user(buf + ret, rng_buffer + data_avail, - len)) { + memcpy(buffer, rng_buffer + data_avail, len); + } + mutex_unlock(&reading_mutex); + put_rng(rng); + + if (len) { + if (copy_to_user(buf + ret, buffer, len)) { err = -EFAULT; - goto out_unlock_reading; + goto out; } size -= len; ret += len; } - mutex_unlock(&reading_mutex); - put_rng(rng); if (need_resched()) schedule_timeout_interruptible(1); @@ -271,6 +278,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memzero_explicit(buffer, sizeof(buffer)); return ret ? : err; out_unlock_reading: -- Gitee From 02af5e68283146b4ae88cb8c7fb38dde13eb5742 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Thu, 14 Dec 2023 11:08:34 +0800 Subject: [PATCH 07/56] crypto: lib/mpi - Fix unexpected pointer access in mpi_ec_init stable inclusion from stable-v5.10.210 commit 0c3687822259a7628c85cd21a3445cbe3c367165 category: bugfix issue: NA CVE: CVE-2023-52616 [ Upstream commit ba3c5574203034781ac4231acf117da917efcd2a ] When the mpi_ec_ctx structure is initialized, some fields are not cleared, causing a crash when referencing the field when the structure was released. Initially, this issue was ignored because memory for mpi_ec_ctx is allocated with the __GFP_ZERO flag. For example, this error will be triggered when calculating the Za value for SM2 separately. Fixes: d58bb7e55a8a ("lib/mpi: Introduce ec implementation to MPI library") Cc: stable@vger.kernel.org # v6.5 Signed-off-by: Tianjia Zhang Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- lib/mpi/ec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c index c21470122dfc..941ba0b0067e 100644 --- a/lib/mpi/ec.c +++ b/lib/mpi/ec.c @@ -584,6 +584,9 @@ void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model, ctx->a = mpi_copy(a); ctx->b = mpi_copy(b); + ctx->d = NULL; + ctx->t.two_inv_p = NULL; + ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL; mpi_ec_get_reset(ctx); -- Gitee From 56f33a7836cc8c662a831df3706a1f91c829b377 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 2 Feb 2024 09:54:04 +0000 Subject: [PATCH 08/56] inet: read sk->sk_family once in inet_recv_error() stable inclusion from stable-v5.10.210 commit 88081ba415224cf413101def4343d660f56d082b category: bugfix issue: NA CVE: CVE-2024-26679 [ Upstream commit eef00a82c568944f113f2de738156ac591bbd5cd ] inet_recv_error() is called without holding the socket lock. IPv6 socket could mutate to IPv4 with IPV6_ADDRFORM socket option and trigger a KCSAN warning. Fixes: f4713a3dfad0 ("net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks") Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv4/af_inet.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index dcc1df34b10f..475a19db3713 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1609,10 +1609,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; -- Gitee From fce01a0960388da2da3d693ce6ac0b42c4050ece Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 12 Jan 2024 20:26:26 +0800 Subject: [PATCH 09/56] blk-mq: fix IO hang from sbitmap wakeup race stable inclusion from stable-v5.10.210 commit 7610ba1319253225a9ba8a9d28d472fc883b4e2f category: bugfix issue: NA CVE: CVE-2024-26671 [ Upstream commit 5266caaf5660529e3da53004b8b7174cab6374ed ] In blk_mq_mark_tag_wait(), __add_wait_queue() may be re-ordered with the following blk_mq_get_driver_tag() in case of getting driver tag failure. Then in __sbitmap_queue_wake_up(), waitqueue_active() may not observe the added waiter in blk_mq_mark_tag_wait() and wake up nothing, meantime blk_mq_mark_tag_wait() can't get driver tag successfully. This issue can be reproduced by running the following test in loop, and fio hang can be observed in < 30min when running it on my test VM in laptop. modprobe -r scsi_debug modprobe scsi_debug delay=0 dev_size_mb=4096 max_queue=1 host_max_queue=1 submit_queues=4 dev=`ls -d /sys/bus/pseudo/drivers/scsi_debug/adapter*/host*/target*/*/block/* | head -1 | xargs basename` fio --filename=/dev/"$dev" --direct=1 --rw=randrw --bs=4k --iodepth=1 \ --runtime=100 --numjobs=40 --time_based --name=test \ --ioengine=libaio Fix the issue by adding one explicit barrier in blk_mq_mark_tag_wait(), which is just fine in case of running out of tag. Cc: Jan Kara Cc: Kemeng Shi Reported-by: Changhui Zhong Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20240112122626.4181044-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- block/blk-mq.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index ad47eb93b266..11ec9a63e678 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1204,6 +1204,22 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); + /* + * Add one explicit barrier since blk_mq_get_driver_tag() may + * not imply barrier in case of failure. + * + * Order adding us to wait queue and allocating driver tag. + * + * The pair is the one implied in sbitmap_queue_wake_up() which + * orders clearing sbitmap tag bits and waitqueue_active() in + * __sbitmap_queue_wake_up(), since waitqueue_active() is lockless + * + * Otherwise, re-order of adding wait queue and getting driver tag + * may cause __sbitmap_queue_wake_up() to wake up nothing because + * the waitqueue_active() may not observe us in wait queue. + */ + smp_mb(); + /* * It's possible that a tag was freed in the window between the * allocation failure and adding the hardware queue to the wait -- Gitee From acd9dc6c9543e5ad74dc39e01501b75ed23ff126 Mon Sep 17 00:00:00 2001 From: Weichen Chen Date: Fri, 24 Feb 2023 10:36:32 +0800 Subject: [PATCH 10/56] pstore/ram: Fix crash when setting number of cpus to an odd number stable inclusion from stable-v5.10.210 commit a63e48cd835c34c38ef671d344cc029b1ea5bf10 category: bugfix issue: NA CVE: CVE-2023-52619 [ Upstream commit d49270a04623ce3c0afddbf3e984cb245aa48e9c ] When the number of cpu cores is adjusted to 7 or other odd numbers, the zone size will become an odd number. The address of the zone will become: addr of zone0 = BASE addr of zone1 = BASE + zone_size addr of zone2 = BASE + zone_size*2 ... The address of zone1/3/5/7 will be mapped to non-alignment va. Eventually crashes will occur when accessing these va. So, use ALIGN_DOWN() to make sure the zone size is even to avoid this bug. Signed-off-by: Weichen Chen Reviewed-by: Matthias Brugger Tested-by: "Guilherme G. Piccoli" Link: https://lore.kernel.org/r/20230224023632.6840-1-weichen.chen@mediatek.com Signed-off-by: Kees Cook Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/pstore/ram.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 615ded4d2979..baedc121f8bd 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -542,6 +542,7 @@ static int ramoops_init_przs(const char *name, } zone_sz = mem_sz / *cnt; + zone_sz = ALIGN_DOWN(zone_sz, 2); if (!zone_sz) { dev_err(dev, "%s zone size == 0\n", name); goto fail; -- Gitee From f538ea601578b6cfe9d8d5ae2550e70de490f9ac Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 23 Oct 2023 09:30:56 +0800 Subject: [PATCH 11/56] ext4: avoid online resizing failures due to oversized flex bg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.210 commit cfbbb3199e71b63fc26cee0ebff327c47128a1e8 category: bugfix issue: NA CVE: CVE-2023-52622 [ Upstream commit 5d1935ac02ca5aee364a449a35e2977ea84509b0 ] When we online resize an ext4 filesystem with a oversized flexbg_size, mkfs.ext4 -F -G 67108864 $dev -b 4096 100M mount $dev $dir resize2fs $dev 16G the following WARN_ON is triggered: ================================================================== WARNING: CPU: 0 PID: 427 at mm/page_alloc.c:4402 __alloc_pages+0x411/0x550 Modules linked in: sg(E) CPU: 0 PID: 427 Comm: resize2fs Tainted: G E 6.6.0-rc5+ #314 RIP: 0010:__alloc_pages+0x411/0x550 Call Trace: __kmalloc_large_node+0xa2/0x200 __kmalloc+0x16e/0x290 ext4_resize_fs+0x481/0xd80 __ext4_ioctl+0x1616/0x1d90 ext4_ioctl+0x12/0x20 __x64_sys_ioctl+0xf0/0x150 do_syscall_64+0x3b/0x90 ================================================================== This is because flexbg_size is too large and the size of the new_group_data array to be allocated exceeds MAX_ORDER. Currently, the minimum value of MAX_ORDER is 8, the minimum value of PAGE_SIZE is 4096, the corresponding maximum number of groups that can be allocated is: (PAGE_SIZE << MAX_ORDER) / sizeof(struct ext4_new_group_data) ≈ 21845 And the value that is down-aligned to the power of 2 is 16384. Therefore, this value is defined as MAX_RESIZE_BG, and the number of groups added each time does not exceed this value during resizing, and is added multiple times to complete the online resizing. The difference is that the metadata in a flex_bg may be more dispersed. Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20231023013057.2117948-4-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/ext4/resize.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 7082a7716509..9eef187ede1e 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -227,10 +227,17 @@ struct ext4_new_flex_group_data { in the flex group */ __u16 *bg_flags; /* block group flags of groups in @groups */ + ext4_group_t resize_bg; /* number of allocated + new_group_data */ ext4_group_t count; /* number of groups in @groups */ }; +/* + * Avoiding memory allocation failures due to too many groups added each time. + */ +#define MAX_RESIZE_BG 16384 + /* * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of * @flexbg_size. @@ -245,17 +252,21 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) if (flex_gd == NULL) goto out3; - if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) - goto out2; - flex_gd->count = flexbg_size; + if (unlikely(flexbg_size > MAX_RESIZE_BG)) + flex_gd->resize_bg = MAX_RESIZE_BG; + else + flex_gd->resize_bg = flexbg_size; - flex_gd->groups = kmalloc_array(flexbg_size, + flex_gd->groups = kmalloc_array(flex_gd->resize_bg, sizeof(struct ext4_new_group_data), GFP_NOFS); + if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data)) + goto out2; + if (flex_gd->groups == NULL) goto out2; - flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16), + flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16), GFP_NOFS); if (flex_gd->bg_flags == NULL) goto out1; @@ -1571,8 +1582,7 @@ static int ext4_flex_group_add(struct super_block *sb, static int ext4_setup_next_flex_gd(struct super_block *sb, struct ext4_new_flex_group_data *flex_gd, - ext4_fsblk_t n_blocks_count, - unsigned long flexbg_size) + ext4_fsblk_t n_blocks_count) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -1596,7 +1606,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, BUG_ON(last); ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); - last_group = group | (flexbg_size - 1); + last_group = group | (flex_gd->resize_bg - 1); if (last_group > n_group) last_group = n_group; @@ -2100,8 +2110,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) /* Add flex groups. Note that a regular group is a * flex group with 1 group. */ - while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, - flexbg_size)) { + while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) { if (jiffies - last_update_time > HZ * 10) { if (last_update_time) ext4_msg(sb, KERN_INFO, -- Gitee From 9bfad777000af0ab860e5b6c8193126a28db85a8 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Wed, 2 Dec 2020 14:53:45 -0800 Subject: [PATCH 12/56] net-zerocopy: Refactor frag-is-remappable test. stable inclusion from stable-v5.10.210 commit 14690e419bb37c81dd7a4bf23daa1097773ecf01 category: bugfix issue: NA CVE: NA [ Upstream commit 98917cf0d6eda01e8c3c34d35398d46b247b6fd3 ] Refactor frag-is-remappable test for tcp receive zerocopy. This is part of a patch set that introduces short-circuited hybrid copies for small receive operations, which results in roughly 33% fewer syscalls for small RPC scenarios. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Jakub Kicinski Stable-dep-of: 577e4432f3ac ("tcp: add sanity checks to rx zerocopy") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv4/tcp.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1de705e2ddc..d2f1cb32028e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1777,6 +1777,26 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, return frag; } +static bool can_map_frag(const skb_frag_t *frag) +{ + return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); +} + +static int find_next_mappable_frag(const skb_frag_t *frag, + int remaining_in_skb) +{ + int offset = 0; + + if (likely(can_map_frag(frag))) + return 0; + + while (offset < remaining_in_skb && !can_map_frag(frag)) { + offset += skb_frag_size(frag); + ++frag; + } + return offset; +} + static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, struct sk_buff *skb, u32 copylen, u32 *offset, u32 *seq) @@ -1902,6 +1922,8 @@ static int tcp_zerocopy_receive(struct sock *sk, ret = 0; curr_addr = address; while (length + PAGE_SIZE <= zc->length) { + int mappable_offset; + if (zc->recv_skip_hint < PAGE_SIZE) { u32 offset_frag; @@ -1929,15 +1951,11 @@ static int tcp_zerocopy_receive(struct sock *sk, if (!frags || offset_frag) break; } - if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { - int remaining = zc->recv_skip_hint; - while (remaining && (skb_frag_size(frags) != PAGE_SIZE || - skb_frag_off(frags))) { - remaining -= skb_frag_size(frags); - frags++; - } - zc->recv_skip_hint -= remaining; + mappable_offset = find_next_mappable_frag(frags, + zc->recv_skip_hint); + if (mappable_offset) { + zc->recv_skip_hint = mappable_offset; break; } pages[pg_idx] = skb_frag_page(frags); -- Gitee From c2ab9f31270efe2b244030441a274cfd16189781 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Jan 2024 10:33:17 +0000 Subject: [PATCH 13/56] tcp: add sanity checks to rx zerocopy stable inclusion from stable-v5.10.210 commit f48bf9a83b1666d934247cb58a9887d7b3127b6f category: bugfix issue: NA CVE: CVE-2024-26640 [ Upstream commit 577e4432f3ac810049cb7e6b71f4d96ec7c6e894 ] TCP rx zerocopy intent is to map pages initially allocated from NIC drivers, not pages owned by a fs. This patch adds to can_map_frag() these additional checks: - Page must not be a compound one. - page->mapping must be NULL. This fixes the panic reported by ZhangPeng. syzbot was able to loopback packets built with sendfile(), mapping pages owned by an ext4 file to TCP rx zerocopy. r3 = socket$inet_tcp(0x2, 0x1, 0x0) mmap(&(0x7f0000ff9000/0x4000)=nil, 0x4000, 0x0, 0x12, r3, 0x0) r4 = socket$inet_tcp(0x2, 0x1, 0x0) bind$inet(r4, &(0x7f0000000000)={0x2, 0x4e24, @multicast1}, 0x10) connect$inet(r4, &(0x7f00000006c0)={0x2, 0x4e24, @empty}, 0x10) r5 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) fallocate(r5, 0x0, 0x0, 0x85b8) sendfile(r4, r5, 0x0, 0x8ba0) getsockopt$inet_tcp_TCP_ZEROCOPY_RECEIVE(r4, 0x6, 0x23, &(0x7f00000001c0)={&(0x7f0000ffb000/0x3000)=nil, 0x3000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, &(0x7f0000000440)=0x40) r6 = openat$dir(0xffffffffffffff9c, &(0x7f00000000c0)='./file0\x00', 0x181e42, 0x0) Fixes: 93ab6cc69162 ("tcp: implement mmap() for zero copy receive") Link: https://lore.kernel.org/netdev/5106a58e-04da-372a-b836-9d3d0bd2507b@huawei.com/T/ Reported-and-bisected-by: ZhangPeng Signed-off-by: Eric Dumazet Cc: Arjun Roy Cc: Matthew Wilcox Cc: linux-mm@vger.kernel.org Cc: Andrew Morton Cc: linux-fsdevel@vger.kernel.org Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv4/tcp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d2f1cb32028e..01aeb2b56630 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1779,7 +1779,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, -- Gitee From 86e9b9455add92d53f9934620b316d17049d5f3c Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Mon, 4 Dec 2023 17:24:43 +0800 Subject: [PATCH 14/56] mfd: syscon: Fix null pointer dereference in of_syscon_register() stable inclusion from stable-v5.10.209 commit 927626a2073887ee30ba00633260d4d203f8e875 category: bugfix issue: NA CVE: CVE-2023-52467 [ Upstream commit 41673c66b3d0c09915698fec5c13b24336f18dd1 ] kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Fixes: e15d7f2b81d2 ("mfd: syscon: Use a unique name with regmap_config") Signed-off-by: Kunwu Chan Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20231204092443.2462115-1-chentao@kylinos.cn Signed-off-by: Lee Jones Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/mfd/syscon.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index df5cebb372a5..60f74144a4f8 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -103,6 +103,10 @@ static struct syscon *of_syscon_register(struct device_node *np, bool check_clk) syscon_config.name = kasprintf(GFP_KERNEL, "%pOFn@%llx", np, (u64)res.start); + if (!syscon_config.name) { + ret = -ENOMEM; + goto err_regmap; + } syscon_config.reg_stride = reg_io_width; syscon_config.val_bits = reg_io_width * 8; syscon_config.max_register = resource_size(&res) - reg_io_width; -- Gitee From ded33913f5f7538937b67a574db7ed8348c064f6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 7 Sep 2023 12:28:20 -0400 Subject: [PATCH 15/56] ring-buffer: Do not attempt to read past "commit" stable inclusion from stable-v5.10.198 commit cee5151c5410e868826b8afecfb356f3799ebea3 category: bugfix issue: NA CVE: CVE-2023-52501 [ Upstream commit 95a404bd60af6c4d9d8db01ad14fe8957ece31ca ] When iterating over the ring buffer while the ring buffer is active, the writer can corrupt the reader. There's barriers to help detect this and handle it, but that code missed the case where the last event was at the very end of the page and has only 4 bytes left. The checks to detect the corruption by the writer to reads needs to see the length of the event. If the length in the first 4 bytes is zero then the length is stored in the second 4 bytes. But if the writer is in the process of updating that code, there's a small window where the length in the first 4 bytes could be zero even though the length is only 4 bytes. That will cause rb_event_length() to read the next 4 bytes which could happen to be off the allocated page. To protect against this, fail immediately if the next event pointer is less than 8 bytes from the end of the commit (last byte of data), as all events must be a minimum of 8 bytes anyway. Link: https://lore.kernel.org/all/20230905141245.26470-1-Tze-nan.Wu@mediatek.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230907122820.0899019c@gandalf.local.home Cc: Masami Hiramatsu Cc: Mark Rutland Reported-by: Tze-nan Wu Signed-off-by: Steven Rostedt (Google) Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- kernel/trace/ring_buffer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f08904914166..c7b30c98054c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2256,6 +2256,11 @@ rb_iter_head_event(struct ring_buffer_iter *iter) */ commit = rb_page_commit(iter_head_page); smp_rmb(); + + /* An event needs to be at least 8 bytes in size */ + if (iter->head > commit - 8) + goto reset; + event = __rb_page_index(iter_head_page, iter->head); length = rb_event_length(event); -- Gitee From 416eaa0e0330afafb7fa472d43ad4129bdca2ccd Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 1 Dec 2023 17:21:32 +0000 Subject: [PATCH 16/56] binder: fix race between mmput() and do_exit() stable inclusion from stable-v5.10.209 commit 7e7a0d86542b0ea903006d3f42f33c4f7ead6918 category: bugfix issue: NA CVE: CVE-2023-52609 commit 9a9ab0d963621d9d12199df9817e66982582d5a5 upstream. Task A calls binder_update_page_range() to allocate and insert pages on a remote address space from Task B. For this, Task A pins the remote mm via mmget_not_zero() first. This can race with Task B do_exit() and the final mmput() refcount decrement will come from Task A. Task A | Task B ------------------+------------------ mmget_not_zero() | | do_exit() | exit_mm() | mmput() mmput() | exit_mmap() | remove_vma() | fput() | In this case, the work of ____fput() from Task B is queued up in Task A as TWA_RESUME. So in theory, Task A returns to userspace and the cleanup work gets executed. However, Task A instead sleep, waiting for a reply from Task B that never comes (it's dead). This means the binder_deferred_release() is blocked until an unrelated binder event forces Task A to go back to userspace. All the associated death notifications will also be delayed until then. In order to fix this use mmput_async() that will schedule the work in the corresponding mm->async_put_work WQ instead of Task A. Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reviewed-by: Alice Ryhl Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20231201172212.1813387-4-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/android/binder_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 3c93e6c05c4d..c534b332c8d3 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -271,7 +271,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, } if (mm) { mmap_write_unlock(mm); - mmput(mm); + mmput_async(mm); } return 0; @@ -304,7 +304,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, err_no_vma: if (mm) { mmap_write_unlock(mm); - mmput(mm); + mmput_async(mm); } return vma ? -ENOMEM : -ESRCH; } -- Gitee From 77a936eb17266f548ebd0ea2edfdb7c1d0ae1c94 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 27 Nov 2023 17:06:18 -0500 Subject: [PATCH 17/56] SUNRPC: Fix a suspicious RCU usage warning stable inclusion from stable-v5.10.210 commit c430e6bb43955c6bf573665fcebf31694925b9f7 category: bugfix issue: NA CVE: CVE-2023-52623 [ Upstream commit 31b62908693c90d4d07db597e685d9f25a120073 ] I received the following warning while running cthon against an ontap server running pNFS: [ 57.202521] ============================= [ 57.202522] WARNING: suspicious RCU usage [ 57.202523] 6.7.0-rc3-g2cc14f52aeb7 #41492 Not tainted [ 57.202525] ----------------------------- [ 57.202525] net/sunrpc/xprtmultipath.c:349 RCU-list traversed in non-reader section!! [ 57.202527] other info that might help us debug this: [ 57.202528] rcu_scheduler_active = 2, debug_locks = 1 [ 57.202529] no locks held by test5/3567. [ 57.202530] stack backtrace: [ 57.202532] CPU: 0 PID: 3567 Comm: test5 Not tainted 6.7.0-rc3-g2cc14f52aeb7 #41492 5b09971b4965c0aceba19f3eea324a4a806e227e [ 57.202534] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022 [ 57.202536] Call Trace: [ 57.202537] [ 57.202540] dump_stack_lvl+0x77/0xb0 [ 57.202551] lockdep_rcu_suspicious+0x154/0x1a0 [ 57.202556] rpc_xprt_switch_has_addr+0x17c/0x190 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202596] rpc_clnt_setup_test_and_add_xprt+0x50/0x180 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202621] ? rpc_clnt_add_xprt+0x254/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202646] rpc_clnt_add_xprt+0x27a/0x300 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202671] ? __pfx_rpc_clnt_setup_test_and_add_xprt+0x10/0x10 [sunrpc ebe02571b9a8ceebf7d98e71675af20c19bdb1f6] [ 57.202696] nfs4_pnfs_ds_connect+0x345/0x760 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202728] ? __pfx_nfs4_test_session_trunk+0x10/0x10 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202754] nfs4_fl_prepare_ds+0x75/0xc0 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202760] filelayout_write_pagelist+0x4a/0x200 [nfs_layout_nfsv41_files e3a4187f18ae8a27b630f9feae6831b584a9360a] [ 57.202765] pnfs_generic_pg_writepages+0xbe/0x230 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202788] __nfs_pageio_add_request+0x3fd/0x520 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202813] nfs_pageio_add_request+0x18b/0x390 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202831] nfs_do_writepage+0x116/0x1e0 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202849] nfs_writepages_callback+0x13/0x30 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202866] write_cache_pages+0x265/0x450 [ 57.202870] ? __pfx_nfs_writepages_callback+0x10/0x10 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202891] nfs_writepages+0x141/0x230 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202913] do_writepages+0xd2/0x230 [ 57.202917] ? filemap_fdatawrite_wbc+0x5c/0x80 [ 57.202921] filemap_fdatawrite_wbc+0x67/0x80 [ 57.202924] filemap_write_and_wait_range+0xd9/0x170 [ 57.202930] nfs_wb_all+0x49/0x180 [nfs 6c976fa593a7c2976f5a0aeb4965514a828e6902] [ 57.202947] nfs4_file_flush+0x72/0xb0 [nfsv4 c716d88496ded0ea6d289bbea684fa996f9b57a9] [ 57.202969] __se_sys_close+0x46/0xd0 [ 57.202972] do_syscall_64+0x68/0x100 [ 57.202975] ? do_syscall_64+0x77/0x100 [ 57.202976] ? do_syscall_64+0x77/0x100 [ 57.202979] entry_SYSCALL_64_after_hwframe+0x6e/0x76 [ 57.202982] RIP: 0033:0x7fe2b12e4a94 [ 57.202985] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d d5 18 0e 00 00 74 13 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 c3 0f 1f 00 48 83 ec 18 89 7c 24 0c e8 c3 [ 57.202987] RSP: 002b:00007ffe857ddb38 EFLAGS: 00000202 ORIG_RAX: 0000000000000003 [ 57.202989] RAX: ffffffffffffffda RBX: 00007ffe857dfd68 RCX: 00007fe2b12e4a94 [ 57.202991] RDX: 0000000000002000 RSI: 00007ffe857ddc40 RDI: 0000000000000003 [ 57.202992] RBP: 00007ffe857dfc50 R08: 7fffffffffffffff R09: 0000000065650f49 [ 57.202993] R10: 00007fe2b11f8300 R11: 0000000000000202 R12: 0000000000000000 [ 57.202994] R13: 00007ffe857dfd80 R14: 00007fe2b1445000 R15: 0000000000000000 [ 57.202999] The problem seems to be that two out of three callers aren't taking the rcu_read_lock() before calling the list_for_each_entry_rcu() function in rpc_xprt_switch_has_addr(). I fix this by having rpc_xprt_switch_has_addr() unconditionaly take the rcu_read_lock(), which is okay to do recursively in the case that the lock has already been taken by a caller. Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/sunrpc/xprtmultipath.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 78c075a68c04..a11e80d17830 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -253,8 +253,9 @@ struct rpc_xprt *xprt_iter_current_entry(struct rpc_xprt_iter *xpi) return xprt_switch_find_current_entry(head, xpi->xpi_cursor); } -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -273,6 +274,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur) -- Gitee From 7413179993afc333dc2fdf60552a30896089e46f Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Sat, 25 Nov 2023 02:41:58 +0530 Subject: [PATCH 18/56] PM / devfreq: Synchronize devfreq_monitor_[start/stop] stable inclusion from stable-v5.10.210 commit 3399cc7013e761fee9d6eec795e9b31ab0cbe475 category: bugfix issue: NA CVE: CVE-2023-52635 [ Upstream commit aed5ed595960c6d301dcd4ed31aeaa7a8054c0c6 ] There is a chance if a frequent switch of the governor done in a loop result in timer list corruption where timer cancel being done from two place one from cancel_delayed_work_sync() and followed by expire_timers() can be seen from the traces[1]. while true do echo "simple_ondemand" > /sys/class/devfreq/1d84000.ufshc/governor echo "performance" > /sys/class/devfreq/1d84000.ufshc/governor done It looks to be issue with devfreq driver where device_monitor_[start/stop] need to synchronized so that delayed work should get corrupted while it is either being queued or running or being cancelled. Let's use polling flag and devfreq lock to synchronize the queueing the timer instance twice and work data being corrupted. [1] ... .. -0 [003] 9436.209662: timer_cancel timer=0xffffff80444f0428 -0 [003] 9436.209664: timer_expire_entry timer=0xffffff80444f0428 now=0x10022da1c function=__typeid__ZTSFvP10timer_listE_global_addr baseclk=0x10022da1c -0 [003] 9436.209718: timer_expire_exit timer=0xffffff80444f0428 kworker/u16:6-14217 [003] 9436.209863: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2b now=0x10022da1c flags=182452227 vendor.xxxyyy.ha-1593 [004] 9436.209888: timer_cancel timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216390: timer_init timer=0xffffff80444f0428 vendor.xxxyyy.ha-1593 [004] 9436.216392: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2c now=0x10022da1d flags=186646532 vendor.xxxyyy.ha-1593 [005] 9436.220992: timer_cancel timer=0xffffff80444f0428 xxxyyyTraceManag-7795 [004] 9436.261641: timer_cancel timer=0xffffff80444f0428 [2] 9436.261653][ C4] Unable to handle kernel paging request at virtual address dead00000000012a [ 9436.261664][ C4] Mem abort info: [ 9436.261666][ C4] ESR = 0x96000044 [ 9436.261669][ C4] EC = 0x25: DABT (current EL), IL = 32 bits [ 9436.261671][ C4] SET = 0, FnV = 0 [ 9436.261673][ C4] EA = 0, S1PTW = 0 [ 9436.261675][ C4] Data abort info: [ 9436.261677][ C4] ISV = 0, ISS = 0x00000044 [ 9436.261680][ C4] CM = 0, WnR = 1 [ 9436.261682][ C4] [dead00000000012a] address between user and kernel address ranges [ 9436.261685][ C4] Internal error: Oops: 96000044 [#1] PREEMPT SMP [ 9436.261701][ C4] Skip md ftrace buffer dump for: 0x3a982d0 ... [ 9436.262138][ C4] CPU: 4 PID: 7795 Comm: TraceManag Tainted: G S W O 5.10.149-android12-9-o-g17f915d29d0c #1 [ 9436.262141][ C4] Hardware name: Qualcomm Technologies, Inc. (DT) [ 9436.262144][ C4] pstate: 22400085 (nzCv daIf +PAN -UAO +TCO BTYPE=--) [ 9436.262161][ C4] pc : expire_timers+0x9c/0x438 [ 9436.262164][ C4] lr : expire_timers+0x2a4/0x438 [ 9436.262168][ C4] sp : ffffffc010023dd0 [ 9436.262171][ C4] x29: ffffffc010023df0 x28: ffffffd0636fdc18 [ 9436.262178][ C4] x27: ffffffd063569dd0 x26: ffffffd063536008 [ 9436.262182][ C4] x25: 0000000000000001 x24: ffffff88f7c69280 [ 9436.262185][ C4] x23: 00000000000000e0 x22: dead000000000122 [ 9436.262188][ C4] x21: 000000010022da29 x20: ffffff8af72b4e80 [ 9436.262191][ C4] x19: ffffffc010023e50 x18: ffffffc010025038 [ 9436.262195][ C4] x17: 0000000000000240 x16: 0000000000000201 [ 9436.262199][ C4] x15: ffffffffffffffff x14: ffffff889f3c3100 [ 9436.262203][ C4] x13: ffffff889f3c3100 x12: 00000000049f56b8 [ 9436.262207][ C4] x11: 00000000049f56b8 x10: 00000000ffffffff [ 9436.262212][ C4] x9 : ffffffc010023e50 x8 : dead000000000122 [ 9436.262216][ C4] x7 : ffffffffffffffff x6 : ffffffc0100239d8 [ 9436.262220][ C4] x5 : 0000000000000000 x4 : 0000000000000101 [ 9436.262223][ C4] x3 : 0000000000000080 x2 : ffffff889edc155c [ 9436.262227][ C4] x1 : ffffff8001005200 x0 : ffffff80444f0428 [ 9436.262232][ C4] Call trace: [ 9436.262236][ C4] expire_timers+0x9c/0x438 [ 9436.262240][ C4] __run_timers+0x1f0/0x330 [ 9436.262245][ C4] run_timer_softirq+0x28/0x58 [ 9436.262255][ C4] efi_header_end+0x168/0x5ec [ 9436.262265][ C4] __irq_exit_rcu+0x108/0x124 [ 9436.262274][ C4] __handle_domain_irq+0x118/0x1e4 [ 9436.262282][ C4] gic_handle_irq.30369+0x6c/0x2bc [ 9436.262286][ C4] el0_irq_naked+0x60/0x6c Link: https://lore.kernel.org/all/1700860318-4025-1-git-send-email-quic_mojha@quicinc.com/ Reported-by: Joyyoung Huang Acked-by: MyungJoo Ham Signed-off-by: Mukesh Ojha Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/devfreq/devfreq.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index c6f460550f5e..0e10c2a05252 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -438,10 +438,14 @@ static void devfreq_monitor(struct work_struct *work) if (err) dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + if (devfreq->stop_polling) + goto out; + queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); - mutex_unlock(&devfreq->lock); +out: + mutex_unlock(&devfreq->lock); trace_devfreq_monitor(devfreq); } @@ -459,6 +463,10 @@ void devfreq_monitor_start(struct devfreq *devfreq) if (devfreq->governor->interrupt_driven) return; + mutex_lock(&devfreq->lock); + if (delayed_work_pending(&devfreq->work)) + goto out; + switch (devfreq->profile->timer) { case DEVFREQ_TIMER_DEFERRABLE: INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); @@ -467,12 +475,16 @@ void devfreq_monitor_start(struct devfreq *devfreq) INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor); break; default: - return; + goto out; } if (devfreq->profile->polling_ms) queue_delayed_work(devfreq_wq, &devfreq->work, msecs_to_jiffies(devfreq->profile->polling_ms)); + +out: + devfreq->stop_polling = false; + mutex_unlock(&devfreq->lock); } EXPORT_SYMBOL(devfreq_monitor_start); @@ -489,6 +501,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq) if (devfreq->governor->interrupt_driven) return; + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); cancel_delayed_work_sync(&devfreq->work); } EXPORT_SYMBOL(devfreq_monitor_stop); -- Gitee From ad58c8e84798844cf5a93f2d901e6b2c1c1d75bb Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Thu, 25 Jan 2024 17:27:37 +0200 Subject: [PATCH 19/56] xhci: handle isoc Babble and Buffer Overrun events properly stable inclusion from stable-v5.10.213 commit 696e4112e5c1ee61996198f0ebb6ca3fab55166e category: bugfix issue: NA CVE: CVE-2024-26659 [ Upstream commit 7c4650ded49e5b88929ecbbb631efb8b0838e811 ] xHCI 4.9 explicitly forbids assuming that the xHC has released its ownership of a multi-TRB TD when it reports an error on one of the early TRBs. Yet the driver makes such assumption and releases the TD, allowing the remaining TRBs to be freed or overwritten by new TDs. The xHC should also report completion of the final TRB due to its IOC flag being set by us, regardless of prior errors. This event cannot be recognized if the TD has already been freed earlier, resulting in "Transfer event TRB DMA ptr not part of current TD" error message. Fix this by reusing the logic for processing isoc Transaction Errors. This also handles hosts which fail to report the final completion. Fix transfer length reporting on Babble errors. They may be caused by device malfunction, no guarantee that the buffer has been filled. Signed-off-by: Michal Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/usb/host/xhci-ring.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b69b8c7e7966..d0d5eec128cc 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2264,9 +2264,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, case COMP_BANDWIDTH_OVERRUN_ERROR: frame->status = -ECOMM; break; - case COMP_ISOCH_BUFFER_OVERRUN: case COMP_BABBLE_DETECTED_ERROR: + sum_trbs_for_length = true; + fallthrough; + case COMP_ISOCH_BUFFER_OVERRUN: frame->status = -EOVERFLOW; + if (ep_trb != td->last_trb) + td->error_mid_td = true; break; case COMP_INCOMPATIBLE_DEVICE_ERROR: case COMP_STALL_ERROR: -- Gitee From 1aa0fd1ee85026222f51243cb377f2a0d43905fd Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Thu, 1 Feb 2024 09:38:15 +0100 Subject: [PATCH 20/56] tunnels: fix out of bounds access when building IPv6 PMTU error stable inclusion from stable-v5.10.210 commit e77bf828f1ca1c47fcff58bdc26b60a9d3dfbe1d category: bugfix issue: NA CVE: CVE-2024-26665 [ Upstream commit d75abeec401f8c86b470e7028a13fcdc87e5dd06 ] If the ICMPv6 error is built from a non-linear skb we get the following splat, BUG: KASAN: slab-out-of-bounds in do_csum+0x220/0x240 Read of size 4 at addr ffff88811d402c80 by task netperf/820 CPU: 0 PID: 820 Comm: netperf Not tainted 6.8.0-rc1+ #543 ... kasan_report+0xd8/0x110 do_csum+0x220/0x240 csum_partial+0xc/0x20 skb_tunnel_check_pmtu+0xeb9/0x3280 vxlan_xmit_one+0x14c2/0x4080 vxlan_xmit+0xf61/0x5c00 dev_hard_start_xmit+0xfb/0x510 __dev_queue_xmit+0x7cd/0x32a0 br_dev_queue_push_xmit+0x39d/0x6a0 Use skb_checksum instead of csum_partial who cannot deal with non-linear SKBs. Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") Signed-off-by: Antoine Tenart Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 4b74c67f13c9..7d6f350585d7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); -- Gitee From 30838452db2de59fbdb6d9c5e9eaa5f63f4f7df8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 15 Feb 2024 15:05:16 -0800 Subject: [PATCH 21/56] arp: Prevent overflow in arp_req_get(). stable inclusion from stable-v5.10.211 commit dbc9b22d0ed319b4e29034ce0a3fe32a3ee2c587 category: bugfix issue: NA CVE: CVE-2024-26733 commit a7d6027790acea24446ddd6632d394096c0f4667 upstream. syzkaller reported an overflown write in arp_req_get(). [0] When ioctl(SIOCGARP) is issued, arp_req_get() looks up an neighbour entry and copies neigh->ha to struct arpreq.arp_ha.sa_data. The arp_ha here is struct sockaddr, not struct sockaddr_storage, so the sa_data buffer is just 14 bytes. In the splat below, 2 bytes are overflown to the next int field, arp_flags. We initialise the field just after the memcpy(), so it's not a problem. However, when dev->addr_len is greater than 22 (e.g. MAX_ADDR_LEN), arp_netmask is overwritten, which could be set as htonl(0xFFFFFFFFUL) in arp_ioctl() before calling arp_req_get(). To avoid the overflow, let's limit the max length of memcpy(). Note that commit b5f0de6df6dc ("net: dev: Convert sa_data to flexible array in struct sockaddr") just silenced syzkaller. [0]: memcpy: detected field-spanning write (size 16) of single field "r->arp_ha.sa_data" at net/ipv4/arp.c:1128 (size 14) WARNING: CPU: 0 PID: 144638 at net/ipv4/arp.c:1128 arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Modules linked in: CPU: 0 PID: 144638 Comm: syz-executor.4 Not tainted 6.1.74 #31 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-debian-1.16.0-5 04/01/2014 RIP: 0010:arp_req_get+0x411/0x4a0 net/ipv4/arp.c:1128 Code: fd ff ff e8 41 42 de fb b9 0e 00 00 00 4c 89 fe 48 c7 c2 20 6d ab 87 48 c7 c7 80 6d ab 87 c6 05 25 af 72 04 01 e8 5f 8d ad fb <0f> 0b e9 6c fd ff ff e8 13 42 de fb be 03 00 00 00 4c 89 e7 e8 a6 RSP: 0018:ffffc900050b7998 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff88803a815000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff8641a44a RDI: 0000000000000001 RBP: ffffc900050b7a98 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 203a7970636d656d R12: ffff888039c54000 R13: 1ffff92000a16f37 R14: ffff88803a815084 R15: 0000000000000010 FS: 00007f172bf306c0(0000) GS:ffff88805aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f172b3569f0 CR3: 0000000057f12005 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: arp_ioctl+0x33f/0x4b0 net/ipv4/arp.c:1261 inet_ioctl+0x314/0x3a0 net/ipv4/af_inet.c:981 sock_do_ioctl+0xdf/0x260 net/socket.c:1204 sock_ioctl+0x3ef/0x650 net/socket.c:1321 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl fs/ioctl.c:856 [inline] __x64_sys_ioctl+0x18e/0x220 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:81 entry_SYSCALL_64_after_hwframe+0x64/0xce RIP: 0033:0x7f172b262b8d Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f172bf300b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f172b3abf80 RCX: 00007f172b262b8d RDX: 0000000020000000 RSI: 0000000000008954 RDI: 0000000000000003 RBP: 00007f172b2d3493 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f172b3abf80 R15: 00007f172bf10000 Reported-by: syzkaller Reported-by: Bjoern Doebel Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20240215230516.31330-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/arp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 83a47998c4b1..8ae9bd6f91c1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1104,7 +1104,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(neigh->nud_state & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; -- Gitee From 63fca7ff5e64d7e1b5c41bea51c63eb1effb7aea Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Thu, 15 Feb 2024 23:27:17 +0300 Subject: [PATCH 22/56] ipv6: sr: fix possible use-after-free and null-ptr-deref stable inclusion from stable-v5.10.211 commit 65c38f23d10ff79feea1e5d50b76dc7af383c1e6 category: bugfix issue: NA CVE: CVE-2024-26735 [ Upstream commit 5559cea2d5aa3018a5f00dd2aca3427ba09b386b ] The pernet operations structure for the subsystem must be registered before registering the generic netlink family. Fixes: 915d7e5e5930 ("ipv6: sr: add code base for control plane support of SR-IPv6") Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/r/20240215202717.29815-1-kovalev@altlinux.org Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/ipv6/seg6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 2278c0234c49..a8439fded12d 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -451,22 +451,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -487,11 +489,11 @@ int __init seg6_init(void) #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } -- Gitee From 36a42da5bb1343cb1a77975967931f2bb2f260bd Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Mon, 29 Jan 2024 17:37:38 +0800 Subject: [PATCH 23/56] usb: roles: fix NULL pointer issue when put module's reference stable inclusion from stable-v5.10.211 commit e279bf8e51893e1fe160b3d8126ef2dd00f661e1 category: bugfix issue: NA CVE: CVE-2024-26747 commit 1c9be13846c0b2abc2480602f8ef421360e1ad9e upstream. In current design, usb role class driver will get usb_role_switch parent's module reference after the user get usb_role_switch device and put the reference after the user put the usb_role_switch device. However, the parent device of usb_role_switch may be removed before the user put the usb_role_switch. If so, then, NULL pointer issue will be met when the user put the parent module's reference. This will save the module pointer in structure of usb_role_switch. Then, we don't need to find module by iterating long relations. Fixes: 5c54fcac9a9d ("usb: roles: Take care of driver module reference counting") cc: stable@vger.kernel.org Signed-off-by: Xu Yang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240129093739.2371530-1-xu.yang_2@nxp.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/roles/class.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index 5cc20275335d..03e7e908baa2 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -19,6 +19,7 @@ static struct class *role_class; struct usb_role_switch { struct device dev; struct mutex lock; /* device lock*/ + struct module *module; /* the module this device depends on */ enum usb_role role; /* From descriptor */ @@ -133,7 +134,7 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev) usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -155,7 +156,7 @@ struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode) sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL, usb_role_switch_match); if (!IS_ERR_OR_NULL(sw)) - WARN_ON(!try_module_get(sw->dev.parent->driver->owner)); + WARN_ON(!try_module_get(sw->module)); return sw; } @@ -170,7 +171,7 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get); void usb_role_switch_put(struct usb_role_switch *sw) { if (!IS_ERR_OR_NULL(sw)) { - module_put(sw->dev.parent->driver->owner); + module_put(sw->module); put_device(&sw->dev); } } @@ -187,15 +188,18 @@ struct usb_role_switch * usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) { struct device *dev; + struct usb_role_switch *sw = NULL; if (!fwnode) return NULL; dev = class_find_device_by_fwnode(role_class, fwnode); - if (dev) - WARN_ON(!try_module_get(dev->parent->driver->owner)); + if (dev) { + sw = to_role_switch(dev); + WARN_ON(!try_module_get(sw->module)); + } - return dev ? to_role_switch(dev) : NULL; + return sw; } EXPORT_SYMBOL_GPL(usb_role_switch_find_by_fwnode); @@ -328,6 +332,7 @@ usb_role_switch_register(struct device *parent, sw->set = desc->set; sw->get = desc->get; + sw->module = parent->driver->owner; sw->dev.parent = parent; sw->dev.fwnode = desc->fwnode; sw->dev.class = role_class; -- Gitee From 14750e32b8f45a5de42803c92ca7819cfb6cb164 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Thu, 4 Jan 2024 11:56:32 +0000 Subject: [PATCH 24/56] Bluetooth: Avoid potential use-after-free in hci_error_reset stable inclusion from stable-v5.10.212 commit 6dd0a9dfa99f8990a08eb8fdd8e79bee31c7d8e2 category: bugfix issue: NA CVE: CVE-2024-26801 [ Upstream commit 2449007d3f73b2842c9734f45f0aadb522daf592 ] While handling the HCI_EV_HARDWARE_ERROR event, if the underlying BT controller is not responding, the GPIO reset mechanism would free the hci_dev and lead to a use-after-free in hci_error_reset. Here's the call trace observed on a ChromeOS device with Intel AX201: queue_work_on+0x3e/0x6c __hci_cmd_sync_sk+0x2ee/0x4c0 [bluetooth ] ? init_wait_entry+0x31/0x31 __hci_cmd_sync+0x16/0x20 [bluetooth ] hci_error_reset+0x4f/0xa4 [bluetooth ] process_one_work+0x1d8/0x33f worker_thread+0x21b/0x373 kthread+0x13a/0x152 ? pr_cont_work+0x54/0x54 ? kthread_blkcg+0x31/0x31 ret_from_fork+0x1f/0x30 This patch holds the reference count on the hci_dev while processing a HCI_EV_HARDWARE_ERROR event to avoid potential crash. Fixes: c7741d16a57c ("Bluetooth: Perform a power cycle when receiving hardware error event") Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/bluetooth/hci_core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bd6f20ef13f3..ffea0e685a34 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2318,6 +2318,7 @@ static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); + hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) @@ -2325,10 +2326,10 @@ static void hci_error_reset(struct work_struct *work) else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); - if (hci_dev_do_close(hdev)) - return; + if (!hci_dev_do_close(hdev)) + hci_dev_do_open(hdev); - hci_dev_do_open(hdev); + hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) -- Gitee From 64b7632a9047d3dfacb78546fd3d11b97698bb68 Mon Sep 17 00:00:00 2001 From: Ryosuke Yasuoka Date: Wed, 21 Feb 2024 16:40:48 +0900 Subject: [PATCH 25/56] netlink: Fix kernel-infoleak-after-free in __skb_datagram_iter stable inclusion from stable-v5.10.212 commit f19d1f98e60e68b11fc60839105dd02a30ec0d77 category: bugfix issue: NA CVE: CVE-2024-26805 [ Upstream commit 661779e1fcafe1b74b3f3fe8e980c1e207fea1fd ] syzbot reported the following uninit-value access issue [1]: netlink_to_full_skb() creates a new `skb` and puts the `skb->data` passed as a 1st arg of netlink_to_full_skb() onto new `skb`. The data size is specified as `len` and passed to skb_put_data(). This `len` is based on `skb->end` that is not data offset but buffer offset. The `skb->end` contains data and tailroom. Since the tailroom is not initialized when the new `skb` created, KMSAN detects uninitialized memory area when copying the data. This patch resolved this issue by correct the len from `skb->end` to `skb->len`, which is the actual data offset. BUG: KMSAN: kernel-infoleak-after-free in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak-after-free in copy_to_user_iter lib/iov_iter.c:24 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_ubuf include/linux/iov_iter.h:29 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance2 include/linux/iov_iter.h:245 [inline] BUG: KMSAN: kernel-infoleak-after-free in iterate_and_advance include/linux/iov_iter.h:271 [inline] BUG: KMSAN: kernel-infoleak-after-free in _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 instrument_copy_to_user include/linux/instrumented.h:114 [inline] copy_to_user_iter lib/iov_iter.c:24 [inline] iterate_ubuf include/linux/iov_iter.h:29 [inline] iterate_and_advance2 include/linux/iov_iter.h:245 [inline] iterate_and_advance include/linux/iov_iter.h:271 [inline] _copy_to_iter+0x364/0x2520 lib/iov_iter.c:186 copy_to_iter include/linux/uio.h:197 [inline] simple_copy_to_iter+0x68/0xa0 net/core/datagram.c:532 __skb_datagram_iter+0x123/0xdc0 net/core/datagram.c:420 skb_copy_datagram_iter+0x5c/0x200 net/core/datagram.c:546 skb_copy_datagram_msg include/linux/skbuff.h:3960 [inline] packet_recvmsg+0xd9c/0x2000 net/packet/af_packet.c:3482 sock_recvmsg_nosec net/socket.c:1044 [inline] sock_recvmsg net/socket.c:1066 [inline] sock_read_iter+0x467/0x580 net/socket.c:1136 call_read_iter include/linux/fs.h:2014 [inline] new_sync_read fs/read_write.c:389 [inline] vfs_read+0x8f6/0xe00 fs/read_write.c:470 ksys_read+0x20f/0x4c0 fs/read_write.c:613 __do_sys_read fs/read_write.c:623 [inline] __se_sys_read fs/read_write.c:621 [inline] __x64_sys_read+0x93/0xd0 fs/read_write.c:621 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was stored to memory at: skb_put_data include/linux/skbuff.h:2622 [inline] netlink_to_full_skb net/netlink/af_netlink.c:181 [inline] __netlink_deliver_tap_skb net/netlink/af_netlink.c:298 [inline] __netlink_deliver_tap+0x5be/0xc90 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 [inline] netlink_deliver_tap_kernel net/netlink/af_netlink.c:347 [inline] netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x10f1/0x1250 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1238/0x13d0 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x9c2/0xd60 net/socket.c:2584 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2638 __sys_sendmsg net/socket.c:2667 [inline] __do_sys_sendmsg net/socket.c:2676 [inline] __se_sys_sendmsg net/socket.c:2674 [inline] __x64_sys_sendmsg+0x307/0x490 net/socket.c:2674 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Uninit was created at: free_pages_prepare mm/page_alloc.c:1087 [inline] free_unref_page_prepare+0xb0/0xa40 mm/page_alloc.c:2347 free_unref_page_list+0xeb/0x1100 mm/page_alloc.c:2533 release_pages+0x23d3/0x2410 mm/swap.c:1042 free_pages_and_swap_cache+0xd9/0xf0 mm/swap_state.c:316 tlb_batch_pages_flush mm/mmu_gather.c:98 [inline] tlb_flush_mmu_free mm/mmu_gather.c:293 [inline] tlb_flush_mmu+0x6f5/0x980 mm/mmu_gather.c:300 tlb_finish_mmu+0x101/0x260 mm/mmu_gather.c:392 exit_mmap+0x49e/0xd30 mm/mmap.c:3321 __mmput+0x13f/0x530 kernel/fork.c:1349 mmput+0x8a/0xa0 kernel/fork.c:1371 exit_mm+0x1b8/0x360 kernel/exit.c:567 do_exit+0xd57/0x4080 kernel/exit.c:858 do_group_exit+0x2fd/0x390 kernel/exit.c:1021 __do_sys_exit_group kernel/exit.c:1032 [inline] __se_sys_exit_group kernel/exit.c:1030 [inline] __x64_sys_exit_group+0x3c/0x50 kernel/exit.c:1030 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b Bytes 3852-3903 of 3904 are uninitialized Memory access of size 3904 starts at ffff88812ea1e000 Data copied to user address 0000000020003280 CPU: 1 PID: 5043 Comm: syz-executor297 Not tainted 6.7.0-rc5-syzkaller-00047-g5bd7ef53ffe5 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 Fixes: 1853c9496460 ("netlink, mmap: transform mmap skb into full skb on taps") Reported-and-tested-by: syzbot+34ad5fab48f7bf510349@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=34ad5fab48f7bf510349 [1] Signed-off-by: Ryosuke Yasuoka Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240221074053.1794118-1-ryasuoka@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 99c869d8d304..63575b6166a3 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -157,7 +157,7 @@ static inline u32 netlink_group_mask(u32 group) static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb, gfp_t gfp_mask) { - unsigned int len = skb_end_offset(skb); + unsigned int len = skb->len; struct sk_buff *new; new = alloc_skb(len, gfp_mask); -- Gitee From 8c3269184241bbc226b98e867f22c6b9416d45d1 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Thu, 18 Jan 2024 10:19:53 -0800 Subject: [PATCH 26/56] mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again stable inclusion from stable-v5.10.210 commit 81e7d2530d458548b90a5c5e76b77ad5e5d1c0df category: bugfix issue: NA CVE: CVE-2024-26720 commit 9319b647902cbd5cc884ac08a8a6d54ce111fc78 upstream. (struct dirty_throttle_control *)->thresh is an unsigned long, but is passed as the u32 divisor argument to div_u64(). On architectures where unsigned long is 64 bytes, the argument will be implicitly truncated. Use div64_u64() instead of div_u64() so that the value used in the "is this a safe division" check is the same as the divisor. Also, remove redundant cast of the numerator to u64, as that should happen implicitly. This would be difficult to exploit in memcg domain, given the ratio-based arithmetic domain_drity_limits() uses, but is much easier in global writeback domain with a BDI_CAP_STRICTLIMIT-backing device, using e.g. vm.dirty_bytes=(1<<32)*PAGE_SIZE so that dtc->thresh == (1<<32) Link: https://lkml.kernel.org/r/20240118181954.1415197-1-zokeefe@google.com Fixes: f6789593d5ce ("mm/page-writeback.c: fix divide by zero in bdi_dirty_limits()") Signed-off-by: Zach O'Keefe Cc: Maxim Patlasov Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- mm/page-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b72da123f242..e969667e8d62 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1519,7 +1519,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need -- Gitee From 9773953de1b07d387301e7be6628137d138ac774 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 4 Jan 2024 19:10:59 +0100 Subject: [PATCH 27/56] wifi: mac80211: fix race condition on enabling fast-xmit stable inclusion from stable-v5.10.211 commit 5ffab99e070b9f8ae0cf60c3c3602b84eee818dd category: bugfix issue: NA CVE: CVE-2024-26779 [ Upstream commit bcbc84af1183c8cf3d1ca9b78540c2185cd85e7f ] fast-xmit must only be enabled after the sta has been uploaded to the driver, otherwise it could end up passing the not-yet-uploaded sta via drv_tx calls to the driver, leading to potential crashes because of uninitialized drv_priv data. Add a missing sta->uploaded check and re-check fast xmit after inserting a sta. Signed-off-by: Felix Fietkau Link: https://msgid.link/20240104181059.84032-1-nbd@nbd.name Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/mac80211/sta_info.c | 2 ++ net/mac80211/tx.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 2e84360990f0..44bd03c6b847 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -700,6 +700,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: sta_info_hash_del(local, sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index bbbcc678c655..ef8bb5dfd110 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2958,7 +2958,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || -- Gitee From b08a4c37d9fb8093f1e5f41b784e86a7ccaae0f9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 18 Oct 2022 02:56:03 -0700 Subject: [PATCH 28/56] net: dev: Convert sa_data to flexible array in struct sockaddr stable inclusion from stable-v5.10.211 commit f6ce90567ed373b6be96a033fda73cd769f5c748 category: bugfix issue: NA CVE: NA [ Upstream commit b5f0de6df6dce8d641ef58ef7012f3304dffb9a1 ] One of the worst offenders of "fake flexible arrays" is struct sockaddr, as it is the classic example of why GCC and Clang have been traditionally forced to treat all trailing arrays as fake flexible arrays: in the distant misty past, sa_data became too small, and code started just treating it as a flexible array, even though it was fixed-size. The special case by the compiler is specifically that sizeof(sa->sa_data) and FORTIFY_SOURCE (which uses __builtin_object_size(sa->sa_data, 1)) do not agree (14 and -1 respectively), which makes FORTIFY_SOURCE treat it as a flexible array. However, the coming -fstrict-flex-arrays compiler flag will remove these special cases so that FORTIFY_SOURCE can gain coverage over all the trailing arrays in the kernel that are _not_ supposed to be treated as a flexible array. To deal with this change, convert sa_data to a true flexible array. To keep the structure size the same, move sa_data into a union with a newly introduced sa_data_min with the original size. The result is that FORTIFY_SOURCE can continue to have no idea how large sa_data may actually be, but anything using sizeof(sa->sa_data) must switch to sizeof(sa->sa_data_min). Cc: Jens Axboe Cc: Pavel Begunkov Cc: David Ahern Cc: Dylan Yudaken Cc: Yajun Deng Cc: Petr Machata Cc: Hangbin Liu Cc: Leon Romanovsky Cc: syzbot Cc: Willem de Bruijn Cc: Pablo Neira Ayuso Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221018095503.never.671-kees@kernel.org Signed-off-by: Jakub Kicinski Stable-dep-of: a7d6027790ac ("arp: Prevent overflow in arp_req_get().") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- include/linux/socket.h | 5 ++++- net/core/dev.c | 2 +- net/core/dev_ioctl.c | 2 +- net/packet/af_packet.c | 10 +++++----- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 23a9d1862391..42739d119905 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -31,7 +31,10 @@ typedef __kernel_sa_family_t sa_family_t; struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ - char sa_data[14]; /* 14 bytes of protocol address */ + union { + char sa_data_min[14]; /* Minimum 14 bytes of protocol address */ + DECLARE_FLEX_ARRAY(char, sa_data); + }; }; struct linger { diff --git a/net/core/dev.c b/net/core/dev.c index f4aad9b00cc9..83648e6a04de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8779,7 +8779,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) { - size_t size = sizeof(sa->sa_data); + size_t size = sizeof(sa->sa_data_min); struct net_device *dev; int ret = 0; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 993420da2930..60e815a71909 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -245,7 +245,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) if (ifr->ifr_hwaddr.sa_family != dev->type) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, - min(sizeof(ifr->ifr_hwaddr.sa_data), + min(sizeof(ifr->ifr_hwaddr.sa_data_min), (size_t)dev->addr_len)); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c7129616dd53..a2e2996ef25d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3248,7 +3248,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - char name[sizeof(uaddr->sa_data) + 1]; + char name[sizeof(uaddr->sa_data_min) + 1]; /* * Check legality @@ -3259,8 +3259,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, /* uaddr->sa_data comes from the userspace, it's not guaranteed to be * zero-terminated. */ - memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data)); - name[sizeof(uaddr->sa_data)] = 0; + memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); + name[sizeof(uaddr->sa_data_min)] = 0; return packet_do_bind(sk, name, 0, 0); } @@ -3532,11 +3532,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; - memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) - strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); + strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); rcu_read_unlock(); return sizeof(*uaddr); -- Gitee From 22dc806a840642293d9c8aef5e32348774e85088 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 29 Jan 2021 15:00:28 +0200 Subject: [PATCH 29/56] xhci: remove extra loop in interrupt context stable inclusion from stable-v5.10.213 commit 89ed7ebae4f04d05678108a2141b7ddaea7f9355 category: bugfix issue: NA CVE: NA [ Upstream commit 55f6153d8cc8eff0852d108f80087fdf41dc2169 ] When finishing a TD we walk the endpoint dequeue trb pointer until it matches the last TRB of the TD. TDs can contain over 100 TRBs, meaning we call a function 100 times, do a few comparisons and increase a couple values for each of these calls, all in interrupt context. This can all be avoided by adding a pointer to the last TRB segment, and a number of TRBs in the TD. So instead of walking through each TRB just set the new dequeue segment, pointer, and number of free TRBs directly. Getting rid of the while loop also reduces the risk of getting stuck in a infinite loop in the interrupt handler. Loop relied on valid matching dequeue and last_trb values to break. Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210129130044.206855-12-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/usb/host/xhci-ring.c | 21 ++++++++++++++------- drivers/usb/host/xhci.h | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index d0d5eec128cc..ae09ddc558bf 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2102,8 +2102,9 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, EP_HARD_RESET); } else { /* Update ring dequeue pointer */ - while (ep_ring->dequeue != td->last_trb) - inc_deq(xhci, ep_ring); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + ep_ring->num_trbs_free += td->num_trbs - 1; inc_deq(xhci, ep_ring); } @@ -2328,8 +2329,9 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, frame->actual_length = 0; /* Update ring dequeue pointer */ - while (ep->ring->dequeue != td->last_trb) - inc_deq(xhci, ep->ring); + ep->ring->dequeue = td->last_trb; + ep->ring->deq_seg = td->last_trb_seg; + ep->ring->num_trbs_free += td->num_trbs - 1; inc_deq(xhci, ep->ring); return xhci_td_cleanup(xhci, td, ep->ring, status); @@ -3494,7 +3496,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, field |= TRB_IOC; more_trbs_coming = false; td->last_trb = ring->enqueue; - + td->last_trb_seg = ring->enq_seg; if (xhci_urb_suitable_for_idt(urb)) { memcpy(&send_addr, urb->transfer_buffer, trb_buff_len); @@ -3520,7 +3522,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, upper_32_bits(send_addr), length_field, field); - + td->num_trbs++; addr += trb_buff_len; sent_len = trb_buff_len; @@ -3544,8 +3546,10 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, ep_index, urb->stream_id, 1, urb, 1, mem_flags); urb_priv->td[1].last_trb = ring->enqueue; + urb_priv->td[1].last_trb_seg = ring->enq_seg; field = TRB_TYPE(TRB_NORMAL) | ring->cycle_state | TRB_IOC; queue_trb(xhci, ring, 0, 0, 0, TRB_INTR_TARGET(0), field); + urb_priv->td[1].num_trbs++; } check_trb_math(urb, enqd_len); @@ -3596,6 +3600,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, urb_priv = urb->hcpriv; td = &urb_priv->td[0]; + td->num_trbs = num_trbs; /* * Don't give the first TRB to the hardware (by toggling the cycle bit) @@ -3668,6 +3673,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags, /* Save the DMA address of the last TRB in the TD */ td->last_trb = ep_ring->enqueue; + td->last_trb_seg = ep_ring->enq_seg; /* Queue status TRB - see Table 7 and sections 4.11.2.2 and 6.4.1.2.3 */ /* If the device sent data, the status stage is an OUT transfer */ @@ -3912,7 +3918,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, goto cleanup; } td = &urb_priv->td[i]; - + td->num_trbs = trbs_per_td; /* use SIA as default, if frame id is used overwrite it */ sia_frame_id = TRB_SIA; if (!(urb->transfer_flags & URB_ISO_ASAP) && @@ -3955,6 +3961,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, } else { more_trbs_coming = false; td->last_trb = ep_ring->enqueue; + td->last_trb_seg = ep_ring->enq_seg; field |= TRB_IOC; if (trb_block_event_intr(xhci, num_tds, i)) field |= TRB_BEI; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c7749f6e3474..cd14f61f8471 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1550,9 +1550,11 @@ struct xhci_td { struct xhci_segment *start_seg; union xhci_trb *first_trb; union xhci_trb *last_trb; + struct xhci_segment *last_trb_seg; struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + unsigned int num_trbs; }; /* xHCI command default timeout value */ -- Gitee From 127ba91471c171db122bdb94a580bec451577499 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 6 Apr 2021 10:02:08 +0300 Subject: [PATCH 30/56] xhci: prevent double-fetch of transfer and transfer event TRBs stable inclusion from stable-v5.10.213 commit fa5aaf31e5f5aa6e57a29037b5fd6e54369f83b8 category: bugfix issue: NA CVE: NA [ Upstream commit e9fcb07704fcef6fa6d0333fd2b3a62442eaf45b ] The same values are parsed several times from transfer and event TRBs by different functions in the same call path, all while processing one transfer event. As the TRBs are in DMA memory and can be accessed by the xHC host we want to avoid this to prevent double-fetch issues. To resolve this pass the already parsed values to the different functions in the path of parsing a transfer event Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20210406070208.3406266-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Stable-dep-of: 5372c65e1311 ("xhci: process isoc TD properly when there was a transaction error mid TD.") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/usb/host/xhci-ring.c | 42 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ae09ddc558bf..2a679f9d333b 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2062,16 +2062,13 @@ int xhci_is_vendor_info_code(struct xhci_hcd *xhci, unsigned int trb_comp_code) return 0; } -static int finish_td(struct xhci_hcd *xhci, struct xhci_td *td, - struct xhci_transfer_event *event, struct xhci_virt_ep *ep) +static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + u32 trb_comp_code) { struct xhci_ep_ctx *ep_ctx; - struct xhci_ring *ep_ring; - u32 trb_comp_code; - ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index); - trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); if (trb_comp_code == COMP_STOPPED_LENGTH_INVALID || trb_comp_code == COMP_STOPPED || @@ -2129,9 +2126,9 @@ static int sum_trb_lengths(struct xhci_hcd *xhci, struct xhci_ring *ring, /* * Process control tds, update urb status and actual_length. */ -static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct xhci_ep_ctx *ep_ctx; u32 trb_comp_code; @@ -2219,15 +2216,15 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->actual_length = requested; finish_td: - return finish_td(xhci, td, event, ep); + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } /* * Process isochronous tds, update urb packet status and actual_length. */ -static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct urb_priv *urb_priv; int idx; @@ -2308,7 +2305,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, td->urb->actual_length += frame->actual_length; - return finish_td(xhci, td, event, ep); + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, @@ -2340,17 +2337,15 @@ static int skip_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, /* * Process bulk and interrupt tds, update urb status and actual_length. */ -static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, - union xhci_trb *ep_trb, struct xhci_transfer_event *event, - struct xhci_virt_ep *ep) +static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, + struct xhci_ring *ep_ring, struct xhci_td *td, + union xhci_trb *ep_trb, struct xhci_transfer_event *event) { struct xhci_slot_ctx *slot_ctx; - struct xhci_ring *ep_ring; u32 trb_comp_code; u32 remaining, requested, ep_trb_len; slot_ctx = xhci_get_slot_ctx(xhci, ep->vdev->out_ctx); - ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len)); remaining = EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)); ep_trb_len = TRB_LEN(le32_to_cpu(ep_trb->generic.field[2])); @@ -2410,7 +2405,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, remaining); td->urb->actual_length = 0; } - return finish_td(xhci, td, event, ep); + + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } /* @@ -2761,11 +2757,11 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* update the urb's actual_length and give back to the core */ if (usb_endpoint_xfer_control(&td->urb->ep->desc)) - process_ctrl_td(xhci, td, ep_trb, event, ep); + process_ctrl_td(xhci, ep, ep_ring, td, ep_trb, event); else if (usb_endpoint_xfer_isoc(&td->urb->ep->desc)) - process_isoc_td(xhci, td, ep_trb, event, ep); + process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else - process_bulk_intr_td(xhci, td, ep_trb, event, ep); + process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); cleanup: handling_skipped_tds = ep->skip && trb_comp_code != COMP_MISSED_SERVICE_ERROR && -- Gitee From 3cff5324bd7cdc05186624f35787bf368871540f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 25 Jan 2024 17:27:36 +0200 Subject: [PATCH 31/56] xhci: process isoc TD properly when there was a transaction error mid TD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.213 commit fe2322caa07424b31522761c27f8b299e87a37a9 category: bugfix issue: NA CVE: NA [ Upstream commit 5372c65e1311a16351ef03dd096ff576e6477674 ] The last TRB of a isoc TD might not trigger an event if there was an error event for a TRB mid TD. This is seen on a NEC Corporation uPD720200 USB 3.0 Host After an error mid a multi-TRB TD the xHC should according to xhci 4.9.1 generate events for passed TRBs with IOC flag set if it proceeds to the next TD. This event is either a copy of the original error, or a "success" transfer event. If that event is missing then the driver and xHC host get out of sync as the driver is still expecting a transfer event for that first TD, while xHC host is already sending events for the next TD in the list. This leads to "Transfer event TRB DMA ptr not part of current TD" messages. As a solution we tag the isoc TDs that get error events mid TD. If an event doesn't match the first TD, then check if the tag is set, and event points to the next TD. In that case give back the fist TD and process the next TD normally Make sure TD status and transferred length stay valid in both cases with and without final TD completion event. Reported-by: Michał Pecio Closes: https://lore.kernel.org/linux-usb/20240112235205.1259f60c@foxbook/ Tested-by: Michał Pecio Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20240125152737.2983959-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/usb/host/xhci-ring.c | 74 +++++++++++++++++++++++++++++------- drivers/usb/host/xhci.h | 1 + 2 files changed, 61 insertions(+), 14 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 2a679f9d333b..5135253974a9 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2247,6 +2247,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, /* handle completion code */ switch (trb_comp_code) { case COMP_SUCCESS: + /* Don't overwrite status if TD had an error, see xHCI 4.9.1 */ + if (td->error_mid_td) + break; if (remaining) { frame->status = short_framestatus; if (xhci->quirks & XHCI_TRUST_TX_LENGTH) @@ -2276,8 +2279,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; case COMP_USB_TRANSACTION_ERROR: frame->status = -EPROTO; + sum_trbs_for_length = true; if (ep_trb != td->last_trb) - return 0; + td->error_mid_td = true; break; case COMP_STOPPED: sum_trbs_for_length = true; @@ -2297,6 +2301,9 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, break; } + if (td->urb_length_set) + goto finish_td; + if (sum_trbs_for_length) frame->actual_length = sum_trb_lengths(xhci, ep->ring, ep_trb) + ep_trb_len - remaining; @@ -2305,6 +2312,14 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, td->urb->actual_length += frame->actual_length; +finish_td: + /* Don't give back TD yet if we encountered an error mid TD */ + if (td->error_mid_td && ep_trb != td->last_trb) { + xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n"); + td->urb_length_set = true; + return 0; + } + return finish_td(xhci, ep, ep_ring, td, trb_comp_code); } @@ -2691,17 +2706,51 @@ static int handle_tx_event(struct xhci_hcd *xhci, } if (!ep_seg) { - if (!ep->skip || - !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { - /* Some host controllers give a spurious - * successful event after a short transfer. - * Ignore it. - */ - if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && - ep_ring->last_td_was_short) { - ep_ring->last_td_was_short = false; - goto cleanup; + + if (ep->skip && usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + skip_isoc_td(xhci, td, ep, status); + goto cleanup; + } + + /* + * Some hosts give a spurious success event after a short + * transfer. Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + goto cleanup; + } + + /* + * xhci 4.10.2 states isoc endpoints should continue + * processing the next TD if there was an error mid TD. + * So host like NEC don't generate an event for the last + * isoc TRB even if the IOC flag is set. + * xhci 4.9.1 states that if there are errors in mult-TRB + * TDs xHC should generate an error for that TRB, and if xHC + * proceeds to the next TD it should genete an event for + * any TRB with IOC flag on the way. Other host follow this. + * So this event might be for the next TD. + */ + if (td->error_mid_td && + !list_is_last(&td->td_list, &ep_ring->td_list)) { + struct xhci_td *td_next = list_next_entry(td, td_list); + + ep_seg = trb_in_td(xhci, td_next->start_seg, td_next->first_trb, + td_next->last_trb, ep_trb_dma, false); + if (ep_seg) { + /* give back previous TD, start handling new */ + xhci_dbg(xhci, "Missing TD completion event after mid TD error\n"); + ep_ring->dequeue = td->last_trb; + ep_ring->deq_seg = td->last_trb_seg; + inc_deq(xhci, ep_ring); + xhci_td_cleanup(xhci, td, ep_ring, td->status); + td = td_next; } + } + + if (!ep_seg) { /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2713,9 +2762,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, ep_trb_dma, true); return -ESHUTDOWN; } - - skip_isoc_td(xhci, td, ep, status); - goto cleanup; } if (trb_comp_code == COMP_SHORT_PACKET) ep_ring->last_td_was_short = true; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index cd14f61f8471..fb3d1c4992f5 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1554,6 +1554,7 @@ struct xhci_td { struct xhci_segment *bounce_seg; /* actual_length of the URB has already been set */ bool urb_length_set; + bool error_mid_td; unsigned int num_trbs; }; -- Gitee From 4c5ce13836f42b4e75f108611d81198029e034f0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 26 Jan 2024 22:31:42 +0300 Subject: [PATCH 32/56] stddef: Introduce DECLARE_FLEX_ARRAY() helper stable inclusion from stable-v5.10.210 commit 443b16ee3d9ce0a3ece0e3526a5af883e5b16eaf category: bugfix issue: NA CVE: NA commit 3080ea5553cc909b000d1f1d964a9041962f2c5b upstream. There are many places where kernel code wants to have several different typed trailing flexible arrays. This would normally be done with multiple flexible arrays in a union, but since GCC and Clang don't (on the surface) allow this, there have been many open-coded workarounds, usually involving neighboring 0-element arrays at the end of a structure. For example, instead of something like this: struct thing { ... union { struct type1 foo[]; struct type2 bar[]; }; }; code works around the compiler with: struct thing { ... struct type1 foo[0]; struct type2 bar[]; }; Another case is when a flexible array is wanted as the single member within a struct (which itself is usually in a union). For example, this would be worked around as: union many { ... struct { struct type3 baz[0]; }; }; These kinds of work-arounds cause problems with size checks against such zero-element arrays (for example when building with -Warray-bounds and -Wzero-length-bounds, and with the coming FORTIFY_SOURCE improvements), so they must all be converted to "real" flexible arrays, avoiding warnings like this: fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree': fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds] 209 | anode->btree.u.internal[0].down = cpu_to_le32(a); | ~~~~~~~~~~~~~~~~~~~~~~~^~~ In file included from fs/hpfs/hpfs_fn.h:26, from fs/hpfs/anode.c:10: fs/hpfs/hpfs.h:412:32: note: while referencing 'internal' 412 | struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving | ^~~~~~~~ drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg': drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds] 360 | tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len]; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22, from drivers/net/can/usb/etas_es58x/es58x_fd.c:17: drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg' 231 | u8 raw_msg[0]; | ^~~~~~~ However, it _is_ entirely possible to have one or more flexible arrays in a struct or union: it just has to be in another struct. And since it cannot be alone in a struct, such a struct must have at least 1 other named member -- but that member can be zero sized. Wrap all this nonsense into the new DECLARE_FLEX_ARRAY() in support of having flexible arrays in unions (or alone in a struct). As with struct_group(), since this is needed in UAPI headers as well, implement the core there, with a non-UAPI wrapper. Additionally update kernel-doc to understand its existence. https://github.com/KSPP/linux/issues/137 Cc: Arnd Bergmann Cc: "Gustavo A. R. Silva" Signed-off-by: Kees Cook Signed-off-by: Vasiliy Kovalev Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/linux/stddef.h | 13 +++++++++++++ include/uapi/linux/stddef.h | 16 ++++++++++++++++ scripts/kernel-doc | 3 ++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 938216f8ab7e..31fdbb784c24 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -84,4 +84,17 @@ enum { #define struct_group_tagged(TAG, NAME, MEMBERS...) \ __struct_group(TAG, NAME, /* no attrs */, MEMBERS) +/** + * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define DECLARE_FLEX_ARRAY(TYPE, NAME) \ + __DECLARE_FLEX_ARRAY(TYPE, NAME) + #endif diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index c3725b492263..7837ba4fe728 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -28,4 +28,20 @@ struct { MEMBERS } ATTRS; \ struct TAG { MEMBERS } ATTRS NAME; \ } + +/** + * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union + * + * @TYPE: The type of each flexible array element + * @NAME: The name of the flexible array member + * + * In order to have a flexible array member in a union or alone in a + * struct, it needs to be wrapped in an anonymous struct with at least 1 + * named member, but that member can be empty. + */ +#define __DECLARE_FLEX_ARRAY(TYPE, NAME) \ + struct { \ + struct { } __empty_ ## NAME; \ + TYPE NAME[]; \ + } #endif diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 19af6dd160e6..7a04d4c05326 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1232,7 +1232,8 @@ sub dump_struct($$) { $members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; # replace DECLARE_KFIFO_PTR $members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos; - + # replace DECLARE_FLEX_ARRAY + $members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos; my $declaration = $members; # Split nested struct/union elements as newer ones -- Gitee From 532cfe12fd15e80a69cca010d180f4fc78380e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:37 +0100 Subject: [PATCH 33/56] bpf: Fix stackmap overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.214 commit 15641007df0f0d35fa28742b25c2a7db9dcd6895 category: bugfix issue: #I9OU4B CVE: CVE-2024-26883 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 7a4b21250bf79eef26543d35bd390448646c536b ] The stackmap code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. The commit in the fixes tag actually attempted to fix this, but the fix did not account for the UB, so the fix only works on CPUs where an overflow does result in a neat truncation to zero, which is not guaranteed. Checking the value before rounding does not have this problem. Fixes: 6183f4d3a0a2 ("bpf: Check for integer overflow when using roundup_pow_of_two()") Signed-off-by: Toke Høiland-Jørgensen Reviewed-by: Bui Quang Minh Message-ID: <20240307120340.99577-4-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- kernel/bpf/stackmap.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 0c5bf98d5576..461a7991f819 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -115,11 +115,14 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) } else if (value_size / 8 > sysctl_perf_event_max_stack) return ERR_PTR(-EINVAL); - /* hash table size must be power of 2 */ - n_buckets = roundup_pow_of_two(attr->max_entries); - if (!n_buckets) + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + if (attr->max_entries > 1UL << 31) return ERR_PTR(-E2BIG); + n_buckets = roundup_pow_of_two(attr->max_entries); + cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); err = bpf_map_charge_init(&mem, cost + attr->max_entries * (sizeof(struct stack_map_bucket) + (u64)value_size)); -- Gitee From a5fcfeeaf4c01e6ff3ca3d634add3e828ec792c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:36 +0100 Subject: [PATCH 34/56] bpf: Fix hashtab overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.214 commit 64f00b4df0597590b199b62a37a165473bf658a6 category: bugfix issue: #I9OU3Z CVE: CVE-2024-26884 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 6787d916c2cf9850c97a0a3f73e08c43e7d973b1 ] The hashtab code relies on roundup_pow_of_two() to compute the number of hash buckets, and contains an overflow check by checking if the resulting value is 0. However, on 32-bit arches, the roundup code itself can overflow by doing a 32-bit left-shift of an unsigned long value, which is undefined behaviour, so it is not guaranteed to truncate neatly. This was triggered by syzbot on the DEVMAP_HASH type, which contains the same check, copied from the hashtab code. So apply the same fix to hashtab, by moving the overflow check to before the roundup. Fixes: daaf427c6ab3 ("bpf: fix arraymap NULL deref and missing overflow and zero size checks") Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-3-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- kernel/bpf/hashtab.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 0ce445aadfdf..d05614a2bdc7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -443,7 +443,13 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) num_possible_cpus()); } - /* hash table size must be power of 2 */ + /* hash table size must be power of 2; roundup_pow_of_two() can overflow + * into UB on 32-bit arches, so check that first + */ + err = -E2BIG; + if (htab->map.max_entries > 1UL << 31) + goto free_htab; + htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); htab->elem_size = sizeof(struct htab_elem) + @@ -453,10 +459,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) else htab->elem_size += round_up(htab->map.value_size, 8); - err = -E2BIG; - /* prevent zero size kmalloc and check for u32 overflow */ - if (htab->n_buckets == 0 || - htab->n_buckets > U32_MAX / sizeof(struct bucket)) + /* check for u32 overflow */ + if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; cost = (u64) htab->n_buckets * sizeof(struct bucket) + -- Gitee From 45fcfa575fd266370c9883995da37c67abc9d932 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Sat, 30 Mar 2024 19:01:14 +0000 Subject: [PATCH 35/56] binder: check offset alignment in binder_get_object() stable inclusion from stable-v5.10.216 commit 48a1f83ca9c68518b1a783c62e6a8223144fa9fc category: bugfix issue: NA CVE: CVE-2024-26926 Signed-off-by: wanxiaoqing --------------------------------------- commit aaef73821a3b0194a01bd23ca77774f704a04d40 upstream. Commit 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") introduced changes to how binder objects are copied. In doing so, it unintentionally removed an offset alignment check done through calls to binder_alloc_copy_from_buffer() -> check_buffer(). These calls were replaced in binder_get_object() with copy_from_user(), so now an explicit offset alignment check is needed here. This avoids later complications when unwinding the objects gets harder. It is worth noting this check existed prior to commit 7a67a39320df ("binder: add function to copy binder object from buffer"), likely removed due to redundancy at the time. Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20240330190115.1877819-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/android/binder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a38959e912ae..76f9d5d51310 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2093,8 +2093,10 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (offset > buffer->data_size || read_size < sizeof(*hdr)) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; + if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; -- Gitee From 9c0e7e3add0906f847f62a0f4a38521bf15abfd0 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Thu, 1 Jun 2023 15:44:12 -0700 Subject: [PATCH 36/56] amdgpu: validate offset_in_bo of drm_amdgpu_gem_va MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.188 commit 968e27fd037ec4732068820a9b9836eccc0e0a12 category: bugfix issue: NA CVE: NA Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 9f0bcf49e9895cb005d78b33a5eebfa11711b425 ] This is motivated by OOB access in amdgpu_vm_update_range when offset_in_bo+map_size overflows. v2: keep the validations in amdgpu_vm_bo_map v3: add the validations to amdgpu_vm_bo_map/amdgpu_vm_bo_replace_map rather than to amdgpu_gem_va_ioctl Fixes: 9f7eb5367d00 ("drm/amdgpu: actually use the VM map parameters") Reviewed-by: Christian König Signed-off-by: Chia-I Wu Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 45b1f00c5968..dbffe95ca1b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2229,14 +2229,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t eaddr; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; @@ -2295,14 +2295,14 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, int r; /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || - size == 0 || size & ~PAGE_MASK) + if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) + return -EINVAL; + if (saddr + size <= saddr || offset + size <= offset) return -EINVAL; /* make sure object fit at this offset */ eaddr = saddr + size - 1; - if (saddr >= eaddr || - (bo && offset + size > amdgpu_bo_size(bo)) || + if ((bo && offset + size > amdgpu_bo_size(bo)) || (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; -- Gitee From c7a339fb8beccc2878cc82a784601ce914f505cd Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Thu, 11 Apr 2024 11:11:38 +0800 Subject: [PATCH 37/56] drm/amdgpu: validate the parameters of bo mapping operations more clearly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.216 commit 1fd7db5c16028dc07b2ceec190f2e895dddb532d category: bugfix issue: NA CVE: CVE-2024-26922 Signed-off-by: wanxiaoqing --------------------------------------- commit 6fef2d4c00b5b8561ad68dd2b68173f5c6af1e75 upstream. Verify the parameters of amdgpu_vm_bo_(map/replace_map/clearing_mappings) in one common place. Fixes: dc54d3d1744d ("drm/amdgpu: implement AMDGPU_VA_OP_CLEAR v2") Cc: stable@vger.kernel.org Reported-by: Vlad Stolyarov Suggested-by: Christian König Signed-off-by: xinhui pan Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 72 ++++++++++++++++---------- 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dbffe95ca1b0..c28b64d02c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2201,6 +2201,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -2227,21 +2258,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -2294,17 +2318,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -2318,7 +2334,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -2405,10 +2421,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); -- Gitee From f8eecdc125e90327e155b729283fc6056135b174 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 26 Jan 2024 09:42:58 +0900 Subject: [PATCH 38/56] tracing/trigger: Fix to return error if failed to alloc snapshot stable inclusion from stable-v5.10.210 commit 56cfbe60710772916a5ba092c99542332b48e870 category: bugfix issue: NA CVE: CVE-2024-26920 Signed-off-by: wanxiaoqing --------------------------------------- commit 0958b33ef5a04ed91f61cef4760ac412080c4e08 upstream. Fix register_snapshot_trigger() to return error code if it failed to allocate a snapshot instead of 0 (success). Unless that, it will register snapshot trigger without an error. Link: https://lore.kernel.org/linux-trace-kernel/170622977792.270660.2789298642759362200.stgit@devnote2 Fixes: 0bbe7f719985 ("tracing: Fix the race between registering 'snapshot' event trigger and triggering 'snapshot' operation") Cc: stable@vger.kernel.org Cc: Vincent Donnefort Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/trace/trace_events_trigger.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4bc90965abb2..e4340958da2d 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1140,8 +1140,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, ops, data, file); } -- Gitee From 9b3bbb78a00f5292a8015d0b721686b9afea08ac Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Fri, 19 Jan 2024 07:39:06 -0800 Subject: [PATCH 39/56] do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak stable inclusion from stable-v5.10.214 commit cde76b3af247f615447bcfecf610bb76c3529126 category: bugfix issue: NA CVE: CVE-2024-26901 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 3948abaa4e2be938ccdfc289385a27342fb13d43 ] syzbot identified a kernel information leak vulnerability in do_sys_name_to_handle() and issued the following report [1]. [1] "BUG: KMSAN: kernel-infoleak in instrument_copy_to_user include/linux/instrumented.h:114 [inline] BUG: KMSAN: kernel-infoleak in _copy_to_user+0xbc/0x100 lib/usercopy.c:40 instrument_copy_to_user include/linux/instrumented.h:114 [inline] _copy_to_user+0xbc/0x100 lib/usercopy.c:40 copy_to_user include/linux/uaccess.h:191 [inline] do_sys_name_to_handle fs/fhandle.c:73 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x949/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Uninit was created at: slab_post_alloc_hook+0x129/0xa70 mm/slab.h:768 slab_alloc_node mm/slub.c:3478 [inline] __kmem_cache_alloc_node+0x5c9/0x970 mm/slub.c:3517 __do_kmalloc_node mm/slab_common.c:1006 [inline] __kmalloc+0x121/0x3c0 mm/slab_common.c:1020 kmalloc include/linux/slab.h:604 [inline] do_sys_name_to_handle fs/fhandle.c:39 [inline] __do_sys_name_to_handle_at fs/fhandle.c:112 [inline] __se_sys_name_to_handle_at+0x441/0xb10 fs/fhandle.c:94 __x64_sys_name_to_handle_at+0xe4/0x140 fs/fhandle.c:94 ... Bytes 18-19 of 20 are uninitialized Memory access of size 20 starts at ffff888128a46380 Data copied to user address 0000000020000240" Per Chuck Lever's suggestion, use kzalloc() instead of kmalloc() to solve the problem. Fixes: 990d6c2d7aee ("vfs: Add name to file handle conversion support") Suggested-by: Chuck Lever III Reported-and-tested-by: Signed-off-by: Nikita Zhandarovich Link: https://lore.kernel.org/r/20240119153906.4367-1-n.zhandarovich@fintech.ru Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/fhandle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 718defdf1e0e..744ae0dba99c 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -37,7 +37,7 @@ static long do_sys_name_to_handle(struct path *path, if (f_handle.handle_bytes > MAX_HANDLE_SZ) return -EINVAL; - handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, GFP_KERNEL); if (!handle) return -ENOMEM; -- Gitee From ca88bcb481755f042d471344517dd96f17ac4ea2 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 28 Feb 2024 10:49:26 -0500 Subject: [PATCH 40/56] Bluetooth: hci_core: Fix possible buffer overflow stable inclusion from stable-v5.10.214 commit d47e6c1932cee02954ea588c9f09fd5ecefeadfc category: bugfix issue: NA CVE: CVE-2024-26889 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 81137162bfaa7278785b24c1fd2e9e74f082e8e4 ] struct hci_dev_info has a fixed size name[8] field so in the event that hdev->name is bigger than that strcpy would attempt to write past its size, so this fixes this problem by switching to use strscpy. Fixes: dcda165706b9 ("Bluetooth: hci_core: Fix build warnings") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ffea0e685a34..3f1ac343c650 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2175,7 +2175,7 @@ int hci_get_dev_info(void __user *arg) else flags = hdev->flags; - strcpy(di.name, hdev->name); + strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4); di.flags = flags; -- Gitee From a11fc10154df54a9df78ef165c74decc62bc27fa Mon Sep 17 00:00:00 2001 From: Wang Jianjian Date: Fri, 2 Feb 2024 16:18:52 +0800 Subject: [PATCH 41/56] quota: Fix potential NULL pointer dereference stable inclusion from stable-v5.10.214 commit 61380537aa6dd32d8a723d98b8f1bd1b11d8fee0 category: bugfix issue: NA CVE: CVE-2024-26878 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit d0aa72604fbd80c8aabb46eda00535ed35570f1f ] Below race may cause NULL pointer dereference P1 P2 dquot_free_inode quota_off drop_dquot_ref remove_dquot_ref dquots = i_dquot(inode) dquots = i_dquot(inode) srcu_read_lock dquots[cnt]) != NULL (1) dquots[type] = NULL (2) spin_lock(&dquots[cnt]->dq_dqb_lock) (3) .... If dquot_free_inode(or other routines) checks inode's quota pointers (1) before quota_off sets it to NULL(2) and use it (3) after that, NULL pointer dereference will be triggered. So let's fix it by using a temporary pointer to avoid this issue. Signed-off-by: Wang Jianjian Signed-off-by: Jan Kara Message-Id: <20240202081852.2514092-1-wangjianjian3@huawei.com> Stable-dep-of: 179b8c97ebf6 ("quota: Fix rcu annotations of inode dquot pointers") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/quota/dquot.c | 98 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ad255f8ab5c5..dd1f0607fc0d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -375,15 +375,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot) EXPORT_SYMBOL(dquot_mark_dquot_dirty); /* Dirtify all the dquots - this can block when journalling */ -static inline int mark_all_dquot_dirty(struct dquot * const *dquot) +static inline int mark_all_dquot_dirty(struct dquot * const *dquots) { int ret, err, cnt; + struct dquot *dquot; ret = err = 0; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquot[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) /* Even in case of error we have to continue */ - ret = mark_dquot_dirty(dquot[cnt]); + ret = mark_dquot_dirty(dquot); if (!err) err = ret; } @@ -1668,6 +1670,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) struct dquot_warn warn[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; struct dquot **dquots; + struct dquot *dquot; if (!dquot_active(inode)) { if (reserve) { @@ -1687,27 +1690,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; if (reserve) { - ret = dquot_add_space(dquots[cnt], 0, number, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]); } else { - ret = dquot_add_space(dquots[cnt], number, 0, flags, - &warn[cnt]); + ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]); } if (ret) { /* Back out changes we already did */ for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); if (reserve) - dquot_free_reserved_space(dquots[cnt], - number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); goto out_flush_warn; @@ -1738,6 +1740,7 @@ int dquot_alloc_inode(struct inode *inode) int cnt, ret = 0, index; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; if (!dquot_active(inode)) return 0; @@ -1748,17 +1751,19 @@ int dquot_alloc_inode(struct inode *inode) index = srcu_read_lock(&dquot_srcu); spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]); + ret = dquot_add_inodes(dquot, 1, &warn[cnt]); if (ret) { for (cnt--; cnt >= 0; cnt--) { - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; /* Back out changes we already did */ - spin_lock(&dquots[cnt]->dq_dqb_lock); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + spin_lock(&dquot->dq_dqb_lock); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } goto warn_put_all; } @@ -1780,6 +1785,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!dquot_active(inode)) { @@ -1795,9 +1801,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) number = dquot->dq_dqb.dqb_rsvspace; @@ -1822,6 +1827,7 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty); void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) { struct dquot **dquots; + struct dquot *dquot; int cnt, index; if (!dquot_active(inode)) { @@ -1837,9 +1843,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number) spin_lock(&inode->i_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (dquots[cnt]) { - struct dquot *dquot = dquots[cnt]; - + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (dquot) { spin_lock(&dquot->dq_dqb_lock); if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) number = dquot->dq_dqb.dqb_curspace; @@ -1866,6 +1871,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot **dquots; + struct dquot *dquot; int reserve = flags & DQUOT_SPACE_RESERVE, index; if (!dquot_active(inode)) { @@ -1886,17 +1892,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) int wtype; warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_bdq_free(dquots[cnt], number); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_bdq_free(dquot, number); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); + prepare_warning(&warn[cnt], dquot, wtype); if (reserve) - dquot_free_reserved_space(dquots[cnt], number); + dquot_free_reserved_space(dquot, number); else - dquot_decr_space(dquots[cnt], number); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + dquot_decr_space(dquot, number); + spin_unlock(&dquot->dq_dqb_lock); } if (reserve) *inode_reserved_space(inode) -= number; @@ -1921,6 +1928,7 @@ void dquot_free_inode(struct inode *inode) unsigned int cnt; struct dquot_warn warn[MAXQUOTAS]; struct dquot * const *dquots; + struct dquot *dquot; int index; if (!dquot_active(inode)) @@ -1931,16 +1939,16 @@ void dquot_free_inode(struct inode *inode) spin_lock(&inode->i_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { int wtype; - warn[cnt].w_type = QUOTA_NL_NOWARN; - if (!dquots[cnt]) + dquot = srcu_dereference(dquots[cnt], &dquot_srcu); + if (!dquot) continue; - spin_lock(&dquots[cnt]->dq_dqb_lock); - wtype = info_idq_free(dquots[cnt], 1); + spin_lock(&dquot->dq_dqb_lock); + wtype = info_idq_free(dquot, 1); if (wtype != QUOTA_NL_NOWARN) - prepare_warning(&warn[cnt], dquots[cnt], wtype); - dquot_decr_inodes(dquots[cnt], 1); - spin_unlock(&dquots[cnt]->dq_dqb_lock); + prepare_warning(&warn[cnt], dquot, wtype); + dquot_decr_inodes(dquot, 1); + spin_unlock(&dquot->dq_dqb_lock); } spin_unlock(&inode->i_lock); mark_all_dquot_dirty(dquots); @@ -1967,7 +1975,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) qsize_t rsv_space = 0; qsize_t inode_usage = 1; struct dquot *transfer_from[MAXQUOTAS] = {}; - int cnt, ret = 0; + int cnt, index, ret = 0; char is_valid[MAXQUOTAS] = {}; struct dquot_warn warn_to[MAXQUOTAS]; struct dquot_warn warn_from_inodes[MAXQUOTAS]; @@ -2056,8 +2064,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) spin_unlock(&inode->i_lock); spin_unlock(&dq_data_lock); + /* + * These arrays are local and we hold dquot references so we don't need + * the srcu protection but still take dquot_srcu to avoid warning in + * mark_all_dquot_dirty(). + */ + index = srcu_read_lock(&dquot_srcu); mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_to); + srcu_read_unlock(&dquot_srcu, index); + flush_warnings(warn_to); flush_warnings(warn_from_inodes); flush_warnings(warn_from_space); -- Gitee From 68bca24a115ed7a3e81b2a5cf9b34f5e2efa24c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2024 14:18:16 +0000 Subject: [PATCH 42/56] packet: annotate data-races around ignore_outgoing stable inclusion from stable-v5.10.214 commit 68e84120319d4fc298fcdb14cf0bea6a0f64ffbd category: bugfix issue: NA CVE: CVE-2024-26862 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 6ebfad33161afacb3e1e59ed1c2feefef70f9f97 ] ignore_outgoing is read locklessly from dev_queue_xmit_nit() and packet_getsockopt() Add appropriate READ_ONCE()/WRITE_ONCE() annotations. syzbot reported: BUG: KCSAN: data-race in dev_queue_xmit_nit / packet_setsockopt write to 0xffff888107804542 of 1 bytes by task 22618 on cpu 0: packet_setsockopt+0xd83/0xfd0 net/packet/af_packet.c:4003 do_sock_setsockopt net/socket.c:2311 [inline] __sys_setsockopt+0x1d8/0x250 net/socket.c:2334 __do_sys_setsockopt net/socket.c:2343 [inline] __se_sys_setsockopt net/socket.c:2340 [inline] __x64_sys_setsockopt+0x66/0x80 net/socket.c:2340 do_syscall_64+0xd3/0x1d0 entry_SYSCALL_64_after_hwframe+0x6d/0x75 read to 0xffff888107804542 of 1 bytes by task 27 on cpu 1: dev_queue_xmit_nit+0x82/0x620 net/core/dev.c:2248 xmit_one net/core/dev.c:3527 [inline] dev_hard_start_xmit+0xcc/0x3f0 net/core/dev.c:3547 __dev_queue_xmit+0xf24/0x1dd0 net/core/dev.c:4335 dev_queue_xmit include/linux/netdevice.h:3091 [inline] batadv_send_skb_packet+0x264/0x300 net/batman-adv/send.c:108 batadv_send_broadcast_skb+0x24/0x30 net/batman-adv/send.c:127 batadv_iv_ogm_send_to_if net/batman-adv/bat_iv_ogm.c:392 [inline] batadv_iv_ogm_emit net/batman-adv/bat_iv_ogm.c:420 [inline] batadv_iv_send_outstanding_bat_ogm_packet+0x3f0/0x4b0 net/batman-adv/bat_iv_ogm.c:1700 process_one_work kernel/workqueue.c:3254 [inline] process_scheduled_works+0x465/0x990 kernel/workqueue.c:3335 worker_thread+0x526/0x730 kernel/workqueue.c:3416 kthread+0x1d1/0x210 kernel/kthread.c:388 ret_from_fork+0x4b/0x60 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 value changed: 0x00 -> 0x01 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 27 Comm: kworker/u8:1 Tainted: G W 6.8.0-syzkaller-08073-g480e035fc4c7 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/29/2024 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet Fixes: fa788d986a3a ("packet: add sockopt to ignore outgoing packets") Reported-by: syzbot+c669c1136495a2e7c31f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/CANn89i+Z7MfbkBLOv=p7KZ7=K1rKHO4P1OL5LYDCtBiyqsa9oQ@mail.gmail.com/T/#t Signed-off-by: Eric Dumazet Cc: Willem de Bruijn Reviewed-by: Willem de Bruijn Reviewed-by: Jason Xing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- net/core/dev.c | 2 +- net/packet/af_packet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 83648e6a04de..7161f29f1352 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2324,7 +2324,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { - if (ptype->ignore_outgoing) + if (READ_ONCE(ptype->ignore_outgoing)) continue; /* Never send packets back to the socket diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a2e2996ef25d..e7e3e7a11c8a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3947,7 +3947,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, if (val < 0 || val > 1) return -EINVAL; - po->prot_hook.ignore_outgoing = !!val; + WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); return 0; } case PACKET_TX_HAS_OFF: @@ -4079,7 +4079,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_IGNORE_OUTGOING: - val = po->prot_hook.ignore_outgoing; + val = READ_ONCE(po->prot_hook.ignore_outgoing); break; case PACKET_ROLLOVER_STATS: if (!po->rollover) -- Gitee From 8e47e3ba79ca17eec16d7a559f0b75010414a12e Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 1 Dec 2020 13:58:48 -0800 Subject: [PATCH 43/56] bpf: Eliminate rlimit-based memory accounting for devmap maps mainline inclusion from mainline-v5.11-rc1 commit 844f157f6c0a905d039d2e20212ab3231f2e5eaf category: bugfix issue: NA CVE: NA Signed-off-by: wanxiaoqing --------------------------------------- Do not use rlimit-based memory accounting for devmap maps. It has been replaced with the memcg-based memory accounting. Signed-off-by: Roman Gushchin Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20201201215900.3569844-23-guro@fb.com Signed-off-by: wanxiaoqing --- kernel/bpf/devmap.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 01149821ded9..ca2cade2871b 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -109,8 +109,6 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { u32 valsize = attr->value_size; - u64 cost = 0; - int err; /* check sanity of attributes. 2 value sizes supported: * 4 bytes: ifindex @@ -135,21 +133,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (!dtab->n_buckets) /* Overflow check */ return -EINVAL; - cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; - } else { - cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); } - /* if map size is larger than memlock limit, reject it */ - err = bpf_map_charge_init(&dtab->map.memory, cost); - if (err) - return -EINVAL; - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) - goto free_charge; + return -ENOMEM; spin_lock_init(&dtab->index_lock); } else { @@ -157,14 +147,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_charge; + return -ENOMEM; } return 0; - -free_charge: - bpf_map_charge_finish(&dtab->map.memory); - return -ENOMEM; } static struct bpf_map *dev_map_alloc(union bpf_attr *attr) -- Gitee From 13e3d83f9d2c4fd14ab48da964181b3903c93988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 7 Mar 2024 13:03:35 +0100 Subject: [PATCH 44/56] bpf: Fix DEVMAP_HASH overflow check on 32-bit arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.214 commit 225da02acdc97af01b6bc6ce1a3e5362bf01d3fb category: bugfix issue: #I9OU4O CVE: CVE-2024-26885 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 281d464a34f540de166cee74b723e97ac2515ec3 ] The devmap code allocates a number hash buckets equal to the next power of two of the max_entries value provided when creating the map. When rounding up to the next power of two, the 32-bit variable storing the number of buckets can overflow, and the code checks for overflow by checking if the truncated 32-bit value is equal to 0. However, on 32-bit arches the rounding up itself can overflow mid-way through, because it ends up doing a left-shift of 32 bits on an unsigned long value. If the size of an unsigned long is four bytes, this is undefined behaviour, so there is no guarantee that we'll end up with a nice and tidy 0-value at the end. Syzbot managed to turn this into a crash on arm32 by creating a DEVMAP_HASH with max_entries > 0x80000000 and then trying to update it. Fix this by moving the overflow check to before the rounding up operation. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Link: https://lore.kernel.org/r/000000000000ed666a0611af6818@google.com Reported-and-tested-by: syzbot+8cd36f6b65f3cafd400a@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Message-ID: <20240307120340.99577-2-toke@redhat.com> Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- kernel/bpf/devmap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index ca2cade2871b..07b5edb2c70f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -129,13 +129,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) bpf_map_init_from_attr(&dtab->map, attr); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); - - if (!dtab->n_buckets) /* Overflow check */ + /* hash table size must be power of 2; roundup_pow_of_two() can + * overflow into UB on 32-bit arches, so check that first + */ + if (dtab->map.max_entries > 1UL << 31) return -EINVAL; - } - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) -- Gitee From 72c6c931becb4687c7cbbf150fb237851836fe28 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 17 Aug 2022 18:27:29 +0100 Subject: [PATCH 45/56] firmware: arm_scmi: Harden accesses to the reset domains mainline inclusion from mainline-v6.0-rc7 commit e9076ffbcaed5da6c182b144ef9f6e24554af268 category: bugfix issue: #I9OU3I CVE: CVE-2022-48655 Signed-off-by: wanxiaoqing --------------------------------------- Accessing reset domains descriptors by the index upon the SCMI drivers requests through the SCMI reset operations interface can potentially lead to out-of-bound violations if the SCMI driver misbehave. Add an internal consistency check before any such domains descriptors accesses. Link: https://lore.kernel.org/r/20220817172731.1185305-5-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla Signed-off-by: wanxiaoqing --- drivers/firmware/arm_scmi/reset.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c index a981a22cfe89..b8388a3b9c06 100644 --- a/drivers/firmware/arm_scmi/reset.c +++ b/drivers/firmware/arm_scmi/reset.c @@ -149,8 +149,12 @@ static int scmi_domain_reset(const struct scmi_handle *handle, u32 domain, struct scmi_xfer *t; struct scmi_msg_reset_domain_reset *dom; struct scmi_reset_info *pi = handle->reset_priv; - struct reset_dom_info *rdom = pi->dom_info + domain; + struct reset_dom_info *rdom; + if (domain >= pi->num_domains) + return -EINVAL; + + rdom = pi->dom_info + domain; if (rdom->async_reset) flags |= ASYNCHRONOUS_RESET; -- Gitee From 7cbd41b10604a6eb96e36f35398b59694f144804 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 24 Oct 2023 20:30:15 +0200 Subject: [PATCH 46/56] PM / devfreq: Fix buffer overflow in trans_stat_show stable inclusion from stable-v5.10.216 commit 087de000e4f8c878c81d9dd3725f00a1d292980c category: bugfix issue: NA CVE: CVE-2023-52614 Signed-off-by: wanxiaoqing --------------------------------------- commit 08e23d05fa6dc4fc13da0ccf09defdd4bbc92ff4 upstream. Fix buffer overflow in trans_stat_show(). Convert simple snprintf to the more secure scnprintf with size of PAGE_SIZE. Add condition checking if we are exceeding PAGE_SIZE and exit early from loop. Also add at the end a warning that we exceeded PAGE_SIZE and that stats is disabled. Return -EFBIG in the case where we don't have enough space to write the full transition table. Also document in the ABI that this function can return -EFBIG error. Link: https://lore.kernel.org/all/20231024183016.14648-2-ansuelsmth@gmail.com/ Cc: stable@vger.kernel.org Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218041 Fixes: e552bbaf5b98 ("PM / devfreq: Add sysfs node for representing frequency transition information.") Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi Signed-off-by: Sasha Levin Signed-off-by: Jan Kiszka Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- Documentation/ABI/testing/sysfs-class-devfreq | 3 + drivers/devfreq/devfreq.c | 59 +++++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index b8ebff4b1c4c..4514cf9fc7a1 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -66,6 +66,9 @@ Description: echo 0 > /sys/class/devfreq/.../trans_stat + If the transition table is bigger than PAGE_SIZE, reading + this will return an -EFBIG error. + What: /sys/class/devfreq/.../userspace/set_freq Date: September 2011 Contact: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 0e10c2a05252..1e3f861b9421 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1638,7 +1638,7 @@ static ssize_t trans_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct devfreq *df = to_devfreq(dev); - ssize_t len; + ssize_t len = 0; int i, j; unsigned int max_state; @@ -1647,7 +1647,7 @@ static ssize_t trans_stat_show(struct device *dev, max_state = df->profile->max_state; if (max_state == 0) - return sprintf(buf, "Not Supported.\n"); + return scnprintf(buf, PAGE_SIZE, "Not Supported.\n"); mutex_lock(&df->lock); if (!df->stop_polling && @@ -1657,33 +1657,54 @@ static ssize_t trans_stat_show(struct device *dev, } mutex_unlock(&df->lock); - len = sprintf(buf, " From : To\n"); - len += sprintf(buf + len, " :"); - for (i = 0; i < max_state; i++) - len += sprintf(buf + len, "%10lu", - df->profile->freq_table[i]); + len += scnprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " :"); + for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu", + df->profile->freq_table[i]); + } + if (len >= PAGE_SIZE - 1) + return PAGE_SIZE - 1; - len += sprintf(buf + len, " time(ms)\n"); + len += scnprintf(buf + len, PAGE_SIZE - len, " time(ms)\n"); for (i = 0; i < max_state; i++) { + if (len >= PAGE_SIZE - 1) + break; if (df->profile->freq_table[i] == df->previous_freq) { - len += sprintf(buf + len, "*"); + len += scnprintf(buf + len, PAGE_SIZE - len, "*"); } else { - len += sprintf(buf + len, " "); + len += scnprintf(buf + len, PAGE_SIZE - len, " "); + } + if (len >= PAGE_SIZE - 1) + break; + + len += scnprintf(buf + len, PAGE_SIZE - len, "%10lu:", + df->profile->freq_table[i]); + for (j = 0; j < max_state; j++) { + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10u", + df->stats.trans_table[(i * max_state) + j]); } - len += sprintf(buf + len, "%10lu:", - df->profile->freq_table[i]); - for (j = 0; j < max_state; j++) - len += sprintf(buf + len, "%10u", - df->stats.trans_table[(i * max_state) + j]); + if (len >= PAGE_SIZE - 1) + break; + len += scnprintf(buf + len, PAGE_SIZE - len, "%10llu\n", (u64) + jiffies64_to_msecs(df->stats.time_in_state[i])); + } + + if (len < PAGE_SIZE - 1) + len += scnprintf(buf + len, PAGE_SIZE - len, "Total transition : %u\n", + df->stats.total_trans); - len += sprintf(buf + len, "%10llu\n", (u64) - jiffies64_to_msecs(df->stats.time_in_state[i])); + if (len >= PAGE_SIZE - 1) { + pr_warn_once("devfreq transition table exceeds PAGE_SIZE. Disabling\n"); + return -EFBIG; } - len += sprintf(buf + len, "Total transition : %u\n", - df->stats.total_trans); return len; } -- Gitee From b079772353093c8a7f260b04020098c9c10f26a3 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Wed, 27 Mar 2024 15:24:56 +0100 Subject: [PATCH 47/56] Bluetooth: Fix TOCTOU in HCI debugfs implementation stable inclusion from stable-v5.10.215 commit e47088075477ea4129f9b000309f189e1392f138 category: bugfix issue: NA CVE: CVE-2024-24858 Signed-off-by: wanxiaoqing --------------------------------------- commit 7835fcfd132eb88b87e8eb901f88436f63ab60f7 upstream. struct hci_dev members conn_info_max_age, conn_info_min_age, le_conn_max_interval, le_conn_min_interval, le_adv_max_interval, and le_adv_min_interval can be modified from the HCI core code, as well through debugfs. The debugfs implementation, that's only available to privileged users, will check for boundaries, making sure that the minimum value being set is strictly above the maximum value that already exists, and vice-versa. However, as both minimum and maximum values can be changed concurrently to us modifying them, we need to make sure that the value we check is the value we end up using. For example, with ->conn_info_max_age set to 10, conn_info_min_age_set() gets called from vfs handlers to set conn_info_min_age to 8. In conn_info_min_age_set(), this goes through: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; Concurrently, conn_info_max_age_set() gets called to set to set the conn_info_max_age to 7: if (val == 0 || val > hdev->conn_info_max_age) return -EINVAL; That check will also pass because we used the old value (10) for conn_info_max_age. After those checks that both passed, the struct hci_dev access is mutex-locked, disabling concurrent access, but that does not matter because the invalid value checks both passed, and we'll end up with conn_info_min_age = 8 and conn_info_max_age = 7 To fix this problem, we need to lock the structure access before so the check and assignment are not interrupted. This fix was originally devised by the BassCheck[1] team, and considered the problem to be an atomicity one. This isn't the case as there aren't any concerns about the variable changing while we check it, but rather after we check it parallel to another change. This patch fixes CVE-2024-24858 and CVE-2024-24857. [1] https://sites.google.com/view/basscheck/ Co-developed-by: Gui-Dong Han <2045gemini@gmail.com> Signed-off-by: Gui-Dong Han <2045gemini@gmail.com> Link: https://lore.kernel.org/linux-bluetooth/20231222161317.6255-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24858 Link: https://lore.kernel.org/linux-bluetooth/20231222162931.6553-1-2045gemini@gmail.com/ Link: https://lore.kernel.org/linux-bluetooth/20231222162310.6461-1-2045gemini@gmail.com/ Link: https://nvd.nist.gov/vuln/detail/CVE-2024-24857 Fixes: 31ad169148df ("Bluetooth: Add conn info lifetime parameters to debugfs") Fixes: 729a1051da6f ("Bluetooth: Expose default LE advertising interval via debugfs") Fixes: 71c3b60ec6d2 ("Bluetooth: Move BR/EDR debugfs file creation into hci_debugfs.c") Signed-off-by: Bastien Nocera Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/bluetooth/hci_debugfs.c | 48 ++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index d4efc4aa55af..131bb56bf2af 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -216,10 +216,12 @@ static int conn_info_min_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val > hdev->conn_info_max_age) + hci_dev_lock(hdev); + if (val == 0 || val > hdev->conn_info_max_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_min_age = val; hci_dev_unlock(hdev); @@ -244,10 +246,12 @@ static int conn_info_max_age_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val < hdev->conn_info_min_age) + hci_dev_lock(hdev); + if (val == 0 || val < hdev->conn_info_min_age) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->conn_info_max_age = val; hci_dev_unlock(hdev); @@ -526,10 +530,12 @@ static int sniff_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val > hdev->sniff_max_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val > hdev->sniff_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_min_interval = val; hci_dev_unlock(hdev); @@ -554,10 +560,12 @@ static int sniff_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val == 0 || val % 2 || val < hdev->sniff_min_interval) + hci_dev_lock(hdev); + if (val == 0 || val % 2 || val < hdev->sniff_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->sniff_max_interval = val; hci_dev_unlock(hdev); @@ -798,10 +806,12 @@ static int conn_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_min_interval = val; hci_dev_unlock(hdev); @@ -826,10 +836,12 @@ static int conn_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) + hci_dev_lock(hdev); + if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_conn_max_interval = val; hci_dev_unlock(hdev); @@ -938,10 +950,12 @@ static int adv_min_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_min_interval = val; hci_dev_unlock(hdev); @@ -966,10 +980,12 @@ static int adv_max_interval_set(void *data, u64 val) { struct hci_dev *hdev = data; - if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) + hci_dev_lock(hdev); + if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) { + hci_dev_unlock(hdev); return -EINVAL; + } - hci_dev_lock(hdev); hdev->le_adv_max_interval = val; hci_dev_unlock(hdev); -- Gitee From efdae08c4bc03d527886d1727fb09c10fe0a13a5 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 12 Oct 2022 07:50:36 -0700 Subject: [PATCH 48/56] tcp: Clean up kernel listener's reqsk in inet_twsk_purge() stable inclusion from stable-v5.10.216 commit 74e5e5601df4c4a4f1b7bd1da5761acf4aa8b084 category: bugfix issue: NA CVE: CVE-2024-26865 Signed-off-by: wanxiaoqing --------------------------------------- commit 740ea3c4a0b2e326b23d7cdf05472a0e92aa39bc upstream. Eric Dumazet reported a use-after-free related to the per-netns ehash series. [0] When we create a TCP socket from userspace, the socket always holds a refcnt of the netns. This guarantees that a reqsk timer is always fired before netns dismantle. Each reqsk has a refcnt of its listener, so the listener is not freed before the reqsk, and the net is not freed before the listener as well. OTOH, when in-kernel users create a TCP socket, it might not hold a refcnt of its netns. Thus, a reqsk timer can be fired after the netns dismantle and access freed per-netns ehash. To avoid the use-after-free, we need to clean up TCP_NEW_SYN_RECV sockets in inet_twsk_purge() if the netns uses a per-netns ehash. [0]: https://lore.kernel.org/netdev/CANn89iLXMup0dRD_Ov79Xt8N9FM0XdhCHEN05sf3eLwxKweM6w@mail.gmail.com/ BUG: KASAN: use-after-free in tcp_or_dccp_get_hashinfo include/net/inet_hashtables.h:181 [inline] BUG: KASAN: use-after-free in reqsk_queue_unlink+0x320/0x350 net/ipv4/inet_connection_sock.c:913 Read of size 8 at addr ffff88807545bd80 by task syz-executor.2/8301 CPU: 1 PID: 8301 Comm: syz-executor.2 Not tainted 6.0.0-syzkaller-02757-gaf7d23f9d96a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/22/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:317 [inline] print_report.cold+0x2ba/0x719 mm/kasan/report.c:433 kasan_report+0xb1/0x1e0 mm/kasan/report.c:495 tcp_or_dccp_get_hashinfo include/net/inet_hashtables.h:181 [inline] reqsk_queue_unlink+0x320/0x350 net/ipv4/inet_connection_sock.c:913 inet_csk_reqsk_queue_drop net/ipv4/inet_connection_sock.c:927 [inline] inet_csk_reqsk_queue_drop_and_put net/ipv4/inet_connection_sock.c:939 [inline] reqsk_timer_handler+0x724/0x1160 net/ipv4/inet_connection_sock.c:1053 call_timer_fn+0x1a0/0x6b0 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers.part.0+0x674/0xa80 kernel/time/timer.c:1790 __run_timers kernel/time/timer.c:1768 [inline] run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1803 __do_softirq+0x1d0/0x9c8 kernel/softirq.c:571 invoke_softirq kernel/softirq.c:445 [inline] __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650 irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1107 Fixes: d1e5e6408b30 ("tcp: Introduce optional per-netns ehash.") Reported-by: syzbot Reported-by: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221012145036.74960-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski [shaozhengchao: resolved conflicts in 5.10] Signed-off-by: Zhengchao Shao Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/inet_timewait_sock.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index a00102d7c7fd..03989b154051 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -268,8 +268,21 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) + if (sk->sk_state != TCP_TIME_WAIT) { + /* A kernel listener socket might not hold refcnt for net, + * so reqsk_timer_handler() could be fired after net is + * freed. Userspace listener and reqsk never exist here. + */ + if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && + hashinfo->pernet)) { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); + } + continue; + } + tw = inet_twsk(sk); if ((tw->tw_family != family) || refcount_read(&twsk_net(tw)->count)) -- Gitee From ed83ec43b623541b90fa0c152aaad648e16475c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Mar 2024 12:01:21 -0800 Subject: [PATCH 49/56] tcp: Fix NEW_SYN_RECV handling in inet_twsk_purge() stable inclusion from stable-v5.10.216 commit ab31bc5022b625457c3e87348865565d2a9bdb4c category: bugfix issue: NA CVE: CVE-2024-26865 Signed-off-by: wanxiaoqing --------------------------------------- commit 1c4e97dd2d3c9a3e84f7e26346aa39bc426d3249 upstream. inet_twsk_purge() uses rcu to find TIME_WAIT and NEW_SYN_RECV objects to purge. These objects use SLAB_TYPESAFE_BY_RCU semantic and need special care. We need to use refcount_inc_not_zero(&sk->sk_refcnt). Reuse the existing correct logic I wrote for TIME_WAIT, because both structures have common locations for sk_state, sk_family, and netns pointer. If after the refcount_inc_not_zero() the object fields longer match the keys, use sock_gen_put(sk) to release the refcount. Then we can call inet_twsk_deschedule_put() for TIME_WAIT, inet_csk_reqsk_queue_drop_and_put() for NEW_SYN_RECV sockets, with BH disabled. Then we need to restart the loop because we had drop rcu_read_lock(). Fixes: 740ea3c4a0b2 ("tcp: Clean up kernel listener's reqsk in inet_twsk_purge()") Link: https://lore.kernel.org/netdev/CANn89iLvFuuihCtt9PME2uS1WJATnf5fKjDToa1WzVnRzHnPfg@mail.gmail.com/T/#u Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20240308200122.64357-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski [shaozhengchao: resolved conflicts in 5.10] Signed-off-by: Zhengchao Shao Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv4/inet_timewait_sock.c | 41 ++++++++++++++++------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 03989b154051..db2f52a50297 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm) } EXPORT_SYMBOL_GPL(__inet_twsk_schedule); +/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) { - struct inet_timewait_sock *tw; - struct sock *sk; struct hlist_nulls_node *node; unsigned int slot; + struct sock *sk; for (slot = 0; slot <= hashinfo->ehash_mask; slot++) { struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; @@ -268,38 +268,35 @@ void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family) rcu_read_lock(); restart: sk_nulls_for_each_rcu(sk, node, &head->chain) { - if (sk->sk_state != TCP_TIME_WAIT) { - /* A kernel listener socket might not hold refcnt for net, - * so reqsk_timer_handler() could be fired after net is - * freed. Userspace listener and reqsk never exist here. - */ - if (unlikely(sk->sk_state == TCP_NEW_SYN_RECV && - hashinfo->pernet)) { - struct request_sock *req = inet_reqsk(sk); - - inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); - } + int state = inet_sk_state_load(sk); + if ((1 << state) & ~(TCPF_TIME_WAIT | + TCPF_NEW_SYN_RECV)) continue; - } - tw = inet_twsk(sk); - if ((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->count)) + if (sk->sk_family != family || + refcount_read(&sock_net(sk)->count)) continue; - if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt))) + if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) continue; - if (unlikely((tw->tw_family != family) || - refcount_read(&twsk_net(tw)->count))) { - inet_twsk_put(tw); + if (unlikely(sk->sk_family != family || + refcount_read(&sock_net(sk)->count))) { + sock_gen_put(sk); goto restart; } rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule_put(tw); + if (state == TCP_TIME_WAIT) { + inet_twsk_deschedule_put(inet_twsk(sk)); + } else { + struct request_sock *req = inet_reqsk(sk); + + inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, + req); + } local_bh_enable(); goto restart_rcu; } -- Gitee From b3dd2d04631e76665a84d6b584f062163484b635 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:37:02 +0100 Subject: [PATCH 50/56] async: Split async_schedule_node_domain() stable inclusion from stable-v5.10.210 commit 9ef68b58fd00cbb39f6eeacd9f224b99912dde62 category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- commit 6aa09a5bccd8e224d917afdb4c278fc66aacde4d upstream. In preparation for subsequent changes, split async_schedule_node_domain() in two pieces so as to allow the bottom part of it to be called from a somewhat different code path. No functional impact. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/async.c | 56 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/kernel/async.c b/kernel/async.c index 1746cd65e271..dfa4ed15cc42 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -145,6 +145,39 @@ static void async_run_entry_fn(struct work_struct *work) wake_up(&async_done); } +static async_cookie_t __async_schedule_node_domain(async_func_t func, + void *data, int node, + struct async_domain *domain, + struct async_entry *entry) +{ + async_cookie_t newcookie; + unsigned long flags; + + INIT_LIST_HEAD(&entry->domain_list); + INIT_LIST_HEAD(&entry->global_list); + INIT_WORK(&entry->work, async_run_entry_fn); + entry->func = func; + entry->data = data; + entry->domain = domain; + + spin_lock_irqsave(&async_lock, flags); + + /* allocate cookie and queue */ + newcookie = entry->cookie = next_cookie++; + + list_add_tail(&entry->domain_list, &domain->pending); + if (domain->registered) + list_add_tail(&entry->global_list, &async_global_pending); + + atomic_inc(&entry_count); + spin_unlock_irqrestore(&async_lock, flags); + + /* schedule for execution */ + queue_work_node(node, system_unbound_wq, &entry->work); + + return newcookie; +} + /** * async_schedule_node_domain - NUMA specific version of async_schedule_domain * @func: function to execute asynchronously @@ -186,29 +219,8 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, func(data, newcookie); return newcookie; } - INIT_LIST_HEAD(&entry->domain_list); - INIT_LIST_HEAD(&entry->global_list); - INIT_WORK(&entry->work, async_run_entry_fn); - entry->func = func; - entry->data = data; - entry->domain = domain; - - spin_lock_irqsave(&async_lock, flags); - /* allocate cookie and queue */ - newcookie = entry->cookie = next_cookie++; - - list_add_tail(&entry->domain_list, &domain->pending); - if (domain->registered) - list_add_tail(&entry->global_list, &async_global_pending); - - atomic_inc(&entry_count); - spin_unlock_irqrestore(&async_lock, flags); - - /* schedule for execution */ - queue_work_node(node, system_unbound_wq, &entry->work); - - return newcookie; + return __async_schedule_node_domain(func, data, node, domain, entry); } EXPORT_SYMBOL_GPL(async_schedule_node_domain); -- Gitee From f23ae962d1dec13976843ac57731b765c075ca2b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:38:23 +0100 Subject: [PATCH 51/56] async: Introduce async_schedule_dev_nocall() stable inclusion from stable-v5.10.210 commit ac4dcccbe9106a5cec483d2ffa7f628b95340a07 category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- commit 7d4b5d7a37bdd63a5a3371b988744b060d5bb86f upstream. In preparation for subsequent changes, introduce a specialized variant of async_schedule_dev() that will not invoke the argument function synchronously when it cannot be scheduled for asynchronous execution. The new function, async_schedule_dev_nocall(), will be used for fixing possible deadlocks in the system-wide power management core code. Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka for the series. Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/linux/async.h | 2 ++ kernel/async.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/include/linux/async.h b/include/linux/async.h index 0a17cd27f348..d5496a520a38 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -90,6 +90,8 @@ async_schedule_dev(async_func_t func, struct device *dev) return async_schedule_node(func, dev, dev_to_node(dev)); } +bool async_schedule_dev_nocall(async_func_t func, struct device *dev); + /** * async_schedule_dev_domain - A device specific version of async_schedule_domain * @func: function to execute asynchronously diff --git a/kernel/async.c b/kernel/async.c index dfa4ed15cc42..5dba7461fc75 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -243,6 +243,35 @@ async_cookie_t async_schedule_node(async_func_t func, void *data, int node) } EXPORT_SYMBOL_GPL(async_schedule_node); +/** + * async_schedule_dev_nocall - A simplified variant of async_schedule_dev() + * @func: function to execute asynchronously + * @dev: device argument to be passed to function + * + * @dev is used as both the argument for the function and to provide NUMA + * context for where to run the function. + * + * If the asynchronous execution of @func is scheduled successfully, return + * true. Otherwise, do nothing and return false, unlike async_schedule_dev() + * that will run the function synchronously then. + */ +bool async_schedule_dev_nocall(async_func_t func, struct device *dev) +{ + struct async_entry *entry; + + entry = kzalloc(sizeof(struct async_entry), GFP_KERNEL); + + /* Give up if there is no memory or too much work. */ + if (!entry || atomic_read(&entry_count) > MAX_WORK) { + kfree(entry); + return false; + } + + __async_schedule_node_domain(func, dev, dev_to_node(dev), + &async_dfl_domain, entry); + return true; +} + /** * async_synchronize_full - synchronize all asynchronous function calls * -- Gitee From 5bbef2fc9847ae97aadd2032e70e5f84f02ec877 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 26 Jan 2021 15:29:40 -0600 Subject: [PATCH 52/56] PM: sleep: Use dev_printk() when possible stable inclusion from stable-v5.10.210 commit 2245a8498569fac4ad2e391f38862a859924cf3c category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit eb23d91af55bc2369fe3f0aa6997e72eb20e16fe ] Use dev_printk() when possible to make messages more consistent with other device-related messages. Signed-off-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/base/power/main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1dbaaddf540e..a4714a025315 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -16,6 +16,7 @@ */ #define pr_fmt(fmt) "PM: " fmt +#define dev_fmt pr_fmt #include #include @@ -449,8 +450,8 @@ static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, int error) { - pr_err("Device %s failed to %s%s: error %d\n", - dev_name(dev), pm_verb(state.event), info, error); + dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info, + error); } static void dpm_show_time(ktime_t starttime, pm_message_t state, int error, @@ -1898,8 +1899,8 @@ int dpm_prepare(pm_message_t state) error = 0; continue; } - pr_info("Device %s not prepared for power transition: code %d\n", - dev_name(dev), error); + dev_info(dev, "not prepared for power transition: code %d\n", + error); put_device(dev); break; } -- Gitee From 1bd526ec774f5aefa6ecb08add5a00d4f339ff86 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Nov 2021 18:26:26 +0100 Subject: [PATCH 53/56] PM: sleep: Avoid calling put_device() under dpm_list_mtx stable inclusion from stable-v5.10.210 commit 57df40f8008931f7b8f2206c6c3975055eb4bcb5 category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 2aa36604e8243698ff22bd5fef0dd0c6bb07ba92 ] It is generally unsafe to call put_device() with dpm_list_mtx held, because the given device's release routine may carry out an action depending on that lock which then may deadlock, so modify the system-wide suspend and resume of devices to always drop dpm_list_mtx before calling put_device() (and adjust white space somewhat while at it). For instance, this prevents the following splat from showing up in the kernel log after a system resume in certain configurations: [ 3290.969514] ====================================================== [ 3290.969517] WARNING: possible circular locking dependency detected [ 3290.969519] 5.15.0+ #2420 Tainted: G S [ 3290.969523] ------------------------------------------------------ [ 3290.969525] systemd-sleep/4553 is trying to acquire lock: [ 3290.969529] ffff888117ab1138 ((wq_completion)hci0#2){+.+.}-{0:0}, at: flush_workqueue+0x87/0x4a0 [ 3290.969554] but task is already holding lock: [ 3290.969556] ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 [ 3290.969571] which lock already depends on the new lock. [ 3290.969573] the existing dependency chain (in reverse order) is: [ 3290.969575] -> #3 (dpm_list_mtx){+.+.}-{3:3}: [ 3290.969583] __mutex_lock+0x9d/0xa30 [ 3290.969591] device_pm_add+0x2e/0xe0 [ 3290.969597] device_add+0x4d5/0x8f0 [ 3290.969605] hci_conn_add_sysfs+0x43/0xb0 [bluetooth] [ 3290.969689] hci_conn_complete_evt.isra.71+0x124/0x750 [bluetooth] [ 3290.969747] hci_event_packet+0xd6c/0x28a0 [bluetooth] [ 3290.969798] hci_rx_work+0x213/0x640 [bluetooth] [ 3290.969842] process_one_work+0x2aa/0x650 [ 3290.969851] worker_thread+0x39/0x400 [ 3290.969859] kthread+0x142/0x170 [ 3290.969865] ret_from_fork+0x22/0x30 [ 3290.969872] -> #2 (&hdev->lock){+.+.}-{3:3}: [ 3290.969881] __mutex_lock+0x9d/0xa30 [ 3290.969887] hci_event_packet+0xba/0x28a0 [bluetooth] [ 3290.969935] hci_rx_work+0x213/0x640 [bluetooth] [ 3290.969978] process_one_work+0x2aa/0x650 [ 3290.969985] worker_thread+0x39/0x400 [ 3290.969993] kthread+0x142/0x170 [ 3290.969999] ret_from_fork+0x22/0x30 [ 3290.970004] -> #1 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}: [ 3290.970013] process_one_work+0x27d/0x650 [ 3290.970020] worker_thread+0x39/0x400 [ 3290.970028] kthread+0x142/0x170 [ 3290.970033] ret_from_fork+0x22/0x30 [ 3290.970038] -> #0 ((wq_completion)hci0#2){+.+.}-{0:0}: [ 3290.970047] __lock_acquire+0x15cb/0x1b50 [ 3290.970054] lock_acquire+0x26c/0x300 [ 3290.970059] flush_workqueue+0xae/0x4a0 [ 3290.970066] drain_workqueue+0xa1/0x130 [ 3290.970073] destroy_workqueue+0x34/0x1f0 [ 3290.970081] hci_release_dev+0x49/0x180 [bluetooth] [ 3290.970130] bt_host_release+0x1d/0x30 [bluetooth] [ 3290.970195] device_release+0x33/0x90 [ 3290.970201] kobject_release+0x63/0x160 [ 3290.970211] dpm_resume+0x164/0x3e0 [ 3290.970215] dpm_resume_end+0xd/0x20 [ 3290.970220] suspend_devices_and_enter+0x1a4/0xba0 [ 3290.970229] pm_suspend+0x26b/0x310 [ 3290.970236] state_store+0x42/0x90 [ 3290.970243] kernfs_fop_write_iter+0x135/0x1b0 [ 3290.970251] new_sync_write+0x125/0x1c0 [ 3290.970257] vfs_write+0x360/0x3c0 [ 3290.970263] ksys_write+0xa7/0xe0 [ 3290.970269] do_syscall_64+0x3a/0x80 [ 3290.970276] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3290.970284] other info that might help us debug this: [ 3290.970285] Chain exists of: (wq_completion)hci0#2 --> &hdev->lock --> dpm_list_mtx [ 3290.970297] Possible unsafe locking scenario: [ 3290.970299] CPU0 CPU1 [ 3290.970300] ---- ---- [ 3290.970302] lock(dpm_list_mtx); [ 3290.970306] lock(&hdev->lock); [ 3290.970310] lock(dpm_list_mtx); [ 3290.970314] lock((wq_completion)hci0#2); [ 3290.970319] *** DEADLOCK *** [ 3290.970321] 7 locks held by systemd-sleep/4553: [ 3290.970325] #0: ffff888103bcd448 (sb_writers#4){.+.+}-{0:0}, at: ksys_write+0xa7/0xe0 [ 3290.970341] #1: ffff888115a14488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x103/0x1b0 [ 3290.970355] #2: ffff888100f719e0 (kn->active#233){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x10c/0x1b0 [ 3290.970369] #3: ffffffff82661048 (autosleep_lock){+.+.}-{3:3}, at: state_store+0x12/0x90 [ 3290.970384] #4: ffffffff82658ac8 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0x9f/0x310 [ 3290.970399] #5: ffffffff827f2a48 (acpi_scan_lock){+.+.}-{3:3}, at: acpi_suspend_begin+0x4c/0x80 [ 3290.970416] #6: ffffffff8280fca8 (dpm_list_mtx){+.+.}-{3:3}, at: dpm_resume+0x12e/0x3e0 [ 3290.970428] stack backtrace: [ 3290.970431] CPU: 3 PID: 4553 Comm: systemd-sleep Tainted: G S 5.15.0+ #2420 [ 3290.970438] Hardware name: Dell Inc. XPS 13 9380/0RYJWW, BIOS 1.5.0 06/03/2019 [ 3290.970441] Call Trace: [ 3290.970446] dump_stack_lvl+0x44/0x57 [ 3290.970454] check_noncircular+0x105/0x120 [ 3290.970468] ? __lock_acquire+0x15cb/0x1b50 [ 3290.970474] __lock_acquire+0x15cb/0x1b50 [ 3290.970487] lock_acquire+0x26c/0x300 [ 3290.970493] ? flush_workqueue+0x87/0x4a0 [ 3290.970503] ? __raw_spin_lock_init+0x3b/0x60 [ 3290.970510] ? lockdep_init_map_type+0x58/0x240 [ 3290.970519] flush_workqueue+0xae/0x4a0 [ 3290.970526] ? flush_workqueue+0x87/0x4a0 [ 3290.970544] ? drain_workqueue+0xa1/0x130 [ 3290.970552] drain_workqueue+0xa1/0x130 [ 3290.970561] destroy_workqueue+0x34/0x1f0 [ 3290.970572] hci_release_dev+0x49/0x180 [bluetooth] [ 3290.970624] bt_host_release+0x1d/0x30 [bluetooth] [ 3290.970687] device_release+0x33/0x90 [ 3290.970695] kobject_release+0x63/0x160 [ 3290.970705] dpm_resume+0x164/0x3e0 [ 3290.970710] ? dpm_resume_early+0x251/0x3b0 [ 3290.970718] dpm_resume_end+0xd/0x20 [ 3290.970723] suspend_devices_and_enter+0x1a4/0xba0 [ 3290.970737] pm_suspend+0x26b/0x310 [ 3290.970746] state_store+0x42/0x90 [ 3290.970755] kernfs_fop_write_iter+0x135/0x1b0 [ 3290.970764] new_sync_write+0x125/0x1c0 [ 3290.970777] vfs_write+0x360/0x3c0 [ 3290.970785] ksys_write+0xa7/0xe0 [ 3290.970794] do_syscall_64+0x3a/0x80 [ 3290.970803] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3290.970811] RIP: 0033:0x7f41b1328164 [ 3290.970819] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 80 00 00 00 00 8b 05 4a d2 2c 00 48 63 ff 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 55 53 48 89 d5 48 89 f3 48 83 [ 3290.970824] RSP: 002b:00007ffe6ae21b28 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ 3290.970831] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f41b1328164 [ 3290.970836] RDX: 0000000000000004 RSI: 000055965e651070 RDI: 0000000000000004 [ 3290.970839] RBP: 000055965e651070 R08: 000055965e64f390 R09: 00007f41b1e3d1c0 [ 3290.970843] R10: 000000000000000a R11: 0000000000000246 R12: 0000000000000004 [ 3290.970846] R13: 0000000000000001 R14: 000055965e64f2b0 R15: 0000000000000004 Cc: All applicable Signed-off-by: Rafael J. Wysocki Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/base/power/main.c | 84 ++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 27 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index a4714a025315..6c334a65644c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -714,6 +714,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) dev = to_device(dpm_noirq_list.next); get_device(dev); list_move_tail(&dev->power.entry, &dpm_late_early_list); + mutex_unlock(&dpm_list_mtx); if (!is_async(dev)) { @@ -728,8 +729,9 @@ static void dpm_noirq_resume_devices(pm_message_t state) } } - mutex_lock(&dpm_list_mtx); put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -855,6 +857,7 @@ void dpm_resume_early(pm_message_t state) dev = to_device(dpm_late_early_list.next); get_device(dev); list_move_tail(&dev->power.entry, &dpm_suspended_list); + mutex_unlock(&dpm_list_mtx); if (!is_async(dev)) { @@ -868,8 +871,10 @@ void dpm_resume_early(pm_message_t state) pm_dev_err(dev, state, " early", error); } } - mutex_lock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -1032,7 +1037,12 @@ void dpm_resume(pm_message_t state) } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); + + mutex_unlock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); @@ -1110,14 +1120,16 @@ void dpm_complete(pm_message_t state) get_device(dev); dev->power.is_prepared = false; list_move(&dev->power.entry, &list); + mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); device_complete(dev, state); trace_device_pm_callback_end(dev, 0); - mutex_lock(&dpm_list_mtx); put_device(dev); + + mutex_lock(&dpm_list_mtx); } list_splice(&list, &dpm_list); mutex_unlock(&dpm_list_mtx); @@ -1302,17 +1314,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state) error = device_suspend_noirq(dev); mutex_lock(&dpm_list_mtx); + if (error) { pm_dev_err(dev, state, " noirq", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; - } - if (!list_empty(&dev->power.entry)) + } else if (!list_empty(&dev->power.entry)) { list_move(&dev->power.entry, &dpm_noirq_list); + } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1479,23 +1495,28 @@ int dpm_suspend_late(pm_message_t state) struct device *dev = to_device(dpm_suspended_list.prev); get_device(dev); + mutex_unlock(&dpm_list_mtx); error = device_suspend_late(dev); mutex_lock(&dpm_list_mtx); + if (!list_empty(&dev->power.entry)) list_move(&dev->power.entry, &dpm_late_early_list); if (error) { pm_dev_err(dev, state, " late", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1755,21 +1776,27 @@ int dpm_suspend(pm_message_t state) struct device *dev = to_device(dpm_prepared_list.prev); get_device(dev); + mutex_unlock(&dpm_list_mtx); error = device_suspend(dev); mutex_lock(&dpm_list_mtx); + if (error) { pm_dev_err(dev, state, "", error); dpm_save_failed_dev(dev_name(dev)); - put_device(dev); - break; - } - if (!list_empty(&dev->power.entry)) + } else if (!list_empty(&dev->power.entry)) { list_move(&dev->power.entry, &dpm_suspended_list); + } + + mutex_unlock(&dpm_list_mtx); + put_device(dev); - if (async_error) + + mutex_lock(&dpm_list_mtx); + + if (error || async_error) break; } mutex_unlock(&dpm_list_mtx); @@ -1886,6 +1913,7 @@ int dpm_prepare(pm_message_t state) struct device *dev = to_device(dpm_list.next); get_device(dev); + mutex_unlock(&dpm_list_mtx); trace_device_pm_callback_start(dev, "", state.event); @@ -1893,21 +1921,23 @@ int dpm_prepare(pm_message_t state) trace_device_pm_callback_end(dev, error); mutex_lock(&dpm_list_mtx); - if (error) { - if (error == -EAGAIN) { - put_device(dev); - error = 0; - continue; - } + + if (!error) { + dev->power.is_prepared = true; + if (!list_empty(&dev->power.entry)) + list_move_tail(&dev->power.entry, &dpm_prepared_list); + } else if (error == -EAGAIN) { + error = 0; + } else { dev_info(dev, "not prepared for power transition: code %d\n", error); - put_device(dev); - break; } - dev->power.is_prepared = true; - if (!list_empty(&dev->power.entry)) - list_move_tail(&dev->power.entry, &dpm_prepared_list); + + mutex_unlock(&dpm_list_mtx); + put_device(dev); + + mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); trace_suspend_resume(TPS("dpm_prepare"), state.event, false); -- Gitee From ed5a9c7274a889c7bcac108eb7376561870737ab Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2021 20:30:18 +0100 Subject: [PATCH 54/56] PM: sleep: Fix error handling in dpm_prepare() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v5.10.210 commit 6db18971f73a8312ec547c32274c398e6ca36e14 category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 544e737dea5ad1a457f25dbddf68761ff25e028b ] Commit 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") forgot to update the while () loop termination condition to also break the loop if error is nonzero, which causes the loop to become infinite if device_prepare() returns an error for one device. Add the missing !error check. Fixes: 2aa36604e824 ("PM: sleep: Avoid calling put_device() under dpm_list_mtx") Signed-off-by: Rafael J. Wysocki Reported-by: Thomas Hellström Reviewed-by: Thomas Hellström Reviewed-by: Ulf Hansson Cc: All applicable Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/base/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 6c334a65644c..0fba3bff97d1 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1909,7 +1909,7 @@ int dpm_prepare(pm_message_t state) device_block_probing(); mutex_lock(&dpm_list_mtx); - while (!list_empty(&dpm_list)) { + while (!list_empty(&dpm_list) && !error) { struct device *dev = to_device(dpm_list.next); get_device(dev); -- Gitee From 24ce00f3d82aba07d9cc6c492ca339c59e832225 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Sun, 26 Mar 2023 06:19:35 +0800 Subject: [PATCH 55/56] PM: core: Remove unnecessary (void *) conversions stable inclusion from stable-v5.10.210 commit 8b604883d4cb1fa8d6b7d5d7db44b96465c9010b category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 73d73f5ee7fb0c42ff87091d105bee720a9565f1 ] Assignments from pointer variables of type (void *) do not require explicit type casts, so remove such type cases from the code in drivers/base/power/main.c where applicable. Signed-off-by: Li zeming [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Stable-dep-of: 7839d0078e0d ("PM: sleep: Fix possible deadlocks in core system-wide PM code") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/base/power/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 0fba3bff97d1..e6840304c43e 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -683,7 +683,7 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_noirq(dev, pm_transition, true); @@ -822,7 +822,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn static void async_resume_early(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume_early(dev, pm_transition, true); @@ -986,7 +986,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) static void async_resume(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = device_resume(dev, pm_transition, true); @@ -1275,7 +1275,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a static void async_suspend_noirq(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_noirq(dev, pm_transition, true); @@ -1458,7 +1458,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as static void async_suspend_late(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend_late(dev, pm_transition, true); @@ -1734,7 +1734,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { - struct device *dev = (struct device *)data; + struct device *dev = data; int error; error = __device_suspend(dev, pm_transition, true); -- Gitee From 0248982efbe1c7e612e8b4038060774b5a863735 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 27 Dec 2023 21:41:06 +0100 Subject: [PATCH 56/56] PM: sleep: Fix possible deadlocks in core system-wide PM code stable inclusion from stable-v5.10.210 commit f46eb832389f162ad13cb780d0b8cde93641990d category: bugfix issue: NA CVE: CVE-2023-52498 Signed-off-by: wanxiaoqing --------------------------------------- [ Upstream commit 7839d0078e0d5e6cc2fa0b0dfbee71de74f1e557 ] It is reported that in low-memory situations the system-wide resume core code deadlocks, because async_schedule_dev() executes its argument function synchronously if it cannot allocate memory (and not only in that case) and that function attempts to acquire a mutex that is already held. Executing the argument function synchronously from within dpm_async_fn() may also be problematic for ordering reasons (it may cause a consumer device's resume callback to be invoked before a requisite supplier device's one, for example). Address this by changing the code in question to use async_schedule_dev_nocall() for scheduling the asynchronous execution of device suspend and resume functions and to directly run them synchronously if async_schedule_dev_nocall() returns false. Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/ Reported-by: Youngmin Nam Signed-off-by: Rafael J. Wysocki Reviewed-by: Stanislaw Gruszka Tested-by: Youngmin Nam Reviewed-by: Ulf Hansson Cc: 5.7+ # 5.7+: 6aa09a5bccd8 async: Split async_schedule_node_domain() Cc: 5.7+ # 5.7+: 7d4b5d7a37bd async: Introduce async_schedule_dev_nocall() Cc: 5.7+ # 5.7+ Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/base/power/main.c | 148 ++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 80 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e6840304c43e..fbc57c4fcdd0 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -583,7 +583,7 @@ bool dev_pm_skip_resume(struct device *dev) } /** - * device_resume_noirq - Execute a "noirq resume" callback for given device. + * __device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. @@ -591,7 +591,7 @@ bool dev_pm_skip_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int device_resume_noirq(struct device *dev, pm_message_t state, bool async) +static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -659,7 +659,13 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn Out: complete_all(&dev->power.completion); TRACE_RESUME(error); - return error; + + if (error) { + suspend_stats.failed_resume_noirq++; + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); + } } static bool is_async(struct device *dev) @@ -672,11 +678,15 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) { reinit_completion(&dev->power.completion); - if (is_async(dev)) { - get_device(dev); - async_schedule_dev(func, dev); + if (!is_async(dev)) + return false; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) return true; - } + + put_device(dev); return false; } @@ -684,15 +694,19 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_noirq(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_noirq(dev, pm_transition, true); put_device(dev); } +static void device_resume_noirq(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_noirq)) + return; + + __device_resume_noirq(dev, pm_transition, false); +} + static void dpm_noirq_resume_devices(pm_message_t state) { struct device *dev; @@ -702,14 +716,6 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_noirq_list, power.entry) - dpm_async_fn(dev, async_resume_noirq); - while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); get_device(dev); @@ -717,17 +723,7 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_noirq(dev, state, false); - if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " noirq", error); - } - } + device_resume_noirq(dev); put_device(dev); @@ -757,14 +753,14 @@ void dpm_resume_noirq(pm_message_t state) } /** - * device_resume_early - Execute an "early resume" callback for given device. + * __device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int device_resume_early(struct device *dev, pm_message_t state, bool async) +static void __device_resume_early(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -817,21 +813,31 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn pm_runtime_enable(dev); complete_all(&dev->power.completion); - return error; + + if (error) { + suspend_stats.failed_resume_early++; + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async early" : " early", error); + } } static void async_resume_early(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = device_resume_early(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume_early(dev, pm_transition, true); put_device(dev); } +static void device_resume_early(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume_early)) + return; + + __device_resume_early(dev, pm_transition, false); +} + /** * dpm_resume_early - Execute "early resume" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -845,14 +851,6 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); pm_transition = state; - /* - * Advanced the async threads upfront, - * in case the starting of async threads is - * delayed by non-async resuming devices. - */ - list_for_each_entry(dev, &dpm_late_early_list, power.entry) - dpm_async_fn(dev, async_resume_early); - while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); get_device(dev); @@ -860,17 +858,7 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); - if (!is_async(dev)) { - int error; - - error = device_resume_early(dev, state, false); - if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, " early", error); - } - } + device_resume_early(dev); put_device(dev); @@ -894,12 +882,12 @@ void dpm_resume_start(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_start); /** - * device_resume - Execute "resume" callbacks for given device. + * __device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being resumed asynchronously. */ -static int device_resume(struct device *dev, pm_message_t state, bool async) +static void __device_resume(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -981,20 +969,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); - return error; + if (error) { + suspend_stats.failed_resume++; + dpm_save_failed_step(SUSPEND_RESUME); + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } } static void async_resume(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = device_resume(dev, pm_transition, true); - if (error) - pm_dev_err(dev, pm_transition, " async", error); + __device_resume(dev, pm_transition, true); put_device(dev); } +static void device_resume(struct device *dev) +{ + if (dpm_async_fn(dev, async_resume)) + return; + + __device_resume(dev, pm_transition, false); +} + /** * dpm_resume - Execute "resume" callbacks for non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1014,27 +1012,17 @@ void dpm_resume(pm_message_t state) pm_transition = state; async_error = 0; - list_for_each_entry(dev, &dpm_suspended_list, power.entry) - dpm_async_fn(dev, async_resume); - while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); + get_device(dev); - if (!is_async(dev)) { - int error; - mutex_unlock(&dpm_list_mtx); + mutex_unlock(&dpm_list_mtx); + + device_resume(dev); - error = device_resume(dev, state, false); - if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, state, "", error); - } + mutex_lock(&dpm_list_mtx); - mutex_lock(&dpm_list_mtx); - } if (!list_empty(&dev->power.entry)) list_move_tail(&dev->power.entry, &dpm_prepared_list); -- Gitee