From bf0f34ec36ded132e197b0fb0312eaf1d3ac4373 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Sat, 31 Aug 2024 19:48:31 +0800 Subject: [PATCH 01/11] crypto: hisilicon/qm - inject error before stopping queue mainline inclusion from mainline-v6.12-rc1 commit b04f06fc0243600665b3b50253869533b7938468 category: bugfix issue: #IB4DGU CVE: CVE-2024-47730 Signed-off-by: May27May --------------------------------------- [ Upstream commit b04f06fc0243600665b3b50253869533b7938468 ] The master ooo cannot be completely closed when the accelerator core reports memory error. Therefore, the driver needs to inject the qm error to close the master ooo. Currently, the qm error is injected after stopping queue, memory may be released immediately after stopping queue, causing the device to access the released memory. Therefore, error is injected to close master ooo before stopping queue to ensure that the device does not access the released memory. Fixes: 6c6dd5802c2d ("crypto: hisilicon/qm - add controller reset interface") Signed-off-by: Weili Qian Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin Signed-off-by: May27May --- drivers/crypto/hisilicon/qm.c | 47 ++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 530f23116d7c..19ba226b7f74 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3376,6 +3376,28 @@ static int qm_set_msi(struct hisi_qm *qm, bool set) return 0; } +static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) +{ + u32 nfe_enb = 0; + + /* Kunpeng930 hardware automatically close master ooo when NFE occurs */ + if (qm->ver >= QM_HW_V3) + return; + + if (!qm->err_status.is_dev_ecc_mbit && + qm->err_status.is_qm_ecc_mbit && + qm->err_ini->close_axi_master_ooo) { + qm->err_ini->close_axi_master_ooo(qm); + } else if (qm->err_status.is_dev_ecc_mbit && + !qm->err_status.is_qm_ecc_mbit && + !qm->err_ini->close_axi_master_ooo) { + nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); + writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, + qm->io_base + QM_RAS_NFE_ENABLE); + writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET); + } +} + static int qm_vf_reset_prepare(struct hisi_qm *qm, enum qm_stop_reason stop_reason) { @@ -3441,6 +3463,8 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) } } + qm_dev_ecc_mbit_handle(qm); + ret = hisi_qm_stop(qm, QM_SOFT_RESET); if (ret) { pci_err(pdev, "Fails to stop QM!\n"); @@ -3450,27 +3474,6 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) return 0; } -static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm) -{ - u32 nfe_enb = 0; - - if (!qm->err_status.is_dev_ecc_mbit && - qm->err_status.is_qm_ecc_mbit && - qm->err_ini->close_axi_master_ooo) { - - qm->err_ini->close_axi_master_ooo(qm); - - } else if (qm->err_status.is_dev_ecc_mbit && - !qm->err_status.is_qm_ecc_mbit && - !qm->err_ini->close_axi_master_ooo) { - - nfe_enb = readl(qm->io_base + QM_RAS_NFE_ENABLE); - writel(nfe_enb & QM_RAS_NFE_MBIT_DISABLE, - qm->io_base + QM_RAS_NFE_ENABLE); - writel(QM_ECC_MBIT, qm->io_base + QM_ABNORMAL_INT_SET); - } -} - static int qm_soft_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -3496,8 +3499,6 @@ static int qm_soft_reset(struct hisi_qm *qm) return ret; } - qm_dev_ecc_mbit_handle(qm); - /* OOO register set and check */ writel(ACC_MASTER_GLOBAL_CTRL_SHUTDOWN, qm->io_base + ACC_MASTER_GLOBAL_CTRL); -- Gitee From dc1c84ad722312b4d9ed765a78bfc8dba960ddd4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Sep 2024 21:17:50 +0200 Subject: [PATCH 02/11] bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error mainline inclusion from mainline-v6.12-rc1 commit 4b3786a6c5397dc220b1483d8e2f4867743e966f category: bugfix issue: #IB4DGU CVE: CVE-2024-47728 Signed-off-by: May27May --------------------------------------- For all non-tracing helpers which formerly had ARG_PTR_TO_{LONG,INT} as input arguments, zero the value for the case of an error as otherwise it could leak memory. For tracing, it is not needed given CAP_PERFMON can already read all kernel memory anyway hence bpf_get_func_arg() and bpf_get_func_ret() is skipped in here. Also, the MTU helpers mtu_len pointer value is being written but also read. Technically, the MEM_UNINIT should not be there in order to always force init. Removing MEM_UNINIT needs more verifier rework though: MEM_UNINIT right now implies two things actually: i) write into memory, ii) memory does not have to be initialized. If we lift MEM_UNINIT, it then becomes: i) read into memory, ii) memory must be initialized. This means that for bpf_*_check_mtu() we're readding the issue we're trying to fix, that is, it would then be able to write back into things like .rodata BPF maps. Follow-up work will rework the MEM_UNINIT semantics such that the intent can be better expressed. For now just clear the *mtu_len on error path which can be lifted later again. Fixes: 8a67f2de9b1d ("bpf: expose bpf_strtol and bpf_strtoul to all program types") Fixes: d7a4cb9b6705 ("bpf: Introduce bpf_strtol and bpf_strtoul helpers") Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/e5edd241-59e7-5e39-0ee5-a51e31b6840a@iogearbox.net Link: https://lore.kernel.org/r/20240913191754.13290-5-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov Signed-off-by: May27May --- kernel/bpf/helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4bb5921a7d21..83fd642f2be7 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -486,6 +486,7 @@ BPF_CALL_4(bpf_strtol, const char *, buf, size_t, buf_len, u64, flags, long long _res; int err; + *res = 0; err = __bpf_strtoll(buf, buf_len, flags, &_res); if (err < 0) return err; @@ -512,6 +513,7 @@ BPF_CALL_4(bpf_strtoul, const char *, buf, size_t, buf_len, u64, flags, bool is_negative; int err; + *res = 0; err = __bpf_strtoull(buf, buf_len, flags, &_res, &is_negative); if (err < 0) return err; -- Gitee From 07405d5f3bf5ec3b960479e9790c5f43c11d24a9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Mar 2024 22:08:54 +0100 Subject: [PATCH 03/11] wifi: mac80211: don't use rate mask for scanning mainline inclusion from mainline-v6.9-rc6 commit ab9177d83c040eba58387914077ebca56f14fae6 category: bugfix issue: #IB4DGU CVE: CVE-2024-47738 Signed-off-by: May27May --------------------------------------- The rate mask is intended for use during operation, and can be set to only have masks for the currently active band. As such, it cannot be used for scanning which can be on other bands as well. Simply ignore the rate masks during scanning to avoid warnings from incorrect settings. Reported-by: syzbot+fdc5123366fb9c3fdc6d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=fdc5123366fb9c3fdc6d Co-developed-by: Dmitry Antipov Signed-off-by: Dmitry Antipov Tested-by: Dmitry Antipov Link: https://msgid.link/20240326220854.9594cbb418ca.I7f86c0ba1f98cf7e27c2bacf6c2d417200ecea5c@changeid Signed-off-by: Johannes Berg Signed-off-by: May27May --- include/net/mac80211.h | 3 +++ net/mac80211/rate.c | 6 +++++- net/mac80211/scan.c | 1 + net/mac80211/tx.c | 13 +++++++++---- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6ff49c13717b..8e5182f15b52 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -856,6 +856,8 @@ enum mac80211_tx_info_flags { * it can be sent out. * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that * has already been assigned to this frame. + * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted + * due to scanning, not in normal operation on the interface. * * These flags are used in tx_info->control.flags. */ @@ -868,6 +870,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6), IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), + IEEE80211_TX_CTRL_SCAN_TX = BIT(10), }; /* diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 63652c39c8e0..1782444c1226 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -873,6 +873,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_supported_band *sband; + u32 mask = ~0; rate_control_fill_sta_table(sta, info, dest, max_rates); @@ -885,9 +886,12 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_data(hdr->frame_control)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); + if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + mask = sdata->rc_rateidx_mask[info->band]; + if (dest[0].idx < 0) __rate_control_send_low(&sdata->local->hw, sband, sta, info, - sdata->rc_rateidx_mask[info->band]); + mask); if (sta) rate_fixup_ratelist(vif, sband, info, dest, max_rates); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index b241ff8c015a..9ed9646d5aa6 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -642,6 +642,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ef8bb5dfd110..6ed89183b258 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -694,11 +694,16 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.bss_conf = &tx->sdata->vif.bss_conf; txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; - if (tx->sdata->rc_has_mcs_mask[info->band]) - txrc.rate_idx_mcs_mask = - tx->sdata->rc_rateidx_mcs_mask[info->band]; + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + txrc.rate_idx_mask = ~0; + } else { + txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; + + if (tx->sdata->rc_has_mcs_mask[info->band]) + txrc.rate_idx_mcs_mask = + tx->sdata->rc_rateidx_mcs_mask[info->band]; + } txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || -- Gitee From a04261aae0f21cdac630d62be2046ef9d49c0481 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 29 Jul 2024 15:48:16 +0800 Subject: [PATCH 04/11] wifi: mac80211: don't use rate mask for offchannel TX either mainline inclusion from mainline-v6.12-rc1 commit e7a7ef9a0742dbd0818d5b15fba2c5313ace765b category: bugfix issue: #IB4DGU CVE: CVE-2024-47738 Signed-off-by: May27May --------------------------------------- Like the commit ab9177d83c04 ("wifi: mac80211: don't use rate mask for scanning"), ignore incorrect settings to avoid no supported rate warning reported by syzbot. The syzbot did bisect and found cause is commit 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band"), which however corrects bitmask of HE MCS and recognizes correctly settings of empty legacy rate plus HE MCS rate instead of returning -EINVAL. As suggestions [1], follow the change of SCAN TX to consider this case of offchannel TX as well. [1] https://lore.kernel.org/linux-wireless/6ab2dc9c3afe753ca6fdcdd1421e7a1f47e87b84.camel@sipsolutions.net/T/#m2ac2a6d2be06a37c9c47a3d8a44b4f647ed4f024 Reported-by: syzbot+8dd98a9e98ee28dc484a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-wireless/000000000000fdef8706191a3f7b@google.com/ Fixes: 9df66d5b9f45 ("cfg80211: fix default HE tx bitrate mask in 2G band") Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240729074816.20323-1-pkshih@realtek.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 7 ++++--- net/mac80211/offchannel.c | 1 + net/mac80211/rate.c | 2 +- net/mac80211/scan.c | 2 +- net/mac80211/tx.c | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8e5182f15b52..0a35826fdeb9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -856,8 +856,9 @@ enum mac80211_tx_info_flags { * it can be sent out. * @IEEE80211_TX_CTRL_NO_SEQNO: Do not overwrite the sequence number that * has already been assigned to this frame. - * @IEEE80211_TX_CTRL_SCAN_TX: Indicates that this frame is transmitted - * due to scanning, not in normal operation on the interface. + * @IEEE80211_TX_CTRL_DONT_USE_RATE_MASK: Don't use rate mask for this frame + * which is transmitted due to scanning or offchannel TX, not in normal + * operation on the interface. * * These flags are used in tx_info->control.flags. */ @@ -870,7 +871,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP = BIT(5), IEEE80211_TX_INTCFL_NEED_TXPROCESSING = BIT(6), IEEE80211_TX_CTRL_NO_SEQNO = BIT(7), - IEEE80211_TX_CTRL_SCAN_TX = BIT(10), + IEEE80211_TX_CTRL_DONT_USE_RATE_MASK = BIT(10), }; /* diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 853c9a369d72..d22b8cd8c7c2 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -902,6 +902,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, } IEEE80211_SKB_CB(skb)->flags = flags; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK; skb->dev = sdata->dev; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 1782444c1226..ad6fefd1fac0 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -886,7 +886,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif, if (ieee80211_is_data(hdr->frame_control)) rate_control_apply_mask(sdata, sta, sband, dest, max_rates); - if (!(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) + if (!(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) mask = sdata->rc_rateidx_mask[info->band]; if (dest[0].idx < 0) diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 9ed9646d5aa6..5197286161a5 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -642,7 +642,7 @@ static void ieee80211_send_scan_probe_req(struct ieee80211_sub_if_data *sdata, cpu_to_le16(IEEE80211_SN_TO_SEQ(sn)); } IEEE80211_SKB_CB(skb)->flags |= tx_flags; - IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_SCAN_TX; + IEEE80211_SKB_CB(skb)->control.flags |= IEEE80211_TX_CTRL_DONT_USE_RATE_MASK; ieee80211_tx_skb_tid_band(sdata, skb, 7, channel->band); } } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6ed89183b258..cab11a92d4c5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -695,7 +695,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.skb = tx->skb; txrc.reported_rate.idx = -1; - if (unlikely(info->control.flags & IEEE80211_TX_CTRL_SCAN_TX)) { + if (unlikely(info->control.flags & IEEE80211_TX_CTRL_DONT_USE_RATE_MASK)) { txrc.rate_idx_mask = ~0; } else { txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[info->band]; -- Gitee From 1f3800c687ab05f12bd97946d6a7f4ac74c59a8a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Sep 2024 08:31:47 +0000 Subject: [PATCH 05/11] ipv6: avoid possible NULL deref in rt6_uncached_list_flush_dev() mainline inclusion from mainline-v6.12-rc1 commit 04ccecfa959d3b9ae7348780d8e379c6486176ac category: bugfix issue: #IB4DGU CVE: CVE-2024-47707 Signed-off-by: May27May --------------------------------------- Blamed commit accidentally removed a check for rt->rt6i_idev being NULL, as spotted by syzbot: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] CPU: 1 UID: 0 PID: 10998 Comm: syz-executor Not tainted 6.11.0-rc6-syzkaller-00208-g625403177711 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: addrconf_ifdown+0x15d/0x1bd0 net/ipv6/addrconf.c:3856 addrconf_notify+0x3cb/0x1020 notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:2032 [inline] call_netdevice_notifiers net/core/dev.c:2046 [inline] unregister_netdevice_many_notify+0xd81/0x1c40 net/core/dev.c:11352 unregister_netdevice_many net/core/dev.c:11414 [inline] unregister_netdevice_queue+0x303/0x370 net/core/dev.c:11289 unregister_netdevice include/linux/netdevice.h:3129 [inline] __tun_detach+0x6b9/0x1600 drivers/net/tun.c:685 tun_detach drivers/net/tun.c:701 [inline] tun_chr_close+0x108/0x1b0 drivers/net/tun.c:3510 __fput+0x24a/0x8a0 fs/file_table.c:422 task_work_run+0x24f/0x310 kernel/task_work.c:228 exit_task_work include/linux/task_work.h:40 [inline] do_exit+0xa2f/0x27f0 kernel/exit.c:882 do_group_exit+0x207/0x2c0 kernel/exit.c:1031 __do_sys_exit_group kernel/exit.c:1042 [inline] __se_sys_exit_group kernel/exit.c:1040 [inline] __x64_sys_exit_group+0x3f/0x40 kernel/exit.c:1040 x64_sys_call+0x2634/0x2640 arch/x86/include/generated/asm/syscalls_64.h:232 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f1acc77def9 Code: Unable to access opcode bytes at 0x7f1acc77decf. RSP: 002b:00007ffeb26fa738 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1acc77def9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000043 RBP: 00007f1acc7dd508 R08: 00007ffeb26f84d7 R09: 0000000000000003 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000000003 R14: 00000000ffffffff R15: 00007ffeb26fa8e0 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:rt6_uncached_list_flush_dev net/ipv6/route.c:177 [inline] RIP: 0010:rt6_disable_ip+0x33e/0x7e0 net/ipv6/route.c:4914 Code: 41 80 3c 04 00 74 0a e8 90 d0 9b f7 48 8b 7c 24 08 48 8b 07 48 89 44 24 10 4c 89 f0 48 c1 e8 03 48 b9 00 00 00 00 00 fc ff df <80> 3c 08 00 74 08 4c 89 f7 e8 64 d0 9b f7 48 8b 44 24 18 49 39 06 RSP: 0018:ffffc900047374e0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 1ffff1100fdf8f33 RCX: dffffc0000000000 RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffff88807efc78c0 RBP: ffffc900047375d0 R08: 0000000000000003 R09: fffff520008e6e8c R10: dffffc0000000000 R11: fffff520008e6e8c R12: 1ffff1100fdf8f18 R13: ffff88807efc7998 R14: 0000000000000000 R15: ffff88807efc7930 FS: 0000000000000000(0000) GS:ffff8880b8900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020002a80 CR3: 0000000022f62000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: e332bc67cf5e ("ipv6: Don't call with rt6_uncached_list_flush_dev") Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Reviewed-by: David Ahern Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240913083147.3095442-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: May27May --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 86be64658d76..93e58276ddfa 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -174,7 +174,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) struct inet6_dev *rt_idev = rt->rt6i_idev; struct net_device *rt_dev = rt->dst.dev; - if (rt_idev->dev == dev) { + if (rt_idev && rt_idev->dev == dev) { rt->rt6i_idev = in6_dev_get(loopback_dev); in6_dev_put(rt_idev); } -- Gitee From 27e1ee55e862537593026f272ccb6802d8a7a43e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 29 Aug 2024 14:46:39 +0000 Subject: [PATCH 06/11] icmp: change the order of rate limits mainline inclusion from mainline-v6.12-rc1 commit 8c2bd38b95f75f3d2a08c93e35303e26d480d24e category: bugfix issue: #IB4DGU CVE: CVE-2024-47678 Signed-off-by: May27May --------------------------------------- ICMP messages are ratelimited : After the blamed commits, the two rate limiters are applied in this order: 1) host wide ratelimit (icmp_global_allow()) 2) Per destination ratelimit (inetpeer based) In order to avoid side-channels attacks, we need to apply the per destination check first. This patch makes the following change : 1) icmp_global_allow() checks if the host wide limit is reached. But credits are not yet consumed. This is deferred to 3) 2) The per destination limit is checked/updated. This might add a new node in inetpeer tree. 3) icmp_global_consume() consumes tokens if prior operations succeeded. This means that host wide ratelimit is still effective in keeping inetpeer tree small even under DDOS. As a bonus, I removed icmp_global.lock as the fast path can use a lock-free operation. Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited") Fixes: 4cdf507d5452 ("icmp: add a global rate limitation") Reported-by: Keyu Man Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Cc: Jesper Dangaard Brouer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: May27May --- include/net/ip.h | 2 + net/ipv4/icmp.c | 103 ++++++++++++++++++++++++++--------------------- net/ipv6/icmp.c | 28 ++++++++----- 3 files changed, 76 insertions(+), 57 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 4b775af57268..902c4a0ddc38 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -751,6 +751,8 @@ static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) } bool icmp_global_allow(void); +void icmp_global_consume(void); + extern int sysctl_icmp_msgs_per_sec; extern int sysctl_icmp_msgs_burst; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a1aacf5e75a6..7ebbc99d7269 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -229,57 +229,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000; int sysctl_icmp_msgs_burst __read_mostly = 50; static struct { - spinlock_t lock; - u32 credit; + atomic_t credit; u32 stamp; -} icmp_global = { - .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), -}; +} icmp_global; /** * icmp_global_allow - Are we allowed to send one more ICMP message ? * * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. - * Note: called with BH disabled + * Works in tandem with icmp_global_consume(). */ bool icmp_global_allow(void) { - u32 credit, delta, incr = 0, now = (u32)jiffies; - bool rc = false; + u32 delta, now, oldstamp; + int incr, new, old; - /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. The READ_ONCE() are paired - * with the following WRITE_ONCE() in this same function. + /* Note: many cpus could find this condition true. + * Then later icmp_global_consume() could consume more credits, + * this is an acceptable race. */ - if (!READ_ONCE(icmp_global.credit)) { - delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); - if (delta < HZ / 50) - return false; - } + if (atomic_read(&icmp_global.credit) > 0) + return true; - spin_lock(&icmp_global.lock); - delta = min_t(u32, now - icmp_global.stamp, HZ); - if (delta >= HZ / 50) { - incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; - if (incr) - WRITE_ONCE(icmp_global.stamp, now); - } - credit = min_t(u32, icmp_global.credit + incr, - READ_ONCE(sysctl_icmp_msgs_burst)); - if (credit) { - /* We want to use a credit of one in average, but need to randomize - * it for security reasons. - */ - credit = max_t(int, credit - prandom_u32_max(3), 0); - rc = true; + now = jiffies; + oldstamp = READ_ONCE(icmp_global.stamp); + delta = min_t(u32, now - oldstamp, HZ); + if (delta < HZ / 50) + return false; + + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; + if (!incr) + return false; + + if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) { + old = atomic_read(&icmp_global.credit); + do { + new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst)); + } while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new)); } - WRITE_ONCE(icmp_global.credit, credit); - spin_unlock(&icmp_global.lock); - return rc; + return true; } EXPORT_SYMBOL(icmp_global_allow); +void icmp_global_consume(void) +{ + int credits = prandom_u32_max(3); + + /* Note: this might make icmp_global.credit negative. */ + if (credits) + atomic_sub(credits, &icmp_global.credit); +} +EXPORT_SYMBOL(icmp_global_consume); + static bool icmpv4_mask_allow(struct net *net, int type, int code) { if (type > NR_ICMP_TYPES) @@ -296,14 +298,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return false; } -static bool icmpv4_global_allow(struct net *net, int type, int code) +static bool icmpv4_global_allow(struct net *net, int type, int code, + bool *apply_ratelimit) { if (icmpv4_mask_allow(net, type, code)) return true; - if (icmp_global_allow()) + if (icmp_global_allow()) { + *apply_ratelimit = true; return true; - + } return false; } @@ -312,15 +316,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) */ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) + struct flowi4 *fl4, int type, int code, + bool apply_ratelimit) { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; int vif; - if (icmpv4_mask_allow(net, type, code)) - goto out; + if (!apply_ratelimit) + return true; /* No rate limit on loopback */ if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) @@ -333,6 +338,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, if (peer) inet_putpeer(peer); out: + if (rc) + icmp_global_consume(); return rc; } @@ -405,6 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); + bool apply_ratelimit = false; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -416,11 +424,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); - /* global icmp_msgs_per_sec */ - if (!icmpv4_global_allow(net, type, code)) + /* is global icmp_msgs_per_sec exhausted ? */ + if (!icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -453,7 +461,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; - if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ip_rt_put(rt); out_unlock: @@ -597,6 +605,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, int room; struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); + bool apply_ratelimit = false; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -678,7 +687,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, } } - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless @@ -686,7 +695,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) */ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && - !icmpv4_global_allow(net, type, code)) + !icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -745,7 +754,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, goto out_unlock; /* peer icmp_ratelimit */ - if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fd1f896115c1..7835aa60acd8 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -182,14 +182,16 @@ static bool icmpv6_mask_allow(struct net *net, int type) return false; } -static bool icmpv6_global_allow(struct net *net, int type) +static bool icmpv6_global_allow(struct net *net, int type, + bool *apply_ratelimit) { if (icmpv6_mask_allow(net, type)) return true; - if (icmp_global_allow()) + if (icmp_global_allow()) { + *apply_ratelimit = true; return true; - + } return false; } @@ -197,13 +199,13 @@ static bool icmpv6_global_allow(struct net *net, int type) * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, - struct flowi6 *fl6) + struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; - if (icmpv6_mask_allow(net, type)) + if (!apply_ratelimit) return true; /* @@ -231,6 +233,8 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (peer) inet_putpeer(peer); } + if (res) + icmp_global_consume(); dst_release(dst); return res; } @@ -454,6 +458,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; + bool apply_ratelimit = false; struct dst_entry *dst; struct icmp6hdr tmp_hdr; struct flowi6 fl6; @@ -535,11 +540,12 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, return; } - /* Needed by both icmp_global_allow and icmpv6_xmit_lock */ + /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ - if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) + if (!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, type, &apply_ratelimit)) goto out_bh_enable; mip6_addr_swap(skb, parm); @@ -577,7 +583,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); - if (!icmpv6_xrlim_allow(sk, type, &fl6)) + if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) goto out; tmp_hdr.icmp6_type = type; @@ -718,6 +724,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); + bool apply_ratelimit = false; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; @@ -775,8 +782,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) goto out; /* Check the ratelimit */ - if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) || - !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6)) + if ((!(skb->dev->flags & IFF_LOOPBACK) && + !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || + !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) goto out_dst_release; idev = __in6_dev_get(skb->dev); -- Gitee From 82a357afb58af33c122fec5b1fe530006d89686d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 6 Sep 2024 14:27:24 +0800 Subject: [PATCH 07/11] f2fs: get rid of online repaire on corrupted directory mainline inclusion from mainline-v6.12-rc1 commit 884ee6dc85b959bc152f15bca80c30f06069e6c4 category: bugfix issue: #IB4DGU CVE: CVE-2024-47690 Signed-off-by: May27May --------------------------------------- syzbot reports a f2fs bug as below: kernel BUG at fs/f2fs/inode.c:896! RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Call Trace: evict+0x532/0x950 fs/inode.c:704 dispose_list fs/inode.c:747 [inline] evict_inodes+0x5f9/0x690 fs/inode.c:797 generic_shutdown_super+0x9d/0x2d0 fs/super.c:627 kill_block_super+0x44/0x90 fs/super.c:1696 kill_f2fs_super+0x344/0x690 fs/f2fs/super.c:4898 deactivate_locked_super+0xc4/0x130 fs/super.c:473 cleanup_mnt+0x41f/0x4b0 fs/namespace.c:1373 task_work_run+0x24f/0x310 kernel/task_work.c:228 ptrace_notify+0x2d2/0x380 kernel/signal.c:2402 ptrace_report_syscall include/linux/ptrace.h:415 [inline] ptrace_report_syscall_exit include/linux/ptrace.h:477 [inline] syscall_exit_work+0xc6/0x190 kernel/entry/common.c:173 syscall_exit_to_user_mode_prepare kernel/entry/common.c:200 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:205 [inline] syscall_exit_to_user_mode+0x279/0x370 kernel/entry/common.c:218 do_syscall_64+0x100/0x230 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0010:f2fs_evict_inode+0x1598/0x15c0 fs/f2fs/inode.c:896 Online repaire on corrupted directory in f2fs_lookup() can generate dirty data/meta while racing w/ readonly remount, it may leave dirty inode after filesystem becomes readonly, however, checkpoint() will skips flushing dirty inode in a state of readonly mode, result in above panic. Let's get rid of online repaire in f2fs_lookup(), and leave the work to fsck.f2fs. Fixes: 510022a85839 ("f2fs: add F2FS_INLINE_DOTS to recover missing dot dentries") Reported-by: syzbot+ebea2790904673d7c618@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a7b20f061ff2d56a@google.com Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: May27May --- fs/f2fs/f2fs.h | 11 -------- fs/f2fs/namei.c | 62 ----------------------------------------- include/linux/f2fs_fs.h | 2 +- 3 files changed, 1 insertion(+), 74 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2243c418cac6..e1417a499fb2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -693,7 +693,6 @@ enum { FI_FIRST_BLOCK_WRITTEN, /* indicate #0 data block was written */ FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ @@ -2669,7 +2668,6 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: f2fs_mark_inode_dirty_sync(inode, true); } @@ -2786,8 +2784,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -2806,8 +2802,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -2872,11 +2866,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 99e4ec48d2a4..4c5a39a5d6be 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -440,56 +440,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct qstr dotdot = QSTR_INIT("..", 2); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - err = dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -499,7 +449,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -535,17 +484,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index a5dbb57a687f..d2e4ace2fe64 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -226,7 +226,7 @@ struct f2fs_extent { #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ -#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */ +#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */ #define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */ #define F2FS_PIN_FILE 0x40 /* file should not be gced */ -- Gitee From 85a4c3ee107ca8187ea0a7ea3e5ee964a8b094be Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 19 Jul 2024 19:00:53 +0800 Subject: [PATCH 08/11] bpf: Prevent tail call between progs attached to different hooks mainline inclusion from mainline-v6.12-rc1 commit 28ead3eaabc16ecc907cfb71876da028080f6356 category: bugfix issue: #IB4DGU CVE: CVE-2024-50063 Signed-off-by: May27May --------------------------------------- bpf progs can be attached to kernel functions, and the attached functions can take different parameters or return different return values. If prog attached to one kernel function tail calls prog attached to another kernel function, the ctx access or return value verification could be bypassed. For example, if prog1 is attached to func1 which takes only 1 parameter and prog2 is attached to func2 which takes two parameters. Since verifier assumes the bpf ctx passed to prog2 is constructed based on func2's prototype, verifier allows prog2 to access the second parameter from the bpf ctx passed to it. The problem is that verifier does not prevent prog1 from passing its bpf ctx to prog2 via tail call. In this case, the bpf ctx passed to prog2 is constructed from func1 instead of func2, that is, the assumption for ctx access verification is bypassed. Another example, if BPF LSM prog1 is attached to hook file_alloc_security, and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier knows the return value rules for these two hooks, e.g. it is legal for bpf_lsm_audit_rule_known to return positive number 1, and it is illegal for file_alloc_security to return positive number. So verifier allows prog2 to return positive number 1, but does not allow prog1 to return positive number. The problem is that verifier does not prevent prog1 from calling prog2 via tail call. In this case, prog2's return value 1 will be used as the return value for prog1's hook file_alloc_security. That is, the return value rule is bypassed. This patch adds restriction for tail call to prevent such bypasses. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: May27May --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a2bc8f19c855..4fd207e9fa79 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -923,6 +923,7 @@ struct bpf_array_aux { * the same prog type and JITed flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 681070bf910b..5bd95cab2f85 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1775,6 +1775,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { bool ret; + struct bpf_prog_aux *aux = fp->aux; if (fp->kprobe_override) return false; @@ -1787,10 +1788,24 @@ bool bpf_prog_array_compatible(struct bpf_array *array, */ array->aux->owner.type = fp->type; array->aux->owner.jited = fp->jited; + array->aux->owner.attach_func_proto = aux->attach_func_proto; ret = true; } else { ret = array->aux->owner.type == fp->type && array->aux->owner.jited == fp->jited; + if (ret && + array->aux->owner.attach_func_proto != aux->attach_func_proto) { + switch (fp->type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_STRUCT_OPS: + ret = false; + break; + default: + break; + } + } } spin_unlock(&array->aux->owner.lock); return ret; -- Gitee From 4259f22b77eed60bd462f39f0bad5efd3ac2fc2d Mon Sep 17 00:00:00 2001 From: Xu Kuohai Date: Fri, 19 Jul 2024 19:00:58 +0800 Subject: [PATCH 09/11] selftests/bpf: Add test for lsm tail call mainline inclusion from mainline-v6.12-rc1 commit d463dd9c9aa24b17ccb8ed76bdd7768baf857b48 category: bugfix issue: #IB4DGU CVE: CVE-2024-50063 Signed-off-by: May27May --------------------------------------- Add test for lsm tail call to ensure tail call can only be used between bpf lsm progs attached to the same hook. Signed-off-by: Xu Kuohai Link: https://lore.kernel.org/r/20240719110059.797546-9-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Andrii Nakryiko Signed-off-by: May27May --- .../selftests/bpf/prog_tests/test_lsm.c | 46 ++++++++++++++++++- .../selftests/bpf/progs/lsm_tailcall.c | 34 ++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/lsm_tailcall.c diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 6ab29226c99b..18c2e3046da3 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -13,6 +13,7 @@ #include #include "lsm.skel.h" +#include "lsm_tailcall.skel.h" char *CMD_ARGS[] = {"true", NULL}; @@ -52,7 +53,7 @@ int exec_cmd(int *monitored_pid) return -EINVAL; } -void test_test_lsm(void) +static void test_lsm_basic(void) { struct lsm *skel = NULL; int err, duration = 0; @@ -93,3 +94,46 @@ void test_test_lsm(void) close_prog: lsm__destroy(skel); } + +static void test_lsm_tailcall(void) +{ + struct lsm_tailcall *skel = NULL; + int map_fd, prog_fd; + int err, key; + + skel = lsm_tailcall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "lsm_tailcall__skel_load")) + goto close_prog; + + map_fd = bpf_map__fd(skel->maps.jmp_table); + if (CHECK_FAIL(map_fd < 0)) + goto close_prog; + + prog_fd = bpf_program__fd(skel->progs.lsm_file_permission_prog); + if (CHECK_FAIL(prog_fd < 0)) + goto close_prog; + + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (CHECK_FAIL(!err)) + goto close_prog; + + prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog); + if (CHECK_FAIL(prog_fd < 0)) + goto close_prog; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto close_prog; + +close_prog: + lsm_tailcall__destroy(skel); +} + +void test_test_lsm(void) +{ + if (test__start_subtest("lsm_basic")) + test_lsm_basic(); + if (test__start_subtest("lsm_tailcall")) + test_lsm_tailcall(); +} diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c new file mode 100644 index 000000000000..49c075ce2d4c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Huawei Technologies Co., Ltd */ + +#include "vmlinux.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("lsm/file_permission") +int lsm_file_permission_prog(void *ctx) +{ + return 0; +} + +SEC("lsm/file_alloc_security") +int lsm_file_alloc_security_prog(void *ctx) +{ + return 0; +} + +SEC("lsm/file_alloc_security") +int lsm_file_alloc_security_entry(void *ctx) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} -- Gitee From e4b6d703321cb75fb0511f6ae0508f72c5d21107 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Sep 2024 11:20:47 +0800 Subject: [PATCH 10/11] f2fs: fix to check atomic_file in f2fs ioctl interfaces stable inclusion from stable-v6.1.113 commit 26b07bd2e1f124b0e430c8d250023f7205c549c3 category: bugfix issue: #IB4DGU CVE: CVE-2024-49859 Signed-off-by: May27May --------------------------------------- commit bfe5c02654261bfb8bd9cb174a67f3279ea99e58 upstream. Some f2fs ioctl interfaces like f2fs_ioc_set_pin_file(), f2fs_move_file_range(), and f2fs_defragment_range() missed to check atomic_write status, which may cause potential race issue, fix it. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman Signed-off-by: May27May --- fs/f2fs/file.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b9109b8ead22..689fa6751845 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2614,6 +2614,11 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + err = -EINVAL; + goto out; + } + /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); @@ -2821,6 +2826,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -3267,6 +3277,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) goto out; } + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); -- Gitee From e3f14fbaacabc3fe31a5c8a6bd973408b5d0fdf2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Jun 2024 15:17:11 +0800 Subject: [PATCH 11/11] f2fs: fix to wait dio completion mainline inclusion from mainline-v6.12-rc1 commit 96cfeb0389530ae32ade8a48ae3ae1ac3b6c009d category: bugfix issue: #IB4DGU CVE: CVE-2024-47726 Signed-off-by: May27May --------------------------------------- It should wait all existing dio write IOs before block removal, otherwise, previous direct write IO may overwrite data in the block which may be reused by other inode. Cc: stable@vger.kernel.org Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: May27May --- fs/f2fs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 689fa6751845..f6fe52c393d4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -938,6 +938,13 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; } + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); @@ -1748,6 +1755,12 @@ static long f2fs_fallocate(struct file *file, int mode, if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; -- Gitee