From 7a153bc983c3fc99ef768ed1fe55d516aaa60364 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Fri, 14 Apr 2023 14:08:35 +0800 Subject: [PATCH 001/101] virtio_net: bugfix overflow inside xdp_linearize_page() stable inclusion from stable-5.10.179 commit b6dd232f6350778a6ba440ea52bdfc4571b62a06 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 853618d5886bf94812f31228091cd37d308230f7 ] Here we copy the data from the original buf to the new page. But we not check that it may be overflow. As long as the size received(including vnethdr) is greater than 3840 (PAGE_SIZE -VIRTIO_XDP_HEADROOM). Then the memcpy will overflow. And this is completely possible, as long as the MTU is large, such as 4096. In our test environment, this will cause crash. Since crash is caused by the written memory, it is meaningless, so I do not include it. Fixes: 72979a6c3590 ("virtio_net: xdp, add slowpath case for non contiguous buffers") Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/virtio_net.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d53321116136..47c9118cc92a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -646,8 +646,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, int page_off, unsigned int *len) { - struct page *page = alloc_page(GFP_ATOMIC); + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct page *page; + if (page_off + *len + tailroom > PAGE_SIZE) + return NULL; + + page = alloc_page(GFP_ATOMIC); if (!page) return NULL; @@ -655,7 +660,6 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, page_off += *len; while (--*num_buf) { - int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); unsigned int buflen; void *buf; int off; -- Gitee From d56d5da1100f181470341d751a0ffc11aab44000 Mon Sep 17 00:00:00 2001 From: Jonathan Cooper Date: Tue, 28 Jun 2022 14:58:55 +0100 Subject: [PATCH 002/101] sfc: Split STATE_READY in to STATE_NET_DOWN and STATE_NET_UP. stable inclusion from stable-5.10.179 commit 06a72bbf0d9834fde7e12b2039fbc929fc8eb220 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 813cf9d1e753e1e0a247d3d685212a06141b483e ] This patch splits the READY state in to NET_UP and NET_DOWN. This is to prepare for future work to delay resource allocation until interface up so that we can use resources more efficiently in SRIOV environments, and also to lay the ground work for an extra PROBED state where we don't create a network interface, for VDPA operation. Signed-off-by: Jonathan Cooper Acked-by: Martin Habets Signed-off-by: David S. Miller Stable-dep-of: a80bb8e7233b ("sfc: Fix use-after-free due to selftest_work") Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/sfc/ef100_netdev.c | 6 ++- drivers/net/ethernet/sfc/efx.c | 29 ++++++------- drivers/net/ethernet/sfc/efx_common.c | 10 ++--- drivers/net/ethernet/sfc/efx_common.h | 6 +-- drivers/net/ethernet/sfc/ethtool_common.c | 2 +- drivers/net/ethernet/sfc/net_driver.h | 50 +++++++++++++++++++++-- 6 files changed, 72 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 63a44ee763be..b9429e8faba1 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -96,6 +96,8 @@ static int ef100_net_stop(struct net_device *net_dev) efx_mcdi_free_vis(efx); efx_remove_interrupts(efx); + efx->state = STATE_NET_DOWN; + return 0; } @@ -172,6 +174,8 @@ static int ef100_net_open(struct net_device *net_dev) efx_link_status_changed(efx); mutex_unlock(&efx->mac_lock); + efx->state = STATE_NET_UP; + return 0; fail: @@ -272,7 +276,7 @@ int ef100_register_netdev(struct efx_nic *efx) /* Always start with carrier off; PHY events will detect the link */ netif_carrier_off(net_dev); - efx->state = STATE_READY; + efx->state = STATE_NET_DOWN; rtnl_unlock(); efx_init_mcdi_logging(efx); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index c069659c9e2d..5f064f185d55 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -105,14 +105,6 @@ static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, u32 flags); -#define EFX_ASSERT_RESET_SERIALISED(efx) \ - do { \ - if ((efx->state == STATE_READY) || \ - (efx->state == STATE_RECOVERY) || \ - (efx->state == STATE_DISABLED)) \ - ASSERT_RTNL(); \ - } while (0) - /************************************************************************** * * Port handling @@ -377,6 +369,8 @@ static int efx_probe_all(struct efx_nic *efx) if (rc) goto fail5; + efx->state = STATE_NET_DOWN; + return 0; fail5: @@ -543,6 +537,9 @@ int efx_net_open(struct net_device *net_dev) efx_start_all(efx); if (efx->state == STATE_DISABLED || efx->reset_pending) netif_device_detach(efx->net_dev); + else + efx->state = STATE_NET_UP; + efx_selftest_async_start(efx); return 0; } @@ -721,8 +718,6 @@ static int efx_register_netdev(struct efx_nic *efx) * already requested. If so, the NIC is probably hosed so we * abort. */ - efx->state = STATE_READY; - smp_mb(); /* ensure we change state before checking reset_pending */ if (efx->reset_pending) { netif_err(efx, probe, efx->net_dev, "aborting probe due to scheduled reset\n"); @@ -750,6 +745,8 @@ static int efx_register_netdev(struct efx_nic *efx) efx_associate(efx); + efx->state = STATE_NET_DOWN; + rtnl_unlock(); rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); @@ -851,7 +848,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) /* Flush reset_work. It can no longer be scheduled since we * are not READY. */ - BUG_ON(efx->state == STATE_READY); + WARN_ON(efx_net_active(efx->state)); efx_flush_reset_workqueue(efx); efx_disable_interrupts(efx); @@ -1196,13 +1193,13 @@ static int efx_pm_freeze(struct device *dev) rtnl_lock(); - if (efx->state != STATE_DISABLED) { - efx->state = STATE_UNINIT; - + if (efx_net_active(efx->state)) { efx_device_detach_sync(efx); efx_stop_all(efx); efx_disable_interrupts(efx); + + efx->state = efx_freeze(efx->state); } rtnl_unlock(); @@ -1217,7 +1214,7 @@ static int efx_pm_thaw(struct device *dev) rtnl_lock(); - if (efx->state != STATE_DISABLED) { + if (efx_frozen(efx->state)) { rc = efx_enable_interrupts(efx); if (rc) goto fail; @@ -1230,7 +1227,7 @@ static int efx_pm_thaw(struct device *dev) efx_device_attach_if_not_resetting(efx); - efx->state = STATE_READY; + efx->state = efx_thaw(efx->state); efx->type->resume_wol(efx); } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index de797e1ac5a9..1527678b241c 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -897,7 +897,7 @@ static void efx_reset_work(struct work_struct *data) * have changed by now. Now that we have the RTNL lock, * it cannot change again. */ - if (efx->state == STATE_READY) + if (efx_net_active(efx->state)) (void)efx_reset(efx, method); rtnl_unlock(); @@ -907,7 +907,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) { enum reset_type method; - if (efx->state == STATE_RECOVERY) { + if (efx_recovering(efx->state)) { netif_dbg(efx, drv, efx->net_dev, "recovering: skip scheduling %s reset\n", RESET_TYPE(type)); @@ -942,7 +942,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) /* If we're not READY then just leave the flags set as the cue * to abort probing or reschedule the reset later. */ - if (READ_ONCE(efx->state) != STATE_READY) + if (!efx_net_active(READ_ONCE(efx->state))) return; /* efx_process_channel() will no longer read events once a @@ -1214,7 +1214,7 @@ static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, rtnl_lock(); if (efx->state != STATE_DISABLED) { - efx->state = STATE_RECOVERY; + efx->state = efx_recover(efx->state); efx->reset_pending = 0; efx_device_detach_sync(efx); @@ -1268,7 +1268,7 @@ static void efx_io_resume(struct pci_dev *pdev) netif_err(efx, hw, efx->net_dev, "efx_reset failed after PCI error (%d)\n", rc); } else { - efx->state = STATE_READY; + efx->state = efx_recovered(efx->state); netif_dbg(efx, hw, efx->net_dev, "Done resetting and resuming IO after PCI error.\n"); } diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h index 65513fd0cf6c..c72e819da8fd 100644 --- a/drivers/net/ethernet/sfc/efx_common.h +++ b/drivers/net/ethernet/sfc/efx_common.h @@ -45,9 +45,7 @@ int efx_reconfigure_port(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ - if ((efx->state == STATE_READY) || \ - (efx->state == STATE_RECOVERY) || \ - (efx->state == STATE_DISABLED)) \ + if (efx->state != STATE_UNINIT) \ ASSERT_RTNL(); \ } while (0) @@ -64,7 +62,7 @@ void efx_port_dummy_op_void(struct efx_nic *efx); static inline int efx_check_disabled(struct efx_nic *efx) { - if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { + if (efx->state == STATE_DISABLED || efx_recovering(efx->state)) { netif_err(efx, drv, efx->net_dev, "device is disabled due to earlier errors\n"); return -EIO; diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c index bd552c7dffcb..3846b76b8972 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.c +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -137,7 +137,7 @@ void efx_ethtool_self_test(struct net_device *net_dev, if (!efx_tests) goto fail; - if (efx->state != STATE_READY) { + if (!efx_net_active(efx->state)) { rc = -EBUSY; goto out; } diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 8aecb4bd2c0d..39f97929b3ff 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -627,12 +627,54 @@ enum efx_int_mode { #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) enum nic_state { - STATE_UNINIT = 0, /* device being probed/removed or is frozen */ - STATE_READY = 1, /* hardware ready and netdev registered */ - STATE_DISABLED = 2, /* device disabled due to hardware errors */ - STATE_RECOVERY = 3, /* device recovering from PCI error */ + STATE_UNINIT = 0, /* device being probed/removed */ + STATE_NET_DOWN, /* hardware probed and netdev registered */ + STATE_NET_UP, /* ready for traffic */ + STATE_DISABLED, /* device disabled due to hardware errors */ + + STATE_RECOVERY = 0x100,/* recovering from PCI error */ + STATE_FROZEN = 0x200, /* frozen by power management */ }; +static inline bool efx_net_active(enum nic_state state) +{ + return state == STATE_NET_DOWN || state == STATE_NET_UP; +} + +static inline bool efx_frozen(enum nic_state state) +{ + return state & STATE_FROZEN; +} + +static inline bool efx_recovering(enum nic_state state) +{ + return state & STATE_RECOVERY; +} + +static inline enum nic_state efx_freeze(enum nic_state state) +{ + WARN_ON(!efx_net_active(state)); + return state | STATE_FROZEN; +} + +static inline enum nic_state efx_thaw(enum nic_state state) +{ + WARN_ON(!efx_frozen(state)); + return state & ~STATE_FROZEN; +} + +static inline enum nic_state efx_recover(enum nic_state state) +{ + WARN_ON(!efx_net_active(state)); + return state | STATE_RECOVERY; +} + +static inline enum nic_state efx_recovered(enum nic_state state) +{ + WARN_ON(!efx_recovering(state)); + return state & ~STATE_RECOVERY; +} + /* Forward declaration */ struct efx_nic; -- Gitee From 415d725424376746236c85b74e8834f01b37c4ce Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Fri, 14 Apr 2023 23:23:06 +0800 Subject: [PATCH 003/101] sfc: Fix use-after-free due to selftest_work stable inclusion from stable-5.10.179 commit 6758a51ef36ac78962db2f421a8e4d85fd23853a category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit a80bb8e7233b2ad6ff119646b6e33fb3edcec37b ] There is a use-after-free scenario that is: When the NIC is down, user set mac address or vlan tag to VF, the xxx_set_vf_mac() or xxx_set_vf_vlan() will invoke efx_net_stop() and efx_net_open(), since netif_running() is false, the port will not start and keep port_enabled false, but selftest_work is scheduled in efx_net_open(). If we remove the device before selftest_work run, the efx_stop_port() will not be called since the NIC is down, and then efx is freed, we will soon get a UAF in run_timer_softirq() like this: [ 1178.907941] ================================================================== [ 1178.907948] BUG: KASAN: use-after-free in run_timer_softirq+0xdea/0xe90 [ 1178.907950] Write of size 8 at addr ff11001f449cdc80 by task swapper/47/0 [ 1178.907950] [ 1178.907953] CPU: 47 PID: 0 Comm: swapper/47 Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [ 1178.907954] Hardware name: SANGFOR X620G40/WI2HG-208T1061A, BIOS SPYH051032-U01 04/01/2022 [ 1178.907955] Call Trace: [ 1178.907956] [ 1178.907960] dump_stack+0x71/0xab [ 1178.907963] print_address_description+0x6b/0x290 [ 1178.907965] ? run_timer_softirq+0xdea/0xe90 [ 1178.907967] kasan_report+0x14a/0x2b0 [ 1178.907968] run_timer_softirq+0xdea/0xe90 [ 1178.907971] ? init_timer_key+0x170/0x170 [ 1178.907973] ? hrtimer_cancel+0x20/0x20 [ 1178.907976] ? sched_clock+0x5/0x10 [ 1178.907978] ? sched_clock_cpu+0x18/0x170 [ 1178.907981] __do_softirq+0x1c8/0x5fa [ 1178.907985] irq_exit+0x213/0x240 [ 1178.907987] smp_apic_timer_interrupt+0xd0/0x330 [ 1178.907989] apic_timer_interrupt+0xf/0x20 [ 1178.907990] [ 1178.907991] RIP: 0010:mwait_idle+0xae/0x370 If the NIC is not actually brought up, there is no need to schedule selftest_work, so let's move invoking efx_selftest_async_start() into efx_start_all(), and it will be canceled by broughting down. Fixes: dd40781e3a4e ("sfc: Run event/IRQ self-test asynchronously when interface is brought up") Fixes: e340be923012 ("sfc: add ndo_set_vf_mac() function for EF10") Debugged-by: Huang Cun Cc: Donglin Peng Suggested-by: Martin Habets Signed-off-by: Ding Hui Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/sfc/efx.c | 1 - drivers/net/ethernet/sfc/efx_common.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 5f064f185d55..7cf52fcdb307 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -540,7 +540,6 @@ int efx_net_open(struct net_device *net_dev) else efx->state = STATE_NET_UP; - efx_selftest_async_start(efx); return 0; } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 1527678b241c..476ef1c97637 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -542,6 +542,8 @@ void efx_start_all(struct efx_nic *efx) /* Start the hardware monitor if there is one */ efx_start_monitor(efx); + efx_selftest_async_start(efx); + /* Link state detection is normally event-driven; we have * to poll now because we could have missed a change */ -- Gitee From 68dcdbf9e6ed3d71cb22acb75cd5b63f7d0c89e0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Apr 2023 10:21:36 +0200 Subject: [PATCH 004/101] netfilter: nf_tables: fix ifdef to also consider nf_tables=m stable inclusion from stable-5.10.179 commit 381110d061926f738d3220feb5580f1f1409cbd7 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit c55c0e91c813589dc55bea6bf9a9fbfaa10ae41d ] nftables can be built as a module, so fix the preprocessor conditional accordingly. Fixes: 478b360a47b7 ("netfilter: nf_tables: fix nf_trace always-on with XT_TRACE=n") Reported-by: Florian Fainelli Reported-by: Jakub Kicinski Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 287999eedef4..a210f1995862 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4277,7 +4277,7 @@ static inline void nf_reset_ct(struct sk_buff *skb) static inline void nf_reset_trace(struct sk_buff *skb) { -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } @@ -4297,7 +4297,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, dst->_nfct = src->_nfct; nf_conntrack_get(skb_nfct(src)); #endif -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) if (copy) dst->nf_trace = src->nf_trace; #endif -- Gitee From f01bbc4c292111530a2179e3ee386bd9afdb4c99 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Fri, 24 Mar 2023 18:16:38 +0100 Subject: [PATCH 005/101] i40e: fix accessing vsi->active_filters without holding lock stable inclusion from stable-5.10.179 commit 72df55d3dc96aa5784254e2d86c0802ebe09cf5d category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 8485d093b076e59baff424552e8aecfc5bd2d261 ] Fix accessing vsi->active_filters without holding the mac_filter_hash_lock. Move vsi->active_filters = 0 inside critical section and move clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) after the critical section to ensure the new filters from other threads can be added only after filters cleaning in the critical section is finished. Fixes: 278e7d0b9d68 ("i40e: store MAC/VLAN filters in a hash with the MAC Address as key") Signed-off-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 76481ff7074b..3a93d538b2d7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -13458,15 +13458,15 @@ static int i40e_add_vsi(struct i40e_vsi *vsi) vsi->id = ctxt.vsi_number; } - vsi->active_filters = 0; - clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); spin_lock_bh(&vsi->mac_filter_hash_lock); + vsi->active_filters = 0; /* If macvlan filters already exist, force them to get loaded */ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { f->state = I40E_FILTER_NEW; f_count++; } spin_unlock_bh(&vsi->mac_filter_hash_lock); + clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); if (f_count) { vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED; -- Gitee From 39da5db1930b87669d3b1dfea98a858e32a0e1e3 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Mon, 3 Apr 2023 07:13:18 +0200 Subject: [PATCH 006/101] i40e: fix i40e_setup_misc_vector() error handling stable inclusion from stable-5.10.179 commit 880c09bc2f0b6d0f3f3d995508a1c06177eb7d73 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit c86c00c6935505929cc9adb29ddb85e48c71f828 ] Add error handling of i40e_setup_misc_vector() in i40e_rebuild(). In case interrupt vectors setup fails do not re-open vsi-s and do not bring up vf-s, we have no interrupts to serve a traffic anyway. Fixes: 41c445ff0f48 ("i40e: main driver core") Signed-off-by: Aleksandr Loktionov Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/intel/i40e/i40e_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3a93d538b2d7..d23a467d0d20 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10448,8 +10448,11 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) pf->hw.aq.asq_last_status)); } /* reinit the misc interrupt */ - if (pf->flags & I40E_FLAG_MSIX_ENABLED) + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { ret = i40e_setup_misc_vector(pf); + if (ret) + goto end_unlock; + } /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out -- Gitee From 57148233f9fd4dddb027b543e19e1b94ad93270c Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 17 Apr 2023 05:07:18 -0700 Subject: [PATCH 007/101] mlxfw: fix null-ptr-deref in mlxfw_mfa2_tlv_next() stable inclusion from stable-5.10.179 commit b6b06c5ee333ff9ccaf4c54005263daba5673b2f category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit c0e73276f0fcbbd3d4736ba975d7dc7a48791b0c ] Function mlxfw_mfa2_tlv_multi_get() returns NULL if 'tlv' in question does not pass checks in mlxfw_mfa2_tlv_payload_get(). This behaviour may lead to NULL pointer dereference in 'multi->total_len'. Fix this issue by testing mlxfw_mfa2_tlv_multi_get()'s return value against NULL. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process") Co-developed-by: Natalia Petrova Signed-off-by: Nikita Zhandarovich Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20230417120718.52325-1-n.zhandarovich@fintech.ru Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c index 017d68f1e123..972c571b4158 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_tlv_multi.c @@ -31,6 +31,8 @@ mlxfw_mfa2_tlv_next(const struct mlxfw_mfa2_file *mfa2_file, if (tlv->type == MLXFW_MFA2_TLV_MULTI_PART) { multi = mlxfw_mfa2_tlv_multi_get(mfa2_file, tlv); + if (!multi) + return NULL; tlv_len = NLA_ALIGN(tlv_len + be16_to_cpu(multi->total_len)); } -- Gitee From ca94a6314792530e3e0226963fe1b4cc2729202c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 Apr 2023 18:52:51 +0200 Subject: [PATCH 008/101] mlxsw: pci: Fix possible crash during initialization stable inclusion from stable-5.10.179 commit b085b5787b4a83755e7bf485d44c00217f04ba44 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 1f64757ee2bb22a93ec89b4c71707297e8cca0ba ] During initialization the driver issues a reset command via its command interface in order to remove previous configuration from the device. After issuing the reset, the driver waits for 200ms before polling on the "system_status" register using memory-mapped IO until the device reaches a ready state (0x5E). The wait is necessary because the reset command only triggers the reset, but the reset itself happens asynchronously. If the driver starts polling too soon, the read of the "system_status" register will never return and the system will crash [1]. The issue was discovered when the device was flashed with a development firmware version where the reset routine took longer to complete. The issue was fixed in the firmware, but it exposed the fact that the current wait time is borderline. Fix by increasing the wait time from 200ms to 400ms. With this patch and the buggy firmware version, the issue did not reproduce in 10 reboots whereas without the patch the issue is reproduced quite consistently. [1] mce: CPUs not responding to MCE broadcast (may include false positives): 0,4 mce: CPUs not responding to MCE broadcast (may include false positives): 0,4 Kernel panic - not syncing: Timeout: Not all CPUs entered broadcast exception handler Shutting down cpus with NMI Kernel Offset: 0x12000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Fixes: ac004e84164e ("mlxsw: pci: Wait longer before accessing the device after reset") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/mellanox/mlxsw/pci_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index a2c1fbd3e0d1..0225c8f1e5ea 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -26,7 +26,7 @@ #define MLXSW_PCI_CIR_TIMEOUT_MSECS 1000 #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS 900000 -#define MLXSW_PCI_SW_RESET_WAIT_MSECS 200 +#define MLXSW_PCI_SW_RESET_WAIT_MSECS 400 #define MLXSW_PCI_FW_READY 0xA1844 #define MLXSW_PCI_FW_READY_MASK 0xFFFF #define MLXSW_PCI_FW_READY_MAGIC 0x5E -- Gitee From 1975e0f6d4935f402804f3b3e4f2e5da95e2798b Mon Sep 17 00:00:00 2001 From: Sebastian Basierski Date: Mon, 17 Apr 2023 13:53:45 -0700 Subject: [PATCH 009/101] e1000e: Disable TSO on i219-LM card to increase speed stable inclusion from stable-5.10.179 commit 9a8dbfd7f3e5c3066b2a4fb0eab13b852b087e9b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 67d47b95119ad589b0a0b16b88b1dd9a04061ced ] While using i219-LM card currently it was only possible to achieve about 60% of maximum speed due to regression introduced in Linux 5.8. This was caused by TSO not being disabled by default despite commit f29801030ac6 ("e1000e: Disable TSO for buffer overrun workaround"). Fix that by disabling TSO during driver probe. Fixes: f29801030ac6 ("e1000e: Disable TSO for buffer overrun workaround") Signed-off-by: Sebastian Basierski Signed-off-by: Mateusz Palczewski Tested-by: Naama Meir Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230417205345.1030801-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/ethernet/intel/e1000e/netdev.c | 51 +++++++++++----------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ae0c9aaab48d..b700663a634d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5294,31 +5294,6 @@ static void e1000_watchdog_task(struct work_struct *work) ew32(TARC(0), tarc0); } - /* disable TSO for pcie and 10/100 speeds, to avoid - * some hardware issues - */ - if (!(adapter->flags & FLAG_TSO_FORCE)) { - switch (adapter->link_speed) { - case SPEED_10: - case SPEED_100: - e_info("10/100 speed: disabling TSO\n"); - netdev->features &= ~NETIF_F_TSO; - netdev->features &= ~NETIF_F_TSO6; - break; - case SPEED_1000: - netdev->features |= NETIF_F_TSO; - netdev->features |= NETIF_F_TSO6; - break; - default: - /* oops */ - break; - } - if (hw->mac.type == e1000_pch_spt) { - netdev->features &= ~NETIF_F_TSO; - netdev->features &= ~NETIF_F_TSO6; - } - } - /* enable transmits in the hardware, need to do this * after setting TARC(0) */ @@ -7477,6 +7452,32 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_RXCSUM | NETIF_F_HW_CSUM); + /* disable TSO for pcie and 10/100 speeds to avoid + * some hardware issues and for i219 to fix transfer + * speed being capped at 60% + */ + if (!(adapter->flags & FLAG_TSO_FORCE)) { + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + e_info("10/100 speed: disabling TSO\n"); + netdev->features &= ~NETIF_F_TSO; + netdev->features &= ~NETIF_F_TSO6; + break; + case SPEED_1000: + netdev->features |= NETIF_F_TSO; + netdev->features |= NETIF_F_TSO6; + break; + default: + /* oops */ + break; + } + if (hw->mac.type == e1000_pch_spt) { + netdev->features &= ~NETIF_F_TSO; + netdev->features &= ~NETIF_F_TSO6; + } + } + /* Set user-changeable features (subset of all device features) */ netdev->hw_features = netdev->features; netdev->hw_features |= NETIF_F_RXFCS; -- Gitee From ca176bd24067934a12f9addfdc9844efef273264 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 6 Mar 2023 12:25:49 +0000 Subject: [PATCH 010/101] f2fs: Fix f2fs_truncate_partial_nodes ftrace event stable inclusion from stable-5.10.179 commit 37882b203a2d206cfab4fb9e1ade4374d0db0540 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 0b04d4c0542e8573a837b1d81b94209e48723b25 ] Fix the nid_t field so that its size is correctly reported in the text format embedded in trace.dat files. As it stands, it is reported as being of size 4: field:nid_t nid[3]; offset:24; size:4; signed:0; Instead of 12: field:nid_t nid[3]; offset:24; size:12; signed:0; This also fixes the reported offset of subsequent fields so that they match with the actual struct layout. Signed-off-by: Douglas Raillard Reviewed-by: Mukesh Ojha Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- include/trace/events/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 2f863aba6acd..0b042847b9f8 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -513,7 +513,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(nid_t, nid[3]) + __array(nid_t, nid, 3) __field(int, depth) __field(int, err) ), -- Gitee From b80f758ef2f8a1aa4ee6b9971a65f80bc8f931ae Mon Sep 17 00:00:00 2001 From: Jonathan Denose Date: Fri, 17 Mar 2023 03:19:51 -0700 Subject: [PATCH 011/101] Input: i8042 - add quirk for Fujitsu Lifebook A574/H stable inclusion from stable-5.10.179 commit 8b44a83a16806f4cc4330562648e10c5ae0eb706 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit f5bad62f9107b701a6def7cac1f5f65862219b83 ] Fujitsu Lifebook A574/H requires the nomux option to properly probe the touchpad, especially when waking from sleep. Signed-off-by: Jonathan Denose Reviewed-by: Hans de Goede Link: https://lore.kernel.org/r/20230303152623.45859-1-jdenose@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/input/serio/i8042-x86ia64io.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 65c0081838e3..9dcdf21c50bd 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -601,6 +601,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook A574/H */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "FMVA0501PZ"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Gigabyte M912 */ .matches = { -- Gitee From 9f7938c2b10661261e640bc9c8fbd683cb8f879f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Wed, 8 Mar 2023 11:59:33 -0800 Subject: [PATCH 012/101] selftests: sigaltstack: fix -Wuninitialized stable inclusion from stable-5.10.179 commit 01bd481b1b9430bb16cbb05b25b7c87eb2514111 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 05107edc910135d27fe557267dc45be9630bf3dd ] Building sigaltstack with clang via: $ ARCH=x86 make LLVM=1 -C tools/testing/selftests/sigaltstack/ produces the following warning: warning: variable 'sp' is uninitialized when used here [-Wuninitialized] if (sp < (unsigned long)sstack || ^~ Clang expects these to be declared at global scope; we've fixed this in the kernel proper by using the macro `current_stack_pointer`. This is defined in different headers for different target architectures, so just create a new header that defines the arch-specific register names for the stack pointer register, and define it for more targets (at least the ones that support current_stack_pointer/ARCH_HAS_CURRENT_STACK_POINTER). Reported-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/lkml/CA+G9fYsi3OOu7yCsMutpzKDnBMAzJBCPimBp86LhGBa0eCnEpA@mail.gmail.com/ Signed-off-by: Nick Desaulniers Reviewed-by: Kees Cook Tested-by: Linux Kernel Functional Testing Tested-by: Anders Roxell Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing create mode 100644 tools/testing/selftests/sigaltstack/current_stack_pointer.h Signed-off-by: wanxiaoqing --- .../sigaltstack/current_stack_pointer.h | 23 +++++++++++++++++++ tools/testing/selftests/sigaltstack/sas.c | 7 +----- 2 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 tools/testing/selftests/sigaltstack/current_stack_pointer.h diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h new file mode 100644 index 000000000000..ea9bdf3a90b1 --- /dev/null +++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#if __alpha__ +register unsigned long sp asm("$30"); +#elif __arm__ || __aarch64__ || __csky__ || __m68k__ || __mips__ || __riscv +register unsigned long sp asm("sp"); +#elif __i386__ +register unsigned long sp asm("esp"); +#elif __loongarch64 +register unsigned long sp asm("$sp"); +#elif __ppc__ +register unsigned long sp asm("r1"); +#elif __s390x__ +register unsigned long sp asm("%15"); +#elif __sh__ +register unsigned long sp asm("r15"); +#elif __x86_64__ +register unsigned long sp asm("rsp"); +#elif __XTENSA__ +register unsigned long sp asm("a1"); +#else +#error "implement current_stack_pointer equivalent" +#endif diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index 8934a3766d20..41646c22384a 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -19,6 +19,7 @@ #include #include "../kselftest.h" +#include "current_stack_pointer.h" #ifndef SS_AUTODISARM #define SS_AUTODISARM (1U << 31) @@ -40,12 +41,6 @@ void my_usr1(int sig, siginfo_t *si, void *u) stack_t stk; struct stk_data *p; -#if __s390x__ - register unsigned long sp asm("%15"); -#else - register unsigned long sp asm("sp"); -#endif - if (sp < (unsigned long)sstack || sp >= (unsigned long)sstack + SIGSTKSZ) { ksft_exit_fail_msg("SP is not on sigaltstack\n"); -- Gitee From f0b564be3f25ef96f2f161b4637f005fe69b74e5 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 24 Mar 2023 14:52:49 +0100 Subject: [PATCH 013/101] scsi: megaraid_sas: Fix fw_crash_buffer_show() stable inclusion from stable-5.10.179 commit 24ddcc6a635b01a8fa2f320461011e67c0ea5abc category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 0808ed6ebbc292222ca069d339744870f6d801da ] If crash_dump_buf is not allocated then crash dump can't be available. Replace logical 'and' with 'or'. Signed-off-by: Tomas Henzl Link: https://lore.kernel.org/r/20230324135249.9733-1-thenzl@redhat.com Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/scsi/megaraid/megaraid_sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 718160ca66b3..45fbf12be40b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3266,7 +3266,7 @@ fw_crash_buffer_show(struct device *cdev, spin_lock_irqsave(&instance->crashdump_lock, flags); buff_offset = instance->fw_crash_buffer_offset; - if (!instance->crash_dump_buf && + if (!instance->crash_dump_buf || !((instance->fw_crash_state == AVAILABLE) || (instance->fw_crash_state == COPYING))) { dev_err(&instance->pdev->dev, -- Gitee From 23caf05360354b431f268d092670608950ae405d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 22 Mar 2023 11:22:11 +0900 Subject: [PATCH 014/101] scsi: core: Improve scsi_vpd_inquiry() checks stable inclusion from stable-5.10.179 commit b33f28ea45e9eb0f84e7bcc0ddc0d2d06a9acedb category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit f0aa59a33d2ac2267d260fe21eaf92500df8e7b4 ] Some USB-SATA adapters have broken behavior when an unsupported VPD page is probed: Depending on the VPD page number, a 4-byte header with a valid VPD page number but with a 0 length is returned. Currently, scsi_vpd_inquiry() only checks that the page number is valid to determine if the page is valid, which results in receiving only the 4-byte header for the non-existent page. This error manifests itself very often with page 0xb9 for the Concurrent Positioning Ranges detection done by sd_read_cpr(), resulting in the following error message: sd 0:0:0:0: [sda] Invalid Concurrent Positioning Ranges VPD page Prevent such misleading error message by adding a check in scsi_vpd_inquiry() to verify that the page length is not 0. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20230322022211.116327-1-damien.lemoal@opensource.wdc.com Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/scsi/scsi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 6ad834d61d4c..d6c25a88cebc 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -317,11 +317,18 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer, if (result) return -EIO; - /* Sanity check that we got the page back that we asked for */ + /* + * Sanity check that we got the page back that we asked for and that + * the page size is not 0. + */ if (buffer[1] != page) return -EIO; - return get_unaligned_be16(&buffer[2]) + 4; + result = get_unaligned_be16(&buffer[2]); + if (!result) + return -EIO; + + return result + 4; } /** -- Gitee From ffa94d7950c7f6b027b900865956386132c988bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 23 Mar 2023 20:48:41 +0100 Subject: [PATCH 015/101] net: dsa: b53: mmap: add phy ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.179 commit a36246a7480d9590b9b790a6b5b3ac95f6e41833 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 45977e58ce65ed0459edc9a0466d9dfea09463f5 ] Implement phy_read16() and phy_write16() ops for B53 MMAP to avoid accessing B53_PORT_MII_PAGE registers which hangs the device. This access should be done through the MDIO Mux bus controller. Signed-off-by: Álvaro Fernández Rojas Acked-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/dsa/b53/b53_mmap.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index c628d0980c0b..1d52cb3e46d5 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -215,6 +215,18 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } +static int b53_mmap_phy_read16(struct b53_device *dev, int addr, int reg, + u16 *value) +{ + return -EIO; +} + +static int b53_mmap_phy_write16(struct b53_device *dev, int addr, int reg, + u16 value) +{ + return -EIO; +} + static const struct b53_io_ops b53_mmap_ops = { .read8 = b53_mmap_read8, .read16 = b53_mmap_read16, @@ -226,6 +238,8 @@ static const struct b53_io_ops b53_mmap_ops = { .write32 = b53_mmap_write32, .write48 = b53_mmap_write48, .write64 = b53_mmap_write64, + .phy_read16 = b53_mmap_phy_read16, + .phy_write16 = b53_mmap_phy_write16, }; static int b53_mmap_probe(struct platform_device *pdev) -- Gitee From 047db5d0283750dd7dac997c46a31a07f44bfaae Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 6 Mar 2023 12:31:30 +0100 Subject: [PATCH 016/101] s390/ptrace: fix PTRACE_GET_LAST_BREAK error handling stable inclusion from stable-5.10.179 commit 8c746b665089f9da50c34fc0369aa7d7f7c5e609 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit f9bbf25e7b2b74b52b2f269216a92657774f239c ] Return -EFAULT if put_user() for the PTRACE_GET_LAST_BREAK request fails, instead of silently ignoring it. Reviewed-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- arch/s390/kernel/ptrace.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index a76dd27fb2e8..3009bb527252 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -500,9 +500,7 @@ long arch_ptrace(struct task_struct *child, long request, } return 0; case PTRACE_GET_LAST_BREAK: - put_user(child->thread.last_break, - (unsigned long __user *) data); - return 0; + return put_user(child->thread.last_break, (unsigned long __user *)data); case PTRACE_ENABLE_TE: if (!MACHINE_HAS_TE) return -EIO; @@ -854,9 +852,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, } return 0; case PTRACE_GET_LAST_BREAK: - put_user(child->thread.last_break, - (unsigned int __user *) data); - return 0; + return put_user(child->thread.last_break, (unsigned int __user *)data); } return compat_ptrace_request(child, request, addr, data); } -- Gitee From df7b5a8efaad6bd2bfe7c99366dd9d7634fe38f9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 20 Mar 2023 15:33:34 +0200 Subject: [PATCH 017/101] nvme-tcp: fix a possible UAF when failing to allocate an io queue stable inclusion from stable-5.10.179 commit 0c9cbfc951c4c4f686df1aee2aceb93935eea729 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 88eaba80328b31ef81813a1207b4056efd7006a6 ] When we allocate a nvme-tcp queue, we set the data_ready callback before we actually need to use it. This creates the potential that if a stray controller sends us data on the socket before we connect, we can trigger the io_work and start consuming the socket. In this case reported: we failed to allocate one of the io queues, and as we start releasing the queues that we already allocated, we get a UAF [1] from the io_work which is running before it should really. Fix this by setting the socket ops callbacks only before we start the queue, so that we can't accidentally schedule the io_work in the initialization phase before the queue started. While we are at it, rename nvme_tcp_restore_sock_calls to pair with nvme_tcp_setup_sock_ops. [1]: [16802.107284] nvme nvme4: starting error recovery [16802.109166] nvme nvme4: Reconnecting in 10 seconds... [16812.173535] nvme nvme4: failed to connect socket: -111 [16812.173745] nvme nvme4: Failed reconnect attempt 1 [16812.173747] nvme nvme4: Reconnecting in 10 seconds... [16822.413555] nvme nvme4: failed to connect socket: -111 [16822.413762] nvme nvme4: Failed reconnect attempt 2 [16822.413765] nvme nvme4: Reconnecting in 10 seconds... [16832.661274] nvme nvme4: creating 32 I/O queues. [16833.919887] BUG: kernel NULL pointer dereference, address: 0000000000000088 [16833.920068] nvme nvme4: Failed reconnect attempt 3 [16833.920094] #PF: supervisor write access in kernel mode [16833.920261] nvme nvme4: Reconnecting in 10 seconds... [16833.920368] #PF: error_code(0x0002) - not-present page [16833.921086] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] [16833.921191] RIP: 0010:_raw_spin_lock_bh+0x17/0x30 ... [16833.923138] Call Trace: [16833.923271] [16833.923402] lock_sock_nested+0x1e/0x50 [16833.923545] nvme_tcp_try_recv+0x40/0xa0 [nvme_tcp] [16833.923685] nvme_tcp_io_work+0x68/0xa0 [nvme_tcp] [16833.923824] process_one_work+0x1e8/0x390 [16833.923969] worker_thread+0x53/0x3d0 [16833.924104] ? process_one_work+0x390/0x390 [16833.924240] kthread+0x124/0x150 [16833.924376] ? set_kthread_struct+0x50/0x50 [16833.924518] ret_from_fork+0x1f/0x30 [16833.924655] Reported-by: Yanjun Zhang Signed-off-by: Sagi Grimberg Tested-by: Yanjun Zhang Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/nvme/host/tcp.c | 46 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 22c5116b41be..167bd7ddd0ef 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1535,22 +1535,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, if (ret) goto err_init_connect; - queue->rd_enabled = true; set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); - nvme_tcp_init_recv_ctx(queue); - - write_lock_bh(&queue->sock->sk->sk_callback_lock); - queue->sock->sk->sk_user_data = queue; - queue->state_change = queue->sock->sk->sk_state_change; - queue->data_ready = queue->sock->sk->sk_data_ready; - queue->write_space = queue->sock->sk->sk_write_space; - queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; - queue->sock->sk->sk_state_change = nvme_tcp_state_change; - queue->sock->sk->sk_write_space = nvme_tcp_write_space; -#ifdef CONFIG_NET_RX_BUSY_POLL - queue->sock->sk->sk_ll_usec = 1; -#endif - write_unlock_bh(&queue->sock->sk->sk_callback_lock); return 0; @@ -1569,7 +1554,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, return ret; } -static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) +static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) { struct socket *sock = queue->sock; @@ -1584,7 +1569,7 @@ static void nvme_tcp_restore_sock_calls(struct nvme_tcp_queue *queue) static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) { kernel_sock_shutdown(queue->sock, SHUT_RDWR); - nvme_tcp_restore_sock_calls(queue); + nvme_tcp_restore_sock_ops(queue); cancel_work_sync(&queue->io_work); } @@ -1599,21 +1584,42 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_unlock(&queue->queue_lock); } +static void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) +{ + write_lock_bh(&queue->sock->sk->sk_callback_lock); + queue->sock->sk->sk_user_data = queue; + queue->state_change = queue->sock->sk->sk_state_change; + queue->data_ready = queue->sock->sk->sk_data_ready; + queue->write_space = queue->sock->sk->sk_write_space; + queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; + queue->sock->sk->sk_state_change = nvme_tcp_state_change; + queue->sock->sk->sk_write_space = nvme_tcp_write_space; +#ifdef CONFIG_NET_RX_BUSY_POLL + queue->sock->sk->sk_ll_usec = 1; +#endif + write_unlock_bh(&queue->sock->sk->sk_callback_lock); +} + static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); + struct nvme_tcp_queue *queue = &ctrl->queues[idx]; int ret; + queue->rd_enabled = true; + nvme_tcp_init_recv_ctx(queue); + nvme_tcp_setup_sock_ops(queue); + if (idx) ret = nvmf_connect_io_queue(nctrl, idx, false); else ret = nvmf_connect_admin_queue(nctrl); if (!ret) { - set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); + set_bit(NVME_TCP_Q_LIVE, &queue->flags); } else { - if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) + __nvme_tcp_stop_queue(queue); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } -- Gitee From 2e3ef0fd5397d7c886fd322ad7c71f810815d5a2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 29 Mar 2023 10:02:59 +0200 Subject: [PATCH 018/101] xen/netback: use same error messages for same errors stable inclusion from stable-5.10.179 commit c215c636819ce09c336f8c27f1495e3b958208f3 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 2eca98e5b24d01c02b46c67be05a5f98cc9789b1 ] Issue the same error message in case an illegal page boundary crossing has been detected in both cases where this is tested. Suggested-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Link: https://lore.kernel.org/r/20230329080259.14823-1-jgross@suse.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/net/xen-netback/netback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 98e8b461bf30..1c366ddf62bc 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1001,10 +1001,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > XEN_PAGE_SIZE)) { - netdev_err(queue->vif->dev, - "txreq.offset: %u, size: %u, end: %lu\n", - txreq.offset, txreq.size, - (unsigned long)(txreq.offset&~XEN_PAGE_MASK) + txreq.size); + netdev_err(queue->vif->dev, "Cross page boundary, txreq.offset: %u, size: %u\n", + txreq.offset, txreq.size); xenvif_fatal_tx_err(queue->vif); break; } -- Gitee From 29a7398f58df213fd3b51f63f60d4f71196b6a43 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 25 Aug 2021 09:54:47 +0530 Subject: [PATCH 019/101] powerpc/doc: Fix htmldocs errors stable inclusion from stable-5.10.179 commit a4e800a7bd67490722c8466472c7ad03e3754902 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit f50da6edbf1ebf35dd8070847bfab5cb988d472b upstream. Fix make htmldocs related errors with the newly added associativity.rst doc file. Reported-by: Stephen Rothwell Tested-by: Stephen Rothwell # build test Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210825042447.106219-1-aneesh.kumar@linux.ibm.com Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- Documentation/powerpc/associativity.rst | 29 +++++++++++++------------ Documentation/powerpc/index.rst | 1 + 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst index 07e7dd3d6c87..4d01c7368561 100644 --- a/Documentation/powerpc/associativity.rst +++ b/Documentation/powerpc/associativity.rst @@ -1,6 +1,6 @@ ============================ NUMA resource associativity -============================= +============================ Associativity represents the groupings of the various platform resources into domains of substantially similar mean performance relative to resources outside @@ -20,11 +20,11 @@ A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativi bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used. Form 0 ------ +------ Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE). Form 1 ------ +------ With Form 1 a combination of ibm,associativity-reference-points, and ibm,associativity device tree properties are used to determine the NUMA distance between resource groups/domains. @@ -78,17 +78,18 @@ numa-lookup-index-table. For ex: ibm,numa-lookup-index-table = <3 0 8 40>; -ibm,numa-distace-table = <9>, /bits/ 8 < 10 20 80 - 20 10 160 - 80 160 10>; - | 0 8 40 ---|------------ - | -0 | 10 20 80 - | -8 | 20 10 160 - | -40| 80 160 10 +ibm,numa-distace-table = <9>, /bits/ 8 < 10 20 80 20 10 160 80 160 10>; + +:: + + | 0 8 40 + --|------------ + | + 0 | 10 20 80 + | + 8 | 20 10 160 + | + 40| 80 160 10 A possible "ibm,associativity" property for resources in node 0, 8 and 40 diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst index 6ec64b0d5257..4663b72caab8 100644 --- a/Documentation/powerpc/index.rst +++ b/Documentation/powerpc/index.rst @@ -7,6 +7,7 @@ powerpc .. toctree:: :maxdepth: 1 + associativity booting bootwrapper cpu_families -- Gitee From f858ca5b8fa43fde887edc9fe3611f9b26708cfa Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Fri, 9 Apr 2021 10:27:43 -0700 Subject: [PATCH 020/101] xfs: drop submit side trans alloc for append ioends stable inclusion from stable-5.10.179 commit 77ac8f2ad4adcddfba0d539030bbeb53b35f6099 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 7cd3099f4925d7c15887d1940ebd65acd66100f5 upstream. Per-inode ioend completion batching has a log reservation deadlock vector between preallocated append transactions and transactions that are acquired at completion time for other purposes (i.e., unwritten extent conversion or COW fork remaps). For example, if the ioend completion workqueue task executes on a batch of ioends that are sorted such that an append ioend sits at the tail, it's possible for the outstanding append transaction reservation to block allocation of transactions required to process preceding ioends in the list. Append ioend completion is historically the common path for on-disk inode size updates. While file extending writes may have completed sometime earlier, the on-disk inode size is only updated after successful writeback completion. These transactions are preallocated serially from writeback context to mitigate concurrency and associated log reservation pressure across completions processed by multi-threaded workqueue tasks. However, now that delalloc blocks unconditionally map to unwritten extents at physical block allocation time, size updates via append ioends are relatively rare. This means that inode size updates most commonly occur as part of the preexisting completion time transaction to convert unwritten extents. As a result, there is no longer a strong need to preallocate size update transactions. Remove the preallocation of inode size update transactions to avoid the ioend completion processing log reservation deadlock. Instead, continue to send all potential size extending ioends to workqueue context for completion and allocate the transaction from that context. This ensures that no outstanding log reservation is owned by the ioend completion worker task when it begins to process ioends. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reported-by: Christian Theune Link: https://lore.kernel.org/linux-xfs/CAOQ4uxjj2UqA0h4Y31NbmpHksMhVrXfXjLG4Tnz3zq_UR-3gSA@mail.gmail.com/ Signed-off-by: Amir Goldstein Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/xfs/xfs_aops.c | 45 +++------------------------------------------ 1 file changed, 3 insertions(+), 42 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 953de843d9c3..e341d6531e68 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -39,33 +39,6 @@ static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend) XFS_I(ioend->io_inode)->i_d.di_size; } -STATIC int -xfs_setfilesize_trans_alloc( - struct iomap_ioend *ioend) -{ - struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; - struct xfs_trans *tp; - int error; - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); - if (error) - return error; - - ioend->io_private = tp; - - /* - * We may pass freeze protection with a transaction. So tell lockdep - * we released it. - */ - __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); - /* - * We hand off the transaction to the completion thread now, so - * clear the flag here. - */ - xfs_trans_clear_context(tp); - return 0; -} - /* * Update on-disk file size now that data has been written to disk. */ @@ -191,12 +164,10 @@ xfs_end_ioend( error = xfs_reflink_end_cow(ip, offset, size); else if (ioend->io_type == IOMAP_UNWRITTEN) error = xfs_iomap_write_unwritten(ip, offset, size, false); - else - ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_private); + if (!error && xfs_ioend_is_append(ioend)) + error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); done: - if (ioend->io_private) - error = xfs_setfilesize_ioend(ioend, error); iomap_finish_ioends(ioend, error); memalloc_nofs_restore(nofs_flag); } @@ -246,7 +217,7 @@ xfs_end_io( static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend) { - return ioend->io_private || + return xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || (ioend->io_flags & IOMAP_F_SHARED); } @@ -259,8 +230,6 @@ xfs_end_bio( struct xfs_inode *ip = XFS_I(ioend->io_inode); unsigned long flags; - ASSERT(xfs_ioend_needs_workqueue(ioend)); - spin_lock_irqsave(&ip->i_ioend_lock, flags); if (list_empty(&ip->i_ioend_list)) WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue, @@ -510,14 +479,6 @@ xfs_prepare_ioend( ioend->io_offset, ioend->io_size); } - /* Reserve log space if we might write beyond the on-disk inode size. */ - if (!status && - ((ioend->io_flags & IOMAP_F_SHARED) || - ioend->io_type != IOMAP_UNWRITTEN) && - xfs_ioend_is_append(ioend) && - !ioend->io_private) - status = xfs_setfilesize_trans_alloc(ioend); - memalloc_nofs_restore(nofs_flag); if (xfs_ioend_needs_workqueue(ioend)) -- Gitee From 917bb2025bf0faf1d8d4da6ef903f6c1082b160d Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Mon, 3 Apr 2023 21:14:55 -0400 Subject: [PATCH 021/101] iio: light: tsl2772: fix reading proximity-diodes from device tree stable inclusion from stable-5.10.179 commit 09daff9c3cd77c4c2e195de82a2082ee956db0e0 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit b1cb00d51e361cf5af93649917d9790e1623647e upstream. tsl2772_read_prox_diodes() will correctly parse the properties from device tree to determine which proximity diode(s) to read from, however it didn't actually set this value on the struct tsl2772_settings. Let's go ahead and fix that. Reported-by: Tom Rix Link: https://lore.kernel.org/lkml/20230327120823.1369700-1-trix@redhat.com/ Fixes: 94cd1113aaa0 ("iio: tsl2772: add support for reading proximity led settings from device tree") Signed-off-by: Brian Masney Link: https://lore.kernel.org/r/20230404011455.339454-1-bmasney@redhat.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/iio/light/tsl2772.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/light/tsl2772.c b/drivers/iio/light/tsl2772.c index d79205361dfa..ff33ad371420 100644 --- a/drivers/iio/light/tsl2772.c +++ b/drivers/iio/light/tsl2772.c @@ -606,6 +606,7 @@ static int tsl2772_read_prox_diodes(struct tsl2772_chip *chip) return -EINVAL; } } + chip->settings.prox_diode = prox_diode_mask; return 0; } -- Gitee From 0a29e47435db944cefd12ec5c2bcb53193613af7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 18 Apr 2023 02:35:13 +0900 Subject: [PATCH 022/101] nilfs2: initialize unused bytes in segment summary blocks stable inclusion from stable-5.10.179 commit 2c90ef37151f9143c1bd18d8f76563f817632dbd category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit ef832747a82dfbc22a3702219cc716f449b24e4a upstream. Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2: initialize "struct nilfs_binfo_dat"->bi_pad field"). This is because the unused bytes at the end of each block in segment summaries are not initialized. So this fixes the issue by padding the unused bytes with null bytes. Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Reported-by: syzbot+048585f3f4227bb2b49b@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b Cc: Alexander Potapenko Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/nilfs2/segment.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 5e835bbf1ffb..fff2cdc69e5e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -435,6 +435,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) return 0; } +/** + * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area + * @sci: segment constructor object + * + * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of + * the current segment summary block. + */ +static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) +{ + struct nilfs_segsum_pointer *ssp; + + ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; + if (ssp->offset < ssp->bh->b_size) + memset(ssp->bh->b_data + ssp->offset, 0, + ssp->bh->b_size - ssp->offset); +} + static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) { sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; @@ -443,6 +460,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) * The current segment is filled up * (internal code) */ + nilfs_segctor_zeropad_segsum(sci); sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); return nilfs_segctor_reset_segment_buffer(sci); } @@ -547,6 +565,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, goto retry; } if (unlikely(required)) { + nilfs_segctor_zeropad_segsum(sci); err = nilfs_segbuf_extend_segsum(segbuf); if (unlikely(err)) goto failed; @@ -1536,6 +1555,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } + nilfs_segctor_zeropad_segsum(sci); nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); return 0; -- Gitee From 1cb38dd7854b7f2a3aaad4f05eb512bf07e6e1d8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Apr 2023 22:03:27 +0200 Subject: [PATCH 023/101] memstick: fix memory leak if card device is never registered stable inclusion from stable-5.10.179 commit e10a6d88ae9b81413da6a6d415c2b705100dd872 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 4b6d621c9d859ff89e68cebf6178652592676013 upstream. When calling dev_set_name() memory is allocated for the name for the struct device. Once that structure device is registered, or attempted to be registerd, with the driver core, the driver core will handle cleaning up that memory when the device is removed from the system. Unfortunatly for the memstick code, there is an error path that causes the struct device to never be registered, and so the memory allocated in dev_set_name will be leaked. Fix that leak by manually freeing it right before the memory for the device is freed. Cc: Maxim Levitsky Cc: Alex Dubov Cc: Ulf Hansson Cc: "Rafael J. Wysocki" Cc: Hans de Goede Cc: Kay Sievers Cc: linux-mmc@vger.kernel.org Fixes: 0252c3b4f018 ("memstick: struct device - replace bus_id with dev_name(), dev_set_name()") Cc: stable Co-developed-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman Co-developed-by: Mirsad Goran Todorovac Signed-off-by: Mirsad Goran Todorovac Link: https://lore.kernel.org/r/20230401200327.16800-1-gregkh@linuxfoundation.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/memstick/core/memstick.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 12bc3f5a6cbb..1c7a9dcfed65 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -412,6 +412,7 @@ static struct memstick_dev *memstick_alloc_card(struct memstick_host *host) return card; err_out: host->card = old_card; + kfree_const(card->dev.kobj.name); kfree(card); return NULL; } @@ -470,8 +471,10 @@ static void memstick_check(struct work_struct *work) put_device(&card->dev); host->card = NULL; } - } else + } else { + kfree_const(card->dev.kobj.name); kfree(card); + } } out_power_off: -- Gitee From 8c6a4ef1242971cbc5940c75b4f1d4407ad21434 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 17 Feb 2023 17:21:54 +0100 Subject: [PATCH 024/101] kernel/sys.c: fix and improve control flow in __sys_setres[ug]id() stable inclusion from stable-5.10.179 commit c61928fcca27a5ed5b3dde9930023182015de2ae category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 659c0ce1cb9efc7f58d380ca4bb2a51ae9e30553 upstream. Linux Security Modules (LSMs) that implement the "capable" hook will usually emit an access denial message to the audit log whenever they "block" the current task from using the given capability based on their security policy. The occurrence of a denial is used as an indication that the given task has attempted an operation that requires the given access permission, so the callers of functions that perform LSM permission checks must take care to avoid calling them too early (before it is decided if the permission is actually needed to perform the requested operation). The __sys_setres[ug]id() functions violate this convention by first calling ns_capable_setid() and only then checking if the operation requires the capability or not. It means that any caller that has the capability granted by DAC (task's capability set) but not by MAC (LSMs) will generate a "denied" audit record, even if is doing an operation for which the capability is not required. Fix this by reordering the checks such that ns_capable_setid() is checked last and -EPERM is returned immediately if it returns false. While there, also do two small optimizations: * move the capability check before prepare_creds() and * bail out early in case of a no-op. Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Ondrej Mosnacek Cc: Eric W. Biederman Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sys.c | 69 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index ee71621340dc..ad2d6b91ecde 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -635,6 +635,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) struct cred *new; int retval; kuid_t kruid, keuid, ksuid; + bool ruid_new, euid_new, suid_new; kruid = make_kuid(ns, ruid); keuid = make_kuid(ns, euid); @@ -649,25 +650,29 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if ((suid != (uid_t) -1) && !uid_valid(ksuid)) return -EINVAL; + old = current_cred(); + + /* check for no-op */ + if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) && + (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) && + uid_eq(keuid, old->fsuid))) && + (suid == (uid_t) -1 || uid_eq(ksuid, old->suid))) + return 0; + + ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && + !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid); + euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && + !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid); + suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && + !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid); + if ((ruid_new || euid_new || suid_new) && + !ns_capable_setid(old->user_ns, CAP_SETUID)) + return -EPERM; + new = prepare_creds(); if (!new) return -ENOMEM; - old = current_cred(); - - retval = -EPERM; - if (!ns_capable_setid(old->user_ns, CAP_SETUID)) { - if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && - !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid)) - goto error; - if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && - !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid)) - goto error; - if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && - !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid)) - goto error; - } - if (ruid != (uid_t) -1) { new->uid = kruid; if (!uid_eq(kruid, old->uid)) { @@ -727,6 +732,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) struct cred *new; int retval; kgid_t krgid, kegid, ksgid; + bool rgid_new, egid_new, sgid_new; krgid = make_kgid(ns, rgid); kegid = make_kgid(ns, egid); @@ -739,23 +745,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) return -EINVAL; + old = current_cred(); + + /* check for no-op */ + if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) && + (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) && + gid_eq(kegid, old->fsgid))) && + (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid))) + return 0; + + rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && + !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid); + egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && + !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid); + sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && + !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid); + if ((rgid_new || egid_new || sgid_new) && + !ns_capable_setid(old->user_ns, CAP_SETGID)) + return -EPERM; + new = prepare_creds(); if (!new) return -ENOMEM; - old = current_cred(); - - retval = -EPERM; - if (!ns_capable_setid(old->user_ns, CAP_SETGID)) { - if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && - !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid)) - goto error; - if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && - !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid)) - goto error; - if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && - !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid)) - goto error; - } if (rgid != (gid_t) -1) new->gid = krgid; -- Gitee From ddecba2d227b1ce75cd129861baa8cbcd02ecba3 Mon Sep 17 00:00:00 2001 From: Bhavya Kapoor Date: Fri, 17 Mar 2023 14:57:11 +0530 Subject: [PATCH 025/101] mmc: sdhci_am654: Set HIGH_SPEED_ENA for SDR12 and SDR25 stable inclusion from stable-5.10.179 commit 651b0bf43d6706496c993312c3e86cb858606eba category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2265098fd6a6272fde3fd1be5761f2f5895bd99a upstream. Timing Information in Datasheet assumes that HIGH_SPEED_ENA=1 should be set for SDR12 and SDR25 modes. But sdhci_am654 driver clears HIGH_SPEED_ENA register. Thus, Modify sdhci_am654 to not clear HIGH_SPEED_ENA (HOST_CONTROL[2]) bit for SDR12 and SDR25 speed modes. Fixes: e374e87538f4 ("mmc: sdhci_am654: Clear HISPD_ENA in some lower speed modes") Signed-off-by: Bhavya Kapoor Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230317092711.660897-1-b-kapoor@ti.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/mmc/host/sdhci_am654.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index bf2592774165..8e52905458f9 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -351,8 +351,6 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg) */ case MMC_TIMING_SD_HS: case MMC_TIMING_MMC_HS: - case MMC_TIMING_UHS_SDR12: - case MMC_TIMING_UHS_SDR25: val &= ~SDHCI_CTRL_HISPD; } } -- Gitee From 9a2266d90903149b11b9f4984cff4d9d30ffe429 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 5 Apr 2023 11:51:20 -0400 Subject: [PATCH 026/101] mm/khugepaged: check again on anon uffd-wp during isolation stable inclusion from stable-5.10.179 commit 2523d9d7bb5ca7986ee29b5e0fb66d1221bffbc1 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit dd47ac428c3f5f3bcabe845f36be870fe6c20784 upstream. Khugepaged collapse an anonymous thp in two rounds of scans. The 2nd round done in __collapse_huge_page_isolate() after hpage_collapse_scan_pmd(), during which all the locks will be released temporarily. It means the pgtable can change during this phase before 2nd round starts. It's logically possible some ptes got wr-protected during this phase, and we can errornously collapse a thp without noticing some ptes are wr-protected by userfault. e1e267c7928f wanted to avoid it but it only did that for the 1st phase, not the 2nd phase. Since __collapse_huge_page_isolate() happens after a round of small page swapins, we don't need to worry on any !present ptes - if it existed khugepaged will already bail out. So we only need to check present ptes with uffd-wp bit set there. This is something I found only but never had a reproducer, I thought it was one caused a bug in Muhammad's recent pagemap new ioctl work, but it turns out it's not the cause of that but an userspace bug. However this seems to still be a real bug even with a very small race window, still worth to have it fixed and copy stable. Link: https://lkml.kernel.org/r/20230405155120.3608140-1-peterx@redhat.com Fixes: e1e267c7928f ("khugepaged: skip collapse if uffd-wp detected") Signed-off-by: Peter Xu Reviewed-by: David Hildenbrand Reviewed-by: Yang Shi Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Mike Rapoport Cc: Nadav Amit Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- mm/khugepaged.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b77186ec70e9..28e18777ec51 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -622,6 +622,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, result = SCAN_PTE_NON_PRESENT; goto out; } + if (pte_uffd_wp(pteval)) { + result = SCAN_PTE_UFFD_WP; + goto out; + } page = vm_normal_page(vma, address, pteval); if (unlikely(!page)) { result = SCAN_PAGE_NULL; -- Gitee From c823996f5a8d93afba32bd8397131bcb1e4324d4 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:34 +0100 Subject: [PATCH 027/101] sched/uclamp: Make task_fits_capacity() use util_fits_cpu() stable inclusion from stable-5.10.179 commit 5cb1a56ced3080f78da539a7addfa95f25c60fd8 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit b48e16a69792b5dc4a09d6807369d11b2970cc36 upstream. So that the new uclamp rules in regard to migration margin and capacity pressure are taken into account correctly. Fixes: a7008c07a568 ("sched/fair: Make task_fits_capacity() consider uclamp restrictions") Co-developed-by: Vincent Guittot Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-3-qais.yousef@arm.com (cherry picked from commit b48e16a69792b5dc4a09d6807369d11b2970cc36) Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 30 ++++++++++++++++++------------ kernel/sched/sched.h | 9 +++++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 01038fc4c41e..335e3b0677b3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4240,10 +4240,12 @@ static inline int util_fits_cpu(unsigned long util, return fits; } -static inline int task_fits_capacity(struct task_struct *p, - unsigned long capacity) +static inline int task_fits_cpu(struct task_struct *p, int cpu) { - return fits_capacity(uclamp_task_util(p), capacity); + unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN); + unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX); + unsigned long util = task_util_est(p); + return util_fits_cpu(util, uclamp_min, uclamp_max, cpu); } #ifdef CONFIG_SCHED_RTG @@ -4255,7 +4257,7 @@ bool task_fits_max(struct task_struct *p, int cpu) if (capacity == max_capacity) return true; - return task_fits_capacity(p, capacity); + return task_fits_cpu(p, cpu); } #endif @@ -4281,9 +4283,9 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) task_fits = capacity_orig_of(cpu) >= capacity_orig_of(cpumask_first(rtg_target)); else - task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq))); + task_fits = task_fits_cpu(p, cpu_of(rq)); #else - task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq))); + task_fits = task_fits_cpu(p, cpu_of(rq)); #endif if (task_fits) { rq->misfit_task_load = 0; @@ -8182,7 +8184,7 @@ static int detach_tasks(struct lb_env *env) case migrate_misfit: /* This is not a misfit task */ - if (task_fits_capacity(p, capacity_of(env->src_cpu))) + if (task_fits_cpu(p, env->src_cpu)) goto next; env->imbalance = 0; @@ -9154,6 +9156,10 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, memset(sgs, 0, sizeof(*sgs)); + /* Assume that task can't fit any CPU of the group */ + if (sd->flags & SD_ASYM_CPUCAPACITY) + sgs->group_misfit_task_load = 1; + for_each_cpu(i, sched_group_span(group)) { struct rq *rq = cpu_rq(i); unsigned int local; @@ -9173,12 +9179,12 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd, if (!nr_running && idle_cpu_without(i, p)) sgs->idle_cpus++; - } + /* Check if task fits in the CPU */ + if (sd->flags & SD_ASYM_CPUCAPACITY && + sgs->group_misfit_task_load && + task_fits_cpu(p, i)) + sgs->group_misfit_task_load = 0; - /* Check if task fits in the group */ - if (sd->flags & SD_ASYM_CPUCAPACITY && - !task_fits_capacity(p, group->sgc->max_capacity)) { - sgs->group_misfit_task_load = 1; } sgs->group_capacity = group->sgc->capacity; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3457a8ac743a..e0d773c43107 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2640,6 +2640,15 @@ static inline bool uclamp_is_used(void) return static_branch_likely(&sched_uclamp_used); } #else /* CONFIG_UCLAMP_TASK */ +static inline unsigned long uclamp_eff_value(struct task_struct *p, + enum uclamp_id clamp_id) +{ + if (clamp_id == UCLAMP_MIN) + return 0; + + return SCHED_CAPACITY_SCALE; +} + static inline unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, struct task_struct *p) -- Gitee From 2d85f3dcb07ed3239630fe3b90125c305433c2e0 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:35 +0100 Subject: [PATCH 028/101] sched/uclamp: Fix fits_capacity() check in feec() stable inclusion from stable-5.10.179 commit 8ca2bf63d946bf443fa0b86a2eaefdf952f8cc29 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 244226035a1f9b2b6c326e55ae5188fab4f428cb upstream. As reported by Yun Hsiang [1], if a task has its uclamp_min >= 0.8 * 1024, it'll always pick the previous CPU because fits_capacity() will always return false in this case. The new util_fits_cpu() logic should handle this correctly for us beside more corner cases where similar failures could occur, like when using UCLAMP_MAX. We open code uclamp_rq_util_with() except for the clamp() part, util_fits_cpu() needs the 'raw' values to be passed to it. Also introduce uclamp_rq_{set, get}() shorthand accessors to get uclamp value for the rq. Makes the code more readable and ensures the right rules (use READ_ONCE/WRITE_ONCE) are respected transparently. [1] https://lists.linaro.org/pipermail/eas-dev/2020-July/001488.html Fixes: 1d42509e475c ("sched/fair: Make EAS wakeup placement consider uclamp restrictions") Reported-by: Yun Hsiang Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-4-qais.yousef@arm.com (cherry picked from commit 244226035a1f9b2b6c326e55ae5188fab4f428cb) [Fix trivial conflict in kernel/sched/fair.c due to new automatic variables in master vs 5.10] Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/core.c | 10 +++++----- kernel/sched/fair.c | 26 ++++++++++++++++++++++++-- kernel/sched/sched.h | 42 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ad03e08d82bd..9afa1dd0f7e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1031,7 +1031,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id, if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE)) return; - WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value); + uclamp_rq_set(rq, clamp_id, clamp_value); } static inline @@ -1209,8 +1209,8 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, if (bucket->tasks == 1 || uc_se->value > bucket->value) bucket->value = uc_se->value; - if (uc_se->value > READ_ONCE(uc_rq->value)) - WRITE_ONCE(uc_rq->value, uc_se->value); + if (uc_se->value > uclamp_rq_get(rq, clamp_id)) + uclamp_rq_set(rq, clamp_id, uc_se->value); } /* @@ -1276,7 +1276,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, if (likely(bucket->tasks)) return; - rq_clamp = READ_ONCE(uc_rq->value); + rq_clamp = uclamp_rq_get(rq, clamp_id); /* * Defensive programming: this should never happen. If it happens, * e.g. due to future modification, warn and fixup the expected value. @@ -1284,7 +1284,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, SCHED_WARN_ON(bucket->value > rq_clamp); if (bucket->value >= rq_clamp) { bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value); - WRITE_ONCE(uc_rq->value, bkt_clamp); + uclamp_rq_set(rq, clamp_id, bkt_clamp); } } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 335e3b0677b3..9c94e821722a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6973,6 +6973,8 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) { unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; + unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0; + unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024; struct root_domain *rd = cpu_rq(smp_processor_id())->rd; unsigned long cpu_cap, util, base_energy = 0; int cpu, best_energy_cpu = prev_cpu; @@ -7000,6 +7002,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) for (; pd; pd = pd->next) { unsigned long cur_delta, spare_cap, max_spare_cap = 0; + unsigned long rq_util_min, rq_util_max; + unsigned long util_min, util_max; unsigned long base_energy_pd; int max_spare_cap_cpu = -1; @@ -7023,8 +7027,26 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * much capacity we can get out of the CPU; this is * aligned with schedutil_cpu_util(). */ - util = uclamp_rq_util_with(cpu_rq(cpu), util, p); - if (!fits_capacity(util, cpu_cap)) + if (uclamp_is_used()) { + if (uclamp_rq_is_idle(cpu_rq(cpu))) { + util_min = p_util_min; + util_max = p_util_max; + } else { + /* + * Open code uclamp_rq_util_with() except for + * the clamp() part. Ie: apply max aggregation + * only. util_fits_cpu() logic requires to + * operate on non clamped util but must use the + * max-aggregated uclamp_{min, max}. + */ + rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); + rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); + + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); + } + } + if (!util_fits_cpu(util, util_min, util_max, cpu)) continue; /* Always use prev_cpu as a candidate. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0d773c43107..7f9b048ab96f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2569,6 +2569,23 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} #ifdef CONFIG_UCLAMP_TASK unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); +static inline unsigned long uclamp_rq_get(struct rq *rq, + enum uclamp_id clamp_id) +{ + return READ_ONCE(rq->uclamp[clamp_id].value); +} + +static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id, + unsigned int value) +{ + WRITE_ONCE(rq->uclamp[clamp_id].value, value); +} + +static inline bool uclamp_rq_is_idle(struct rq *rq) +{ + return rq->uclamp_flags & UCLAMP_FLAG_IDLE; +} + /** * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values. * @rq: The rq to clamp against. Must not be NULL. @@ -2604,12 +2621,12 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, * Ignore last runnable task's max clamp, as this task will * reset it. Similarly, no need to read the rq's min clamp. */ - if (rq->uclamp_flags & UCLAMP_FLAG_IDLE) + if (uclamp_rq_is_idle(rq)) goto out; } - min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value)); - max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value)); + min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN)); + max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX)); out: /* * Since CPU's {min,max}_util clamps are MAX aggregated considering @@ -2665,6 +2682,25 @@ static inline bool uclamp_is_used(void) { return false; } + +static inline unsigned long uclamp_rq_get(struct rq *rq, + enum uclamp_id clamp_id) +{ + if (clamp_id == UCLAMP_MIN) + return 0; + + return SCHED_CAPACITY_SCALE; +} + +static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id, + unsigned int value) +{ +} + +static inline bool uclamp_rq_is_idle(struct rq *rq) +{ + return false; +} #endif /* CONFIG_UCLAMP_TASK */ #ifdef arch_scale_freq_capacity -- Gitee From 71a55fb6d9897486ab657717fdffcd2c17b6e4bb Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:36 +0100 Subject: [PATCH 029/101] sched/uclamp: Make select_idle_capacity() use util_fits_cpu() stable inclusion from stable-5.10.179 commit 2fd1c194e688484e65c2a428bd343107d2a753c2 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit b759caa1d9f667b94727b2ad12589cbc4ce13a82 upstream. Use the new util_fits_cpu() to ensure migration margin and capacity pressure are taken into account correctly when uclamp is being used otherwise we will fail to consider CPUs as fitting in scenarios where they should. Fixes: b4c9c9f15649 ("sched/fair: Prefer prev cpu in asymmetric wakeup path") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-5-qais.yousef@arm.com (cherry picked from commit b759caa1d9f667b94727b2ad12589cbc4ce13a82) Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9c94e821722a..28d01ba5444b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6515,14 +6515,16 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) { - unsigned long task_util, best_cap = 0; + unsigned long task_util, util_min, util_max, best_cap = 0; int cpu, best_cpu = -1; struct cpumask *cpus; cpus = this_cpu_cpumask_var_ptr(select_idle_mask); cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); - task_util = uclamp_task_util(p); + task_util = task_util_est(p); + util_min = uclamp_eff_value(p, UCLAMP_MIN); + util_max = uclamp_eff_value(p, UCLAMP_MAX); for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); @@ -6532,7 +6534,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) continue; - if (fits_capacity(task_util, cpu_cap)) + if (util_fits_cpu(task_util, util_min, util_max, cpu)) return cpu; if (cpu_cap > best_cap) { -- Gitee From 888a7804cada7ba7c74cc27862dcefb154927c7c Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:37 +0100 Subject: [PATCH 030/101] sched/uclamp: Make asym_fits_capacity() use util_fits_cpu() stable inclusion from stable-5.10.179 commit 07750955e9a241f6c8d5f0ed4eab2b26fb8cd53d category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a2e7f03ed28fce26c78b985f87913b6ce3accf9d upstream. Use the new util_fits_cpu() to ensure migration margin and capacity pressure are taken into account correctly when uclamp is being used otherwise we will fail to consider CPUs as fitting in scenarios where they should. s/asym_fits_capacity/asym_fits_cpu/ to better reflect what it does now. Fixes: b4c9c9f15649 ("sched/fair: Prefer prev cpu in asymmetric wakeup path") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-6-qais.yousef@arm.com (cherry picked from commit a2e7f03ed28fce26c78b985f87913b6ce3accf9d) [Conflict in kernel/sched/fair.c due different name of static key wrapper function and slightly different if condition block in one of the asym_fits_cpu() call sites] Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 28d01ba5444b..74df00f30f9e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6546,10 +6546,13 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) return best_cpu; } -static inline bool asym_fits_capacity(unsigned long task_util, int cpu) +static inline bool asym_fits_cpu(unsigned long util, + unsigned long util_min, + unsigned long util_max, + int cpu) { if (static_branch_unlikely(&sched_asym_cpucapacity)) - return fits_capacity(task_util, capacity_of(cpu)); + return util_fits_cpu(util, util_min, util_max, cpu); return true; } @@ -6560,7 +6563,7 @@ static inline bool asym_fits_capacity(unsigned long task_util, int cpu) static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; - unsigned long task_util; + unsigned long task_util, util_min, util_max; int i, recent_used_cpu; /* @@ -6569,11 +6572,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ if (static_branch_unlikely(&sched_asym_cpucapacity)) { sync_entity_load_avg(&p->se); - task_util = uclamp_task_util(p); + task_util = task_util_est(p); + util_min = uclamp_eff_value(p, UCLAMP_MIN); + util_max = uclamp_eff_value(p, UCLAMP_MAX); } if ((available_idle_cpu(target) || sched_idle_cpu(target)) && - !cpu_isolated(target) && asym_fits_capacity(task_util, target)) + !cpu_isolated(target) && asym_fits_cpu(task_util, util_min, util_max, target)) return target; /* @@ -6581,7 +6586,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ if (prev != target && cpus_share_cache(prev, target) && ((available_idle_cpu(prev) || sched_idle_cpu(prev)) && - !cpu_isolated(target) && asym_fits_capacity(task_util, prev))) + !cpu_isolated(target) && asym_fits_cpu(task_util, util_min, util_max, prev))) return prev; /* @@ -6596,7 +6601,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) in_task() && prev == smp_processor_id() && this_rq()->nr_running <= 1 && - asym_fits_capacity(task_util, prev)) { + asym_fits_cpu(task_util, util_min, util_max, prev)) { return prev; } @@ -6607,7 +6612,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && - asym_fits_capacity(task_util, recent_used_cpu)) { + asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { /* * Replace recent_used_cpu with prev as it is a potential * candidate for the next wake: -- Gitee From 68fa0236d57102d33ead09cb3b099033bfe4fdc7 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:38 +0100 Subject: [PATCH 031/101] sched/uclamp: Make cpu_overutilized() use util_fits_cpu() stable inclusion from stable-5.10.179 commit 41a880740c9f3a7da983c67a2ae8f70085fd2c00 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit c56ab1b3506ba0e7a872509964b100912bde165d upstream. So that it is now uclamp aware. This fixes a major problem of busy tasks capped with UCLAMP_MAX keeping the system in overutilized state which disables EAS and leads to wasting energy in the long run. Without this patch running a busy background activity like JIT compilation on Pixel 6 causes the system to be in overutilized state 74.5% of the time. With this patch this goes down to 9.79%. It also fixes another problem when long running tasks that have their UCLAMP_MIN changed while running such that they need to upmigrate to honour the new UCLAMP_MIN value. The upmigration doesn't get triggered because overutilized state never gets set in this state, hence misfit migration never happens at tick in this case until the task wakes up again. Fixes: af24bde8df202 ("sched/uclamp: Add uclamp support to energy_compute()") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-7-qais.yousef@arm.com (cherry picked from commit c56ab1b3506ba0e7a872509964b100912bde165d) [Conflict in kernel/sched/fair.c: use cpu_util() instead of cpu_util_cfs()] Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 74df00f30f9e..166bc2f176c3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5734,7 +5734,10 @@ static inline void hrtick_update(struct rq *rq) #ifdef CONFIG_SMP static inline bool cpu_overutilized(int cpu) { - return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); + unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); + unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); + + return !util_fits_cpu(cpu_util(cpu), rq_util_min, rq_util_max, cpu); } static inline void update_overutilized_status(struct rq *rq) -- Gitee From c032b16254c7ea81461f89c8916a908fd7760434 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:39 +0100 Subject: [PATCH 032/101] sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early exit condition stable inclusion from stable-5.10.179 commit b18cbd359d7216a57af9409a177fa58e7db827fb category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d81304bc6193554014d4372a01debdf65e1e9a4d upstream. If the utilization of the woken up task is 0, we skip the energy calculation because it has no impact. But if the task is boosted (uclamp_min != 0) will have an impact on task placement and frequency selection. Only skip if the util is truly 0 after applying uclamp values. Change uclamp_task_cpu() signature to avoid unnecessary additional calls to uclamp_eff_get(). feec() is the only user now. Fixes: 732cd75b8c920 ("sched/fair: Select an energy-efficient CPU on task wake-up") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-8-qais.yousef@arm.com (cherry picked from commit d81304bc6193554014d4372a01debdf65e1e9a4d) Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 166bc2f176c3..0b4374325afd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3964,20 +3964,26 @@ static inline unsigned long task_util_est(struct task_struct *p) #ifdef CONFIG_UCLAMP_TASK #ifdef CONFIG_SCHED_RT_CAS -unsigned long uclamp_task_util(struct task_struct *p) +unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) #else -static inline unsigned long uclamp_task_util(struct task_struct *p) +static inline unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) #endif { - return clamp(task_util_est(p), - uclamp_eff_value(p, UCLAMP_MIN), - uclamp_eff_value(p, UCLAMP_MAX)); + return clamp(task_util_est(p), uclamp_min, uclamp_max); } #else #ifdef CONFIG_SCHED_RT_CAS -unsigned long uclamp_task_util(struct task_struct *p) +unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) #else -static inline unsigned long uclamp_task_util(struct task_struct *p) +static inline unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) #endif { return task_util_est(p); @@ -7007,7 +7013,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) goto fail; sync_entity_load_avg(&p->se); - if (!task_util_est(p)) + if (!uclamp_task_util(p, p_util_min, p_util_max)) goto unlock; for (; pd; pd = pd->next) { -- Gitee From 045995d7807c326e5e13e3c0634403be2351122b Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:40 +0100 Subject: [PATCH 033/101] sched/fair: Detect capacity inversion stable inclusion from stable-5.10.179 commit 30f04dd56dfddfc58cd7fe9b45728ba9c1682a94 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit: 44c7b80bffc3a657a36857098d5d9c49d94e652b upstream. Check each performance domain to see if thermal pressure is causing its capacity to be lower than another performance domain. We assume that each performance domain has CPUs with the same capacities, which is similar to an assumption made in energy_model.c We also assume that thermal pressure impacts all CPUs in a performance domain equally. If there're multiple performance domains with the same capacity_orig, we will trigger a capacity inversion if the domain is under thermal pressure. The new cpu_in_capacity_inversion() should help users to know when information about capacity_orig are not reliable and can opt in to use the inverted capacity as the 'actual' capacity_orig. Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-9-qais.yousef@arm.com (cherry picked from commit 44c7b80bffc3a657a36857098d5d9c49d94e652b) [Trivial conflict in kernel/sched/fair.c and sched.h due to code shuffling] Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 63 +++++++++++++++++++++++++++++++++++++++++--- kernel/sched/sched.h | 19 +++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b4374325afd..dcdd89d98e64 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8629,16 +8629,73 @@ static unsigned long scale_rt_capacity(int cpu) static void update_cpu_capacity(struct sched_domain *sd, int cpu) { + unsigned long capacity_orig = arch_scale_cpu_capacity(cpu); unsigned long capacity = scale_rt_capacity(cpu); struct sched_group *sdg = sd->groups; + struct rq *rq = cpu_rq(cpu); - cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu); + rq->cpu_capacity_orig = capacity_orig; if (!capacity) capacity = 1; - cpu_rq(cpu)->cpu_capacity = capacity; - trace_sched_cpu_capacity_tp(cpu_rq(cpu)); + rq->cpu_capacity = capacity; + + /* + * Detect if the performance domain is in capacity inversion state. + * + * Capacity inversion happens when another perf domain with equal or + * lower capacity_orig_of() ends up having higher capacity than this + * domain after subtracting thermal pressure. + * + * We only take into account thermal pressure in this detection as it's + * the only metric that actually results in *real* reduction of + * capacity due to performance points (OPPs) being dropped/become + * unreachable due to thermal throttling. + * + * We assume: + * * That all cpus in a perf domain have the same capacity_orig + * (same uArch). + * * Thermal pressure will impact all cpus in this perf domain + * equally. + */ + if (static_branch_unlikely(&sched_asym_cpucapacity)) { + unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); + struct perf_domain *pd = rcu_dereference(rq->rd->pd); + + rq->cpu_capacity_inverted = 0; + + for (; pd; pd = pd->next) { + struct cpumask *pd_span = perf_domain_span(pd); + unsigned long pd_cap_orig, pd_cap; + + cpu = cpumask_any(pd_span); + pd_cap_orig = arch_scale_cpu_capacity(cpu); + + if (capacity_orig < pd_cap_orig) + continue; + + /* + * handle the case of multiple perf domains have the + * same capacity_orig but one of them is under higher + * thermal pressure. We record it as capacity + * inversion. + */ + if (capacity_orig == pd_cap_orig) { + pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu)); + + if (pd_cap > inv_cap) { + rq->cpu_capacity_inverted = inv_cap; + break; + } + } else if (pd_cap_orig > inv_cap) { + rq->cpu_capacity_inverted = inv_cap; + break; + } + } + } + + trace_sched_cpu_capacity_tp(rq); sdg->sgc->capacity = capacity; sdg->sgc->min_capacity = capacity; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7f9b048ab96f..a557a04ba1a9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1073,6 +1073,7 @@ struct rq { unsigned long cpu_capacity; unsigned long cpu_capacity_orig; + unsigned long cpu_capacity_inverted; struct callback_head *balance_callback; @@ -2721,6 +2722,24 @@ static inline unsigned long capacity_orig_of(int cpu) { return cpu_rq(cpu)->cpu_capacity_orig; } + +/* + * Returns inverted capacity if the CPU is in capacity inversion state. + * 0 otherwise. + * + * Capacity inversion detection only considers thermal impact where actual + * performance points (OPPs) gets dropped. + * + * Capacity inversion state happens when another performance domain that has + * equal or lower capacity_orig_of() becomes effectively larger than the perf + * domain this CPU belongs to due to thermal pressure throttling it hard. + * + * See comment in update_cpu_capacity(). + */ +static inline unsigned long cpu_in_capacity_inversion(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity_inverted; +} #endif /** -- Gitee From 3d06c7b7ba18bfebcf460249ea7454d86ba5619c Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:41 +0100 Subject: [PATCH 034/101] sched/fair: Consider capacity inversion in util_fits_cpu() stable inclusion from stable-5.10.179 commit 09129798a608eee34c0e53e5cb4974023628944f category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit: aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8 upstream. We do consider thermal pressure in util_fits_cpu() for uclamp_min only. With the exception of the biggest cores which by definition are the max performance point of the system and all tasks by definition should fit. Even under thermal pressure, the capacity of the biggest CPU is the highest in the system and should still fit every task. Except when it reaches capacity inversion point, then this is no longer true. We can handle this by using the inverted capacity as capacity_orig in util_fits_cpu(). Which not only addresses the problem above, but also ensure uclamp_max now considers the inverted capacity. Force fitting a task when a CPU is in this adverse state will contribute to making the thermal throttling last longer. Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220804143609.515789-10-qais.yousef@arm.com (cherry picked from commit aa69c36f31aadc1669bfa8a3de6a47b5e6c98ee8) Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dcdd89d98e64..808737593b8b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4160,12 +4160,16 @@ static inline int util_fits_cpu(unsigned long util, * For uclamp_max, we can tolerate a drop in performance level as the * goal is to cap the task. So it's okay if it's getting less. * - * In case of capacity inversion, which is not handled yet, we should - * honour the inverted capacity for both uclamp_min and uclamp_max all - * the time. + * In case of capacity inversion we should honour the inverted capacity + * for both uclamp_min and uclamp_max all the time. */ - capacity_orig = capacity_orig_of(cpu); - capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); + capacity_orig = cpu_in_capacity_inversion(cpu); + if (capacity_orig) { + capacity_orig_thermal = capacity_orig; + } else { + capacity_orig = capacity_orig_of(cpu); + capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu); + } /* * We want to force a task to fit a cpu as implied by uclamp_max. -- Gitee From 23d1147bfe2fd54b1ef25d22ac1a26e66f72425f Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:42 +0100 Subject: [PATCH 035/101] sched/uclamp: Fix a uninitialized variable warnings stable inclusion from stable-5.10.179 commit 89ad8a672f57fb119e1743901ed129d4b0a2dc2b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit e26fd28db82899be71b4b949527373d0a6be1e65 upstream. Addresses the following warnings: > config: riscv-randconfig-m031-20221111 > compiler: riscv64-linux-gcc (GCC) 12.1.0 > > smatch warnings: > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_min'. > kernel/sched/fair.c:7263 find_energy_efficient_cpu() error: uninitialized symbol 'util_max'. Fixes: 244226035a1f ("sched/uclamp: Fix fits_capacity() check in feec()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-2-qyousef@layalina.io (cherry picked from commit e26fd28db82899be71b4b949527373d0a6be1e65) [Conflict in kernel/sched/fair.c due to new automatic variable in master vs 5.10 and new code around for loop] Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 808737593b8b..eb6f458f6c14 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7021,9 +7021,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) goto unlock; for (; pd; pd = pd->next) { + unsigned long util_min = p_util_min, util_max = p_util_max; unsigned long cur_delta, spare_cap, max_spare_cap = 0; unsigned long rq_util_min, rq_util_max; - unsigned long util_min, util_max; unsigned long base_energy_pd; int max_spare_cap_cpu = -1; @@ -7032,6 +7032,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) base_energy += base_energy_pd; for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { + struct rq *rq = cpu_rq(cpu); + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; @@ -7047,24 +7049,19 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) * much capacity we can get out of the CPU; this is * aligned with schedutil_cpu_util(). */ - if (uclamp_is_used()) { - if (uclamp_rq_is_idle(cpu_rq(cpu))) { - util_min = p_util_min; - util_max = p_util_max; - } else { - /* - * Open code uclamp_rq_util_with() except for - * the clamp() part. Ie: apply max aggregation - * only. util_fits_cpu() logic requires to - * operate on non clamped util but must use the - * max-aggregated uclamp_{min, max}. - */ - rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN); - rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX); - - util_min = max(rq_util_min, p_util_min); - util_max = max(rq_util_max, p_util_max); - } + if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) { + /* + * Open code uclamp_rq_util_with() except for + * the clamp() part. Ie: apply max aggregation + * only. util_fits_cpu() logic requires to + * operate on non clamped util but must use the + * max-aggregated uclamp_{min, max}. + */ + rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN); + rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX); + + util_min = max(rq_util_min, p_util_min); + util_max = max(rq_util_max, p_util_max); } if (!util_fits_cpu(util, util_min, util_max, cpu)) continue; -- Gitee From 4e399f7ac8866400001c7a9666aebc58d0977753 Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Tue, 18 Apr 2023 15:09:43 +0100 Subject: [PATCH 036/101] sched/fair: Fixes for capacity inversion detection stable inclusion from stable-5.10.179 commit 4735b6f74f25e5e869ef1e199064c4f4cd31aa68 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit da07d2f9c153e457e845d4dcfdd13568d71d18a4 upstream. Traversing the Perf Domains requires rcu_read_lock() to be held and is conditional on sched_energy_enabled(). Ensure right protections applied. Also skip capacity inversion detection for our own pd; which was an error. Fixes: 44c7b80bffc3 ("sched/fair: Detect capacity inversion") Reported-by: Dietmar Eggemann Signed-off-by: Qais Yousef (Google) Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Link: https://lore.kernel.org/r/20230112122708.330667-3-qyousef@layalina.io (cherry picked from commit da07d2f9c153e457e845d4dcfdd13568d71d18a4) Signed-off-by: Qais Yousef (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eb6f458f6c14..e5e72262e2ef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8660,16 +8660,23 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) * * Thermal pressure will impact all cpus in this perf domain * equally. */ - if (static_branch_unlikely(&sched_asym_cpucapacity)) { + if (sched_energy_enabled()) { unsigned long inv_cap = capacity_orig - thermal_load_avg(rq); - struct perf_domain *pd = rcu_dereference(rq->rd->pd); + struct perf_domain *pd; + + rcu_read_lock(); + pd = rcu_dereference(rq->rd->pd); rq->cpu_capacity_inverted = 0; for (; pd; pd = pd->next) { struct cpumask *pd_span = perf_domain_span(pd); unsigned long pd_cap_orig, pd_cap; + /* We can't be inverted against our own pd */ + if (cpumask_test_cpu(cpu_of(rq), pd_span)) + continue; + cpu = cpumask_any(pd_span); pd_cap_orig = arch_scale_cpu_capacity(cpu); @@ -8694,6 +8701,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu) break; } } + + rcu_read_unlock(); } trace_sched_cpu_capacity_tp(rq); -- Gitee From 149fbddc27f4a166622752e937f5532ee69080e1 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Sat, 8 Apr 2023 21:33:48 +0100 Subject: [PATCH 037/101] MIPS: Define RUNTIME_DISCARD_EXIT in LD script stable inclusion from stable-5.10.179 commit 77748b0a047e7962c66cd9864ddfe87432571a52 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 6dcbd0a69c84a8ae7a442840a8cf6b1379dc8f16 upstream. MIPS's exit sections are discarded at runtime as well. Fixes link error: `.exit.text' referenced in section `__jump_table' of fs/fuse/inode.o: defined in discarded section `.exit.text' of fs/fuse/inode.o Fixes: 99cb0d917ffa ("arch: fix broken BuildID for arm64 and riscv") Reported-by: "kernelci.org bot" Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/mips/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 09fa4705ce8e..64afe075df08 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -15,6 +15,8 @@ #define EMITS_PT_NOTE #endif +#define RUNTIME_DISCARD_EXIT + #include #undef mips -- Gitee From e7240a5f0ba13f94f528a5bc5eee74357b47e327 Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Sat, 22 Apr 2023 00:17:42 +0200 Subject: [PATCH 038/101] docs: futex: Fix kernel-doc references after code split-up preparation stable inclusion from stable-5.10.179 commit 37df709706ae71de4dca8fe338625a643543fd9a category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- In upstream commit 77e52ae35463 ("futex: Move to kernel/futex/") the futex code from kernel/futex.c was moved into kernel/futex/core.c in preparation of the split-up of the implementation in various files. Point kernel-doc references to the new files as otherwise the documentation shows errors on build: [...] Error: Cannot open file ./kernel/futex.c Error: Cannot open file ./kernel/futex.c [...] WARNING: kernel-doc './scripts/kernel-doc -rst -enable-lineno -sphinx-version 3.4.3 -internal ./kernel/futex.c' failed with return code 2 There is no direct upstream commit for this change. It is made in analogy to commit bc67f1c454fb ("docs: futex: Fix kernel-doc references") applied as consequence of the restructuring of the futex code. Fixes: 77e52ae35463 ("futex: Move to kernel/futex/") Signed-off-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- Documentation/kernel-hacking/locking.rst | 2 +- Documentation/translations/it_IT/kernel-hacking/locking.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index 6ed806e6061b..a6d89efede79 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -1358,7 +1358,7 @@ Mutex API reference Futex API reference =================== -.. kernel-doc:: kernel/futex.c +.. kernel-doc:: kernel/futex/core.c :internal: Further reading diff --git a/Documentation/translations/it_IT/kernel-hacking/locking.rst b/Documentation/translations/it_IT/kernel-hacking/locking.rst index bf1acd6204ef..192ab8e28125 100644 --- a/Documentation/translations/it_IT/kernel-hacking/locking.rst +++ b/Documentation/translations/it_IT/kernel-hacking/locking.rst @@ -1400,7 +1400,7 @@ Riferimento per l'API dei Mutex Riferimento per l'API dei Futex =============================== -.. kernel-doc:: kernel/futex.c +.. kernel-doc:: kernel/futex/core.c :internal: Approfondimenti -- Gitee From d72b5adf80a875d662e8cb4495a5e484bf4650f8 Mon Sep 17 00:00:00 2001 From: Alyssa Ross Date: Sun, 26 Mar 2023 18:21:21 +0000 Subject: [PATCH 039/101] purgatory: fix disabling debug info stable inclusion from stable-5.10.179 commit e220438d1e33170313a295cd797919228c638953 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d83806c4c0cccc0d6d3c3581a11983a9c186a138 upstream. Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS. Instead, it includes -g, the appropriate -gdwarf-* flag, and also the -Wa versions of both of those if building with Clang and GNU as. As a result, debug info was being generated for the purgatory objects, even though the intention was that it not be. Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files") Signed-off-by: Alyssa Ross Cc: stable@vger.kernel.org Acked-by: Nick Desaulniers Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/x86/purgatory/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 95ea17a9d20c..ebaf329a2368 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -64,8 +64,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) CFLAGS_string.o += $(PURGATORY_CFLAGS) -AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 -AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 +asflags-remove-y += -g -Wa,-gdwarf-2 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) -- Gitee From acb9bead147e03ad07759df1d4197011a1b33036 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 Nov 2020 17:22:32 +0100 Subject: [PATCH 040/101] virtiofs: clean up error handling in virtio_fs_get_tree() stable inclusion from stable-5.10.179 commit cf08dc79168a80a7cf4dad14a3cc0f2f87561d11 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 833c5a42e28beeefa1f9bd476a63fe8050c1e8ca upstream. Avoid duplicating error cleanup. Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/virtio_fs.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index b9cfb1165ff4..22d2145ce08d 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1440,22 +1440,14 @@ static int virtio_fs_get_tree(struct fs_context *fsc) return -EINVAL; } + err = -ENOMEM; fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); - if (!fc) { - mutex_lock(&virtio_fs_mutex); - virtio_fs_put(fs); - mutex_unlock(&virtio_fs_mutex); - return -ENOMEM; - } + if (!fc) + goto out_err; fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); - if (!fm) { - mutex_lock(&virtio_fs_mutex); - virtio_fs_put(fs); - mutex_unlock(&virtio_fs_mutex); - kfree(fc); - return -ENOMEM; - } + if (!fm) + goto out_err; fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); fc->release = fuse_free_conn; @@ -1483,6 +1475,13 @@ static int virtio_fs_get_tree(struct fs_context *fsc) WARN_ON(fsc->root); fsc->root = dget(sb->s_root); return 0; + +out_err: + kfree(fc); + mutex_lock(&virtio_fs_mutex); + virtio_fs_put(fs); + mutex_unlock(&virtio_fs_mutex); + return err; } static const struct fs_context_operations virtio_fs_context_ops = { -- Gitee From b627b5727d2c932868b76f98f462655ba60033a8 Mon Sep 17 00:00:00 2001 From: Connor Kuehl Date: Thu, 18 Mar 2021 08:52:22 -0500 Subject: [PATCH 041/101] virtiofs: split requests that exceed virtqueue size stable inclusion from stable-5.10.179 commit 6d0d67b05f7b42f638aae31af558e72ce7c5836a category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a7f0d7aab0b4f3f0780b1f77356e2fe7202ac0cb upstream. If an incoming FUSE request can't fit on the virtqueue, the request is placed onto a workqueue so a worker can try to resubmit it later where there will (hopefully) be space for it next time. This is fine for requests that aren't larger than a virtqueue's maximum capacity. However, if a request's size exceeds the maximum capacity of the virtqueue (even if the virtqueue is empty), it will be doomed to a life of being placed on the workqueue, removed, discovered it won't fit, and placed on the workqueue yet again. Furthermore, from section 2.6.5.3.1 (Driver Requirements: Indirect Descriptors) of the virtio spec: "A driver MUST NOT create a descriptor chain longer than the Queue Size of the device." To fix this, limit the number of pages FUSE will use for an overall request. This way, each request can realistically fit on the virtqueue when it is decomposed into a scattergather list and avoid violating section 2.6.5.3.1 of the virtio spec. Signed-off-by: Connor Kuehl Reviewed-by: Vivek Goyal Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 3 ++- fs/fuse/virtio_fs.c | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b10cddd72355..ceaa6868386e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -556,6 +556,9 @@ struct fuse_conn { /** Maxmum number of pages that can be used in a single request */ unsigned int max_pages; + /** Constrain ->max_pages to this value during feature negotiation */ + unsigned int max_pages_limit; + /** Input queue */ struct fuse_iqueue iq; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 2ede05df7d06..058bb82dee40 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -710,6 +710,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; + fc->max_pages_limit = FUSE_MAX_MAX_PAGES; INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); @@ -1056,7 +1057,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->abort_err = 1; if (arg->flags & FUSE_MAX_PAGES) { fc->max_pages = - min_t(unsigned int, FUSE_MAX_MAX_PAGES, + min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); } if (IS_ENABLED(CONFIG_FUSE_DAX) && diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 22d2145ce08d..6aaaa74438f3 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -18,6 +18,12 @@ #include #include "fuse_i.h" +/* Used to help calculate the FUSE connection's max_pages limit for a request's + * size. Parts of the struct fuse_req are sliced into scattergather lists in + * addition to the pages used, so this can help account for that overhead. + */ +#define FUSE_HEADER_OVERHEAD 4 + /* List of virtio-fs device instances and a lock for the list. Also provides * mutual exclusion in device removal and mounting path */ @@ -1426,9 +1432,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) { struct virtio_fs *fs; struct super_block *sb; - struct fuse_conn *fc; + struct fuse_conn *fc = NULL; struct fuse_mount *fm; - int err; + unsigned int virtqueue_size; + int err = -EIO; /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() @@ -1440,6 +1447,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) return -EINVAL; } + virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); + if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) + goto out_err; + err = -ENOMEM; fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); if (!fc) @@ -1454,6 +1465,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) fc->delete_stale = true; fc->auto_submounts = true; + /* Tell FUSE to split requests that exceed the virtqueue's size */ + fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, + virtqueue_size - FUSE_HEADER_OVERHEAD); + fsc->s_fs_info = fm; sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); fuse_mount_put(fm); -- Gitee From a099ef78c4351e8d35d19c42796976cf7a20f526 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 21 Oct 2021 10:01:38 +0200 Subject: [PATCH 042/101] fuse: check s_root when destroying sb stable inclusion from stable-5.10.179 commit 0078a1667cbf7e674430186837698503f0ef4272 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d534d31d6a45d71de61db22090b4820afb68fddc upstream. Checking "fm" works because currently sb->s_fs_info is cleared on error paths; however, sb->s_root is what generic_shutdown_super() checks to determine whether the sb was fully initialized or not. This change will allow cleanup of sb setup error paths. Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/inode.c | 2 +- fs/fuse/virtio_fs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 058bb82dee40..9ea175ff9c8e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1596,7 +1596,7 @@ static void fuse_kill_sb_blk(struct super_block *sb) struct fuse_mount *fm = get_fuse_mount_super(sb); bool last; - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) fuse_conn_destroy(fm); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6aaaa74438f3..faadc80485e7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1399,7 +1399,7 @@ static void virtio_kill_sb(struct super_block *sb) bool last; /* If mount failed, we can still be called without any fc */ - if (fm) { + if (sb->s_root) { last = fuse_mount_remove(fm); if (last) virtio_fs_conn_destroy(fm); -- Gitee From b769c8661264ac8372dd2270e4f2f149adf3880f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Oct 2021 17:03:03 +0200 Subject: [PATCH 043/101] fuse: fix attr version comparison in fuse_read_update_size() stable inclusion from stable-5.10.179 commit 42dfdbd4dcfa021d50af9f6a4c4a09eb086cc29d category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 484ce65715b06aead8c4901f01ca32c5a240bc71 upstream. A READ request returning a short count is taken as indication of EOF, and the cached file size is modified accordingly. Fix the attribute version checking to allow for changes to fc->attr_version on other inodes. Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504389568dac..94fe2c690676 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -782,7 +782,7 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fi->lock); - if (attr_ver == fi->attr_version && size < inode->i_size && + if (attr_ver >= fi->attr_version && size < inode->i_size && !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, size); -- Gitee From e74aed56c6f64998d43cab884a912ad021ba9300 Mon Sep 17 00:00:00 2001 From: Jiachen Zhang Date: Wed, 28 Sep 2022 20:19:34 +0800 Subject: [PATCH 044/101] fuse: always revalidate rename target dentry stable inclusion from stable-5.10.179 commit 03cefde986f1504fb84c92403c2b6c133fc9bdb0 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit ccc031e26afe60d2a5a3d93dabd9c978210825fb upstream. The previous commit df8629af2934 ("fuse: always revalidate if exclusive create") ensures that the dentries are revalidated on O_EXCL creates. This commit complements it by also performing revalidation for rename target dentries. Otherwise, a rename target file that only exists in kernel dentry cache but not in the filesystem will result in EEXIST if RENAME_NOREPLACE flag is used. Signed-off-by: Jiachen Zhang Signed-off-by: Zhang Tianci Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 80a9e50392a0..bdb04bea0da9 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -205,7 +205,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || - (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { + (flags & (LOOKUP_EXCL | LOOKUP_REVAL | LOOKUP_RENAME_TARGET))) { struct fuse_entry_out outarg; FUSE_ARGS(args); struct fuse_forget_link *forget; -- Gitee From 7d2d99566f135bdcb090b056f375ac2a72b62ac1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 22 Apr 2022 15:48:53 +0200 Subject: [PATCH 045/101] fuse: fix deadlock between atomic O_TRUNC and page invalidation stable inclusion from stable-5.10.179 commit 81775ab858b4236c52c5da7e25cec6e49dd91b46 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b upstream. fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache() in such a case to avoid the A-A deadlock. However, we found another A-B-B-A deadlock related to the case above, which will cause the xfstests generic/464 testcase hung in our virtio-fs test environment. For example, consider two processes concurrently open one same file, one with O_TRUNC and another without O_TRUNC. The deadlock case is described below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying to lock a page (acquiring B), open() could have held the page lock (acquired B), and waiting on the page writeback (acquiring A). This would lead to deadlocks. open(O_TRUNC) ---------------------------------------------------------------- fuse_open_common inode_lock [C acquire] fuse_set_nowrite [A acquire] fuse_finish_open truncate_pagecache lock_page [B acquire] truncate_inode_page unlock_page [B release] fuse_release_nowrite [A release] inode_unlock [C release] ---------------------------------------------------------------- open() ---------------------------------------------------------------- fuse_open_common fuse_finish_open invalidate_inode_pages2 lock_page [B acquire] fuse_launder_page fuse_wait_on_page_writeback [A acquire & release] unlock_page [B release] ---------------------------------------------------------------- Besides this case, all calls of invalidate_inode_pages2() and invalidate_inode_pages2_range() in fuse code also can deadlock with open(O_TRUNC). Fix by moving the truncate_pagecache() call outside the nowrite protected region. The nowrite protection is only for delayed writeback (writeback_cache) case, where inode lock does not protect against truncation racing with writes on the server. Write syscalls racing with page cache truncation still get the inode lock protection. This patch also changes the order of filemap_invalidate_lock() vs. fuse_set_nowrite() in fuse_open_common(). This new order matches the order found in fuse_file_fallocate() and fuse_do_setattr(). Reported-by: Jiachen Zhang Tested-by: Jiachen Zhang Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC") Cc: Signed-off-by: Miklos Szeredi Signed-off-by: Yang Bo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/fuse/dir.c | 5 +++++ fs/fuse/file.c | 29 +++++++++++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index bdb04bea0da9..e3b9b7d188e6 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_inode *fi; struct fuse_file *ff; + bool trunc = flags & O_TRUNC; /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); @@ -604,6 +605,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, } else { file->private_data = ff; fuse_finish_open(inode, file); + if (fm->fc->atomic_o_trunc && trunc) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); } return err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 94fe2c690676..13d97547eaf6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -206,14 +206,10 @@ void fuse_finish_open(struct inode *inode, struct file *file) fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, 0); spin_unlock(&fi->lock); - truncate_pagecache(inode, 0); fuse_invalidate_attr(inode); if (fc->writeback_cache) file_update_time(file); - } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { - invalidate_inode_pages2(inode->i_mapping); } - if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) fuse_link_write_file(file); } @@ -236,30 +232,39 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (is_wb_truncate || dax_truncate) { + if (is_wb_truncate || dax_truncate) inode_lock(inode); - fuse_set_nowrite(inode); - } if (dax_truncate) { down_write(&get_fuse_inode(inode)->i_mmap_sem); err = fuse_dax_break_layouts(inode, 0, 0); if (err) - goto out; + goto out_inode_unlock; } + if (is_wb_truncate || dax_truncate) + fuse_set_nowrite(inode); + err = fuse_do_open(fm, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); -out: + if (is_wb_truncate || dax_truncate) + fuse_release_nowrite(inode); + if (!err) { + struct fuse_file *ff = file->private_data; + + if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); + } if (dax_truncate) up_write(&get_fuse_inode(inode)->i_mmap_sem); - if (is_wb_truncate | dax_truncate) { - fuse_release_nowrite(inode); +out_inode_unlock: + if (is_wb_truncate || dax_truncate) inode_unlock(inode); - } return err; } -- Gitee From a2423d8f8470bec2385d86efe0bf6e2a079f193f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 19 Apr 2023 06:46:08 +0000 Subject: [PATCH 046/101] Revert "ext4: fix use-after-free in ext4_xattr_set_entry" stable inclusion from stable-5.10.179 commit 9400206d9d5eebc0317da4151364ade32d28944f category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- This reverts commit bb8592efcf8ef2f62947745d3182ea05b5256a15 which is commit 67d7d8ad99beccd9fe92d585b87f1760dc9018e3 upstream. The order in which patches are queued to stable matters. This patch has a logical dependency on commit 310c097c2bdbea253d6ee4e064f3e65580ef93ac upstream, and failing to queue the latter results in a null-ptr-deref reported at the Link below. In order to avoid conflicts on stable, revert the commit just so that we can queue its prerequisite patch first and then queue the same after. Link: https://syzkaller.appspot.com/bug?extid=d5ebf56f3b1268136afd Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/ext4/xattr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8dc3495fa2a9..35e95c1707dc 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2196,9 +2196,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_inode *raw_inode; int error; - if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + if (EXT4_I(inode)->i_extra_isize == 0) return 0; - raw_inode = ext4_raw_inode(&is->iloc); header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); @@ -2226,9 +2225,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; - if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; - error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; -- Gitee From fe68020d1a904f305ce056f699d7b4abba8bde95 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Wed, 19 Apr 2023 06:46:09 +0000 Subject: [PATCH 047/101] ext4: remove duplicate definition of ext4_xattr_ibody_inline_set() stable inclusion from stable-5.10.179 commit c75711396c04a77cb614f49d37e7385e984c0cb7 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 310c097c2bdbea253d6ee4e064f3e65580ef93ac ] ext4_xattr_ibody_inline_set() & ext4_xattr_ibody_set() have the exact same definition. Hence remove ext4_xattr_ibody_inline_set() and all its call references. Convert the callers of it to call ext4_xattr_ibody_set() instead. [ Modified to preserve ext4_xattr_ibody_set() and remove ext4_xattr_ibody_inline_set() instead. -- TYT ] Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/fd566b799bbbbe9b668eb5eecde5b5e319e3694f.1622685482.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/ext4/inline.c | 11 +++++------ fs/ext4/xattr.c | 26 +------------------------- fs/ext4/xattr.h | 6 +++--- 3 files changed, 9 insertions(+), 34 deletions(-) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index baf28781b226..4adf36c17e34 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -206,7 +206,7 @@ static int ext4_read_inline_data(struct inode *inode, void *buffer, /* * write the buffer to the inline inode. * If 'create' is set, we don't need to do the extra copy in the xattr - * value since it is already handled by ext4_xattr_ibody_inline_set. + * value since it is already handled by ext4_xattr_ibody_set. * That saves us one memcpy. */ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, @@ -288,7 +288,7 @@ static int ext4_create_inline_data(handle_t *handle, BUG_ON(!is.s.not_found); - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) { if (error == -ENOSPC) ext4_clear_inode_state(inode, @@ -360,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, i.value = value; i.value_len = len; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; @@ -433,7 +433,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, if (error) goto out; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; @@ -1936,8 +1936,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; - err = ext4_xattr_ibody_inline_set(handle, inode, - &i, &is); + err = ext4_xattr_ibody_set(handle, inode, &i, &is); if (err) goto out_error; } diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 35e95c1707dc..b5fbd9e4132a 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2217,31 +2217,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, return 0; } -int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_xattr_search *s = &is->s; - int error; - - if (EXT4_I(inode)->i_extra_isize == 0) - return -ENOSPC; - error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) - return error; - header = IHDR(inode, ext4_raw_inode(&is->iloc)); - if (!IS_LAST_ENTRY(s->first)) { - header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - ext4_set_inode_state(inode, EXT4_STATE_XATTR); - } else { - header->h_magic = cpu_to_le32(0); - ext4_clear_inode_state(inode, EXT4_STATE_XATTR); - } - return 0; -} - -static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, +int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is) { diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index b357872ab83b..e5e36bd11f05 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -200,9 +200,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size); -extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is); +extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + struct ext4_xattr_info *i, + struct ext4_xattr_ibody_find *is); extern struct mb_cache *ext4_xattr_create_cache(void); extern void ext4_xattr_destroy_cache(struct mb_cache *); -- Gitee From c0e2bd4424a62f87d83fff86aa545c7ae2b4b639 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 19 Apr 2023 06:46:10 +0000 Subject: [PATCH 048/101] ext4: fix use-after-free in ext4_xattr_set_entry stable inclusion from stable-5.10.179 commit 05cf34a2b6414a1172552d16159b3e17e9da36a3 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 67d7d8ad99beccd9fe92d585b87f1760dc9018e3 ] Hulk Robot reported a issue: ================================================================== BUG: KASAN: use-after-free in ext4_xattr_set_entry+0x18ab/0x3500 Write of size 4105 at addr ffff8881675ef5f4 by task syz-executor.0/7092 CPU: 1 PID: 7092 Comm: syz-executor.0 Not tainted 4.19.90-dirty #17 Call Trace: [...] memcpy+0x34/0x50 mm/kasan/kasan.c:303 ext4_xattr_set_entry+0x18ab/0x3500 fs/ext4/xattr.c:1747 ext4_xattr_ibody_inline_set+0x86/0x2a0 fs/ext4/xattr.c:2205 ext4_xattr_set_handle+0x940/0x1300 fs/ext4/xattr.c:2386 ext4_xattr_set+0x1da/0x300 fs/ext4/xattr.c:2498 __vfs_setxattr+0x112/0x170 fs/xattr.c:149 __vfs_setxattr_noperm+0x11b/0x2a0 fs/xattr.c:180 __vfs_setxattr_locked+0x17b/0x250 fs/xattr.c:238 vfs_setxattr+0xed/0x270 fs/xattr.c:255 setxattr+0x235/0x330 fs/xattr.c:520 path_setxattr+0x176/0x190 fs/xattr.c:539 __do_sys_lsetxattr fs/xattr.c:561 [inline] __se_sys_lsetxattr fs/xattr.c:557 [inline] __x64_sys_lsetxattr+0xc2/0x160 fs/xattr.c:557 do_syscall_64+0xdf/0x530 arch/x86/entry/common.c:298 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x459fe9 RSP: 002b:00007fa5e54b4c08 EFLAGS: 00000246 ORIG_RAX: 00000000000000bd RAX: ffffffffffffffda RBX: 000000000051bf60 RCX: 0000000000459fe9 RDX: 00000000200003c0 RSI: 0000000020000180 RDI: 0000000020000140 RBP: 000000000051bf60 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000001009 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc73c93fc0 R14: 000000000051bf60 R15: 00007fa5e54b4d80 [...] ================================================================== Above issue may happen as follows: ------------------------------------- ext4_xattr_set ext4_xattr_set_handle ext4_xattr_ibody_find >> s->end < s->base >> no EXT4_STATE_XATTR >> xattr_check_inode is not executed ext4_xattr_ibody_set ext4_xattr_set_entry >> size_t min_offs = s->end - s->base >> UAF in memcpy we can easily reproduce this problem with the following commands: mkfs.ext4 -F /dev/sda mount -o debug_want_extra_isize=128 /dev/sda /mnt touch /mnt/file setfattr -n user.cat -v `seq -s z 4096|tr -d '[:digit:]'` /mnt/file In ext4_xattr_ibody_find, we have the following assignment logic: header = IHDR(inode, raw_inode) = raw_inode + EXT4_GOOD_OLD_INODE_SIZE + i_extra_isize is->s.base = IFIRST(header) = header + sizeof(struct ext4_xattr_ibody_header) is->s.end = raw_inode + s_inode_size In ext4_xattr_set_entry min_offs = s->end - s->base = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) last = s->first free = min_offs - ((void *)last - s->base) - sizeof(__u32) = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize - sizeof(struct ext4_xattr_ibody_header) - sizeof(__u32) In the calculation formula, all values except s_inode_size and i_extra_size are fixed values. When i_extra_size is the maximum value s_inode_size - EXT4_GOOD_OLD_INODE_SIZE, min_offs is -4 and free is -8. The value overflows. As a result, the preceding issue is triggered when memcpy is executed. Therefore, when finding xattr or setting xattr, check whether there is space for storing xattr in the inode to resolve this issue. Cc: stable@kernel.org Reported-by: Hulk Robot Signed-off-by: Baokun Li Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20220616021358.2504451-3-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Tudor Ambarus Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/ext4/xattr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b5fbd9e4132a..8af72ac28cee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2196,8 +2196,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_inode *raw_inode; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return 0; + raw_inode = ext4_raw_inode(&is->iloc); header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); -- Gitee From fa314ed6839b86b0bbe95b3b94214380b38e3cdf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Oct 2022 11:53:46 -0700 Subject: [PATCH 049/101] udp: Call inet6_destroy_sock() in setsockopt(IPV6_ADDRFORM). stable inclusion from stable-5.10.179 commit 0e7b5e1020aad24e63d1231065af92c701629327 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 21985f43376cee092702d6cb963ff97a9d2ede68 upstream. Commit 4b340ae20d0e ("IPv6: Complete IPV6_DONTFRAG support") forgot to add a change to free inet6_sk(sk)->rxpmtu while converting an IPv6 socket into IPv4 with IPV6_ADDRFORM. After conversion, sk_prot is changed to udp_prot and ->destroy() never cleans it up, resulting in a memory leak. This is due to the discrepancy between inet6_destroy_sock() and IPV6_ADDRFORM, so let's call inet6_destroy_sock() from IPV6_ADDRFORM to remove the difference. However, this is not enough for now because rxpmtu can be changed without lock_sock() after commit 03485f2adcde ("udpv6: Add lockless sendmsg() support"). We will fix this case in the following patch. Note we will rename inet6_destroy_sock() to inet6_cleanup_sock() and remove unnecessary inet6_destroy_sock() calls in sk_prot->destroy() in the future. Fixes: 4b340ae20d0e ("IPv6: Complete IPV6_DONTFRAG support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski Signed-off-by: Ziyang Xuan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/net/ipv6.h | 1 + net/ipv6/af_inet6.c | 6 ++++++ net/ipv6/ipv6_sockglue.c | 20 ++++++++------------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 6538b11fadd5..fe1ebee6c7be 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1105,6 +1105,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); +void inet6_cleanup_sock(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f18e6499a853..eb4e9c15113d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -505,6 +505,12 @@ void inet6_destroy_sock(struct sock *sk) } EXPORT_SYMBOL_GPL(inet6_destroy_sock); +void inet6_cleanup_sock(struct sock *sk) +{ + inet6_destroy_sock(sk); +} +EXPORT_SYMBOL_GPL(inet6_cleanup_sock); + /* * This does both peername and sockname. */ diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index ce4e0da4ab9b..72391b5321af 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -429,9 +429,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { - struct ipv6_txoptions *opt; - struct sk_buff *pktopt; - if (sk->sk_type == SOCK_RAW) break; @@ -462,7 +459,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); @@ -500,14 +496,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg((__force struct ipv6_txoptions **)&np->opt, - NULL); - if (opt) { - atomic_sub(opt->tot_len, &sk->sk_omem_alloc); - txopt_put(opt); - } - pktopt = xchg(&np->pktoptions, NULL); - kfree_skb(pktopt); + + /* Disable all options not to allocate memory anymore, + * but there is still a race. See the lockless path + * in udpv6_sendmsg() and ipv6_local_rxpmtu(). + */ + np->rxopt.all = 0; + + inet6_cleanup_sock(sk); /* * ... and add it to the refcnt debug socks count -- Gitee From 6851ea44ed984927955e057f056ae6c841bdcb18 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 6 Oct 2022 11:53:47 -0700 Subject: [PATCH 050/101] tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct(). stable inclusion from stable-5.10.179 commit b1f06ab985efd3456c8bbf96bdc52c53a125b4b3 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d38afeec26ed4739c640bf286c270559aab2ba5f upstream. Originally, inet6_sk(sk)->XXX were changed under lock_sock(), so we were able to clean them up by calling inet6_destroy_sock() during the IPv6 -> IPv4 conversion by IPV6_ADDRFORM. However, commit 03485f2adcde ("udpv6: Add lockless sendmsg() support") added a lockless memory allocation path, which could cause a memory leak: setsockopt(IPV6_ADDRFORM) sendmsg() +-----------------------+ +-------+ - do_ipv6_setsockopt(sk, ...) - udpv6_sendmsg(sk, ...) - sockopt_lock_sock(sk) ^._ called via udpv6_prot - lock_sock(sk) before WRITE_ONCE() - WRITE_ONCE(sk->sk_prot, &tcp_prot) - inet6_destroy_sock() - if (!corkreq) - sockopt_release_sock(sk) - ip6_make_skb(sk, ...) - release_sock(sk) ^._ lockless fast path for the non-corking case - __ip6_append_data(sk, ...) - ipv6_local_rxpmtu(sk, ...) - xchg(&np->rxpmtu, skb) ^._ rxpmtu is never freed. - goto out_no_dst; - lock_sock(sk) For now, rxpmtu is only the case, but not to miss the future change and a similar bug fixed in commit e27326009a3d ("net: ping6: Fix memleak in ipv6_renew_options()."), let's set a new function to IPv6 sk->sk_destruct() and call inet6_cleanup_sock() there. Since the conversion does not change sk->sk_destruct(), we can guarantee that we can clean up IPv6 resources finally. We can now remove all inet6_destroy_sock() calls from IPv6 protocol specific ->destroy() functions, but such changes are invasive to backport. So they can be posted as a follow-up later for net-next. Fixes: 03485f2adcde ("udpv6: Add lockless sendmsg() support") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Jakub Kicinski Signed-off-by: Ziyang Xuan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/net/ipv6.h | 1 + include/net/udp.h | 2 +- include/net/udplite.h | 8 -------- net/ipv4/udp.c | 9 ++++++--- net/ipv4/udplite.c | 8 ++++++++ net/ipv6/af_inet6.c | 8 +++++++- net/ipv6/udp.c | 15 ++++++++++++++- net/ipv6/udp_impl.h | 1 + net/ipv6/udplite.c | 9 ++++++++- 9 files changed, 46 insertions(+), 15 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fe1ebee6c7be..4c8f97a6da5a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1106,6 +1106,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); void inet6_cleanup_sock(struct sock *sk); +void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/include/net/udp.h b/include/net/udp.h index 388e68c7bca0..e2550a4547a7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -268,7 +268,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, } /* net/ipv4/udp.c */ -void udp_destruct_sock(struct sock *sk); +void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/udplite.h b/include/net/udplite.h index 9185e45b997f..c59ba86668af 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -24,14 +24,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; } -/* Designate sk as UDP-Lite socket */ -static inline int udplite_sk_init(struct sock *sk) -{ - udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; - return 0; -} - /* * Checksumming routines */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b093daaa3deb..f0db66e415bd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1582,7 +1582,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); -void udp_destruct_sock(struct sock *sk) +void udp_destruct_common(struct sock *sk) { /* reclaim completely the forward allocated memory */ struct udp_sock *up = udp_sk(sk); @@ -1595,10 +1595,14 @@ void udp_destruct_sock(struct sock *sk) kfree_skb(skb); } udp_rmem_release(sk, total, 0, true); +} +EXPORT_SYMBOL_GPL(udp_destruct_common); +static void udp_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); inet_sock_destruct(sk); } -EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { @@ -1606,7 +1610,6 @@ int udp_init_sock(struct sock *sk) sk->sk_destruct = udp_destruct_sock; return 0; } -EXPORT_SYMBOL_GPL(udp_init_sock); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index bd8773b49e72..cfb36655a5fd 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -17,6 +17,14 @@ struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); +/* Designate sk as UDP-Lite socket */ +static int udplite_sk_init(struct sock *sk) +{ + udp_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index eb4e9c15113d..adce1c199b70 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -107,6 +107,12 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } +void inet6_sock_destruct(struct sock *sk) +{ + inet6_cleanup_sock(sk); + inet_sock_destruct(sk); +} + static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -199,7 +205,7 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, inet->hdrincl = 1; } - sk->sk_destruct = inet_sock_destruct; + sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 20cc08210c70..bd3140f69b82 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,6 +54,19 @@ #include #include "udp_impl.h" +static void udpv6_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +int udpv6_init_sock(struct sock *sk) +{ + skb_queue_head_init(&udp_sk(sk)->reader_queue); + sk->sk_destruct = udpv6_destruct_sock; + return 0; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -1702,7 +1715,7 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udp_init_sock, + .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index b2fcc46c1630..e49776819441 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); +int udpv6_init_sock(struct sock *sk); int udp_v6_get_port(struct sock *sk, unsigned short snum); void udp_v6_rehash(struct sock *sk); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index fbb700d3f437..b6482e04dad0 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -12,6 +12,13 @@ #include #include "udp_impl.h" +static int udplitev6_sk_init(struct sock *sk) +{ + udpv6_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); @@ -38,7 +45,7 @@ struct proto udplitev6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udplite_sk_init, + .init = udplitev6_sk_init, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, -- Gitee From 43675e0adc5d3162035f1687fc44e576d0d65ddd Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 19 Oct 2022 15:35:59 -0700 Subject: [PATCH 051/101] inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy(). stable inclusion from stable-5.10.179 commit 04d393c4bbf5b906329fd40449c1c660a3affa37 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit b5fc29233d28be7a3322848ebe73ac327559cdb9 upstream. After commit d38afeec26ed ("tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct()."), we call inet6_destroy_sock() in sk->sk_destruct() by setting inet6_sock_destruct() to it to make sure we do not leak inet6-specific resources. Now we can remove unnecessary inet6_destroy_sock() calls in sk->sk_prot->destroy(). DCCP and SCTP have their own sk->sk_destruct() function, so we change them separately in the following patches. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Ziyang Xuan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/ipv6/ping.c | 6 ------ net/ipv6/raw.c | 2 -- net/ipv6/tcp_ipv6.c | 8 +------- net/ipv6/udp.c | 2 -- net/l2tp/l2tp_ip6.c | 2 -- net/mptcp/protocol.c | 7 ------- 6 files changed, 1 insertion(+), 26 deletions(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 135e3a060caa..6ac88fe24a8e 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -22,11 +22,6 @@ #include #include -static void ping_v6_destroy(struct sock *sk) -{ - inet6_destroy_sock(sk); -} - /* Compatibility glue so we can support IPv6 when it's compiled as a module */ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) @@ -171,7 +166,6 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .destroy = ping_v6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 110254f44a46..69f0f9c05d02 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1211,8 +1211,6 @@ static void raw6_destroy(struct sock *sk) lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); - - inet6_destroy_sock(sk); } static int rawv6_init_sk(struct sock *sk) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3f721eda2dac..48e55e93deed 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1938,12 +1938,6 @@ static int tcp_v6_init_sock(struct sock *sk) return 0; } -static void tcp_v6_destroy_sock(struct sock *sk) -{ - tcp_v4_destroy_sock(sk); - inet6_destroy_sock(sk); -} - #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, @@ -2136,7 +2130,7 @@ struct proto tcpv6_prot = { .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, - .destroy = tcp_v6_destroy_sock, + .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bd3140f69b82..19c0721399d9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1630,8 +1630,6 @@ void udpv6_destroy_sock(struct sock *sk) udp_encap_disable(); } } - - inet6_destroy_sock(sk); } /* diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d54dbd01d86f..382124d6f764 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -255,8 +255,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk) if (tunnel) l2tp_tunnel_delete(tunnel); - - inet6_destroy_sock(sk); } static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e61c85873ea2..72d944e6a641 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2863,12 +2863,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { static struct proto mptcp_v6_prot; -static void mptcp_v6_destroy(struct sock *sk) -{ - mptcp_destroy(sk); - inet6_destroy_sock(sk); -} - static struct inet_protosw mptcp_v6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_MPTCP, @@ -2884,7 +2878,6 @@ int __init mptcp_proto_v6_init(void) mptcp_v6_prot = mptcp_prot; strcpy(mptcp_v6_prot.name, "MPTCPv6"); mptcp_v6_prot.slab = NULL; - mptcp_v6_prot.destroy = mptcp_v6_destroy; mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock); err = proto_register(&mptcp_v6_prot, 1); -- Gitee From 66bd3e534bbf11235d0b6a3653d3d9d006dd8a1f Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 19 Oct 2022 15:36:00 -0700 Subject: [PATCH 052/101] dccp: Call inet6_destroy_sock() via sk->sk_destruct(). stable inclusion from stable-5.10.179 commit 7da54ddc04e6cef3c016b87dbaae574abc73fa94 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 1651951ebea54970e0bda60c638fc2eee7a6218f upstream. After commit d38afeec26ed ("tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct()."), we call inet6_destroy_sock() in sk->sk_destruct() by setting inet6_sock_destruct() to it to make sure we do not leak inet6-specific resources. DCCP sets its own sk->sk_destruct() in the dccp_init_sock(), and DCCPv6 socket shares it by calling the same init function via dccp_v6_init_sock(). To call inet6_sock_destruct() from DCCPv6 sk->sk_destruct(), we export it and set dccp_v6_sk_destruct() in the init function. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Ziyang Xuan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/dccp/dccp.h | 1 + net/dccp/ipv6.c | 15 ++++++++------- net/dccp/proto.c | 8 +++++++- net/ipv6/af_inet6.c | 1 + 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 5183e627468d..0218eb169891 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -283,6 +283,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct dccp_hdr *dh, const unsigned int len); +void dccp_destruct_common(struct sock *sk); int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized); void dccp_destroy_sock(struct sock *sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c563f9b325d0..64e91783860d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -992,6 +992,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .sockaddr_len = sizeof(struct sockaddr_in6), }; +static void dccp_v6_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); + inet6_sock_destruct(sk); +} + /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ @@ -1004,17 +1010,12 @@ static int dccp_v6_init_sock(struct sock *sk) if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; + sk->sk_destruct = dccp_v6_sk_destruct; } return err; } -static void dccp_v6_destroy_sock(struct sock *sk) -{ - dccp_destroy_sock(sk); - inet6_destroy_sock(sk); -} - static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; @@ -1037,7 +1038,7 @@ static struct proto dccp_v6_prot = { .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v6_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 65e81e0199b0..e946211758c0 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -171,12 +171,18 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); -static void dccp_sk_destruct(struct sock *sk) +void dccp_destruct_common(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_tx_ccid = NULL; +} +EXPORT_SYMBOL_GPL(dccp_destruct_common); + +static void dccp_sk_destruct(struct sock *sk) +{ + dccp_destruct_common(sk); inet_sock_destruct(sk); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index adce1c199b70..0979a4b15f26 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -112,6 +112,7 @@ void inet6_sock_destruct(struct sock *sk) inet6_cleanup_sock(sk); inet_sock_destruct(sk); } +EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) -- Gitee From c2393edcbcae50ce6552a3f51b0a5021531dd093 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 19 Oct 2022 15:36:01 -0700 Subject: [PATCH 053/101] sctp: Call inet6_destroy_sock() via sk->sk_destruct(). stable inclusion from stable-5.10.179 commit f69112de70a0c876a42532daab9962e36c2f3fed category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 6431b0f6ff1633ae598667e4cdd93830074a03e8 upstream. After commit d38afeec26ed ("tcp/udp: Call inet6_destroy_sock() in IPv6 sk->sk_destruct()."), we call inet6_destroy_sock() in sk->sk_destruct() by setting inet6_sock_destruct() to it to make sure we do not leak inet6-specific resources. SCTP sets its own sk->sk_destruct() in the sctp_init_sock(), and SCTPv6 socket reuses it as the init function. To call inet6_sock_destruct() from SCTPv6 sk->sk_destruct(), we set sctp_v6_destruct_sock() in a new init function. Signed-off-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Ziyang Xuan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- net/sctp/socket.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3a68d65f7d15..35d3eee26ea5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4995,13 +4995,17 @@ static void sctp_destroy_sock(struct sock *sk) } /* Triggered when there are no references on the socket anymore */ -static void sctp_destruct_sock(struct sock *sk) +static void sctp_destruct_common(struct sock *sk) { struct sctp_sock *sp = sctp_sk(sk); /* Free up the HMAC transform. */ crypto_free_shash(sp->hmac); +} +static void sctp_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); inet_sock_destruct(sk); } @@ -9195,7 +9199,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, sctp_sk(newsk)->reuse = sp->reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = sctp_destruct_sock; + newsk->sk_destruct = sk->sk_destruct; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; @@ -9427,11 +9431,20 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) -#include -static void sctp_v6_destroy_sock(struct sock *sk) +static void sctp_v6_destruct_sock(struct sock *sk) +{ + sctp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +static int sctp_v6_init_sock(struct sock *sk) { - sctp_destroy_sock(sk); - inet6_destroy_sock(sk); + int ret = sctp_init_sock(sk); + + if (!ret) + sk->sk_destruct = sctp_v6_destruct_sock; + + return ret; } struct proto sctpv6_prot = { @@ -9441,8 +9454,8 @@ struct proto sctpv6_prot = { .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, - .init = sctp_init_sock, - .destroy = sctp_v6_destroy_sock, + .init = sctp_v6_init_sock, + .destroy = sctp_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, -- Gitee From 5da0ae69fd0763a3f531ef5370faa807c1cbfe66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 22 Mar 2023 22:45:44 +0100 Subject: [PATCH 054/101] pwm: meson: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.179 commit b28079807de2144ca88d4f2a5cf039c58e48654d category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 8caa81eb950cb2e9d2d6959b37d853162d197f57 upstream. The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. This fixes a regression that was possible since commit c73a3107624d ("pwm: Handle .get_state() failures") which stopped to zero-initialize the state passed to the .get_state() callback. This was reported at https://forum.odroid.com/viewtopic.php?f=177&t=46360 . While this was an unintended side effect, the real issue is the driver's callback not setting the polarity. There is a complicating fact, that the .apply() callback fakes support for inversed polarity. This is not (and cannot) be matched by .get_state(). As fixing this isn't easy, only point it out in a comment to prevent authors of other drivers from copying that approach. Fixes: c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()") Reported-by: Munehisa Kamata Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230310191405.2606296-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pwm/pwm-meson.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index bd0d7336b898..237bb8e06593 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -168,6 +168,12 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, duty = state->duty_cycle; period = state->period; + /* + * Note this is wrong. The result is an output wave that isn't really + * inverted and so is wrongly identified by .get_state as normal. + * Fixing this needs some care however as some machines might rely on + * this. + */ if (state->polarity == PWM_POLARITY_INVERSED) duty = period - duty; @@ -366,6 +372,7 @@ static void meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->period = 0; state->duty_cycle = 0; } + state->polarity = PWM_POLARITY_NORMAL; } static const struct pwm_ops meson_pwm_ops = { -- Gitee From 6ec39099d8af17d3b2567071d3f387ad769bed08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 24 Apr 2023 07:31:04 +0200 Subject: [PATCH 055/101] pwm: iqs620a: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.179 commit 1c3a1211370dcaf509bac72ff258c813cc42a6eb category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit b20b097128d9145fadcea1cbb45c4d186cb57466 ] The driver only supports normal polarity. Complete the implementation of .get_state() by setting .polarity accordingly. Fixes: 6f0841a8197b ("pwm: Add support for Azoteq IQS620A PWM generator") Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20230228135508.1798428-4-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pwm/pwm-iqs620a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 3e967a12458c..a2aef006cb71 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -132,6 +132,7 @@ static void iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, mutex_unlock(&iqs620_pwm->lock); state->period = IQS620_PWM_PERIOD_NS; + state->polarity = PWM_POLARITY_NORMAL; } static int iqs620_pwm_notifier(struct notifier_block *notifier, -- Gitee From 400e4c57b1f57db7735b0d4a579a16baf20f0c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 24 Apr 2023 07:31:05 +0200 Subject: [PATCH 056/101] pwm: hibvt: Explicitly set .polarity in .get_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.179 commit 155b2586de3f171d46c8a1955990d4676487501e category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 6f57937980142715e927697a6ffd2050f38ed6f6 ] The driver only both polarities. Complete the implementation of .get_state() by setting .polarity according to the configured hardware state. Fixes: d09f00810850 ("pwm: Add PWM driver for HiSilicon BVT SOCs") Link: https://lore.kernel.org/r/20230228135508.1798428-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Thierry Reding Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pwm/pwm-hibvt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c index ad205fdad372..286e9b119ee5 100644 --- a/drivers/pwm/pwm-hibvt.c +++ b/drivers/pwm/pwm-hibvt.c @@ -146,6 +146,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, value = readl(base + PWM_CTRL_ADDR(pwm->hwpwm)); state->enabled = (PWM_ENABLE_MASK & value); + state->polarity = (PWM_POLARITY_MASK & value) ? PWM_POLARITY_INVERSED : PWM_POLARITY_NORMAL; } static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, -- Gitee From 8cb4efb5736759833168bcbcc473b51f15a58d61 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Mar 2023 07:35:32 +0300 Subject: [PATCH 057/101] iio: adc: at91-sama5d2_adc: fix an error code in at91_adc_allocate_trigger() stable inclusion from stable-5.10.179 commit 42604b4ad5f3289596455c786c04c67a1b5740d9 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 73a428b37b9b538f8f8fe61caa45e7f243bab87c upstream. The at91_adc_allocate_trigger() function is supposed to return error pointers. Returning a NULL will cause an Oops. Fixes: 5e1a1da0f8c9 ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/5d728f9d-31d1-410d-a0b3-df6a63a2c8ba@kili.mountain Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/iio/adc/at91-sama5d2_adc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index 250b78ee1625..b806c1ab9b61 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1002,7 +1002,7 @@ static struct iio_trigger *at91_adc_allocate_trigger(struct iio_dev *indio, trig = devm_iio_trigger_alloc(&indio->dev, "%s-dev%d-%s", indio->name, indio->id, trigger_name); if (!trig) - return NULL; + return ERR_PTR(-ENOMEM); trig->dev.parent = indio->dev.parent; iio_trigger_set_drvdata(trig, indio); -- Gitee From b422d7fa24cc2b951600ef9182fcccc5ccd5b8e0 Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Mon, 17 Apr 2023 06:32:42 -0700 Subject: [PATCH 058/101] ASoC: fsl_asrc_dma: fix potential null-ptr-deref stable inclusion from stable-5.10.179 commit 4ab5f8f9d026bd6e66827c5993179bab42fc8f51 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 86a24e99c97234f87d9f70b528a691150e145197 upstream. dma_request_slave_channel() may return NULL which will lead to NULL pointer dereference error in 'tmp_chan->private'. Correct this behaviour by, first, switching from deprecated function dma_request_slave_channel() to dma_request_chan(). Secondly, enable sanity check for the resuling value of dma_request_chan(). Also, fix description that follows the enacted changes and that concerns the use of dma_request_slave_channel(). Fixes: 706e2c881158 ("ASoC: fsl_asrc_dma: Reuse the dma channel if available in Back-End") Co-developed-by: Natalia Petrova Signed-off-by: Nikita Zhandarovich Acked-by: Shengjiu Wang Link: https://lore.kernel.org/r/20230417133242.53339-1-n.zhandarovich@fintech.ru Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- sound/soc/fsl/fsl_asrc_dma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c index 29f91cdecbc3..9b2a986ce415 100644 --- a/sound/soc/fsl/fsl_asrc_dma.c +++ b/sound/soc/fsl/fsl_asrc_dma.c @@ -207,14 +207,19 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component, be_chan = soc_component_to_pcm(component_be)->chan[substream->stream]; tmp_chan = be_chan; } - if (!tmp_chan) - tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx"); + if (!tmp_chan) { + tmp_chan = dma_request_chan(dev_be, tx ? "tx" : "rx"); + if (IS_ERR(tmp_chan)) { + dev_err(dev, "failed to request DMA channel for Back-End\n"); + return -EINVAL; + } + } /* * An EDMA DEV_TO_DEV channel is fixed and bound with DMA event of each * peripheral, unlike SDMA channel that is allocated dynamically. So no * need to configure dma_request and dma_request2, but get dma_chan of - * Back-End device directly via dma_request_slave_channel. + * Back-End device directly via dma_request_chan. */ if (!asrc->use_edma) { /* Get DMA request of Back-End */ -- Gitee From 73fbdf663c221c96a9b32a02d04c3979ff736427 Mon Sep 17 00:00:00 2001 From: Ekaterina Orlova Date: Fri, 21 Apr 2023 15:35:39 +0100 Subject: [PATCH 059/101] ASN.1: Fix check for strdup() success stable inclusion from stable-5.10.179 commit 0367bf3f4a4eb6691e18194beb5792843aecced9 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 5a43001c01691dcbd396541e6faa2c0077378f48 upstream. It seems there is a misprint in the check of strdup() return code that can lead to NULL pointer dereference. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 4520c6a49af8 ("X.509: Add simple ASN.1 grammar compiler") Signed-off-by: Ekaterina Orlova Cc: David Woodhouse Cc: James Bottomley Cc: Jarkko Sakkinen Cc: keyrings@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Link: https://lore.kernel.org/r/20230315172130.140-1-vorobushek.ok@gmail.com/ Signed-off-by: David Howells Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- scripts/asn1_compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index adabd4145264..985fb81cae79 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -625,7 +625,7 @@ int main(int argc, char **argv) p = strrchr(argv[1], '/'); p = p ? p + 1 : argv[1]; grammar_name = strdup(p); - if (!p) { + if (!grammar_name) { perror(NULL); exit(1); } -- Gitee From 0929f1a7ec629d5b07cca61bfd8a7f65ad622dd2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 26 Apr 2023 11:27:43 +0200 Subject: [PATCH 060/101] Linux 5.10.179 stable inclusion from stable-5.10.179^0 commit f1b32fda06d2cfb8eea9680b0ba7a8b0d5b81eeb category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- Link: https://lore.kernel.org/r/20230424131127.653885914@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: Guenter Roeck Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Chris Paterson (CIP) Tested-by: Florian Fainelli Tested-by: Shuah Khan Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1ccd4af54a45..1a4592e1d1f4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 178 +SUBLEVEL = 179 EXTRAVERSION = NAME = Dare mighty things -- Gitee From 6fe1dc8d823c41fb39cc01fa0f6050d044b77ee6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 23 Aug 2022 08:45:32 -0700 Subject: [PATCH 061/101] seccomp: Move copy_seccomp() to no failure path. stable inclusion from stable-5.10.180 commit d4a895e924b486f2a38463114509e1088ef4d7f5 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a1140cb215fa13dcec06d12ba0c3ee105633b7c4 upstream. Our syzbot instance reported memory leaks in do_seccomp() [0], similar to the report [1]. It shows that we miss freeing struct seccomp_filter and some objects included in it. We can reproduce the issue with the program below [2] which calls one seccomp() and two clone() syscalls. The first clone()d child exits earlier than its parent and sends a signal to kill it during the second clone(), more precisely before the fatal_signal_pending() test in copy_process(). When the parent receives the signal, it has to destroy the embryonic process and return -EINTR to user space. In the failure path, we have to call seccomp_filter_release() to decrement the filter's refcount. Initially, we called it in free_task() called from the failure path, but the commit 3a15fb6ed92c ("seccomp: release filter after task is fully dead") moved it to release_task() to notify user space as early as possible that the filter is no longer used. To keep the change and current seccomp refcount semantics, let's move copy_seccomp() just after the signal check and add a WARN_ON_ONCE() in free_task() for future debugging. [0]: unreferenced object 0xffff8880063add00 (size 256): comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.914s) hex dump (first 32 bytes): 01 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ backtrace: do_seccomp (./include/linux/slab.h:600 ./include/linux/slab.h:733 kernel/seccomp.c:666 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) unreferenced object 0xffffc90000035000 (size 4096): comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 05 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: __vmalloc_node_range (mm/vmalloc.c:3226) __vmalloc_node (mm/vmalloc.c:3261 (discriminator 4)) bpf_prog_alloc_no_stats (kernel/bpf/core.c:91) bpf_prog_alloc (kernel/bpf/core.c:129) bpf_prog_create_from_user (net/core/filter.c:1414) do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) unreferenced object 0xffff888003fa1000 (size 1024): comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: bpf_prog_alloc_no_stats (./include/linux/slab.h:600 ./include/linux/slab.h:733 kernel/bpf/core.c:95) bpf_prog_alloc (kernel/bpf/core.c:129) bpf_prog_create_from_user (net/core/filter.c:1414) do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) unreferenced object 0xffff888006360240 (size 16): comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s) hex dump (first 16 bytes): 01 00 37 00 76 65 72 6c e0 83 01 06 80 88 ff ff ..7.verl........ backtrace: bpf_prog_store_orig_filter (net/core/filter.c:1137) bpf_prog_create_from_user (net/core/filter.c:1428) do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) unreferenced object 0xffff8880060183e0 (size 8): comm "repro_seccomp", pid 230, jiffies 4294687090 (age 9.915s) hex dump (first 8 bytes): 06 00 00 00 00 00 ff 7f ........ backtrace: kmemdup (mm/util.c:129) bpf_prog_store_orig_filter (net/core/filter.c:1144) bpf_prog_create_from_user (net/core/filter.c:1428) do_seccomp (kernel/seccomp.c:671 kernel/seccomp.c:708 kernel/seccomp.c:1871 kernel/seccomp.c:1991) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) [1]: https://syzkaller.appspot.com/bug?id=2809bb0ac77ad9aa3f4afe42d6a610aba594a987 [2]: #define _GNU_SOURCE #include #include #include #include #include #include void main(void) { struct sock_filter filter[] = { BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog fprog = { .len = sizeof(filter) / sizeof(filter[0]), .filter = filter, }; long i, pid; syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &fprog); for (i = 0; i < 2; i++) { pid = syscall(__NR_clone, CLONE_NEWNET | SIGKILL, NULL, NULL, 0); if (pid == 0) return; } } Fixes: 3a15fb6ed92c ("seccomp: release filter after task is fully dead") Reported-by: syzbot+ab17848fe269b573eb71@syzkaller.appspotmail.com Reported-by: Ayushman Dutta Suggested-by: Kees Cook Signed-off-by: Kuniyuki Iwashima Reviewed-by: Christian Brauner (Microsoft) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220823154532.82913-1-kuniyu@amazon.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/fork.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/fork.c b/kernel/fork.c index dd0375e4644b..82a9b0473cd4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -449,6 +449,9 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { +#ifdef CONFIG_SECCOMP + WARN_ON_ONCE(tsk->seccomp.filter); +#endif scs_release(tsk); #ifndef CONFIG_THREAD_INFO_IN_TASK @@ -2271,12 +2274,6 @@ static __latent_entropy struct task_struct *copy_process( spin_lock(¤t->sighand->siglock); - /* - * Copy seccomp details explicitly here, in case they were changed - * before holding sighand lock. - */ - copy_seccomp(p); - rseq_fork(p, clone_flags); /* Don't start children in a dying pid namespace */ @@ -2291,6 +2288,14 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + /* No more failure paths after this point. */ + + /* + * Copy seccomp details explicitly here, in case they were changed + * before holding sighand lock. + */ + copy_seccomp(p); + init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); -- Gitee From dd7594479c061eb653262b5ceb6f8b952a093c51 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Sun, 12 Mar 2023 19:15:49 -0400 Subject: [PATCH 062/101] counter: 104-quad-8: Fix race condition between FLAG and CNTR reads stable inclusion from stable-5.10.180 commit 0f29d0e8fc77db0cbf08df3aafb6206b66714277 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 4aa3b75c74603c3374877d5fd18ad9cc3a9a62ed upstream. The Counter (CNTR) register is 24 bits wide, but we can have an effective 25-bit count value by setting bit 24 to the XOR of the Borrow flag and Carry flag. The flags can be read from the FLAG register, but a race condition exists: the Borrow flag and Carry flag are instantaneous and could change by the time the count value is read from the CNTR register. Since the race condition could result in an incorrect 25-bit count value, remove support for 25-bit count values from this driver; hard-coded maximum count values are replaced by a LS7267_CNTR_MAX define for consistency and clarity. Fixes: 28e5d3bb0325 ("iio: 104-quad-8: Add IIO support for the ACCES 104-QUAD-8") Cc: # 6.1.x Cc: # 6.2.x Link: https://lore.kernel.org/r/20230312231554.134858-1-william.gray@linaro.org/ Signed-off-by: William Breathitt Gray Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/counter/104-quad-8.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c index 21bb2bb767a1..89c9cb850a34 100644 --- a/drivers/counter/104-quad-8.c +++ b/drivers/counter/104-quad-8.c @@ -62,10 +62,6 @@ struct quad8_iio { #define QUAD8_REG_CHAN_OP 0x11 #define QUAD8_REG_INDEX_INPUT_LEVELS 0x16 #define QUAD8_DIFF_ENCODER_CABLE_STATUS 0x17 -/* Borrow Toggle flip-flop */ -#define QUAD8_FLAG_BT BIT(0) -/* Carry Toggle flip-flop */ -#define QUAD8_FLAG_CT BIT(1) /* Error flag */ #define QUAD8_FLAG_E BIT(4) /* Up/Down flag */ @@ -104,9 +100,6 @@ static int quad8_read_raw(struct iio_dev *indio_dev, { struct quad8_iio *const priv = iio_priv(indio_dev); const int base_offset = priv->base + 2 * chan->channel; - unsigned int flags; - unsigned int borrow; - unsigned int carry; int i; switch (mask) { @@ -117,12 +110,7 @@ static int quad8_read_raw(struct iio_dev *indio_dev, return IIO_VAL_INT; } - flags = inb(base_offset + 1); - borrow = flags & QUAD8_FLAG_BT; - carry = !!(flags & QUAD8_FLAG_CT); - - /* Borrow XOR Carry effectively doubles count range */ - *val = (borrow ^ carry) << 24; + *val = 0; mutex_lock(&priv->lock); @@ -643,17 +631,9 @@ static int quad8_count_read(struct counter_device *counter, { struct quad8_iio *const priv = counter->priv; const int base_offset = priv->base + 2 * count->id; - unsigned int flags; - unsigned int borrow; - unsigned int carry; int i; - flags = inb(base_offset + 1); - borrow = flags & QUAD8_FLAG_BT; - carry = !!(flags & QUAD8_FLAG_CT); - - /* Borrow XOR Carry effectively doubles count range */ - *val = (unsigned long)(borrow ^ carry) << 24; + *val = 0; mutex_lock(&priv->lock); @@ -1198,8 +1178,8 @@ static ssize_t quad8_count_ceiling_read(struct counter_device *counter, mutex_unlock(&priv->lock); - /* By default 0x1FFFFFF (25 bits unsigned) is maximum count */ - return sprintf(buf, "33554431\n"); + /* By default 0xFFFFFF (24 bits unsigned) is maximum count */ + return sprintf(buf, "16777215\n"); } static ssize_t quad8_count_ceiling_write(struct counter_device *counter, -- Gitee From 23c8e50cf2410a1ec63300accfcd40f5f3a6834d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 19 Apr 2023 13:16:13 +0300 Subject: [PATCH 063/101] KVM: arm64: Fix buffer overflow in kvm_arm_set_fw_reg() stable inclusion from stable-5.10.180 commit 1dd95b2109de223d98956b54cf8990bb226c285e category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a25bc8486f9c01c1af6b6c5657234b2eee2c39d6 upstream. The KVM_REG_SIZE() comes from the ioctl and it can be a power of two between 0-32768 but if it is more than sizeof(long) this will corrupt memory. Fixes: 99adb567632b ("KVM: arm/arm64: Add save/restore support for firmware workaround state") Signed-off-by: Dan Carpenter Reviewed-by: Steven Price Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/4efbab8c-640f-43b2-8ac6-6d68e08280fe@kili.mountain Signed-off-by: Oliver Upton [will: kvm_arm_set_fw_reg() lives in psci.c not hypercalls.c] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/arm64/kvm/psci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 20ba5136ac3d..32bb26be8a9b 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -499,6 +499,8 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; int wa_level; + if (KVM_REG_SIZE(reg->id) != sizeof(val)) + return -ENOENT; if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) return -EFAULT; -- Gitee From cc2580b81fc11659ef5830d0d4026d8d95de2f07 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 4 Apr 2023 21:40:36 +0200 Subject: [PATCH 064/101] drm/fb-helper: set x/yres_virtual in drm_fb_helper_check_var stable inclusion from stable-5.10.180 commit dc110b20f4ce752c232ad710a428dd97cf138338 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 1935f0deb6116dd785ea64d8035eab0ff441255b upstream. Drivers are supposed to fix this up if needed if they don't outright reject it. Uncovered by 6c11df58fd1a ("fbmem: Check virtual screen sizes in fb_set_var()"). Reported-by: syzbot+20dcf81733d43ddff661@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=c5faf983bfa4a607de530cd3bb008888bf06cefc Cc: stable@vger.kernel.org # v5.4+ Cc: Daniel Vetter Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20230404194038.472803-1-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/gpu/drm/drm_fb_helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ac5d61e65124..04f2ec2254e9 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1299,6 +1299,9 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, return -EINVAL; } + var->xres_virtual = fb->width; + var->yres_virtual = fb->height; + /* * Workaround for SDL 1.2, which is known to be setting all pixel format * fields values to zero in some cases. We treat this situation as a -- Gitee From 67c97ea5447c5081296933c7baa066f8610dce07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 1 May 2022 21:31:43 +0200 Subject: [PATCH 065/101] x86/fpu: Prevent FPU state corruption stable inclusion from stable-5.10.180 commit c0e921422359f6796fc0ccf50a66739b919344dd category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 59f5ede3bc0f00eb856425f636dab0c10feb06d8 upstream. The FPU usage related to task FPU management is either protected by disabling interrupts (switch_to, return to user) or via fpregs_lock() which is a wrapper around local_bh_disable(). When kernel code wants to use the FPU then it has to check whether it is possible by calling irq_fpu_usable(). But the condition in irq_fpu_usable() is wrong. It allows FPU to be used when: !in_interrupt() || interrupted_user_mode() || interrupted_kernel_fpu_idle() The latter is checking whether some other context already uses FPU in the kernel, but if that's not the case then it allows FPU to be used unconditionally even if the calling context interrupted a fpregs_lock() critical region. If that happens then the FPU state of the interrupted context becomes corrupted. Allow in kernel FPU usage only when no other context has in kernel FPU usage and either the calling context is not hard interrupt context or the hard interrupt did not interrupt a local bottomhalf disabled region. It's hard to find a proper Fixes tag as the condition was broken in one way or the other for a very long time and the eager/lazy FPU changes caused a lot of churn. Picked something remotely connected from the history. This survived undetected for quite some time as FPU usage in interrupt context is rare, but the recent changes to the random code unearthed it at least on a kernel which had FPU debugging enabled. There is probably a higher rate of silent corruption as not all issues can be detected by the FPU debugging code. This will be addressed in a subsequent change. Fixes: 5d2bd7009f30 ("x86, fpu: decouple non-lazy/eager fpu restore from xsave") Reported-by: Filipe Manana Signed-off-by: Thomas Gleixner Tested-by: Filipe Manana Reviewed-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220501193102.588689270@linutronix.de Signed-off-by: Can Sun Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/x86/kernel/fpu/core.c | 67 +++++++++++++++----------------------- 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 571220ac8bea..835b948095cd 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -25,17 +25,7 @@ */ union fpregs_state init_fpstate __read_mostly; -/* - * Track whether the kernel is using the FPU state - * currently. - * - * This flag is used: - * - * - by IRQ context code to potentially use the FPU - * if it's unused. - * - * - to debug kernel_fpu_begin()/end() correctness - */ +/* Track in-kernel FPU usage */ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* @@ -43,42 +33,37 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static bool kernel_fpu_disabled(void) -{ - return this_cpu_read(in_kernel_fpu); -} - -static bool interrupted_kernel_fpu_idle(void) -{ - return !kernel_fpu_disabled(); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - /* * Can we use the FPU in kernel mode with the * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. */ bool irq_fpu_usable(void) { - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); + if (WARN_ON_ONCE(in_nmi())) + return false; + + /* In kernel FPU usage already active? */ + if (this_cpu_read(in_kernel_fpu)) + return false; + + /* + * When not in NMI or hard interrupt context, FPU can be used in: + * + * - Task context except from within fpregs_lock()'ed critical + * regions. + * + * - Soft interrupt processing context which cannot happen + * while in a fpregs_lock()'ed critical region. + */ + if (!in_irq()) + return true; + + /* + * In hard interrupt context it's safe when soft interrupts + * are enabled, which means the interrupt did not hit in + * a fpregs_lock()'ed critical region. + */ + return !softirq_count(); } EXPORT_SYMBOL(irq_fpu_usable); -- Gitee From 63cae44e34a2ad37d7c016c01e07dd8861154423 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= Date: Mon, 17 Apr 2023 18:20:03 +0300 Subject: [PATCH 066/101] USB: serial: option: add UNISOC vendor and TOZED LT70C product MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit f964a00386ca0b1013357657922c00c1b5e66b07 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a095edfc15f0832e046ae23964e249ef5c95af87 upstream. Add UNISOC vendor ID and TOZED LT70-C modem which is based from UNISOC SL8563. The modem supports the NCM mode. Interface 0 is used for running the AT commands. Interface 12 is the ADB interface. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4055 Rev=04.04 S: Manufacturer=Unisoc Phone S: Product=Unisoc Phone S: SerialNumber= C: #Ifs=14 Cfg#= 1 Atr=c0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=10 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=11 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8c(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=12 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=09(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8d(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#=13 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=0a(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8e(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=84(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 3 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=86(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 5 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 1 Cls=02(commc) Sub=0d Prot=00 Driver=cdc_ncm E: Ad=88(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 7 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=01 Driver=cdc_ncm E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 9 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Arınç ÜNAL Link: https://lore.kernel.org/r/20230417152003.243248-1-arinc.unal@arinc9.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/serial/option.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index da8b7bd39703..5b474efeab6a 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -595,6 +595,11 @@ static void option_instat_callback(struct urb *urb); #define SIERRA_VENDOR_ID 0x1199 #define SIERRA_PRODUCT_EM9191 0x90d3 +/* UNISOC (Spreadtrum) products */ +#define UNISOC_VENDOR_ID 0x1782 +/* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ +#define TOZED_PRODUCT_LT70C 0x4055 + /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -2225,6 +2230,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(OPPO_VENDOR_ID, OPPO_PRODUCT_R11, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- Gitee From f4401d3567230109e48f9d15395dcf0b5cac78db Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 12 Apr 2023 15:58:42 -0700 Subject: [PATCH 067/101] driver core: Don't require dynamic_debug for initcall_debug probe timing stable inclusion from stable-5.10.180 commit 8aa079c2fdfc1bbf2eb8499e065ab509a2b3eea3 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit e2f06aa885081e1391916367f53bad984714b4db upstream. Don't require the use of dynamic debug (or modification of the kernel to add a #define DEBUG to the top of this file) to get the printk message about driver probe timing. This printk is only emitted when initcall_debug is enabled on the kernel commandline, and it isn't immediately obvious that you have to do something else to debug boot timing issues related to driver probe. Add a comment too so it doesn't get converted back to pr_debug(). Fixes: eb7fbc9fb118 ("driver core: Add missing '\n' in log messages") Cc: stable Cc: Christophe JAILLET Cc: Brian Norris Reviewed-by: Brian Norris Acked-by: Randy Dunlap Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20230412225842.3196599-1-swboyd@chromium.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/base/dd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 497e3d4255c4..503c01d4015d 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -677,7 +677,12 @@ static int really_probe_debug(struct device *dev, struct device_driver *drv) calltime = ktime_get(); ret = really_probe(dev, drv); rettime = ktime_get(); - pr_debug("probe of %s returned %d after %lld usecs\n", + /* + * Don't change this to pr_debug() because that requires + * CONFIG_DYNAMIC_DEBUG and we want a simple 'initcall_debug' on the + * kernel commandline to print this all the time at the debug level. + */ + printk(KERN_DEBUG "probe of %s returned %d after %lld usecs\n", dev_name(dev), ret, ktime_us_delta(rettime, calltime)); return ret; } -- Gitee From b43c592516c4f751fa320f8893a8e1aa77dff96d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrik=20Dahlstr=C3=B6m?= Date: Mon, 13 Mar 2023 21:50:29 +0100 Subject: [PATCH 068/101] iio: adc: palmas_gpadc: fix NULL dereference on rmmod MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit f5e96af71eab491a19fb3926ab42d592118ca52c category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 49f76c499d38bf67803438eee88c8300d0f6ce09 ] Calling dev_to_iio_dev() on a platform device pointer is undefined and will make adc NULL. Signed-off-by: Patrik Dahlström Link: https://lore.kernel.org/r/20230313205029.1881745-1-risca@dalakolonin.se Signed-off-by: Jonathan Cameron Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- drivers/iio/adc/palmas_gpadc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index f4756671cddb..6ed0d151ad21 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -628,7 +628,7 @@ static int palmas_gpadc_probe(struct platform_device *pdev) static int palmas_gpadc_remove(struct platform_device *pdev) { - struct iio_dev *indio_dev = dev_to_iio_dev(&pdev->dev); + struct iio_dev *indio_dev = dev_get_drvdata(&pdev->dev); struct palmas_gpadc *adc = iio_priv(indio_dev); if (adc->wakeup1_enable || adc->wakeup2_enable) -- Gitee From 72db4aa2052735b9db4129663ad5ecf233660693 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Mar 2023 15:53:32 +0100 Subject: [PATCH 069/101] ASoC: Intel: bytcr_rt5640: Add quirk for the Acer Iconia One 7 B1-750 stable inclusion from stable-5.10.180 commit 925cbb725367b228039c62d35f4c75937463bb44 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit e38c5e80c3d293a883c6f1d553f2146ec0bda35e ] The Acer Iconia One 7 B1-750 tablet mostly works fine with the defaults for an Bay Trail CR tablet. Except for the internal mic, instead of an analog mic on IN3 a digital mic on DMIC1 is uses. Add a quirk with these settings for this tablet. Acked-by: Pierre-Louis Bossart Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20230322145332.131525-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- sound/soc/intel/boards/bytcr_rt5640.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 8a99cb6dfcd6..9a5ab96f917d 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -393,6 +393,18 @@ static int byt_rt5640_aif1_hw_params(struct snd_pcm_substream *substream, /* Please keep this list alphabetically sorted */ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + { /* Acer Iconia One 7 B1-750 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "VESPA2"), + }, + .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | + BYT_RT5640_JD_SRC_JD1_IN4P | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Acer Iconia Tab 8 W1-810 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"), -- Gitee From 84d4b43bf1c2fccfa1c00cd8c0e0be00d9abfeb4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:52 +0200 Subject: [PATCH 070/101] asm-generic/io.h: suppress endianness warnings for readq() and writeq() stable inclusion from stable-5.10.180 commit 69fdbb334d6e6d33e27b0adc3234607af6135861 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit d564fa1ff19e893e2971d66e5c8f49dc1cdc8ffc ] Commit c1d55d50139b ("asm-generic/io.h: Fix sparse warnings on big-endian architectures") missed fixing the 64-bit accessors. Arnd explains in the attached link why the casts are necessary, even if __raw_readq() and __raw_writeq() do not take endian-specific types. Link: https://lore.kernel.org/lkml/9105d6fc-880b-4734-857d-e3d30b87ccf6@app.fastmail.com/ Suggested-by: Arnd Bergmann Signed-off-by: Vladimir Oltean Reviewed-by: Jonathan Cameron Signed-off-by: Arnd Bergmann Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- include/asm-generic/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 9ea83d80eb6f..dcbd41048b4e 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -190,7 +190,7 @@ static inline u64 readq(const volatile void __iomem *addr) u64 val; __io_br(); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); __io_ar(val); return val; } @@ -233,7 +233,7 @@ static inline void writel(u32 value, volatile void __iomem *addr) static inline void writeq(u64 value, volatile void __iomem *addr) { __io_bw(); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); __io_aw(); } #endif -- Gitee From 0e2bae3b3e860619532e6b62eb9a6a240dfaba71 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Tue, 13 Dec 2022 15:52:08 -0700 Subject: [PATCH 071/101] wireguard: timers: cast enum limits members to int in prints stable inclusion from stable-5.10.180 commit 5434c7019d2308a474ddb8accbb7279f1a327036 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2d4ee16d969c97996e80e4c9cb6de0acaff22c9f upstream. Since gcc13, each member of an enum has the same type as the enum. And that is inherited from its members. Provided "REKEY_AFTER_MESSAGES = 1ULL << 60", the named type is unsigned long. This generates warnings with gcc-13: error: format '%d' expects argument of type 'int', but argument 6 has type 'long unsigned int' Cast those particular enum members to int when printing them. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36113 Cc: Martin Liska Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Jason A. Donenfeld Link: https://lore.kernel.org/all/20221213225208.3343692-2-Jason@zx2c4.com/ Signed-off-by: Jakub Kicinski Cc: Chris Clayton Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/net/wireguard/timers.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c index d54d32ac9bc4..91f5d6d2d4e2 100644 --- a/drivers/net/wireguard/timers.c +++ b/drivers/net/wireguard/timers.c @@ -46,7 +46,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer) if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2); + &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2); del_timer(&peer->timer_send_keepalive); /* We drop all packets without a keypair and don't try again, @@ -64,7 +64,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer) ++peer->timer_handshake_attempts; pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, REKEY_TIMEOUT, + &peer->endpoint.addr, (int)REKEY_TIMEOUT, peer->timer_handshake_attempts + 1); /* We clear the endpoint address src address, in case this is @@ -94,7 +94,7 @@ static void wg_expired_new_handshake(struct timer_list *timer) pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT); + &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)); /* We clear the endpoint address src address, in case this is the cause * of trouble. */ @@ -126,7 +126,7 @@ static void wg_queued_expired_zero_key_material(struct work_struct *work) pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->device->dev->name, peer->internal_id, - &peer->endpoint.addr, REJECT_AFTER_TIME * 3); + &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3); wg_noise_handshake_clear(&peer->handshake); wg_noise_keypairs_clear(&peer->keypairs); wg_peer_put(peer); -- Gitee From dc45634a555a18cd95536cf3374e9da86764a279 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 11 Apr 2023 08:21:02 +0200 Subject: [PATCH 072/101] PCI: pciehp: Fix AB-BA deadlock between reset_lock and device_lock stable inclusion from stable-5.10.180 commit 2f31633da843eb0457f6212eba435ac538c28f58 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit f5eff5591b8f9c5effd25c92c758a127765f74c1 upstream. In 2013, commits 2e35afaefe64 ("PCI: pciehp: Add reset_slot() method") 608c388122c7 ("PCI: Add slot reset option to pci_dev_reset()") amended PCIe hotplug to mask Presence Detect Changed events during a Secondary Bus Reset. The reset thus no longer causes gratuitous slot bringdown and bringup. However the commits neglected to serialize reset with code paths reading slot registers. For instance, a slot bringup due to an earlier hotplug event may see the Presence Detect State bit cleared during a concurrent Secondary Bus Reset. In 2018, commit 5b3f7b7d062b ("PCI: pciehp: Avoid slot access during reset") retrofitted the missing locking. It introduced a reset_lock which serializes a Secondary Bus Reset with other parts of pciehp. Unfortunately the locking turns out to be overzealous: reset_lock is held for the entire enumeration and de-enumeration of hotplugged devices, including driver binding and unbinding. Driver binding and unbinding acquires device_lock while the reset_lock of the ancestral hotplug port is held. A concurrent Secondary Bus Reset acquires the ancestral reset_lock while already holding the device_lock. The asymmetric locking order in the two code paths can lead to AB-BA deadlocks. Michael Haeuptle reports such deadlocks on simultaneous hot-removal and vfio release (the latter implies a Secondary Bus Reset): pciehp_ist() # down_read(reset_lock) pciehp_handle_presence_or_link_change() pciehp_disable_slot() __pciehp_disable_slot() remove_board() pciehp_unconfigure_device() pci_stop_and_remove_bus_device() pci_stop_bus_device() pci_stop_dev() device_release_driver() device_release_driver_internal() __device_driver_lock() # device_lock() SYS_munmap() vfio_device_fops_release() vfio_device_group_close() vfio_device_close() vfio_device_last_close() vfio_pci_core_close_device() vfio_pci_core_disable() # device_lock() __pci_reset_function_locked() pci_reset_bus_function() pci_dev_reset_slot_function() pci_reset_hotplug_slot() pciehp_reset_slot() # down_write(reset_lock) Ian May reports the same deadlock on simultaneous hot-removal and an AER-induced Secondary Bus Reset: aer_recover_work_func() pcie_do_recovery() aer_root_reset() pci_bus_error_reset() pci_slot_reset() pci_slot_lock() # device_lock() pci_reset_hotplug_slot() pciehp_reset_slot() # down_write(reset_lock) Fix by releasing the reset_lock during driver binding and unbinding, thereby splitting and shrinking the critical section. Driver binding and unbinding is protected by the device_lock() and thus serialized with a Secondary Bus Reset. There's no need to additionally protect it with the reset_lock. However, pciehp does not bind and unbind devices directly, but rather invokes PCI core functions which also perform certain enumeration and de-enumeration steps. The reset_lock's purpose is to protect slot registers, not enumeration and de-enumeration of hotplugged devices. That would arguably be the job of the PCI core, not the PCIe hotplug driver. After all, an AER-induced Secondary Bus Reset may as well happen during boot-time enumeration of the PCI hierarchy and there's no locking to prevent that either. Exempting *de-enumeration* from the reset_lock is relatively harmless: A concurrent Secondary Bus Reset may foil config space accesses such as PME interrupt disablement. But if the device is physically gone, those accesses are pointless anyway. If the device is physically present and only logically removed through an Attention Button press or the sysfs "power" attribute, PME interrupts as well as DMA cannot come through because pciehp_unconfigure_device() disables INTx and Bus Master bits. That's still protected by the reset_lock in the present commit. Exempting *enumeration* from the reset_lock also has limited impact: The exempted call to pci_bus_add_device() may perform device accesses through pcibios_bus_add_device() and pci_fixup_device() which are now no longer protected from a concurrent Secondary Bus Reset. Otherwise there should be no impact. In essence, the present commit seeks to fix the AB-BA deadlocks while still retaining a best-effort reset protection for enumeration and de-enumeration of hotplugged devices -- until a general solution is implemented in the PCI core. Link: https://lore.kernel.org/linux-pci/CS1PR8401MB0728FC6FDAB8A35C22BD90EC95F10@CS1PR8401MB0728.NAMPRD84.PROD.OUTLOOK.COM Link: https://lore.kernel.org/linux-pci/20200615143250.438252-1-ian.may@canonical.com Link: https://lore.kernel.org/linux-pci/ce878dab-c0c4-5bd0-a725-9805a075682d@amd.com Link: https://lore.kernel.org/linux-pci/ed831249-384a-6d35-0831-70af191e9bce@huawei.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=215590 Fixes: 5b3f7b7d062b ("PCI: pciehp: Avoid slot access during reset") Link: https://lore.kernel.org/r/fef2b2e9edf245c049a8c5b94743c0f74ff5008a.1681191902.git.lukas@wunner.de Reported-by: Michael Haeuptle Reported-by: Ian May Reported-by: Andrey Grodzovsky Reported-by: Rahul Kumar Reported-by: Jialin Zhang Tested-by: Anatoli Antonovitch Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v4.19+ Cc: Dan Stein Cc: Ashok Raj Cc: Alex Michon Cc: Xiongfeng Wang Cc: Alex Williamson Cc: Mika Westerberg Cc: Sathyanarayanan Kuppuswamy Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pci/hotplug/pciehp_pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index d17f3bf36f70..ad12515a4a12 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -63,7 +63,14 @@ int pciehp_configure_device(struct controller *ctrl) pci_assign_unassigned_bridge_resources(bridge); pcie_bus_configure_settings(parent); + + /* + * Release reset_lock during driver binding + * to avoid AB-BA deadlock with device_lock. + */ + up_read(&ctrl->reset_lock); pci_bus_add_devices(parent); + down_read_nested(&ctrl->reset_lock, ctrl->depth); out: pci_unlock_rescan_remove(); @@ -104,7 +111,15 @@ void pciehp_unconfigure_device(struct controller *ctrl, bool presence) list_for_each_entry_safe_reverse(dev, temp, &parent->devices, bus_list) { pci_dev_get(dev); + + /* + * Release reset_lock during driver unbinding + * to avoid AB-BA deadlock with device_lock. + */ + up_read(&ctrl->reset_lock); pci_stop_and_remove_bus_device(dev); + down_read_nested(&ctrl->reset_lock, ctrl->depth); + /* * Ensure that no new Requests will be generated from * the device. -- Gitee From 9e6cefc56423e2992da9aa46a5542c7dc133139a Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 16 Mar 2023 13:40:59 +0530 Subject: [PATCH 073/101] PCI: qcom: Fix the incorrect register usage in v2.7.0 config stable inclusion from stable-5.10.180 commit b978269ddad4ee2d5d6788eeb0e7e344a7d480e1 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2542e16c392508800f1d9037feee881a9c444951 upstream. Qcom PCIe IP version v2.7.0 and its derivatives don't contain the PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT register. Instead, they have the new PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT_V2 register. So fix the incorrect register usage which is modifying a different register. Also in this IP version, this register change doesn't depend on MSI being enabled. So remove that check also. Link: https://lore.kernel.org/r/20230316081117.14288-2-manivannan.sadhasivam@linaro.org Fixes: ed8cc3b1fc84 ("PCI: qcom: Add support for SDM845 PCIe controller") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Cc: # 5.6+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pci/controller/dwc/pcie-qcom.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 5fbd80908a99..c68e14271c02 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1210,11 +1210,9 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) val |= BIT(4); writel(val, pcie->parf + PCIE20_PARF_MHI_CLOCK_RESET_CTRL); - if (IS_ENABLED(CONFIG_PCI_MSI)) { - val = readl(pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT); - val |= BIT(31); - writel(val, pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT); - } + val = readl(pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT_V2); + val |= BIT(31); + writel(val, pcie->parf + PCIE20_PARF_AXI_MSTR_WR_ADDR_HALT_V2); return 0; err_disable_clocks: -- Gitee From 151ec8a6c86e30676ffac09a43d9795810cc8028 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 4 Apr 2023 09:25:14 +0200 Subject: [PATCH 074/101] USB: dwc3: fix runtime pm imbalance on probe errors stable inclusion from stable-5.10.180 commit 27dc207c386eb6719e9cb695f7afb15e52b07d28 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 9a8ad10c9f2e0925ff26308ec6756b93fc2f4977 upstream. Make sure not to suspend the device when probe fails to avoid disabling clocks and phys multiple times. Fixes: 328082376aea ("usb: dwc3: fix runtime PM in error path") Cc: stable@vger.kernel.org # 4.8 Cc: Roger Quadros Acked-by: Thinh Nguyen Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230404072524.19014-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/dwc3/core.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index d73f624ed42a..fc84677635a3 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1567,13 +1567,11 @@ static int dwc3_probe(struct platform_device *pdev) spin_lock_init(&dwc->lock); mutex_init(&dwc->mutex); + pm_runtime_get_noresume(dev); pm_runtime_set_active(dev); pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, DWC3_DEFAULT_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); - if (ret < 0) - goto err1; pm_runtime_forbid(dev); @@ -1633,12 +1631,10 @@ static int dwc3_probe(struct platform_device *pdev) dwc3_free_event_buffers(dwc); err2: - pm_runtime_allow(&pdev->dev); - -err1: - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); - + pm_runtime_allow(dev); + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); disable_clks: clk_bulk_disable_unprepare(dwc->num_clks, dwc->clks); assert_reset: -- Gitee From c8bb6b8043a1cc6dded471e4f13d5049ac64288b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 4 Apr 2023 09:25:15 +0200 Subject: [PATCH 075/101] USB: dwc3: fix runtime pm imbalance on unbind stable inclusion from stable-5.10.180 commit a71cb92ec4315b7b61983bae35ec00be62901b25 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 44d257e9012ee8040e41d224d0e5bfb5ef5427ea upstream. Make sure to balance the runtime PM usage count on driver unbind by adding back the pm_runtime_allow() call that had been erroneously removed. Fixes: 266d0493900a ("usb: dwc3: core: don't trigger runtime pm when remove driver") Cc: stable@vger.kernel.org # 5.9 Cc: Li Jun Acked-by: Thinh Nguyen Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230404072524.19014-3-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/dwc3/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index fc84677635a3..5709b959b1d9 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1655,6 +1655,7 @@ static int dwc3_remove(struct platform_device *pdev) dwc3_core_exit(dwc); dwc3_ulpi_exit(dwc); + pm_runtime_allow(&pdev->dev); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); pm_runtime_set_suspended(&pdev->dev); -- Gitee From 7a5a89097f8c2fb215c88049b0c5a556587c7db4 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 13 Apr 2023 16:39:58 -0500 Subject: [PATCH 076/101] hwmon: (k10temp) Check range scale when CUR_TEMP register is read-write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit b009006887e32bf62ff5c0da73b9bafd18dd996b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 0c072385348e3ac5229145644055d3e2afb5b3db upstream. Spec says, when CUR_TEMP_TJ_SEL == 3 and CUR_TEMP_RANGE_SEL == 0, it should use RangeUnadjusted is 0, which is (CurTmp*0.125 -49) C. The CUR_TEMP register is read-write when CUR_TEMP_TJ_SEL == 3 (bit 17-16). Add the check to detect it. Sensors command's output before the patch. $sensors k10temp-pci-00c3 Adapter: PCI adapter Tctl: +76.6°C <- Wrong value Tccd1: +26.5°C Tccd2: +27.5°C Tccd3: +27.2°C Tccd4: +27.5°C Tccd5: +26.0°C Tccd6: +26.2°C Tccd7: +25.0°C Tccd8: +26.5°C Sensors command's output after the patch. $sensors k10temp-pci-00c3 Adapter: PCI adapter Tctl: +28.8°C <- corrected value Tccd1: +27.5°C Tccd2: +28.5°C Tccd3: +28.5°C Tccd4: +28.5°C Tccd5: +27.0°C Tccd6: +27.5°C Tccd7: +27.0°C Tccd8: +27.5°C Signed-off-by: Babu Moger Fixes: 1b59788979ac ("hwmon: (k10temp) Add temperature offset for Ryzen 2700X") Link: https://lore.kernel.org/r/20230413213958.847634-1-babu.moger@amd.com Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/hwmon/k10temp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 3bc2551577a3..74a67089ad07 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -74,6 +74,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define ZEN_CUR_TEMP_SHIFT 21 #define ZEN_CUR_TEMP_RANGE_SEL_MASK BIT(19) +#define ZEN_CUR_TEMP_TJ_SEL_MASK GENMASK(17, 16) #define ZEN_SVI_BASE 0x0005A000 @@ -173,7 +174,8 @@ static long get_raw_temp(struct k10temp_data *data) data->read_tempreg(data->pdev, ®val); temp = (regval >> ZEN_CUR_TEMP_SHIFT) * 125; - if (regval & data->temp_adjust_mask) + if ((regval & data->temp_adjust_mask) || + (regval & ZEN_CUR_TEMP_TJ_SEL_MASK) == ZEN_CUR_TEMP_TJ_SEL_MASK) temp -= 49000; return temp; } -- Gitee From 8dc5efa48e5f1bcfbd9f62d5f5a8d79957bea9ca Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 19 Apr 2023 11:36:55 +1200 Subject: [PATCH 077/101] hwmon: (adt7475) Use device_property APIs when configuring polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit aed39acf7ed6beb8f671d2b23709a863aed565e8 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2a8e41ad337508fc5d598c0f9288890214f8e318 upstream. On DT unaware platforms of_property_read_u32_array() returns -ENOSYS which wasn't handled by the code treating adi,pwm-active-state as optional. Update the code to use device_property_read_u32_array() which deals gracefully with DT unaware platforms. Fixes: 86da28eed4fb ("hwmon: (adt7475) Add support for inverting pwm output") Reported-by: Mariusz Białończyk Link: https://lore.kernel.org/linux-hwmon/52e26a67-9131-2dc0-40cb-db5c07370027@alliedtelesis.co.nz/T/#mdd0505801e0a4e72340de009a47c0fca4f771ed3 Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20230418233656.869055-2-chris.packham@alliedtelesis.co.nz Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/hwmon/adt7475.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 6b84822e7d93..22e314725def 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -1515,9 +1515,9 @@ static int adt7475_set_pwm_polarity(struct i2c_client *client) int ret, i; u8 val; - ret = of_property_read_u32_array(client->dev.of_node, - "adi,pwm-active-state", states, - ARRAY_SIZE(states)); + ret = device_property_read_u32_array(&client->dev, + "adi,pwm-active-state", states, + ARRAY_SIZE(states)); if (ret) return ret; -- Gitee From eaa2871441975c3c564d402f1900da965bb2499e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Apr 2023 15:37:55 +0200 Subject: [PATCH 078/101] posix-cpu-timers: Implement the missing timer_wait_running callback stable inclusion from stable-5.10.180 commit 7c5811b95c573d0e080f9fd8d708fc2a706cf784 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit f7abf14f0001a5a47539d9f60bbdca649e43536b upstream. For some unknown reason the introduction of the timer_wait_running callback missed to fixup posix CPU timers, which went unnoticed for almost four years. Marco reported recently that the WARN_ON() in timer_wait_running() triggers with a posix CPU timer test case. Posix CPU timers have two execution models for expiring timers depending on CONFIG_POSIX_CPU_TIMERS_TASK_WORK: 1) If not enabled, the expiry happens in hard interrupt context so spin waiting on the remote CPU is reasonably time bound. Implement an empty stub function for that case. 2) If enabled, the expiry happens in task work before returning to user space or guest mode. The expired timers are marked as firing and moved from the timer queue to a local list head with sighand lock held. Once the timers are moved, sighand lock is dropped and the expiry happens in fully preemptible context. That means the expiring task can be scheduled out, migrated, interrupted etc. So spin waiting on it is more than suboptimal. The timer wheel has a timer_wait_running() mechanism for RT, which uses a per CPU timer-base expiry lock which is held by the expiry code and the task waiting for the timer function to complete blocks on that lock. This does not work in the same way for posix CPU timers as there is no timer base and expiry for process wide timers can run on any task belonging to that process, but the concept of waiting on an expiry lock can be used too in a slightly different way: - Add a mutex to struct posix_cputimers_work. This struct is per task and used to schedule the expiry task work from the timer interrupt. - Add a task_struct pointer to struct cpu_timer which is used to store a the task which runs the expiry. That's filled in when the task moves the expired timers to the local expiry list. That's not affecting the size of the k_itimer union as there are bigger union members already - Let the task take the expiry mutex around the expiry function - Let the waiter acquire a task reference with rcu_read_lock() held and block on the expiry mutex This avoids spin-waiting on a task which might not even be on a CPU and works nicely for RT too. Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Reported-by: Marco Elver Signed-off-by: Thomas Gleixner Tested-by: Marco Elver Tested-by: Sebastian Andrzej Siewior Reviewed-by: Frederic Weisbecker Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87zg764ojw.ffs@tglx Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- include/linux/posix-timers.h | 17 ++++--- kernel/time/posix-cpu-timers.c | 81 ++++++++++++++++++++++++++++------ kernel/time/posix-timers.c | 4 ++ 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 913aa60228b1..8e284161b65e 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -63,16 +64,18 @@ static inline int clockid_to_fd(const clockid_t clk) * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig * @head: timerqueue head on which this timer is queued - * @task: Pointer to target task + * @pid: Pointer to target task PID * @elist: List head for the expiry list * @firing: Timer is currently firing + * @handling: Pointer to the task which handles expiry */ struct cpu_timer { - struct timerqueue_node node; - struct timerqueue_head *head; - struct pid *pid; - struct list_head elist; - int firing; + struct timerqueue_node node; + struct timerqueue_head *head; + struct pid *pid; + struct list_head elist; + int firing; + struct task_struct __rcu *handling; }; static inline bool cpu_timer_enqueue(struct timerqueue_head *head, @@ -129,10 +132,12 @@ struct posix_cputimers { /** * posix_cputimers_work - Container for task work based posix CPU timer expiry * @work: The task work to be scheduled + * @mutex: Mutex held around expiry in context of this task work * @scheduled: @work has been scheduled already, no further processing */ struct posix_cputimers_work { struct callback_head work; + struct mutex mutex; unsigned int scheduled; }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5d76edd0ad9c..bede1e608d95 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -782,6 +782,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head, return expires; ctmr->firing = 1; + /* See posix_cpu_timer_wait_running() */ + rcu_assign_pointer(ctmr->handling, current); cpu_timer_dequeue(ctmr); list_add_tail(&ctmr->elist, firing); } @@ -1097,7 +1099,49 @@ static void handle_posix_cpu_timers(struct task_struct *tsk); #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK static void posix_cpu_timers_work(struct callback_head *work) { + struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work); + + mutex_lock(&cw->mutex); handle_posix_cpu_timers(current); + mutex_unlock(&cw->mutex); +} + +/* + * Invoked from the posix-timer core when a cancel operation failed because + * the timer is marked firing. The caller holds rcu_read_lock(), which + * protects the timer and the task which is expiring it from being freed. + */ +static void posix_cpu_timer_wait_running(struct k_itimer *timr) +{ + struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling); + + /* Has the handling task completed expiry already? */ + if (!tsk) + return; + + /* Ensure that the task cannot go away */ + get_task_struct(tsk); + /* Now drop the RCU protection so the mutex can be locked */ + rcu_read_unlock(); + /* Wait on the expiry mutex */ + mutex_lock(&tsk->posix_cputimers_work.mutex); + /* Release it immediately again. */ + mutex_unlock(&tsk->posix_cputimers_work.mutex); + /* Drop the task reference. */ + put_task_struct(tsk); + /* Relock RCU so the callsite is balanced */ + rcu_read_lock(); +} + +static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr) +{ + /* Ensure that timr->it.cpu.handling task cannot go away */ + rcu_read_lock(); + spin_unlock_irq(&timr->it_lock); + posix_cpu_timer_wait_running(timr); + rcu_read_unlock(); + /* @timr is on stack and is valid */ + spin_lock_irq(&timr->it_lock); } /* @@ -1113,6 +1157,7 @@ void clear_posix_cputimers_work(struct task_struct *p) sizeof(p->posix_cputimers_work.work)); init_task_work(&p->posix_cputimers_work.work, posix_cpu_timers_work); + mutex_init(&p->posix_cputimers_work.mutex); p->posix_cputimers_work.scheduled = false; } @@ -1191,6 +1236,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk) lockdep_posixtimer_exit(); } +static void posix_cpu_timer_wait_running(struct k_itimer *timr) +{ + cpu_relax(); +} + +static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr) +{ + spin_unlock_irq(&timr->it_lock); + cpu_relax(); + spin_lock_irq(&timr->it_lock); +} + static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk) { return false; @@ -1299,6 +1356,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk) */ if (likely(cpu_firing >= 0)) cpu_timer_fire(timer); + /* See posix_cpu_timer_wait_running() */ + rcu_assign_pointer(timer->it.cpu.handling, NULL); spin_unlock(&timer->it_lock); } } @@ -1434,23 +1493,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, expires = cpu_timer_getexpires(&timer.it.cpu); error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); if (!error) { - /* - * Timer is now unarmed, deletion can not fail. - */ + /* Timer is now unarmed, deletion can not fail. */ posix_cpu_timer_del(&timer); + } else { + while (error == TIMER_RETRY) { + posix_cpu_timer_wait_running_nsleep(&timer); + error = posix_cpu_timer_del(&timer); + } } - spin_unlock_irq(&timer.it_lock); - while (error == TIMER_RETRY) { - /* - * We need to handle case when timer was or is in the - * middle of firing. In other cases we already freed - * resources. - */ - spin_lock_irq(&timer.it_lock); - error = posix_cpu_timer_del(&timer); - spin_unlock_irq(&timer.it_lock); - } + spin_unlock_irq(&timer.it_lock); if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { /* @@ -1560,6 +1612,7 @@ const struct k_clock clock_posix_cpu = { .timer_del = posix_cpu_timer_del, .timer_get = posix_cpu_timer_get, .timer_rearm = posix_cpu_timer_rearm, + .timer_wait_running = posix_cpu_timer_wait_running, }; const struct k_clock clock_process = { diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 5bbfb9b298cb..7a18690e5110 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer, rcu_read_lock(); unlock_timer(timer, *flags); + /* + * kc->timer_wait_running() might drop RCU lock. So @timer + * cannot be touched anymore after the function returns! + */ if (!WARN_ON_ONCE(!kc->timer_wait_running)) kc->timer_wait_running(timer); -- Gitee From 50e80931235d34c4596fa157e6444601b1315fc3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Jul 2021 13:06:38 -0300 Subject: [PATCH 079/101] perf sched: Cast PTHREAD_STACK_MIN to int as it may turn into sysconf(__SC_THREAD_STACK_MIN_VALUE) stable inclusion from stable-5.10.180 commit 68494eb75f1fd1366a969a8374fd72cfb9dacadb category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d08c84e01afa7a7eee6badab25d5420fa847f783 upstream. In fedora rawhide the PTHREAD_STACK_MIN define may end up expanded to a sysconf() call, and that will return 'long int', breaking the build: 45 fedora:rawhide : FAIL gcc version 11.1.1 20210623 (Red Hat 11.1.1-6) (GCC) builtin-sched.c: In function 'create_tasks': /git/perf-5.14.0-rc1/tools/include/linux/kernel.h:43:24: error: comparison of distinct pointer types lacks a cast [-Werror] 43 | (void) (&_max1 == &_max2); \ | ^~ builtin-sched.c:673:34: note: in expansion of macro 'max' 673 | (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); | ^~~ cc1: all warnings being treated as errors $ grep __sysconf /usr/include/*/*.h /usr/include/bits/pthread_stack_min-dynamic.h:extern long int __sysconf (int __name) __THROW; /usr/include/bits/pthread_stack_min-dynamic.h:# define PTHREAD_STACK_MIN __sysconf (__SC_THREAD_STACK_MIN_VALUE) /usr/include/bits/time.h:extern long int __sysconf (int); /usr/include/bits/time.h:# define CLK_TCK ((__clock_t) __sysconf (2)) /* 2 is _SC_CLK_TCK */ $ So cast it to int to cope with that. Signed-off-by: Arnaldo Carvalho de Melo Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index d3b5f5faf8c1..02e5774cabb6 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -670,7 +670,7 @@ static void create_tasks(struct perf_sched *sched) err = pthread_attr_init(&attr); BUG_ON(err); err = pthread_attr_setstacksize(&attr, - (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); + (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); BUG_ON(err); err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); -- Gitee From 006c29f957d6c6c0d569451e6319130cca2d40ba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 May 2023 21:09:39 -0700 Subject: [PATCH 080/101] blk-mq: release crypto keyslot before reporting I/O complete stable inclusion from stable-5.10.180 commit 874bdf43b4a7dc5463c31508f62b3e42eb237b08 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 9cd1e566676bbcb8a126acd921e4e194e6339603 upstream. Once all I/O using a blk_crypto_key has completed, filesystems can call blk_crypto_evict_key(). However, the block layer currently doesn't call blk_crypto_put_keyslot() until the request is being freed, which happens after upper layers have been told (via bio_endio()) the I/O has completed. This causes a race condition where blk_crypto_evict_key() can see 'slot_refs != 0' without there being an actual bug. This makes __blk_crypto_evict_key() hit the 'WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)' and return without doing anything, eventually causing a use-after-free in blk_crypto_reprogram_all_keys(). (This is a very rare bug and has only been seen when per-file keys are being used with fscrypt.) There are two options to fix this: either release the keyslot before bio_endio() is called on the request's last bio, or make __blk_crypto_evict_key() ignore slot_refs. Let's go with the first solution, since it preserves the ability to report bugs (via WARN_ON_ONCE) where a key is evicted while still in-use. Fixes: a892c8d52c02 ("block: Inline encryption support for blk-mq") Cc: stable@vger.kernel.org Reviewed-by: Nathan Huckleberry Reviewed-by: Christoph Hellwig Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- block/blk-core.c | 7 +++++++ block/blk-crypto-internal.h | 25 +++++++++++++++++++++---- block/blk-crypto.c | 24 ++++++++++++------------ block/blk-merge.c | 2 ++ block/blk-mq.c | 2 +- 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 593ba10475cb..3c819bda92a5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1435,6 +1435,13 @@ bool blk_update_request(struct request *req, blk_status_t error, req->q->integrity.profile->complete_fn(req, nr_bytes); #endif + /* + * Upper layers may call blk_crypto_evict_key() anytime after the last + * bio_endio(). Therefore, the keyslot must be released before that. + */ + if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req)) + __blk_crypto_rq_put_keyslot(req); + if (unlikely(error && !blk_rq_is_passthrough(req) && !(req->rq_flags & RQF_QUIET))) print_req_error(req, error, __func__); diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h index 0d36aae538d7..8e0834557620 100644 --- a/block/blk-crypto-internal.h +++ b/block/blk-crypto-internal.h @@ -60,6 +60,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq) return rq->crypt_ctx; } +static inline bool blk_crypto_rq_has_keyslot(struct request *rq) +{ + return rq->crypt_keyslot; +} + #else /* CONFIG_BLK_INLINE_ENCRYPTION */ static inline bool bio_crypt_rq_ctx_compatible(struct request *rq, @@ -93,6 +98,11 @@ static inline bool blk_crypto_rq_is_encrypted(struct request *rq) return false; } +static inline bool blk_crypto_rq_has_keyslot(struct request *rq) +{ + return false; +} + #endif /* CONFIG_BLK_INLINE_ENCRYPTION */ void __bio_crypt_advance(struct bio *bio, unsigned int bytes); @@ -127,14 +137,21 @@ static inline bool blk_crypto_bio_prep(struct bio **bio_ptr) return true; } -blk_status_t __blk_crypto_init_request(struct request *rq); -static inline blk_status_t blk_crypto_init_request(struct request *rq) +blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq); +static inline blk_status_t blk_crypto_rq_get_keyslot(struct request *rq) { if (blk_crypto_rq_is_encrypted(rq)) - return __blk_crypto_init_request(rq); + return __blk_crypto_rq_get_keyslot(rq); return BLK_STS_OK; } +void __blk_crypto_rq_put_keyslot(struct request *rq); +static inline void blk_crypto_rq_put_keyslot(struct request *rq) +{ + if (blk_crypto_rq_has_keyslot(rq)) + __blk_crypto_rq_put_keyslot(rq); +} + void __blk_crypto_free_request(struct request *rq); static inline void blk_crypto_free_request(struct request *rq) { @@ -173,7 +190,7 @@ static inline blk_status_t blk_crypto_insert_cloned_request(struct request *rq) { if (blk_crypto_rq_is_encrypted(rq)) - return blk_crypto_init_request(rq); + return blk_crypto_rq_get_keyslot(rq); return BLK_STS_OK; } diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 5ffa9aab49de..0506adfd9ca6 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -216,26 +216,26 @@ static bool bio_crypt_check_alignment(struct bio *bio) return true; } -blk_status_t __blk_crypto_init_request(struct request *rq) +blk_status_t __blk_crypto_rq_get_keyslot(struct request *rq) { return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key, &rq->crypt_keyslot); } -/** - * __blk_crypto_free_request - Uninitialize the crypto fields of a request. - * - * @rq: The request whose crypto fields to uninitialize. - * - * Completely uninitializes the crypto fields of a request. If a keyslot has - * been programmed into some inline encryption hardware, that keyslot is - * released. The rq->crypt_ctx is also freed. - */ -void __blk_crypto_free_request(struct request *rq) +void __blk_crypto_rq_put_keyslot(struct request *rq) { blk_ksm_put_slot(rq->crypt_keyslot); + rq->crypt_keyslot = NULL; +} + +void __blk_crypto_free_request(struct request *rq) +{ + /* The keyslot, if one was needed, should have been released earlier. */ + if (WARN_ON_ONCE(rq->crypt_keyslot)) + __blk_crypto_rq_put_keyslot(rq); + mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool); - blk_crypto_rq_set_defaults(rq); + rq->crypt_ctx = NULL; } /** diff --git a/block/blk-merge.c b/block/blk-merge.c index 0e0d36621d75..ff3ce1a3ee2a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -801,6 +801,8 @@ static struct request *attempt_merge(struct request_queue *q, if (!blk_discard_mergable(req)) elv_merge_requests(q, req, next); + blk_crypto_rq_put_keyslot(next); + /* * 'next' is going away, so update stats accordingly */ diff --git a/block/blk-mq.c b/block/blk-mq.c index 02ca17c575c1..ad47eb93b266 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2209,7 +2209,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio) blk_mq_bio_to_request(rq, bio, nr_segs); - ret = blk_crypto_init_request(rq); + ret = blk_crypto_rq_get_keyslot(rq); if (ret != BLK_STS_OK) { bio->bi_status = ret; bio_endio(bio); -- Gitee From c7b47725b20cae1dc4e348cf3a79e07ebfb62208 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 May 2023 21:09:40 -0700 Subject: [PATCH 081/101] blk-crypto: make blk_crypto_evict_key() return void stable inclusion from stable-5.10.180 commit 5072008bef2386237dab112dd0770bd82b7a46c1 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 70493a63ba04f754f7a7dd53a4fcc82700181490 upstream. blk_crypto_evict_key() is only called in contexts such as inode eviction where failure is not an option. So there is nothing the caller can do with errors except log them. (dm-table.c does "use" the error code, but only to pass on to upper layers, so it doesn't really count.) Just make blk_crypto_evict_key() return void and log errors itself. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- block/blk-crypto.c | 22 ++++++++++------------ include/linux/blk-crypto.h | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 0506adfd9ca6..d8c48ee44ba6 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "blk-crypto-internal.h" @@ -393,19 +394,16 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key, * Upper layers (filesystems) must call this function to ensure that a key is * evicted from any hardware that it might have been programmed into. The key * must not be in use by any in-flight IO when this function is called. - * - * Return: 0 on success or if key is not present in the q's ksm, -err on error. */ -int blk_crypto_evict_key(struct request_queue *q, - const struct blk_crypto_key *key) +void blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key) { - if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg)) - return blk_ksm_evict_key(q->ksm, key); + int err; - /* - * If the request queue's associated inline encryption hardware didn't - * have support for the key, then the key might have been programmed - * into the fallback keyslot manager, so try to evict from there. - */ - return blk_crypto_fallback_evict_key(key); + if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg)) + err = blk_ksm_evict_key(q->ksm, key); + else + err = blk_crypto_fallback_evict_key(key); + if (err) + pr_warn_ratelimited("error %d evicting key\n", err); } diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h index 69b24fe92cbf..5e96bad54804 100644 --- a/include/linux/blk-crypto.h +++ b/include/linux/blk-crypto.h @@ -97,8 +97,8 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key, int blk_crypto_start_using_key(const struct blk_crypto_key *key, struct request_queue *q); -int blk_crypto_evict_key(struct request_queue *q, - const struct blk_crypto_key *key); +void blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key); bool blk_crypto_config_supported(struct request_queue *q, const struct blk_crypto_config *cfg); -- Gitee From 9648c6b786403ef078351496e5966dd62c7a87c8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 3 May 2023 21:09:41 -0700 Subject: [PATCH 082/101] blk-crypto: make blk_crypto_evict_key() more robust stable inclusion from stable-5.10.180 commit 701a8220762ff90615dc91d3543f789391b63298 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 5c7cb94452901a93e90c2230632e2c12a681bc92 upstream. If blk_crypto_evict_key() sees that the key is still in-use (due to a bug) or that ->keyslot_evict failed, it currently just returns while leaving the key linked into the keyslot management structures. However, blk_crypto_evict_key() is only called in contexts such as inode eviction where failure is not an option. So actually the caller proceeds with freeing the blk_crypto_key regardless of the return value of blk_crypto_evict_key(). These two assumptions don't match, and the result is that there can be a use-after-free in blk_crypto_reprogram_all_keys() after one of these errors occurs. (Note, these errors *shouldn't* happen; we're just talking about what happens if they do anyway.) Fix this by making blk_crypto_evict_key() unlink the key from the keyslot management structures even on failure. Also improve some comments. Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230315183907.53675-2-ebiggers@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- block/blk-crypto.c | 29 +++++++++++++++++++-------- block/keyslot-manager.c | 43 ++++++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/block/blk-crypto.c b/block/blk-crypto.c index d8c48ee44ba6..87ec55d4354f 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -385,15 +385,20 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key, } /** - * blk_crypto_evict_key() - Evict a key from any inline encryption hardware - * it may have been programmed into - * @q: The request queue who's associated inline encryption hardware this key - * might have been programmed into - * @key: The key to evict + * blk_crypto_evict_key() - Evict a blk_crypto_key from a request_queue + * @q: a request_queue on which I/O using the key may have been done + * @key: the key to evict * - * Upper layers (filesystems) must call this function to ensure that a key is - * evicted from any hardware that it might have been programmed into. The key - * must not be in use by any in-flight IO when this function is called. + * For a given request_queue, this function removes the given blk_crypto_key + * from the keyslot management structures and evicts it from any underlying + * hardware keyslot(s) or blk-crypto-fallback keyslot it may have been + * programmed into. + * + * Upper layers must call this before freeing the blk_crypto_key. It must be + * called for every request_queue the key may have been used on. The key must + * no longer be in use by any I/O when this function is called. + * + * Context: May sleep. */ void blk_crypto_evict_key(struct request_queue *q, const struct blk_crypto_key *key) @@ -404,6 +409,14 @@ void blk_crypto_evict_key(struct request_queue *q, err = blk_ksm_evict_key(q->ksm, key); else err = blk_crypto_fallback_evict_key(key); + /* + * An error can only occur here if the key failed to be evicted from a + * keyslot (due to a hardware or driver issue) or is allegedly still in + * use by I/O (due to a kernel bug). Even in these cases, the key is + * still unlinked from the keyslot management structures, and the caller + * is allowed and expected to free it right away. There's nothing + * callers can do to handle errors, so just log them and return void. + */ if (err) pr_warn_ratelimited("error %d evicting key\n", err); } diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c index 86f8195d8039..17a1f1ba44ef 100644 --- a/block/keyslot-manager.c +++ b/block/keyslot-manager.c @@ -305,44 +305,43 @@ bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm, return true; } -/** - * blk_ksm_evict_key() - Evict a key from the lower layer device. - * @ksm: The keyslot manager to evict from - * @key: The key to evict - * - * Find the keyslot that the specified key was programmed into, and evict that - * slot from the lower layer device. The slot must not be in use by any - * in-flight IO when this function is called. - * - * Context: Process context. Takes and releases ksm->lock. - * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY - * if the keyslot is still in use, or another -errno value on other - * error. +/* + * This is an internal function that evicts a key from an inline encryption + * device that can be either a real device or the blk-crypto-fallback "device". + * It is used only by blk_crypto_evict_key(); see that function for details. */ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm, const struct blk_crypto_key *key) { struct blk_ksm_keyslot *slot; - int err = 0; + int err; blk_ksm_hw_enter(ksm); slot = blk_ksm_find_keyslot(ksm, key); - if (!slot) - goto out_unlock; + if (!slot) { + /* + * Not an error, since a key not in use by I/O is not guaranteed + * to be in a keyslot. There can be more keys than keyslots. + */ + err = 0; + goto out; + } if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) { + /* BUG: key is still in use by I/O */ err = -EBUSY; - goto out_unlock; + goto out_remove; } err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, blk_ksm_get_slot_idx(slot)); - if (err) - goto out_unlock; - +out_remove: + /* + * Callers free the key even on error, so unlink the key from the hash + * table and clear slot->key even on error. + */ hlist_del(&slot->hash_node); slot->key = NULL; - err = 0; -out_unlock: +out: blk_ksm_hw_exit(ksm); return err; } -- Gitee From ae3173c17e058d1cd229234d924561539003996f Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Thu, 23 Dec 2021 12:21:37 -0800 Subject: [PATCH 083/101] ext4: use ext4_journal_start/stop for fast commit transactions stable inclusion from stable-5.10.180 commit c8714ddf3ccf94a50574ba5db2e2128eb6e77b75 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 2729cfdcfa1cc49bef5a90d046fa4a187fdfcc69 upstream. This patch drops all calls to ext4_fc_start_update() and ext4_fc_stop_update(). To ensure that there are no ongoing journal updates during fast commit, we also make jbd2_fc_begin_commit() lock journal for updates. This way we don't have to maintain two different transaction start stop APIs for fast commit and full commit. This patch doesn't remove the functions altogether since in future we want to have inode level locking for fast commits. Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223202140.2061101-2-harshads@google.com Signed-off-by: Theodore Ts'o Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/ext4/acl.c | 2 -- fs/ext4/extents.c | 2 -- fs/ext4/file.c | 4 ---- fs/ext4/inode.c | 7 +------ fs/ext4/ioctl.c | 8 +------- fs/jbd2/journal.c | 2 ++ 6 files changed, 4 insertions(+), 21 deletions(-) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 68aaed48315f..76f634d185f1 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -242,7 +242,6 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits); if (IS_ERR(handle)) return PTR_ERR(handle); - ext4_fc_start_update(inode); if ((type == ACL_TYPE_ACCESS) && acl) { error = posix_acl_update_mode(inode, &mode, &acl); @@ -260,7 +259,6 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type) } out_stop: ext4_journal_stop(handle); - ext4_fc_stop_update(inode); if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; return error; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a6302e6b9528..7cf33851eb69 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4706,7 +4706,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; - ext4_fc_start_update(inode); inode_lock(inode); ret = ext4_convert_inline_data(inode); inode_unlock(inode); @@ -4776,7 +4775,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) inode_unlock(inode); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); exit: - ext4_fc_stop_update(inode); return ret; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 904d0bd91160..fcf3887d1880 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -260,7 +260,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, if (iocb->ki_flags & IOCB_NOWAIT) return -EOPNOTSUPP; - ext4_fc_start_update(inode); inode_lock(inode); ret = ext4_write_checks(iocb, from); if (ret <= 0) @@ -272,7 +271,6 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, out: inode_unlock(inode); - ext4_fc_stop_update(inode); if (likely(ret > 0)) { iocb->ki_pos += ret; ret = generic_write_sync(iocb, ret); @@ -559,9 +557,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } - ext4_fc_start_update(inode); ret = ext4_orphan_add(handle, inode); - ext4_fc_stop_update(inode); if (ret) { ext4_journal_stop(handle); goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index be439ef94ebf..8af9ee25def8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5413,7 +5413,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) return error; } - ext4_fc_start_update(inode); + if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { handle_t *handle; @@ -5437,7 +5437,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (error) { ext4_journal_stop(handle); - ext4_fc_stop_update(inode); return error; } /* Update corresponding info in inode so that everything is in @@ -5449,7 +5448,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); if (unlikely(error)) { - ext4_fc_stop_update(inode); return error; } } @@ -5464,12 +5462,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); if (attr->ia_size > sbi->s_bitmap_maxbytes) { - ext4_fc_stop_update(inode); return -EFBIG; } } if (!S_ISREG(inode->i_mode)) { - ext4_fc_stop_update(inode); return -EINVAL; } @@ -5595,7 +5591,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_std_error(inode->i_sb, error); if (!error) error = rc; - ext4_fc_stop_update(inode); return error; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 53bdc67a815f..1171618f6549 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1322,13 +1322,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long ret; - - ext4_fc_start_update(file_inode(filp)); - ret = __ext4_ioctl(filp, cmd, arg); - ext4_fc_stop_update(file_inode(filp)); - - return ret; + return __ext4_ioctl(filp, cmd, arg); } #ifdef CONFIG_COMPAT diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 4a9f6efcd071..0139c8d31bb2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -757,6 +757,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid) } journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; write_unlock(&journal->j_state_lock); + jbd2_journal_lock_updates(journal); return 0; } @@ -768,6 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit); */ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) { + jbd2_journal_unlock_updates(journal); if (journal->j_fc_cleanup_callback) journal->j_fc_cleanup_callback(journal, 0); write_lock(&journal->j_state_lock); -- Gitee From 4fe132bdf9c6e7d77c0f834e743ab8eb60203269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nuno=20S=C3=A1?= Date: Mon, 27 Mar 2023 16:54:14 +0200 Subject: [PATCH 084/101] staging: iio: resolver: ads1210: fix config mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit a863ac03fae0c07e9eba8129fc4b8dcabe0ad670 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 16313403d873ff17a587818b61f84c8cb4971cef upstream. As stated in the device datasheet [1], bits a0 and a1 have to be set to 1 for the configuration mode. [1]: https://www.analog.com/media/en/technical-documentation/data-sheets/ad2s1210.pdf Fixes: b19e9ad5e2cb9 ("staging:iio:resolver:ad2s1210 general driver cleanup") Cc: stable Signed-off-by: Nuno Sá Link: https://lore.kernel.org/r/20230327145414.1505537-1-nuno.sa@analog.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/staging/iio/resolver/ad2s1210.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/resolver/ad2s1210.c b/drivers/staging/iio/resolver/ad2s1210.c index 74adb82f37c3..a19cfb2998c9 100644 --- a/drivers/staging/iio/resolver/ad2s1210.c +++ b/drivers/staging/iio/resolver/ad2s1210.c @@ -101,7 +101,7 @@ struct ad2s1210_state { static const int ad2s1210_mode_vals[4][2] = { [MOD_POS] = { 0, 0 }, [MOD_VEL] = { 0, 1 }, - [MOD_CONFIG] = { 1, 0 }, + [MOD_CONFIG] = { 1, 1 }, }; static inline void ad2s1210_set_mode(enum ad2s1210_mode mode, -- Gitee From d89f8520bbae21c0d15aa78360be844dd847bfbc Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 5 Apr 2023 11:03:42 +0200 Subject: [PATCH 085/101] xhci: fix debugfs register accesses while suspended stable inclusion from stable-5.10.180 commit 29b89908fdd94127e9b6e2f29ecceae82a1314bc category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 735baf1b23458f71a8b15cb924af22c9ff9cd125 upstream. Wire up the debugfs regset device pointer so that the controller is resumed before accessing registers to avoid crashing or locking up if it happens to be runtime suspended. Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver") Cc: stable@vger.kernel.org # 4.15: 30332eeefec8: debugfs: regset32: Add Runtime PM support Cc: stable@vger.kernel.org # 4.15 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20230405090342.7363-1-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/usb/host/xhci-debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c index dc832ddf7033..bd40caeeb21c 100644 --- a/drivers/usb/host/xhci-debugfs.c +++ b/drivers/usb/host/xhci-debugfs.c @@ -133,6 +133,7 @@ static void xhci_debugfs_regset(struct xhci_hcd *xhci, u32 base, regset->regs = regs; regset->nregs = nregs; regset->base = hcd->regs + base; + regset->dev = hcd->self.controller; debugfs_create_regset32((const char *)rgs->name, 0444, parent, regset); } -- Gitee From 8510bd1de1f911e2ca8ec83ffb1a838c4916ba24 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 24 Jan 2023 17:31:26 +0000 Subject: [PATCH 086/101] tick/nohz: Fix cpu_is_hotpluggable() by checking with nohz subsystem stable inclusion from stable-5.10.180 commit 2884595932ea07481590b2c10e20fcb729eaaad0 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 58d7668242647e661a20efe065519abd6454287e upstream. For CONFIG_NO_HZ_FULL systems, the tick_do_timer_cpu cannot be offlined. However, cpu_is_hotpluggable() still returns true for those CPUs. This causes torture tests that do offlining to end up trying to offline this CPU causing test failures. Such failure happens on all architectures. Fix the repeated error messages thrown by this (even if the hotplug errors are harmless) by asking the opinion of the nohz subsystem on whether the CPU can be hotplugged. [ Apply Frederic Weisbecker feedback on refactoring tick_nohz_cpu_down(). ] For drivers/base/ portion: Acked-by: Greg Kroah-Hartman Acked-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: "Paul E. McKenney" Cc: Zhouyi Zhou Cc: Will Deacon Cc: Marc Zyngier Cc: rcu Cc: stable@vger.kernel.org Fixes: 2987557f52b9 ("driver-core/cpu: Expose hotpluggability to the rest of the kernel") Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/base/cpu.c | 3 ++- include/linux/tick.h | 2 ++ kernel/time/tick-sched.c | 11 ++++++++--- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index cf922b88bc90..298daad1f86d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -527,7 +527,8 @@ static const struct attribute_group *cpu_root_attr_groups[] = { bool cpu_is_hotpluggable(unsigned cpu) { struct device *dev = get_cpu_device(cpu); - return dev && container_of(dev, struct cpu, dev)->hotpluggable; + return dev && container_of(dev, struct cpu, dev)->hotpluggable + && tick_nohz_cpu_hotpluggable(cpu); } EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); diff --git a/include/linux/tick.h b/include/linux/tick.h index 7340613c7eff..a90a8f7759a2 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -211,6 +211,7 @@ extern void tick_nohz_dep_set_signal(struct signal_struct *signal, enum tick_dep_bits bit); extern void tick_nohz_dep_clear_signal(struct signal_struct *signal, enum tick_dep_bits bit); +extern bool tick_nohz_cpu_hotpluggable(unsigned int cpu); /* * The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases @@ -275,6 +276,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } +static inline bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { return true; } static inline void tick_dep_set(enum tick_dep_bits bit) { } static inline void tick_dep_clear(enum tick_dep_bits bit) { } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 92fb738813f3..e4e0d032126b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -426,7 +426,7 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) tick_nohz_full_running = true; } -static int tick_nohz_cpu_down(unsigned int cpu) +bool tick_nohz_cpu_hotpluggable(unsigned int cpu) { /* * The tick_do_timer_cpu CPU handles housekeeping duty (unbound @@ -434,8 +434,13 @@ static int tick_nohz_cpu_down(unsigned int cpu) * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) - return -EBUSY; - return 0; + return false; + return true; +} + +static int tick_nohz_cpu_down(unsigned int cpu) +{ + return tick_nohz_cpu_hotpluggable(cpu) ? 0 : -EBUSY; } void __init tick_nohz_init(void) -- Gitee From 6a1bbc78a98b38af896ab363a08e6059dc928025 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 11 Apr 2023 12:14:26 +0100 Subject: [PATCH 087/101] MIPS: fw: Allow firmware to pass a empty env stable inclusion from stable-5.10.180 commit 47e61cadc7a5f3dffd42d2d6fda81be163f1ab82 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit ee1809ed7bc456a72dc8410b475b73021a3a68d5 upstream. fw_getenv will use env entry to determine style of env, however it is legal for firmware to just pass a empty list. Check if first entry exist before running strchr to avoid null pointer dereference. Cc: stable@vger.kernel.org Link: https://github.com/clbr/n64bootloader/issues/5 Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/mips/fw/lib/cmdline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/fw/lib/cmdline.c b/arch/mips/fw/lib/cmdline.c index f24cbb4a39b5..892765b742bb 100644 --- a/arch/mips/fw/lib/cmdline.c +++ b/arch/mips/fw/lib/cmdline.c @@ -53,7 +53,7 @@ char *fw_getenv(char *envname) { char *result = NULL; - if (_fw_envp != NULL) { + if (_fw_envp != NULL && fw_envp(0) != NULL) { /* * Return a pointer to the given environment variable. * YAMON uses "name", "value" pairs, while U-Boot uses -- Gitee From a7aa5cf37688e7b55eafa336d1d9e630cb7731bc Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 4 Apr 2023 12:09:14 +0000 Subject: [PATCH 088/101] ipmi:ssif: Add send_retries increment stable inclusion from stable-5.10.180 commit 6d5993d5696339b2f4fbfbe0f9f940388f3cf636 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 6ce7995a43febe693d4894033c6e29314970646a upstream. A recent change removed an increment of send_retries, re-add it. Fixes: 95767ed78a18 ipmi:ssif: resend_msg() cannot fail Reported-by: Pavel Machek Cc: stable@vger.kernel.org Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/char/ipmi/ipmi_ssif.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 20dc2452815c..3005e95df3be 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -564,8 +564,10 @@ static void retry_timeout(struct timer_list *t) if (waiting) start_get(ssif_info); - if (resend) + if (resend) { start_resend(ssif_info); + ssif_inc_stat(ssif_info, send_retries); + } } static void watch_timeout(struct timer_list *t) -- Gitee From c08e24fd4d95e1973b77e167e8c7bd1fa9208e65 Mon Sep 17 00:00:00 2001 From: Zhang Yuchen Date: Wed, 12 Apr 2023 15:49:07 +0800 Subject: [PATCH 089/101] ipmi: fix SSIF not responding under certain cond. stable inclusion from stable-5.10.180 commit 1b633da2fecf954440cd71d786368a407256a2ee category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 6d2555cde2918409b0331560e66f84a0ad4849c6 upstream. The ipmi communication is not restored after a specific version of BMC is upgraded on our server. The ipmi driver does not respond after printing the following log: ipmi_ssif: Invalid response getting flags: 1c 1 I found that after entering this branch, ssif_info->ssif_state always holds SSIF_GETTING_FLAGS and never return to IDLE. As a result, the driver cannot be loaded, because the driver status is checked during the unload process and must be IDLE in shutdown_ssif(): while (ssif_info->ssif_state != SSIF_IDLE) schedule_timeout(1); The process trigger this problem is: 1. One msg timeout and next msg start send, and call ssif_set_need_watch(). 2. ssif_set_need_watch()->watch_timeout()->start_flag_fetch() change ssif_state to SSIF_GETTING_FLAGS. 3. In msg_done_handler() ssif_state == SSIF_GETTING_FLAGS, if an error message is received, the second branch does not modify the ssif_state. 4. All retry action need IS_SSIF_IDLE() == True. Include retry action in watch_timeout(), msg_done_handler(). Sending msg does not work either. SSIF_IDLE is also checked in start_next_msg(). 5. The only thing that can be triggered in the SSIF driver is watch_timeout(), after destory_user(), this timer will stop too. So, if enter this branch, the ssif_state will remain SSIF_GETTING_FLAGS and can't send msg, no timer started, can't unload. We did a comparative test before and after adding this patch, and the result is effective. Fixes: 259307074bfc ("ipmi: Add SMBus interface driver (SSIF)") Cc: stable@vger.kernel.org Signed-off-by: Zhang Yuchen Message-Id: <20230412074907.80046-1-zhangyuchen.lcr@bytedance.com> Signed-off-by: Corey Minyard Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/char/ipmi/ipmi_ssif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 3005e95df3be..a3745fa643f3 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -794,9 +794,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, } else if (data[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 || data[1] != IPMI_GET_MSG_FLAGS_CMD) { /* - * Don't abort here, maybe it was a queued - * response to a previous command. + * Recv error response, give up. */ + ssif_info->ssif_state = SSIF_IDLE; ipmi_ssif_unlock_cond(ssif_info, flags); dev_warn(&ssif_info->client->dev, "Invalid response getting flags: %x %x\n", -- Gitee From 05bb78fdfb226f070fca66473e4b539220d798db Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Mar 2023 14:49:50 -0800 Subject: [PATCH 090/101] kheaders: Use array declaration instead of char stable inclusion from stable-5.10.180 commit fcd2da2e6bf2640a31a2a5b118b50dc3635c707b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit b69edab47f1da8edd8e7bfdf8c70f51a2a5d89fb upstream. Under CONFIG_FORTIFY_SOURCE, memcpy() will check the size of destination and source buffers. Defining kernel_headers_data as "char" would trip this check. Since these addresses are treated as byte arrays, define them as arrays (as done everywhere else). This was seen with: $ cat /sys/kernel/kheaders.tar.xz >> /dev/null detected buffer overflow in memcpy kernel BUG at lib/string_helpers.c:1027! ... RIP: 0010:fortify_panic+0xf/0x20 [...] Call Trace: ikheaders_read+0x45/0x50 [kheaders] kernfs_fop_read_iter+0x1a4/0x2f0 ... Reported-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20230302112130.6e402a98@kernel.org/ Acked-by: Joel Fernandes (Google) Reviewed-by: Alexander Lobakin Tested-by: Jakub Kicinski Fixes: 43d8ce9d65a5 ("Provide in-kernel headers to make extending kernel easier") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20230302224946.never.243-kees@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/kheaders.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/kheaders.c b/kernel/kheaders.c index 8f69772af77b..42163c9e94e5 100644 --- a/kernel/kheaders.c +++ b/kernel/kheaders.c @@ -26,15 +26,15 @@ asm ( " .popsection \n" ); -extern char kernel_headers_data; -extern char kernel_headers_data_end; +extern char kernel_headers_data[]; +extern char kernel_headers_data_end[]; static ssize_t ikheaders_read(struct file *file, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) { - memcpy(buf, &kernel_headers_data + off, len); + memcpy(buf, &kernel_headers_data[off], len); return len; } @@ -48,8 +48,8 @@ static struct bin_attribute kheaders_attr __ro_after_init = { static int __init ikheaders_init(void) { - kheaders_attr.size = (&kernel_headers_data_end - - &kernel_headers_data); + kheaders_attr.size = (kernel_headers_data_end - + kernel_headers_data); return sysfs_create_bin_file(kernel_kobj, &kheaders_attr); } -- Gitee From db4b585c2d9df41af7e39178ef0211af4c4fb44b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 9 Apr 2023 17:15:52 +0200 Subject: [PATCH 091/101] pwm: meson: Fix axg ao mux parents stable inclusion from stable-5.10.180 commit c1cabb10e07287019527132a74baa684df77af8b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit eb411c0cf59ae6344b34bc6f0d298a22b300627e upstream. This fix is basically the same as 9bce02ef0dfa ("pwm: meson: Fix the G12A AO clock parents order"). Vendor driver referenced there has xtal as first parent also for axg ao. In addition fix the name of the aoclk81 clock. Apparently name aoclk81 as used by the vendor driver was changed when mainlining the axg clock driver. Fixes: bccaa3f917c9 ("pwm: meson: Add clock source configuration for Meson-AXG") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Martin Blumenstingl Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pwm/pwm-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 237bb8e06593..1699d311e860 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -424,7 +424,7 @@ static const struct meson_pwm_data pwm_axg_ee_data = { }; static const char * const pwm_axg_ao_parent_names[] = { - "aoclk81", "xtal", "fclk_div4", "fclk_div5" + "xtal", "axg_ao_clk81", "fclk_div4", "fclk_div5" }; static const struct meson_pwm_data pwm_axg_ao_data = { -- Gitee From a885df021f47835dfde57777a958d6fedd4c1e3e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 11 Apr 2023 07:34:11 +0200 Subject: [PATCH 092/101] pwm: meson: Fix g12a ao clk81 name stable inclusion from stable-5.10.180 commit dda1372c8d838e389c1a9fc805bdead96fa4d52b category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 9e4fa80ab7ef9eb4f7b1ea9fc31e0eb040e85e25 upstream. Fix the name of the aoclk81 clock. Apparently name aoclk81 as used by the vendor driver was changed when mainlining the g12a clock driver. Fixes: f41efceb46e6 ("pwm: meson: Add clock source configuration for Meson G12A") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Martin Blumenstingl Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/pwm/pwm-meson.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 1699d311e860..0283163ddbe8 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -433,7 +433,7 @@ static const struct meson_pwm_data pwm_axg_ao_data = { }; static const char * const pwm_g12a_ao_ab_parent_names[] = { - "xtal", "aoclk81", "fclk_div4", "fclk_div5" + "xtal", "g12a_ao_clk81", "fclk_div4", "fclk_div5" }; static const struct meson_pwm_data pwm_g12a_ao_ab_data = { @@ -442,7 +442,7 @@ static const struct meson_pwm_data pwm_g12a_ao_ab_data = { }; static const char * const pwm_g12a_ao_cd_parent_names[] = { - "xtal", "aoclk81", + "xtal", "g12a_ao_clk81", }; static const struct meson_pwm_data pwm_g12a_ao_cd_data = { -- Gitee From 787de8c08e6063f0b625d7ed67aa30691f028fc3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Apr 2023 17:59:20 +0200 Subject: [PATCH 093/101] ring-buffer: Sync IRQ works before buffer destruction stable inclusion from stable-5.10.180 commit 1c99f65d6af2a454bfd5207b4f6a97c8474a1191 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 675751bb20634f981498c7d66161584080cc061e upstream. If something was written to the buffer just before destruction, it may be possible (maybe not in a real system, but it did happen in ARCH=um with time-travel) to destroy the ringbuffer before the IRQ work ran, leading this KASAN report (or a crash without KASAN): BUG: KASAN: slab-use-after-free in irq_work_run_list+0x11a/0x13a Read of size 8 at addr 000000006d640a48 by task swapper/0 CPU: 0 PID: 0 Comm: swapper Tainted: G W O 6.3.0-rc1 #7 Stack: 60c4f20f 0c203d48 41b58ab3 60f224fc 600477fa 60f35687 60c4f20f 601273dd 00000008 6101eb00 6101eab0 615be548 Call Trace: [<60047a58>] show_stack+0x25e/0x282 [<60c609e0>] dump_stack_lvl+0x96/0xfd [<60c50d4c>] print_report+0x1a7/0x5a8 [<603078d3>] kasan_report+0xc1/0xe9 [<60308950>] __asan_report_load8_noabort+0x1b/0x1d [<60232844>] irq_work_run_list+0x11a/0x13a [<602328b4>] irq_work_tick+0x24/0x34 [<6017f9dc>] update_process_times+0x162/0x196 [<6019f335>] tick_sched_handle+0x1a4/0x1c3 [<6019fd9e>] tick_sched_timer+0x79/0x10c [<601812b9>] __hrtimer_run_queues.constprop.0+0x425/0x695 [<60182913>] hrtimer_interrupt+0x16c/0x2c4 [<600486a3>] um_timer+0x164/0x183 [...] Allocated by task 411: save_stack_trace+0x99/0xb5 stack_trace_save+0x81/0x9b kasan_save_stack+0x2d/0x54 kasan_set_track+0x34/0x3e kasan_save_alloc_info+0x25/0x28 ____kasan_kmalloc+0x8b/0x97 __kasan_kmalloc+0x10/0x12 __kmalloc+0xb2/0xe8 load_elf_phdrs+0xee/0x182 [...] The buggy address belongs to the object at 000000006d640800 which belongs to the cache kmalloc-1k of size 1024 The buggy address is located 584 bytes inside of freed 1024-byte region [000000006d640800, 000000006d640c00) Add the appropriate irq_work_sync() so the work finishes before the buffers are destroyed. Prior to the commit in the Fixes tag below, there was only a single global IRQ work, so this issue didn't exist. Link: https://lore.kernel.org/linux-trace-kernel/20230427175920.a76159263122.I8295e405c44362a86c995e9c2c37e3e03810aa56@changeid Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Fixes: 15693458c4bc ("tracing/ring-buffer: Move poll wake ups into ring buffer code") Signed-off-by: Johannes Berg Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 21b07c7c6ee5..1fe6b29366f1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1644,6 +1644,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) struct list_head *head = cpu_buffer->pages; struct buffer_page *bpage, *tmp; + irq_work_sync(&cpu_buffer->irq_work.work); + free_buffer_page(cpu_buffer->reader_page); if (head) { @@ -1750,6 +1752,8 @@ ring_buffer_free(struct trace_buffer *buffer) cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); + irq_work_sync(&buffer->irq_work.work); + for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); -- Gitee From 9a8f9cace1b42b1dc30d0f186c8b7b8d7ed31f0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Mon, 13 Mar 2023 10:17:24 +0100 Subject: [PATCH 094/101] crypto: api - Demote BUG_ON() in crypto_unregister_alg() to a WARN_ON() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.180 commit ff86deaba1fa15b3035ffe6f0059c2493535124e category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit a543ada7db729514ddd3ba4efa45f4c7b802ad85 upstream. The crypto_unregister_alg() function expects callers to ensure that any algorithm that is unregistered has a refcnt of exactly 1, and issues a BUG_ON() if this is not the case. However, there are in fact drivers that will call crypto_unregister_alg() without ensuring that the refcnt has been lowered first, most notably on system shutdown. This causes the BUG_ON() to trigger, which prevents a clean shutdown and hangs the system. To avoid such hangs on shutdown, demote the BUG_ON() in crypto_unregister_alg() to a WARN_ON() with early return. Cc stable because this problem was observed on a 6.2 kernel, cf the link below. Link: https://lore.kernel.org/r/87r0tyq8ph.fsf@toke.dk Cc: stable@vger.kernel.org Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- crypto/algapi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crypto/algapi.c b/crypto/algapi.c index 9de27daa98b4..42dca17dc2d9 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -456,7 +456,9 @@ void crypto_unregister_alg(struct crypto_alg *alg) if (WARN(ret, "Algorithm %s is not registered", alg->cra_driver_name)) return; - BUG_ON(refcount_read(&alg->cra_refcnt) != 1); + if (WARN_ON(refcount_read(&alg->cra_refcnt) != 1)) + return; + if (alg->cra_destroy) alg->cra_destroy(alg); -- Gitee From f3fdcd2cea28be9d2918cc3b1ab8bfec5c99f1da Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Tue, 28 Feb 2023 18:28:58 +0000 Subject: [PATCH 095/101] crypto: safexcel - Cleanup ring IRQ workqueues on load failure stable inclusion from stable-5.10.180 commit 0a89d4a075524cf1f865cfdbb9cf38ab8e3e5409 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit ca25c00ccbc5f942c63897ed23584cfc66e8ec81 upstream. A failure loading the safexcel driver results in the following warning on boot, because the IRQ affinity has not been correctly cleaned up. Ensure we clean up the affinity and workqueues on a failure to load the driver. crypto-safexcel: probe of f2800000.crypto failed with error -2 ------------[ cut here ]------------ WARNING: CPU: 1 PID: 232 at kernel/irq/manage.c:1913 free_irq+0x300/0x340 Modules linked in: hwmon mdio_i2c crypto_safexcel(+) md5 sha256_generic libsha256 authenc libdes omap_rng rng_core nft_masq nft_nat nft_chain_nat nf_nat nft_ct nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables libcrc32c nfnetlink fuse autofs4 CPU: 1 PID: 232 Comm: systemd-udevd Tainted: G W 6.1.6-00002-g9d4898824677 #3 Hardware name: MikroTik RB5009 (DT) pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : free_irq+0x300/0x340 lr : free_irq+0x2e0/0x340 sp : ffff800008fa3890 x29: ffff800008fa3890 x28: 0000000000000000 x27: 0000000000000000 x26: ffff8000008e6dc0 x25: ffff000009034cac x24: ffff000009034d50 x23: 0000000000000000 x22: 000000000000004a x21: ffff0000093e0d80 x20: ffff000009034c00 x19: ffff00000615fc00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 000075f5c1584c5e x14: 0000000000000017 x13: 0000000000000000 x12: 0000000000000040 x11: ffff000000579b60 x10: ffff000000579b62 x9 : ffff800008bbe370 x8 : ffff000000579dd0 x7 : 0000000000000000 x6 : ffff000000579e18 x5 : ffff000000579da8 x4 : ffff800008ca0000 x3 : ffff800008ca0188 x2 : 0000000013033204 x1 : ffff000009034c00 x0 : ffff8000087eadf0 Call trace: free_irq+0x300/0x340 devm_irq_release+0x14/0x20 devres_release_all+0xa0/0x100 device_unbind_cleanup+0x14/0x60 really_probe+0x198/0x2d4 __driver_probe_device+0x74/0xdc driver_probe_device+0x3c/0x110 __driver_attach+0x8c/0x190 bus_for_each_dev+0x6c/0xc0 driver_attach+0x20/0x30 bus_add_driver+0x148/0x1fc driver_register+0x74/0x120 __platform_driver_register+0x24/0x30 safexcel_init+0x48/0x1000 [crypto_safexcel] do_one_initcall+0x4c/0x1b0 do_init_module+0x44/0x1cc load_module+0x1724/0x1be4 __do_sys_finit_module+0xbc/0x110 __arm64_sys_finit_module+0x1c/0x24 invoke_syscall+0x44/0x110 el0_svc_common.constprop.0+0xc0/0xe0 do_el0_svc+0x20/0x80 el0_svc+0x14/0x4c el0t_64_sync_handler+0xb0/0xb4 el0t_64_sync+0x148/0x14c ---[ end trace 0000000000000000 ]--- Fixes: 1b44c5a60c13 ("inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Jonathan McDowell Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- drivers/crypto/inside-secure/safexcel.c | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index fbcf52e46d17..7de9b9d20de0 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1634,19 +1634,23 @@ static int safexcel_probe_generic(void *pdev, &priv->ring[i].rdr); if (ret) { dev_err(dev, "Failed to initialize rings\n"); - return ret; + goto err_cleanup_rings; } priv->ring[i].rdr_req = devm_kcalloc(dev, EIP197_DEFAULT_RING_SIZE, sizeof(*priv->ring[i].rdr_req), GFP_KERNEL); - if (!priv->ring[i].rdr_req) - return -ENOMEM; + if (!priv->ring[i].rdr_req) { + ret = -ENOMEM; + goto err_cleanup_rings; + } ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL); - if (!ring_irq) - return -ENOMEM; + if (!ring_irq) { + ret = -ENOMEM; + goto err_cleanup_rings; + } ring_irq->priv = priv; ring_irq->ring = i; @@ -1660,7 +1664,8 @@ static int safexcel_probe_generic(void *pdev, ring_irq); if (irq < 0) { dev_err(dev, "Failed to get IRQ ID for ring %d\n", i); - return irq; + ret = irq; + goto err_cleanup_rings; } priv->ring[i].irq = irq; @@ -1672,8 +1677,10 @@ static int safexcel_probe_generic(void *pdev, snprintf(wq_name, 9, "wq_ring%d", i); priv->ring[i].workqueue = create_singlethread_workqueue(wq_name); - if (!priv->ring[i].workqueue) - return -ENOMEM; + if (!priv->ring[i].workqueue) { + ret = -ENOMEM; + goto err_cleanup_rings; + } priv->ring[i].requests = 0; priv->ring[i].busy = false; @@ -1690,16 +1697,26 @@ static int safexcel_probe_generic(void *pdev, ret = safexcel_hw_init(priv); if (ret) { dev_err(dev, "HW init failed (%d)\n", ret); - return ret; + goto err_cleanup_rings; } ret = safexcel_register_algorithms(priv); if (ret) { dev_err(dev, "Failed to register algorithms (%d)\n", ret); - return ret; + goto err_cleanup_rings; } return 0; + +err_cleanup_rings: + for (i = 0; i < priv->config.rings; i++) { + if (priv->ring[i].irq) + irq_set_affinity_hint(priv->ring[i].irq, NULL); + if (priv->ring[i].workqueue) + destroy_workqueue(priv->ring[i].workqueue); + } + + return ret; } static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv) -- Gitee From 640ee177831bab2a7f872035ecb9d735b3992119 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Fri, 6 Jan 2023 15:09:34 +0800 Subject: [PATCH 096/101] rcu: Avoid stack overflow due to __rcu_irq_enter_check_tick() being kprobe-ed stable inclusion from stable-5.10.180 commit eb18bc5a8678f431c500e6da1b8b5f34478d5bc1 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 7a29fb4a4771124bc61de397dbfc1554dbbcc19c upstream. Registering a kprobe on __rcu_irq_enter_check_tick() can cause kernel stack overflow as shown below. This issue can be reproduced by enabling CONFIG_NO_HZ_FULL and booting the kernel with argument "nohz_full=", and then giving the following commands at the shell prompt: # cd /sys/kernel/tracing/ # echo 'p:mp1 __rcu_irq_enter_check_tick' >> kprobe_events # echo 1 > events/kprobes/enable This commit therefore adds __rcu_irq_enter_check_tick() to the kprobes blacklist using NOKPROBE_SYMBOL(). Insufficient stack space to handle exception! ESR: 0x00000000f2000004 -- BRK (AArch64) FAR: 0x0000ffffccf3e510 Task stack: [0xffff80000ad30000..0xffff80000ad38000] IRQ stack: [0xffff800008050000..0xffff800008058000] Overflow stack: [0xffff089c36f9f310..0xffff089c36fa0310] CPU: 5 PID: 190 Comm: bash Not tainted 6.2.0-rc2-00320-g1f5abbd77e2c #19 Hardware name: linux,dummy-virt (DT) pstate: 400003c5 (nZcv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : __rcu_irq_enter_check_tick+0x0/0x1b8 lr : ct_nmi_enter+0x11c/0x138 sp : ffff80000ad30080 x29: ffff80000ad30080 x28: ffff089c82e20000 x27: 0000000000000000 x26: 0000000000000000 x25: ffff089c02a8d100 x24: 0000000000000000 x23: 00000000400003c5 x22: 0000ffffccf3e510 x21: ffff089c36fae148 x20: ffff80000ad30120 x19: ffffa8da8fcce148 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: ffffa8da8e44ea6c x14: ffffa8da8e44e968 x13: ffffa8da8e03136c x12: 1fffe113804d6809 x11: ffff6113804d6809 x10: 0000000000000a60 x9 : dfff800000000000 x8 : ffff089c026b404f x7 : 00009eec7fb297f7 x6 : 0000000000000001 x5 : ffff80000ad30120 x4 : dfff800000000000 x3 : ffffa8da8e3016f4 x2 : 0000000000000003 x1 : 0000000000000000 x0 : 0000000000000000 Kernel panic - not syncing: kernel stack overflow CPU: 5 PID: 190 Comm: bash Not tainted 6.2.0-rc2-00320-g1f5abbd77e2c #19 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0xf8/0x108 show_stack+0x20/0x30 dump_stack_lvl+0x68/0x84 dump_stack+0x1c/0x38 panic+0x214/0x404 add_taint+0x0/0xf8 panic_bad_stack+0x144/0x160 handle_bad_stack+0x38/0x58 __bad_stack+0x78/0x7c __rcu_irq_enter_check_tick+0x0/0x1b8 arm64_enter_el1_dbg.isra.0+0x14/0x20 el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 arm64_enter_el1_dbg.isra.0+0x14/0x20 el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 arm64_enter_el1_dbg.isra.0+0x14/0x20 el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 [...] el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 arm64_enter_el1_dbg.isra.0+0x14/0x20 el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 arm64_enter_el1_dbg.isra.0+0x14/0x20 el1_dbg+0x2c/0x90 el1h_64_sync_handler+0xcc/0xe8 el1h_64_sync+0x64/0x68 __rcu_irq_enter_check_tick+0x0/0x1b8 el1_interrupt+0x28/0x60 el1h_64_irq_handler+0x18/0x28 el1h_64_irq+0x64/0x68 __ftrace_set_clr_event_nolock+0x98/0x198 __ftrace_set_clr_event+0x58/0x80 system_enable_write+0x144/0x178 vfs_write+0x174/0x738 ksys_write+0xd0/0x188 __arm64_sys_write+0x4c/0x60 invoke_syscall+0x64/0x180 el0_svc_common.constprop.0+0x84/0x160 do_el0_svc+0x48/0xe8 el0_svc+0x34/0xd0 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x190/0x194 SMP: stopping secondary CPUs Kernel Offset: 0x28da86000000 from 0xffff800008000000 PHYS_OFFSET: 0xfffff76600000000 CPU features: 0x00000,01a00100,0000421b Memory Limit: none Acked-by: Joel Fernandes (Google) Link: https://lore.kernel.org/all/20221119040049.795065-1-zhengyejian1@huawei.com/ Fixes: aaf2bc50df1f ("rcu: Abstract out rcu_irq_enter_check_tick() from rcu_nmi_enter()") Signed-off-by: Zheng Yejian Cc: stable@vger.kernel.org Signed-off-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9cce4e13af41..30e1d7fedb5f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -964,6 +964,7 @@ void __rcu_irq_enter_check_tick(void) } raw_spin_unlock_rcu_node(rdp->mynode); } +NOKPROBE_SYMBOL(__rcu_irq_enter_check_tick); #endif /* CONFIG_NO_HZ_FULL */ /** -- Gitee From 59233132e4187692eeba15f20b00c8b59349c07a Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 31 Mar 2023 14:32:18 +0200 Subject: [PATCH 097/101] reiserfs: Add security prefix to xattr name in reiserfs_security_write() stable inclusion from stable-5.10.180 commit 680c419d0d8a67a3a8601174547aae6a4e26e5f9 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit d82dcd9e21b77d338dc4875f3d4111f0db314a7c upstream. Reiserfs sets a security xattr at inode creation time in two stages: first, it calls reiserfs_security_init() to obtain the xattr from active LSMs; then, it calls reiserfs_security_write() to actually write that xattr. Unfortunately, it seems there is a wrong expectation that LSMs provide the full xattr name in the form 'security.'. However, LSMs always provided just the suffix, causing reiserfs to not write the xattr at all (if the suffix is shorter than the prefix), or to write an xattr with the wrong name. Add a temporary buffer in reiserfs_security_write(), and write to it the full xattr name, before passing it to reiserfs_xattr_set_handle(). Also replace the name length check with a check that the full xattr name is not larger than XATTR_NAME_MAX. Cc: stable@vger.kernel.org # v2.6.x Fixes: 57fe60df6241 ("reiserfs: add atomic addition of selinux attributes during inode creation") Signed-off-by: Roberto Sassu Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- fs/reiserfs/xattr_security.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 59d87f9f72fb..159af6c26f4b 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -81,11 +81,15 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th, struct inode *inode, struct reiserfs_security_handle *sec) { + char xattr_name[XATTR_NAME_MAX + 1] = XATTR_SECURITY_PREFIX; int error; - if (strlen(sec->name) < sizeof(XATTR_SECURITY_PREFIX)) + + if (XATTR_SECURITY_PREFIX_LEN + strlen(sec->name) > XATTR_NAME_MAX) return -EINVAL; - error = reiserfs_xattr_set_handle(th, inode, sec->name, sec->value, + strlcat(xattr_name, sec->name, sizeof(xattr_name)); + + error = reiserfs_xattr_set_handle(th, inode, xattr_name, sec->value, sec->length, XATTR_CREATE); if (error == -ENODATA || error == -EOPNOTSUPP) error = 0; -- Gitee From 0a57e73ab9e9c2c00abfc2fd3836c7e03be4ced9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 Apr 2023 17:23:59 -0700 Subject: [PATCH 098/101] KVM: nVMX: Emulate NOPs in L2, and PAUSE if it's not intercepted stable inclusion from stable-5.10.180 commit e28df70df0070ab3db25e0e71c47cb465676a7e2 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- commit 4984563823f0034d3533854c1b50e729f5191089 upstream. Extend VMX's nested intercept logic for emulated instructions to handle "pause" interception, in quotes because KVM's emulator doesn't filter out NOPs when checking for nested intercepts. Failure to allow emulation of NOPs results in KVM injecting a #UD into L2 on any NOP that collides with the emulator's definition of PAUSE, i.e. on all single-byte NOPs. For PAUSE itself, honor L1's PAUSE-exiting control, but ignore PLE to avoid unnecessarily injecting a #UD into L2. Per the SDM, the first execution of PAUSE after VM-Entry is treated as the beginning of a new loop, i.e. will never trigger a PLE VM-Exit, and so L1 can't expect any given execution of PAUSE to deterministically exit. ... the processor considers this execution to be the first execution of PAUSE in a loop. (It also does so for the first execution of PAUSE at CPL 0 after VM entry.) All that said, the PLE side of things is currently a moot point, as KVM doesn't expose PLE to L1. Note, vmx_check_intercept() is still wildly broken when L1 wants to intercept an instruction, as KVM injects a #UD instead of synthesizing a nested VM-Exit. That issue extends far beyond NOP/PAUSE and needs far more effort to fix, i.e. is a problem for the future. Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Cc: Mathias Krause Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20230405002359.418138-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing --- arch/x86/kvm/vmx/vmx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 16943e923902..9aedc7b06da7 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7537,6 +7537,21 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ break; + case x86_intercept_pause: + /* + * PAUSE is a single-byte NOP with a REPE prefix, i.e. collides + * with vanilla NOPs in the emulator. Apply the interception + * check only to actual PAUSE instructions. Don't check + * PAUSE-loop-exiting, software can't expect a given PAUSE to + * exit, i.e. KVM is within its rights to allow L2 to execute + * the PAUSE. + */ + if ((info->rep_prefix != REPE_PREFIX) || + !nested_cpu_has2(vmcs12, CPU_BASED_PAUSE_EXITING)) + return X86EMUL_CONTINUE; + + break; + /* TODO: check more intercepts... */ default: break; -- Gitee From e56d9d1777dad1ab558288b645b76eee8656cec8 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 10 Apr 2023 21:08:26 +0800 Subject: [PATCH 099/101] writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs stable inclusion from stable-5.10.180 commit 2b00b2a0e6425095602a0ba05e36a5b9afe86817 category: bugfix issue: #I8RGKL CVE: NA Signed-off-by: yaowenrui --------------------------------------- [ Upstream commit 1ba1199ec5747f475538c0d25a32804e5ba1dfde ] KASAN report null-ptr-deref: ================================================================== BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0 Write of size 8 at addr 0000000000000000 by task sync/943 CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461 Call Trace: dump_stack_lvl+0x7f/0xc0 print_report+0x2ba/0x340 kasan_report+0xc4/0x120 kasan_check_range+0x1b7/0x2e0 __kasan_check_write+0x24/0x40 bdi_split_work_to_wbs+0x5c5/0x7b0 sync_inodes_sb+0x195/0x630 sync_inodes_one_sb+0x3a/0x50 iterate_supers+0x106/0x1b0 ksys_sync+0x98/0x160 [...] ================================================================== The race that causes the above issue is as follows: cpu1 cpu2 -------------------------|------------------------- inode_switch_wbs INIT_WORK(&isw->work, inode_switch_wbs_work_fn) queue_rcu_work(isw_wq, &isw->work) // queue_work async inode_switch_wbs_work_fn wb_put_many(old_wb, nr_switched) percpu_ref_put_many ref->data->release(ref) cgwb_release queue_work(cgwb_release_wq, &wb->release_work) // queue_work async &wb->release_work cgwb_release_workfn ksys_sync iterate_supers sync_inodes_one_sb sync_inodes_sb bdi_split_work_to_wbs kmalloc(sizeof(*work), GFP_ATOMIC) // alloc memory failed percpu_ref_exit ref->data = NULL kfree(data) wb_get(wb) percpu_ref_get(&wb->refcnt) percpu_ref_get_many(ref, 1) atomic_long_add(nr, &ref->data->count) atomic64_add(i, v) // trigger null-ptr-deref bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all wbs. If the allocation of new work fails, the on-stack fallback will be used and the reference count of the current wb is increased afterwards. If cgroup writeback membership switches occur before getting the reference count and the current wb is released as old_wd, then calling wb_get() or wb_put() will trigger the null pointer dereference above. This issue was introduced in v4.3-rc7 (see fix tag1). Both sync_inodes_sb() and __writeback_inodes_sb_nr() calls to bdi_split_work_to_wbs() can trigger this issue. For scenarios called via sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize sync(2) against cgroup writeback membership switches") reduced the possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io, thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(), and the issue becomes easily reproducible again. To solve this problem, percpu_ref_exit() is called under RCU protection to avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs(). Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(), and skip the current wb if wb_tryget() fails because the wb has already been shutdown. Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones") Signed-off-by: Baokun Li Reviewed-by: Jan Kara Acked-by: Tejun Heo Cc: Alexander Viro Cc: Andreas Dilger Cc: Christian Brauner Cc: Dennis Zhou Cc: Hou Tao Cc: yangerkun Cc: Zhang Yi Cc: Jens Axboe Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing --- fs/fs-writeback.c | 17 ++++++++++------- mm/backing-dev.c | 11 ++++++++++- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2011199476ea..20e29e35884a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -884,6 +884,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, continue; } + /* + * If wb_tryget fails, the wb has been shutdown, skip it. + * + * Pin @wb so that it stays on @bdi->wb_list. This allows + * continuing iteration from @wb after dropping and + * regrabbing rcu read lock. + */ + if (!wb_tryget(wb)) + continue; + /* alloc failed, execute synchronously using on-stack fallback */ work = &fallback_work; *work = *base_work; @@ -892,13 +902,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, work->done = &fallback_work_done; wb_queue_work(wb, work); - - /* - * Pin @wb so that it stays on @bdi->wb_list. This allows - * continuing iteration from @wb after dropping and - * regrabbing rcu read lock. - */ - wb_get(wb); last_wb = wb; rcu_read_unlock(); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index ca770a783a9f..b28f629c3527 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -378,6 +378,15 @@ static void wb_exit(struct bdi_writeback *wb) static DEFINE_SPINLOCK(cgwb_lock); static struct workqueue_struct *cgwb_release_wq; +static void cgwb_free_rcu(struct rcu_head *rcu_head) +{ + struct bdi_writeback *wb = container_of(rcu_head, + struct bdi_writeback, rcu); + + percpu_ref_exit(&wb->refcnt); + kfree(wb); +} + static void cgwb_release_workfn(struct work_struct *work) { struct bdi_writeback *wb = container_of(work, struct bdi_writeback, @@ -397,7 +406,7 @@ static void cgwb_release_workfn(struct work_struct *work) fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); wb_exit(wb); - kfree_rcu(wb, rcu); + call_rcu(&wb->rcu, cgwb_free_rcu); } static void cgwb_release(struct percpu_ref *refcnt) -- Gitee From e598982dc06a333906b38332728ea99043c84154 Mon Sep 17 00:00:00 2001 From: wanxiaoqing Date: Tue, 26 Dec 2023 19:27:18 +0800 Subject: [PATCH 100/101] Kernel Version UPdate: v5.10.178=>v5.10.179 Signed-off-by: wanxiaoqing --- README.OpenSource | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.OpenSource b/README.OpenSource index 5fb9dd500cc3..71a2928bd5f5 100644 --- a/README.OpenSource +++ b/README.OpenSource @@ -3,7 +3,7 @@ "Name": "linux-5.10", "License": "GPL-2.0+", "License File": "COPYING", - "Version Number": "5.10.178", + "Version Number": "5.10.179", "Owner": "liuyu82@huawei.com", "Upstream URL": "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/log/?h=linux-5.10.y", "Description": "linux kernel 5.10" -- Gitee From 5a7537d76f973d1eb72962e8781dcfd9d0dfea0a Mon Sep 17 00:00:00 2001 From: wanxiaoqing Date: Wed, 27 Dec 2023 09:40:39 +0800 Subject: [PATCH 101/101] test Signed-off-by: wanxiaoqing --- kernel/sched/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a557a04ba1a9..e17b13878749 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -88,7 +88,9 @@ struct rq; struct cpuidle_state; #ifdef CONFIG_SCHED_RT_CAS -extern unsigned long uclamp_task_util(struct task_struct *p); +extern unsigned long uclamp_task_util(struct task_struct *p, + unsigned long uclamp_min, + unsigned long uclamp_max) #endif #ifdef CONFIG_SCHED_WALT -- Gitee