From 13adaaf6afdd26acd7d6fdece0db1b337ffb22b9 Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 24 Dec 2021 14:47:16 +0800 Subject: [PATCH 001/100] ASoC: mediatek: mt8173: fix device_node leak stable inclusion from stable-5.10.94 commit f28672eef4a9f401b617ddd9d394aaa35d634c61 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- [ Upstream commit 493433785df0075afc0c106ab65f10a605d0b35d ] Fixes the device_node leak. Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20211224064719.2031210-2-tzungbi@google.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: wenfei --- sound/soc/mediatek/mt8173/mt8173-max98090.c | 3 +++ sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c | 2 ++ sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c | 2 ++ sound/soc/mediatek/mt8173/mt8173-rt5650.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index fc94314bfc02..3bdd4931316c 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -180,6 +180,9 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(codec_node); + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index 0f28dc2217c0..390da5bf727e 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -218,6 +218,8 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 077c6ee06780..c8e4e85e1057 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -285,6 +285,8 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index c28ebf891cb0..e168d31f4445 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -323,6 +323,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } -- Gitee From 8d2899f26ef4266a7a46c8d77c53d2cae17eb08e Mon Sep 17 00:00:00 2001 From: Tzung-Bi Shih Date: Fri, 24 Dec 2021 14:47:17 +0800 Subject: [PATCH 002/100] ASoC: mediatek: mt8183: fix device_node leak stable inclusion from stable-5.10.94 commit ff08cf1e34a17c1205630a9682640b784932970b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- [ Upstream commit cb006006fe6221f092fadaffd3f219288304c9ad ] Fixes the device_node leak. Signed-off-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20211224064719.2031210-3-tzungbi@google.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: wenfei --- sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c | 6 +++++- sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 20d31b69a5c0..9cc0f26b08fb 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -787,7 +787,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) return ret; } - return devm_snd_soc_register_card(&pdev->dev, card); + ret = devm_snd_soc_register_card(&pdev->dev, card); + + of_node_put(platform_node); + of_node_put(hdmi_codec); + return ret; } #ifdef CONFIG_OF diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c index 79ba2f2d8452..14ce8b93597f 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -720,7 +720,12 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) __func__, ret); } - return devm_snd_soc_register_card(&pdev->dev, card); + ret = devm_snd_soc_register_card(&pdev->dev, card); + + of_node_put(platform_node); + of_node_put(ec_codec); + of_node_put(hdmi_codec); + return ret; } #ifdef CONFIG_OF -- Gitee From e6df85ef05b5b4d4e6a045ac50820a9e74ff0669 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 24 Dec 2021 08:21:03 +0000 Subject: [PATCH 003/100] phy: mediatek: Fix missing check in mtk_mipi_tx_probe stable inclusion from stable-5.10.94 commit 082ff9e12b4a1c40142a1710f0534e65c48f1d51 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- [ Upstream commit 399c91c3f30531593e5ff6ca7b53f47092128669 ] The of_device_get_match_data() function may return NULL. Add check to prevent potential null dereference. Signed-off-by: Miaoqian Lin Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20211224082103.7658-1-linmq006@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin Signed-off-by: wenfei --- drivers/gpu/drm/mediatek/mtk_mipi_tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c index 8cee2591e728..ccc742dc78bd 100644 --- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c +++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c @@ -147,6 +147,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) return -ENOMEM; mipi_tx->driver_data = of_device_get_match_data(dev); + if (!mipi_tx->driver_data) + return -ENODEV; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); mipi_tx->regs = devm_ioremap_resource(dev, mem); -- Gitee From 2cd2a0ff630f251c90c1d84247590f512910f44b Mon Sep 17 00:00:00 2001 From: Arnaud Pouliquen Date: Mon, 6 Dec 2021 20:07:58 +0100 Subject: [PATCH 004/100] rpmsg: core: Clean up resources on announce_create failure. stable inclusion from stable-5.10.94 commit 43e94431c313ca2270584a106da36b02d65978c8 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 8066c615cb69b7da8a94f59379847b037b3a5e46 upstream. During the rpmsg_dev_probe, if rpdev->ops->announce_create returns an error, the rpmsg device and default endpoint should be freed before exiting the function. Fixes: 5e619b48677c ("rpmsg: Split rpmsg core and virtio backend") Suggested-by: Bjorn Andersson Signed-off-by: Arnaud Pouliquen Reviewed-by: Bjorn Andersson Cc: stable Link: https://lore.kernel.org/r/20211206190758.10004-1-arnaud.pouliquen@foss.st.com Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/rpmsg/rpmsg_core.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 91de940896e3..028ca5961bc2 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -473,13 +473,25 @@ static int rpmsg_dev_probe(struct device *dev) err = rpdrv->probe(rpdev); if (err) { dev_err(dev, "%s: failed: %d\n", __func__, err); - if (ept) - rpmsg_destroy_ept(ept); - goto out; + goto destroy_ept; } - if (ept && rpdev->ops->announce_create) + if (ept && rpdev->ops->announce_create) { err = rpdev->ops->announce_create(rpdev); + if (err) { + dev_err(dev, "failed to announce creation\n"); + goto remove_rpdev; + } + } + + return 0; + +remove_rpdev: + if (rpdrv->remove) + rpdrv->remove(rpdev); +destroy_ept: + if (ept) + rpmsg_destroy_ept(ept); out: return err; } -- Gitee From 64910abfbc7e844e4892687c7ce4f7a5a302ba3b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 17 Dec 2021 10:03:30 +0100 Subject: [PATCH 005/100] crypto: omap-aes - Fix broken pm_runtime_and_get() usage stable inclusion from stable-5.10.94 commit 2031e0246e459cea7a01b342b5e4b87de8fc090c category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c2aec59be093bd44627bc4f6bc67e4614a93a7b6 upstream. This fix is basically the same as 3d6b661330a7 ("crypto: stm32 - Revert broken pm_runtime_resume_and_get changes"), just for the omap driver. If the return value isn't used, then pm_runtime_get_sync() has to be used for ensuring that the usage count is balanced. Fixes: 1f34cc4a8da3 ("crypto: omap-aes - Fix PM reference leak on omap-aes.c") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/crypto/omap-aes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 9b968ac4ee7b..a196bb8b1701 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1302,7 +1302,7 @@ static int omap_aes_suspend(struct device *dev) static int omap_aes_resume(struct device *dev) { - pm_runtime_resume_and_get(dev); + pm_runtime_get_sync(dev); return 0; } #endif -- Gitee From 72f2d437dc6216d4af1b5c9fbb96d0a734ffff78 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 20 Dec 2021 20:50:22 +0100 Subject: [PATCH 006/100] crypto: stm32/crc32 - Fix kernel BUG triggered in probe() stable inclusion from stable-5.10.94 commit 9e6ff2d5725b70b6f9430597a072cbbc0b36223f category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 29009604ad4e3ef784fd9b9fef6f23610ddf633d upstream. The include/linux/crypto.h struct crypto_alg field cra_driver_name description states "Unique name of the transformation provider. " ... " this contains the name of the chip or provider and the name of the transformation algorithm." In case of the stm32-crc driver, field cra_driver_name is identical for all registered transformation providers and set to the name of the driver itself, which is incorrect. This patch fixes it by assigning a unique cra_driver_name to each registered transformation provider. The kernel crash is triggered when the driver calls crypto_register_shashes() which calls crypto_register_shash(), which calls crypto_register_alg(), which calls __crypto_register_alg(), which returns -EEXIST, which is propagated back through this call chain. Upon -EEXIST from crypto_register_shash(), the crypto_register_shashes() starts unregistering the providers back, and calls crypto_unregister_shash(), which calls crypto_unregister_alg(), and this is where the BUG() triggers due to incorrect cra_refcnt. Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module") Signed-off-by: Marek Vasut Cc: # 4.12+ Cc: Alexandre Torgue Cc: Fabien Dessenne Cc: Herbert Xu Cc: Lionel Debieve Cc: Nicolas Toromanoff Cc: linux-arm-kernel@lists.infradead.org Cc: linux-stm32@st-md-mailman.stormreply.com To: linux-crypto@vger.kernel.org Acked-by: Nicolas Toromanoff Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/crypto/stm32/stm32-crc32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index 75867c0b0017..be1bf39a317d 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -279,7 +279,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, @@ -301,7 +301,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32c", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32c", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, -- Gitee From 099c339cb052f7984c7f38bd17abef3b3f0090d6 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Mon, 1 Nov 2021 11:13:53 +0800 Subject: [PATCH 007/100] crypto: caam - replace this_cpu_ptr with raw_cpu_ptr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 4f0762ac32b57bc52fc8357641962b669c42269b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit efd21e10fc3bf4c6da122470a5ae89ec4ed8d180 upstream. When enable the kernel debug config, there is below calltrace detected: BUG: using smp_processor_id() in preemptible [00000000] code: cryptomgr_test/339 caller is debug_smp_processor_id+0x20/0x30 CPU: 9 PID: 339 Comm: cryptomgr_test Not tainted 5.10.63-yocto-standard #1 Hardware name: NXP Layerscape LX2160ARDB (DT) Call trace: dump_backtrace+0x0/0x1a0 show_stack+0x24/0x30 dump_stack+0xf0/0x13c check_preemption_disabled+0x100/0x110 debug_smp_processor_id+0x20/0x30 dpaa2_caam_enqueue+0x10c/0x25c ...... cryptomgr_test+0x38/0x60 kthread+0x158/0x164 ret_from_fork+0x10/0x38 According to the comment in commit ac5d15b4519f("crypto: caam/qi2 - use affine DPIOs "), because preemption is no longer disabled while trying to enqueue an FQID, it might be possible to run the enqueue on a different CPU(due to migration, when in process context), however this wouldn't be a functionality issue. But there will be above calltrace when enable kernel debug config. So, replace this_cpu_ptr with raw_cpu_ptr to avoid above call trace. Fixes: ac5d15b4519f ("crypto: caam/qi2 - use affine DPIOs") Cc: stable@vger.kernel.org Signed-off-by: Meng Li Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/crypto/caam/caamalg_qi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a780e627838a..5a40c7d10cc9 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -5467,7 +5467,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req) dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1])); dpaa2_fd_set_flc(&fd, req->flc_dma); - ppriv = this_cpu_ptr(priv->ppriv); + ppriv = raw_cpu_ptr(priv->ppriv); for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) { err = dpaa2_io_service_enqueue_fq(ppriv->dpio, ppriv->req_fqid, &fd); -- Gitee From 2cb48e4a96a47edd0c0d888eb471e71d08b359b9 Mon Sep 17 00:00:00 2001 From: Petr Cvachoucek Date: Mon, 30 Aug 2021 21:20:37 +0200 Subject: [PATCH 008/100] ubifs: Error path in ubifs_remount_rw() seems to wrongly free write buffers stable inclusion from stable-5.10.94 commit 76006d33f1c8cabe28e5f3adfad0f38f273195ad category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3fea4d9d160186617ff40490ae01f4f4f36b28ff upstream. it seems freeing the write buffers in the error path of the ubifs_remount_rw() is wrong. It leads later to a kernel oops like this: [10016.431274] UBIFS (ubi0:0): start fixing up free space [10090.810042] UBIFS (ubi0:0): free space fixup complete [10090.814623] UBIFS error (ubi0:0 pid 512): ubifs_remount_fs: cannot spawn "ubifs_bgt0_0", error -4 [10101.915108] UBIFS (ubi0:0): background thread "ubifs_bgt0_0" started, PID 517 [10105.275498] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000030 [10105.284352] Mem abort info: [10105.287160] ESR = 0x96000006 [10105.290252] EC = 0x25: DABT (current EL), IL = 32 bits [10105.295592] SET = 0, FnV = 0 [10105.298652] EA = 0, S1PTW = 0 [10105.301848] Data abort info: [10105.304723] ISV = 0, ISS = 0x00000006 [10105.308573] CM = 0, WnR = 0 [10105.311564] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000f03d1000 [10105.318034] [0000000000000030] pgd=00000000f6cee003, pud=00000000f4884003, pmd=0000000000000000 [10105.326783] Internal error: Oops: 96000006 [#1] PREEMPT SMP [10105.332355] Modules linked in: ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 nvme nvme_core cryptodev(O) [10105.342468] CPU: 3 PID: 518 Comm: touch Tainted: G O 5.4.3 #1 [10105.349517] Hardware name: HYPEX CPU (DT) [10105.353525] pstate: 40000005 (nZcv daif -PAN -UAO) [10105.358324] pc : atomic64_try_cmpxchg_acquire.constprop.22+0x8/0x34 [10105.364596] lr : mutex_lock+0x1c/0x34 [10105.368253] sp : ffff000075633aa0 [10105.371563] x29: ffff000075633aa0 x28: 0000000000000001 [10105.376874] x27: ffff000076fa80c8 x26: 0000000000000004 [10105.382185] x25: 0000000000000030 x24: 0000000000000000 [10105.387495] x23: 0000000000000000 x22: 0000000000000038 [10105.392807] x21: 000000000000000c x20: ffff000076fa80c8 [10105.398119] x19: ffff000076fa8000 x18: 0000000000000000 [10105.403429] x17: 0000000000000000 x16: 0000000000000000 [10105.408741] x15: 0000000000000000 x14: fefefefefefefeff [10105.414052] x13: 0000000000000000 x12: 0000000000000fe0 [10105.419364] x11: 0000000000000fe0 x10: ffff000076709020 [10105.424675] x9 : 0000000000000000 x8 : 00000000000000a0 [10105.429986] x7 : ffff000076fa80f4 x6 : 0000000000000030 [10105.435297] x5 : 0000000000000000 x4 : 0000000000000000 [10105.440609] x3 : 0000000000000000 x2 : ffff00006f276040 [10105.445920] x1 : ffff000075633ab8 x0 : 0000000000000030 [10105.451232] Call trace: [10105.453676] atomic64_try_cmpxchg_acquire.constprop.22+0x8/0x34 [10105.459600] ubifs_garbage_collect+0xb4/0x334 [10105.463956] ubifs_budget_space+0x398/0x458 [10105.468139] ubifs_create+0x50/0x180 [10105.471712] path_openat+0x6a0/0x9b0 [10105.475284] do_filp_open+0x34/0x7c [10105.478771] do_sys_open+0x78/0xe4 [10105.482170] __arm64_sys_openat+0x1c/0x24 [10105.486180] el0_svc_handler+0x84/0xc8 [10105.489928] el0_svc+0x8/0xc [10105.492808] Code: 52800013 17fffffb d2800003 f9800011 (c85ffc05) [10105.498903] ---[ end trace 46b721d93267a586 ]--- To reproduce the problem: 1. Filesystem initially mounted read-only, free space fixup flag set. 2. mount -o remount,rw 3. it takes some time (free space fixup running) ... try to terminate running mount by CTRL-C ... does not respond, only after free space fixup is complete ... then "ubifs_remount_fs: cannot spawn "ubifs_bgt0_0", error -4" 4. mount -o remount,rw ... now finished instantly (fixup already done). 5. Create file or just unmount the filesystem and we get the oops. Cc: Fixes: b50b9f408502 ("UBIFS: do not free write-buffers when in R/O mode") Signed-off-by: Petr Cvachoucek Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ubifs/super.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 98cbd9cac246..2dd79fd123ff 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1853,7 +1853,6 @@ static int ubifs_remount_rw(struct ubifs_info *c) kthread_stop(c->bgt); c->bgt = NULL; } - free_wbufs(c); kfree(c->write_reserve_buf); c->write_reserve_buf = NULL; vfree(c->ileb_buf); -- Gitee From 0491f274e619a81496be978090a1e48f5697bfa1 Mon Sep 17 00:00:00 2001 From: Patrick Williams Date: Thu, 23 Dec 2021 09:49:31 -0600 Subject: [PATCH 009/100] tpm: fix NPE on probe for missing device stable inclusion from stable-5.10.94 commit 05026c4e94c9bc723013ea0f7a93fbae2bb0b257 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 84cc69589700b90a4c8d27b481a51fce8cca6051 upstream. When using the tpm_tis-spi driver on a system missing the physical TPM, a null pointer exception was observed. [ 0.938677] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 0.939020] pgd = 10c753cb [ 0.939237] [00000004] *pgd=00000000 [ 0.939808] Internal error: Oops: 5 [#1] SMP ARM [ 0.940157] CPU: 0 PID: 48 Comm: kworker/u4:1 Not tainted 5.15.10-dd1e40c #1 [ 0.940364] Hardware name: Generic DT based system [ 0.940601] Workqueue: events_unbound async_run_entry_fn [ 0.941048] PC is at tpm_tis_remove+0x28/0xb4 [ 0.941196] LR is at tpm_tis_core_init+0x170/0x6ac This is due to an attempt in 'tpm_tis_remove' to use the drvdata, which was not initialized in 'tpm_tis_core_init' prior to the first error. Move the initialization of drvdata earlier so 'tpm_tis_remove' has access to it. Signed-off-by: Patrick Williams Fixes: 79ca6f74dae0 ("tpm: fix Atmel TPM crash caused by too frequent queries") Cc: stable@vger.kernel.org Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/char/tpm/tpm_tis_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 36d1ad8f479d..dc56b976d816 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -950,6 +950,8 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, priv->timeout_max = TPM_TIMEOUT_USECS_MAX; priv->phy_ops = phy_ops; + dev_set_drvdata(&chip->dev, priv); + rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor); if (rc < 0) return rc; @@ -962,8 +964,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, priv->timeout_max = TIS_TIMEOUT_MAX_ATML; } - dev_set_drvdata(&chip->dev, priv); - if (is_bsw()) { priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR, ILB_REMAP_SIZE); -- Gitee From 2f9f5791f36972bace4070fa3124669deb1426f3 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Wed, 22 Dec 2021 13:48:12 +0900 Subject: [PATCH 010/100] spi: uniphier: Fix a bug that doesn't point to private data correctly stable inclusion from stable-5.10.94 commit 67b078d996f723fe835edc2997a6fc4f31a16d02 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 80bb73a9fbcde4ecc55e12f10c73fabbe68a24d1 upstream. In uniphier_spi_remove(), there is a wrong code to get private data from the platform device, so the driver can't be removed properly. The driver should get spi_master from the platform device and retrieve the private data from it. Cc: Fixes: 5ba155a4d4cc ("spi: add SPI controller driver for UniPhier SoC") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1640148492-32178-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/spi/spi-uniphier.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 6a9ef8ee3cc9..e5c234aecf67 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -767,12 +767,13 @@ static int uniphier_spi_probe(struct platform_device *pdev) static int uniphier_spi_remove(struct platform_device *pdev) { - struct uniphier_spi_priv *priv = platform_get_drvdata(pdev); + struct spi_master *master = platform_get_drvdata(pdev); + struct uniphier_spi_priv *priv = spi_master_get_devdata(master); - if (priv->master->dma_tx) - dma_release_channel(priv->master->dma_tx); - if (priv->master->dma_rx) - dma_release_channel(priv->master->dma_rx); + if (master->dma_tx) + dma_release_channel(master->dma_tx); + if (master->dma_rx) + dma_release_channel(master->dma_rx); clk_disable_unprepare(priv->clk); -- Gitee From 97e8136ade1a5e5513146f574719da401c17ba47 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Fri, 10 Dec 2021 11:28:17 +0200 Subject: [PATCH 011/100] xen/gntdev: fix unmap notification order stable inclusion from stable-5.10.94 commit 9fbaddd783fdc7c074ac9af5aef9baa45596b8b3 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit ce2f46f3531a03781181b7f4bd1ff9f8c5086e7e upstream. While working with Xen's libxenvchan library I have faced an issue with unmap notifications sent in wrong order if both UNMAP_NOTIFY_SEND_EVENT and UNMAP_NOTIFY_CLEAR_BYTE were requested: first we send an event channel notification and then clear the notification byte which renders in the below inconsistency (cli_live is the byte which was requested to be cleared on unmap): [ 444.514243] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6 libxenvchan_is_open cli_live 1 [ 444.515239] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14 Thus it is not possible to reliably implement the checks like - wait for the notification (UNMAP_NOTIFY_SEND_EVENT) - check the variable (UNMAP_NOTIFY_CLEAR_BYTE) because it is possible that the variable gets checked before it is cleared by the kernel. To fix that we need to re-order the notifications, so the variable is first gets cleared and then the event channel notification is sent. With this fix I can see the correct order of execution: [ 54.522611] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14 [ 54.537966] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6 libxenvchan_is_open cli_live 0 Cc: stable@vger.kernel.org Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20211210092817.580718-1-andr2000@gmail.com Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/xen/gntdev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b9651f797676..54778aadf618 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -240,13 +240,13 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; + if (map->pages && !use_ptemod) + unmap_grant_pages(map, 0, map->count); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); } - - if (map->pages && !use_ptemod) - unmap_grant_pages(map, 0, map->count); gntdev_free_map(map); } -- Gitee From e20d2d3877464ea19e87799ec7e4a48525242085 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 22 Nov 2021 17:05:31 +0800 Subject: [PATCH 012/100] fuse: Pass correct lend value to filemap_write_and_wait_range() stable inclusion from stable-5.10.94 commit 9668cf9e4af0a0989c5f8f975213a58e74568a17 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit e388164ea385f04666c4633f5dc4f951fca71890 upstream. The acceptable maximum value of lend parameter in filemap_write_and_wait_range() is LLONG_MAX rather than -1. And there is also some logic depending on LLONG_MAX check in write_cache_pages(). So let's pass LLONG_MAX to filemap_write_and_wait_range() in fuse_writeback_range() instead. Fixes: 59bda8ecee2f ("fuse: flush extending writes") Signed-off-by: Xie Yongji Cc: # v5.15 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 05def4d20161..d1bc96ee6eb3 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -3252,7 +3252,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) { - int err = filemap_write_and_wait_range(inode->i_mapping, start, -1); + int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX); if (!err) fuse_sync_writes(inode); -- Gitee From 0dda1e356a8d3466485b558a1baaa7d683b0673d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 18 Dec 2021 10:58:56 +0100 Subject: [PATCH 013/100] serial: Fix incorrect rs485 polarity on uart open stable inclusion from stable-5.10.94 commit 13518f058fdee42d7b624e22add590fa1e5badbf category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit d3b3404df318504ec084213ab1065b73f49b0f1d upstream. Commit a6845e1e1b78 ("serial: core: Consider rs485 settings to drive RTS") sought to deassert RTS when opening an rs485-enabled uart port. That way, the transceiver does not occupy the bus until it transmits data. Unfortunately, the commit mixed up the logic and *asserted* RTS instead of *deasserting* it: The commit amended uart_port_dtr_rts(), which raises DTR and RTS when opening an rs232 port. "Raising" actually means lowering the signal that's coming out of the uart, because an rs232 transceiver not only changes a signal's voltage level, it also *inverts* the signal. See the simplified schematic in the MAX232 datasheet for an example: https://www.ti.com/lit/ds/symlink/max232.pdf So, to raise RTS on an rs232 port, TIOCM_RTS is *set* in port->mctrl and that results in the signal being driven low. In contrast to rs232, the signal level for rs485 Transmit Enable is the identity, not the inversion: If the transceiver expects a "high" RTS signal for Transmit Enable, the signal coming out of the uart must also be high, so TIOCM_RTS must be *cleared* in port->mctrl. The commit did the exact opposite, but it's easy to see why given the confusing semantics of rs232 and rs485. Fix it. Fixes: a6845e1e1b78 ("serial: core: Consider rs485 settings to drive RTS") Cc: stable@vger.kernel.org # v4.14+ Cc: Rafael Gago Castano Cc: Jan Kiszka Cc: Su Bao Cheng Signed-off-by: Lukas Wunner Link: https://lore.kernel.org/r/9395767847833f2f3193c49cde38501eeb3b5669.1639821059.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/tty/serial/serial_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 55108db5b64b..be0d9922e320 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -162,7 +162,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); if (raise) { - if (rs485_on && !RTS_after_send) { + if (rs485_on && RTS_after_send) { uart_set_mctrl(uport, TIOCM_DTR); uart_clear_mctrl(uport, TIOCM_RTS); } else { @@ -171,7 +171,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) } else { unsigned int clear = TIOCM_DTR; - clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0; + clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; uart_clear_mctrl(uport, clear); } } -- Gitee From 8b189a423e1de97c2e4ae76d63bf33e8ac82dfed Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 15 Nov 2021 19:46:04 +0300 Subject: [PATCH 014/100] cputime, cpuacct: Include guest time in user time in cpuacct.stat stable inclusion from stable-5.10.94 commit b72075e395b33761c574c4afbf6bb62fcab3ce6a category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 9731698ecb9c851f353ce2496292ff9fcea39dff upstream. cpuacct.stat in no-root cgroups shows user time without guest time included int it. This doesn't match with user time shown in root cpuacct.stat and /proc//stat. This also affects cgroup2's cpu.stat in the same way. Make account_guest_time() to add user time to cgroup's cpustat to fix this. Fixes: ef12fefabf94 ("cpuacct: add per-cgroup utime/stime statistics") Signed-off-by: Andrey Ryabinin Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Jordan Acked-by: Tejun Heo Cc: Link: https://lore.kernel.org/r/20211115164607.23784-1-arbn@yandex-team.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- kernel/sched/cputime.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index cf87d3fff5dd..40c657b1653a 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -162,10 +162,10 @@ void account_guest_time(struct task_struct *p, u64 cputime) /* Add guest time to cpustat. */ if (task_nice(p) > 0) { - cpustat[CPUTIME_NICE] += cputime; + task_group_account_field(p, CPUTIME_NICE, cputime); cpustat[CPUTIME_GUEST_NICE] += cputime; } else { - cpustat[CPUTIME_USER] += cputime; + task_group_account_field(p, CPUTIME_USER, cputime); cpustat[CPUTIME_GUEST] += cputime; } } -- Gitee From a80d9af8d0a031f3f3217e21938e9b8a7efc3499 Mon Sep 17 00:00:00 2001 From: Xiangyang Zhang Date: Fri, 7 Jan 2022 23:02:42 +0800 Subject: [PATCH 015/100] tracing/kprobes: 'nmissed' not showed correctly for kretprobe stable inclusion from stable-5.10.94 commit c524f4cfb3e59d412dc6b532481bbe4190f9e5a6 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit dfea08a2116fe327f79d8f4d4b2cf6e0c88be11f upstream. The 'nmissed' column of the 'kprobe_profile' file for kretprobe is not showed correctly, kretprobe can be skipped by two reasons, shortage of kretprobe_instance which is counted by tk->rp.nmissed, and kprobe itself is missed by some reason, so to show the sum. Link: https://lkml.kernel.org/r/20220107150242.5019-1-xyz.sun.ok@gmail.com Cc: stable@vger.kernel.org Fixes: 4a846b443b4e ("tracing/kprobes: Cleanup kprobe tracer code") Acked-by: Masami Hiramatsu Signed-off-by: Xiangyang Zhang Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- kernel/trace/trace_kprobe.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 552dbc9d5226..d8a9fc794126 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1183,15 +1183,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_kprobe *tk; + unsigned long nmissed; if (!is_trace_kprobe(ev)) return 0; tk = to_trace_kprobe(ev); + nmissed = trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), - tk->rp.kp.nmissed); + nmissed); return 0; } -- Gitee From 750d0107b9379d8d4fc01504da9b4b48be094ad8 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 10 Dec 2021 09:06:21 +0200 Subject: [PATCH 016/100] iwlwifi: mvm: Increase the scan timeout guard to 30 seconds stable inclusion from stable-5.10.94 commit 798754ba48b7a8f6dc42883b0122859a281c9644 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit ced50f1133af12f7521bb777fcf4046ca908fb77 upstream. With the introduction of 6GHz channels the scan guard timeout should be adjusted to account for the following extreme case: - All 6GHz channels are scanned passively: 58 channels. - The scan is fragmented with the following parameters: 3 fragments, 95 TUs suspend time, 44 TUs maximal out of channel time. The above would result with scan time of more than 24 seconds. Thus, set the timeout to 30 seconds. Cc: stable@vger.kernel.org Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20211210090244.3c851b93aef5.I346fa2e1d79220a6770496e773c6f87a2ad9e6c4@changeid Signed-off-by: Luca Coelho Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index a5d90e028833..46255d2c555b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2157,7 +2157,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EIO; } -#define SCAN_TIMEOUT 20000 +#define SCAN_TIMEOUT 30000 void iwl_mvm_scan_timeout_wk(struct work_struct *work) { -- Gitee From 3f3975564e4e77678367e18ebfedb68f1ce2fda5 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Thu, 4 Nov 2021 07:14:44 +0100 Subject: [PATCH 017/100] s390/mm: fix 2KB pgtable release race stable inclusion from stable-5.10.94 commit ecb71f7bd584cd56713b139b6429a8ddaef30f7b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c2c224932fd0ee6854d6ebfc8d059c2bcad86606 upstream. There is a race on concurrent 2KB-pgtables release paths when both upper and lower halves of the containing parent page are freed, one via page_table_free_rcu() + __tlb_remove_table(), and the other via page_table_free(). The race might lead to a corruption as result of remove of list item in page_table_free() concurrently with __free_page() in __tlb_remove_table(). Let's assume first the lower and next the upper 2KB-pgtables are freed from a page. Since both halves of the page are allocated the tracking byte (bits 24-31 of the page _refcount) has value of 0x03 initially: CPU0 CPU1 ---- ---- page_table_free_rcu() // lower half { // _refcount[31..24] == 0x03 ... atomic_xor_bits(&page->_refcount, 0x11U << (0 + 24)); // _refcount[31..24] <= 0x12 ... table = table | (1U << 0); tlb_remove_table(tlb, table); } ... __tlb_remove_table() { // _refcount[31..24] == 0x12 mask = _table & 3; // mask <= 0x01 ... page_table_free() // upper half { // _refcount[31..24] == 0x12 ... atomic_xor_bits( &page->_refcount, 1U << (1 + 24)); // _refcount[31..24] <= 0x10 // mask <= 0x10 ... atomic_xor_bits(&page->_refcount, mask << (4 + 24)); // _refcount[31..24] <= 0x00 // mask <= 0x00 ... if (mask != 0) // == false break; fallthrough; ... if (mask & 3) // == false ... else __free_page(page); list_del(&page->lru); ^^^^^^^^^^^^^^^^^^ RACE! ^^^^^^^^^^^^^^^^^^^^^ } ... } The problem is page_table_free() releases the page as result of lower nibble unset and __tlb_remove_table() observing zero too early. With this update page_table_free() will use the similar logic as page_table_free_rcu() + __tlb_remove_table(), and mark the fragment as pending for removal in the upper nibble until after the list_del(). In other words, the parent page is considered as unreferenced and safe to release only when the lower nibble is cleared already and unsetting a bit in upper nibble results in that nibble turned zero. Cc: stable@vger.kernel.org Suggested-by: Vlastimil Babka Reviewed-by: Gerald Schaefer Signed-off-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- arch/s390/mm/pgalloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 11d2c8395e2a..6d99b1be0082 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -253,13 +253,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); + mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); + mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); + mask >>= 24; if (mask != 0) return; } else { -- Gitee From a1dc77a84be4c3838b1c3a27f0cdfecf036fbcd8 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 1 Dec 2021 14:59:29 +0200 Subject: [PATCH 018/100] device property: Fix fwnode_graph_devcon_match() fwnode leak stable inclusion from stable-5.10.94 commit 6c1e3d8b1bff4c861c9ebe2d37a5b296fc7ee207 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 4a7f4110f79163fd53ea65438041994ed615e3af upstream. For each endpoint it encounters, fwnode_graph_devcon_match() checks whether the endpoint's remote port parent device is available. If it is not, it ignores the endpoint but does not put the reference to the remote endpoint port parent fwnode. For available devices the fwnode handle reference is put as expected. Put the reference for unavailable devices now. Fixes: 637e9e52b185 ("device connection: Find device connections also from device graphs") Cc: 5.1+ # 5.1+ Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/base/property.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/base/property.c b/drivers/base/property.c index 4c43d30145c6..cf88a5554d9c 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -1195,8 +1195,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id, fwnode_graph_for_each_endpoint(fwnode, ep) { node = fwnode_graph_get_remote_port_parent(ep); - if (!fwnode_device_is_available(node)) + if (!fwnode_device_is_available(node)) { + fwnode_handle_put(node); continue; + } ret = match(node, con_id, data); fwnode_handle_put(node); -- Gitee From 7155ed34a025f8f6318e568687a7fc0ce648f9c4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 17 Dec 2021 11:59:28 +0100 Subject: [PATCH 019/100] drm/etnaviv: limit submit sizes stable inclusion from stable-5.10.94 commit 605583fcccb51c73362f5c2b326693da692e4492 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 6dfa2fab8ddd46faa771a102672176bee7a065de upstream. Currently we allow rediculous amounts of kernel memory being allocated via the etnaviv GEM_SUBMIT ioctl, which is a pretty easy DoS vector. Put some reasonable limits in to fix this. The commandstream size is limited to 64KB, which was already a soft limit on older kernels after which the kernel only took submits on a best effort base, so there is no userspace that tries to submit commandstreams larger than this. Even if the whole commandstream is a single incrementing address load, the size limit also limits the number of potential relocs and referenced buffers to slightly under 64K, so use the same limit for those arguments. The performance monitoring infrastructure currently supports less than 50 performance counter signals, so limiting them to 128 on a single submit seems like a reasonably future-proof number for now. This number can be bumped if needed without breaking the interface. Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Lucas Stach Reviewed-by: Christian Gmeiner Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 5f24cc52c287..ed2c50011d44 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,6 +469,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } + if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K || + args->nr_bos > SZ_64K || args->nr_pmrs > 128) { + DRM_ERROR("submit arguments out of size limits\n"); + return -EINVAL; + } + /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. -- Gitee From 71d429e0495d0b39863b5c102739023b64e27b13 Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 7 Mar 2021 12:48:53 -0500 Subject: [PATCH 020/100] drm/nouveau/kms/nv04: use vzalloc for nv04_display stable inclusion from stable-5.10.94 commit 8cbbf4a6f1acdf0f392694ebc719b584a40400f6 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit bd6e07e72f37f34535bec7eebc807e5fcfe37b43 upstream. The struct is giant, and triggers an order-7 allocation (512K). There is no reason for this to be kmalloc-type memory, so switch to vmalloc. This should help loading nouveau on low-memory and/or long-running systems. Reported-by: Nathan E. Egge Signed-off-by: Ilia Mirkin Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs Reviewed-by: Karol Herbst Signed-off-by: Karol Herbst Link: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests/10 Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/gpu/drm/nouveau/dispnv04/disp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 7739f46470d3..99fee4d8cd31 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -205,7 +205,7 @@ nv04_display_destroy(struct drm_device *dev) nvif_notify_dtor(&disp->flip); nouveau_display(dev)->priv = NULL; - kfree(disp); + vfree(disp); nvif_object_unmap(&drm->client.device.object); } @@ -223,7 +223,7 @@ nv04_display_create(struct drm_device *dev) struct nv04_display *disp; int i, ret; - disp = kzalloc(sizeof(*disp), GFP_KERNEL); + disp = vzalloc(sizeof(*disp)); if (!disp) return -ENOMEM; -- Gitee From 10927f088ba2942e1c56f75071e15b86c21a4b20 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 3 Nov 2021 13:52:00 -0700 Subject: [PATCH 021/100] drm/bridge: analogix_dp: Make PSR-exit block less stable inclusion from stable-5.10.94 commit 9b78ee2341d4496688a157ae3fc901cc6c3c4385 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c4c6ef229593366ab593d4d424addc7025b54a76 upstream. Prior to commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"), "PSR exit" used non-blocking analogix_dp_send_psr_spd(). The refactor started using the blocking variant, for a variety of reasons -- quoting Sean Paul's potentially-faulty memory: """ - To avoid racing a subsequent PSR entry (if exit takes a long time) - To avoid racing disable/modeset - We're not displaying new content while exiting PSR anyways, so there is minimal utility in allowing frames to be submitted - We're lying to userspace telling them frames are on the screen when we're just dropping them on the floor """ However, I'm finding that this blocking transition is causing upwards of 60+ ms of unneeded latency on PSR-exit, to the point that initial cursor movements when leaving PSR are unbearably jumpy. It turns out that we need to meet in the middle somewhere: Sean is right that we were "lying to userspace" with a non-blocking PSR-exit, but the new blocking behavior is also waiting too long: According to the eDP specification, the sink device must support PSR entry transitions from both state 4 (ACTIVE_RESYNC) and state 0 (INACTIVE). It also states that in ACTIVE_RESYNC, "the Sink device must display the incoming active frames from the Source device with no visible glitches and/or artifacts." Thus, for our purposes, we only need to wait for ACTIVE_RESYNC before moving on; we are ready to display video, and subsequent PSR-entry is safe. Tested on a Samsung Chromebook Plus (i.e., Rockchip RK3399 Gru Kevin), where this saves about 60ms of latency, for PSR-exit that used to take about 80ms. Fixes: 6c836d965bad ("drm/rockchip: Use the helpers for PSR") Cc: Cc: Zain Wang Cc: Tomasz Figa Cc: Heiko Stuebner Cc: Sean Paul Signed-off-by: Brian Norris Reviewed-by: Sean Paul Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20211103135112.v3.1.I67612ea073c3306c71b46a87be894f79707082df@changeid Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 914c569ab8c1..cab3f5c4e2fc 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -1086,11 +1086,21 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, if (!blocking) return 0; + /* + * db[1]!=0: entering PSR, wait for fully active remote frame buffer. + * db[1]==0: exiting PSR, wait for either + * (a) ACTIVE_RESYNC - the sink "must display the + * incoming active frames from the Source device with no visible + * glitches and/or artifacts", even though timings may still be + * re-synchronizing; or + * (b) INACTIVE - the transition is fully complete. + */ ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status, psr_status >= 0 && ((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) || - (!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500, - DP_TIMEOUT_PSR_LOOP_MS * 1000); + (!vsc->db[1] && (psr_status == DP_PSR_SINK_ACTIVE_RESYNC || + psr_status == DP_PSR_SINK_INACTIVE))), + 1500, DP_TIMEOUT_PSR_LOOP_MS * 1000); if (ret) { dev_warn(dp->dev, "Failed to apply PSR %d\n", ret); return ret; -- Gitee From 133d207d68bf86e5a14c47f321455cdca5fe0d8d Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 22 Dec 2021 16:01:31 +0000 Subject: [PATCH 022/100] parisc: Fix lpa and lpa_user defines stable inclusion from stable-5.10.94 commit eb44b1386af5751e0faec3a06e65078f146f81a7 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit db19c6f1a2a353cc8dec35b4789733a3cf6e2838 upstream. While working on the rewrite to the light-weight syscall and futex code, I experimented with using a hash index based on the user physical address of atomic variable. This exposed two problems with the lpa and lpa_user defines. Because of the copy instruction, the pa argument needs to be an early clobber argument. This prevents gcc from allocating the va and pa arguments to the same register. Secondly, the lpa instruction can cause a page fault so we need to catch exceptions. Signed-off-by: John David Anglin Fixes: 116d753308cf ("parisc: Use lpa instruction to load physical addresses in driver code") Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v5.2+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- arch/parisc/include/asm/special_insns.h | 44 ++++++++++++++----------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index a303ae9a77f4..16ee41e77174 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -2,28 +2,32 @@ #ifndef __PARISC_SPECIAL_INSNS_H #define __PARISC_SPECIAL_INSNS_H -#define lpa(va) ({ \ - unsigned long pa; \ - __asm__ __volatile__( \ - "copy %%r0,%0\n\t" \ - "lpa %%r0(%1),%0" \ - : "=r" (pa) \ - : "r" (va) \ - : "memory" \ - ); \ - pa; \ +#define lpa(va) ({ \ + unsigned long pa; \ + __asm__ __volatile__( \ + "copy %%r0,%0\n" \ + "8:\tlpa %%r0(%1),%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + : "=&r" (pa) \ + : "r" (va) \ + : "memory" \ + ); \ + pa; \ }) -#define lpa_user(va) ({ \ - unsigned long pa; \ - __asm__ __volatile__( \ - "copy %%r0,%0\n\t" \ - "lpa %%r0(%%sr3,%1),%0" \ - : "=r" (pa) \ - : "r" (va) \ - : "memory" \ - ); \ - pa; \ +#define lpa_user(va) ({ \ + unsigned long pa; \ + __asm__ __volatile__( \ + "copy %%r0,%0\n" \ + "8:\tlpa %%r0(%%sr3,%1),%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + : "=&r" (pa) \ + : "r" (va) \ + : "memory" \ + ); \ + pa; \ }) #define mfctl(reg) ({ \ -- Gitee From 317a345165d9d66ce5603403e0afc79a863f09e9 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 16 Dec 2021 20:33:42 +1000 Subject: [PATCH 023/100] powerpc/64s/radix: Fix huge vmap false positive stable inclusion from stable-5.10.94 commit e09f47e77b6e3148c02d88ef91e9ab16a005cc95 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 467ba14e1660b52a2f9338b484704c461bd23019 upstream. pmd_huge() is defined to false when HUGETLB_PAGE is not configured, but the vmap code still installs huge PMDs. This leads to false bad PMD errors when vunmapping because it is not seen as a huge PTE, and the bad PMD check catches it. The end result may not be much more serious than some bad pmd warning messages, because the pmd_none_or_clear_bad() does what we wanted and clears the huge PTE anyway. Fix this by checking pmd_is_leaf(), which checks for a PTE regardless of config options. The whole huge/large/leaf stuff is a tangled mess but that's kernel-wide and not something we can improve much in arch/powerpc code. pmd_page(), pud_page(), etc., called by vmalloc_to_page() on huge vmaps can similarly trigger a false VM_BUG_ON when CONFIG_HUGETLB_PAGE=n, so those checks are adjusted. The checks were added by commit d6eacedd1f0e ("powerpc/book3s: Use config independent helpers for page table walk"), while implementing a similar fix for other page table walking functions. Fixes: d909f9109c30 ("powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP") Cc: stable@vger.kernel.org # v5.3+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211216103342.609192-1-npiggin@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- arch/powerpc/mm/book3s64/radix_pgtable.c | 4 ++-- arch/powerpc/mm/pgtable_64.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 1d5eec847b88..295959487b76 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1152,7 +1152,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) int pud_clear_huge(pud_t *pud) { - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { pud_clear(pud); return 1; } @@ -1199,7 +1199,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); return 1; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cc6e2f94517f..aefc2bfdf104 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift); struct page *p4d_page(p4d_t p4d) { if (p4d_is_leaf(p4d)) { - VM_WARN_ON(!p4d_huge(p4d)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!p4d_huge(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_page_vaddr(p4d)); @@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d) struct page *pud_page(pud_t pud) { if (pud_is_leaf(pud)) { - VM_WARN_ON(!pud_huge(pud)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_page_vaddr(pud)); @@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud) struct page *pmd_page(pmd_t pmd) { if (pmd_is_leaf(pmd)) { - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); + /* + * vmalloc_to_page may be called on any vmap address (not only + * vmalloc), and it uses pmd_page() etc., when huge vmap is + * enabled so these checks can't be used. + */ + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); -- Gitee From 618bdc807059d5a0c35210ccc6dd9d71a0a64d25 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 29 Nov 2021 11:36:37 -0600 Subject: [PATCH 024/100] PCI: xgene: Fix IB window setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 6cbe8f8deb6286a33937b9b95204c6f77b232796 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c7a75d07827a1f33d566e18e6098379cc2a0c2b2 upstream. Commit 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") broke PCI support on XGene. The cause is the IB resources are now sorted in address order instead of being in DT dma-ranges order. The result is which inbound registers are used for each region are swapped. I don't know the details about this h/w, but it appears that IB region 0 registers can't handle a size greater than 4GB. In any case, limiting the size for region 0 is enough to get back to the original assignment of dma-ranges to regions. Link: https://lore.kernel.org/all/CA+enf=v9rY_xnZML01oEgKLmvY1NGBUUhnSJaETmXtDtXfaczA@mail.gmail.com/ Link: https://lore.kernel.org/r/20211129173637.303201-1-robh@kernel.org Fixes: 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup") Reported-by: Stéphane Graber Tested-by: Stéphane Graber Signed-off-by: Rob Herring Signed-off-by: Lorenzo Pieralisi Reviewed-by: Krzysztof Wilczyński Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/controller/pci-xgene.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index c33b385ac918..b651b6f44469 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -467,7 +467,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) return 1; } - if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) { + if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) { *ib_reg_mask |= (1 << 0); return 0; } -- Gitee From 663f36ca1f94594d3cb448c7de3127b4ba3f010f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 17 Dec 2021 15:17:09 +0100 Subject: [PATCH 025/100] PCI: pciehp: Use down_read/write_nested(reset_lock) to fix lockdep errors stable inclusion from stable-5.10.94 commit def2825b09ece8c8b6e2514c8f785073bf3a864e category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 085a9f43433f30cbe8a1ade62d9d7827c3217f4d upstream. Use down_read_nested() and down_write_nested() when taking the ctrl->reset_lock rw-sem, passing the number of PCIe hotplug controllers in the path to the PCI root bus as lock subclass parameter. This fixes the following false-positive lockdep report when unplugging a Lenovo X1C8 from a Lenovo 2nd gen TB3 dock: pcieport 0000:06:01.0: pciehp: Slot(1): Link Down pcieport 0000:06:01.0: pciehp: Slot(1): Card not present ============================================ WARNING: possible recursive locking detected 5.16.0-rc2+ #621 Not tainted -------------------------------------------- irq/124-pciehp/86 is trying to acquire lock: ffff8e5ac4299ef8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_check_presence+0x23/0x80 but task is already holding lock: ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&ctrl->reset_lock); lock(&ctrl->reset_lock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by irq/124-pciehp/86: #0: ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180 #1: ffffffffa3b024e8 (pci_rescan_remove_lock){+.+.}-{3:3}, at: pciehp_unconfigure_device+0x31/0x110 #2: ffff8e5ac1ee2248 (&dev->mutex){....}-{3:3}, at: device_release_driver+0x1c/0x40 stack backtrace: CPU: 4 PID: 86 Comm: irq/124-pciehp Not tainted 5.16.0-rc2+ #621 Hardware name: LENOVO 20U90SIT19/20U90SIT19, BIOS N2WET30W (1.20 ) 08/26/2021 Call Trace: dump_stack_lvl+0x59/0x73 __lock_acquire.cold+0xc5/0x2c6 lock_acquire+0xb5/0x2b0 down_read+0x3e/0x50 pciehp_check_presence+0x23/0x80 pciehp_runtime_resume+0x5c/0xa0 device_for_each_child+0x45/0x70 pcie_port_device_runtime_resume+0x20/0x30 pci_pm_runtime_resume+0xa7/0xc0 __rpm_callback+0x41/0x110 rpm_callback+0x59/0x70 rpm_resume+0x512/0x7b0 __pm_runtime_resume+0x4a/0x90 __device_release_driver+0x28/0x240 device_release_driver+0x26/0x40 pci_stop_bus_device+0x68/0x90 pci_stop_bus_device+0x2c/0x90 pci_stop_and_remove_bus_device+0xe/0x20 pciehp_unconfigure_device+0x6c/0x110 pciehp_disable_slot+0x5b/0xe0 pciehp_handle_presence_or_link_change+0xc3/0x2f0 pciehp_ist+0x179/0x180 This lockdep warning is triggered because with Thunderbolt, hotplug ports are nested. When removing multiple devices in a daisy-chain, each hotplug port's reset_lock may be acquired recursively. It's never the same lock, so the lockdep splat is a false positive. Because locks at the same hierarchy level are never acquired recursively, a per-level lockdep class is sufficient to fix the lockdep warning. The choice to use one lockdep subclass per pcie-hotplug controller in the path to the root-bus was made to conserve class keys because their number is limited and the complexity grows quadratically with number of keys according to Documentation/locking/lockdep-design.rst. Link: https://lore.kernel.org/linux-pci/20190402021933.GA2966@mit.edu/ Link: https://lore.kernel.org/linux-pci/de684a28-9038-8fc6-27ca-3f6f2f6400d7@redhat.com/ Link: https://lore.kernel.org/r/20211217141709.379663-1-hdegoede@redhat.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=208855 Reported-by: "Theodore Ts'o" Signed-off-by: Hans de Goede Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/hotplug/pciehp.h | 3 +++ drivers/pci/hotplug/pciehp_core.c | 2 +- drivers/pci/hotplug/pciehp_hpc.c | 21 ++++++++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 4fd200d8b0a9..f1f789fe0637 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -72,6 +72,8 @@ extern int pciehp_poll_time; * @reset_lock: prevents access to the Data Link Layer Link Active bit in the * Link Status register and to the Presence Detect State bit in the Slot * Status register during a slot reset which may cause them to flap + * @depth: Number of additional hotplug ports in the path to the root bus, + * used as lock subclass for @reset_lock * @ist_running: flag to keep user request waiting while IRQ thread is running * @request_result: result of last user request submitted to the IRQ thread * @requester: wait queue to wake up on completion of user request, @@ -103,6 +105,7 @@ struct controller { struct hotplug_slot hotplug_slot; /* hotplug core interface */ struct rw_semaphore reset_lock; + unsigned int depth; unsigned int ist_running; int request_result; wait_queue_head_t requester; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index ad3393930ecb..e7fe4b42f039 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl) { int occupied; - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); mutex_lock(&ctrl->state_lock); occupied = pciehp_card_present_or_link_active(ctrl); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9d06939736c0..90da17c6da66 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -583,7 +583,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * the corresponding link change may have been ignored above. * Synthesize it to ensure that it is acted on. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (!pciehp_check_link_active(ctrl)) pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); up_read(&ctrl->reset_lock); @@ -746,7 +746,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) * Disable requests have higher priority than Presence Detect Changed * or Data Link Layer State Changed events. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (events & DISABLE_SLOT) pciehp_handle_disable_request(ctrl); else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) @@ -880,7 +880,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe) if (probe) return 0; - down_write(&ctrl->reset_lock); + down_write_nested(&ctrl->reset_lock, ctrl->depth); if (!ATTN_BUTTN(ctrl)) { ctrl_mask |= PCI_EXP_SLTCTL_PDCE; @@ -936,6 +936,20 @@ static inline void dbg_ctrl(struct controller *ctrl) #define FLAG(x, y) (((x) & (y)) ? '+' : '-') +static inline int pcie_hotplug_depth(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + int depth = 0; + + while (bus->parent) { + bus = bus->parent; + if (bus->self && bus->self->is_hotplug_bridge) + depth++; + } + + return depth; +} + struct controller *pcie_init(struct pcie_device *dev) { struct controller *ctrl; @@ -949,6 +963,7 @@ struct controller *pcie_init(struct pcie_device *dev) return NULL; ctrl->pcie = dev; + ctrl->depth = pcie_hotplug_depth(dev->port); pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap); if (pdev->hotplug_user_indicators) -- Gitee From 0c33069e7bb8d0904f6ed6d36caf594c4c26812a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 24 Nov 2021 16:59:39 +0100 Subject: [PATCH 026/100] PCI: pci-bridge-emul: Make expansion ROM Base Address register read-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 2a0d437d8a76474452cb91d2fc729020c2371ad3 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 1c1a3b4d3e86b997a313ffb297c1129540882859 upstream. If expansion ROM is unsupported (which is the case of pci-bridge-emul.c driver) then ROM Base Address register must be implemented as read-only register that return 0 when read, same as for unused Base Address registers. Link: https://lore.kernel.org/r/20211124155944.1290-2-pali@kernel.org Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/pci-bridge-emul.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index db97cddfc85e..5de8b8dde209 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -139,8 +139,13 @@ struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = { .ro = GENMASK(7, 0), }, + /* + * If expansion ROM is unsupported then ROM Base Address register must + * be implemented as read-only register that return 0 when read, same + * as for unused Base Address registers. + */ [PCI_ROM_ADDRESS1 / 4] = { - .rw = GENMASK(31, 11) | BIT(0), + .ro = ~0, }, /* -- Gitee From 8b77385319e30b91b291bd3b5ef6d185abf8b082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 24 Nov 2021 16:59:40 +0100 Subject: [PATCH 027/100] PCI: pci-bridge-emul: Properly mark reserved PCIe bits in PCI config space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 0f2ae6691e73c25798a38aa6204b2763d5bb982f category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 7b067ac63a5730d2fae18399fed7e45f23d36912 upstream. Some bits in PCI config space are reserved when device is PCIe. Properly define behavior of PCI registers for PCIe emulated bridge and ensure that it would not be possible change these reserved bits. Link: https://lore.kernel.org/r/20211124155944.1290-3-pali@kernel.org Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/pci-bridge-emul.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 5de8b8dde209..0cbb4e3ca827 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -295,6 +295,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, kfree(bridge->pci_regs_behavior); return -ENOMEM; } + /* These bits are applicable only for PCI and reserved on PCIe */ + bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &= + ~GENMASK(15, 8); + bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &= + ~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE | + PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT | + PCI_COMMAND_FAST_BACK) | + (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &= + ~GENMASK(31, 24); + bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &= + ~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &= + ~((PCI_BRIDGE_CTL_MASTER_ABORT | + BIT(8) | BIT(9) | BIT(11)) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &= + ~((PCI_BRIDGE_CTL_FAST_BACK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &= + ~(BIT(10) << 16); } if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) { -- Gitee From fccfd680aa74402817795a6d01b6697e9320ac22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 24 Nov 2021 16:59:42 +0100 Subject: [PATCH 028/100] PCI: pci-bridge-emul: Fix definitions of reserved bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit af1d0acdaca70c62861964288dcf1dc1b9108b5c category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 12998087d9f48b66965b97412069c7826502cd7e upstream. Some bits in PCI_EXP registers are reserved for non-root ports. Driver pci-bridge-emul.c implements PCIe Root Port device therefore it should not allow setting reserved bits of registers. Properly define non-reserved bits for all PCI_EXP registers. Link: https://lore.kernel.org/r/20211124155944.1290-5-pali@kernel.org Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/pci-bridge-emul.c | 36 ++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 0cbb4e3ca827..2c7e04fb2685 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -176,41 +176,55 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = [PCI_CAP_LIST_ID / 4] = { /* * Capability ID, Next Capability Pointer and - * Capabilities register are all read-only. + * bits [14:0] of Capabilities register are all read-only. + * Bit 15 of Capabilities register is reserved. */ - .ro = ~0, + .ro = GENMASK(30, 0), }, [PCI_EXP_DEVCAP / 4] = { - .ro = ~0, + /* + * Bits [31:29] and [17:16] are reserved. + * Bits [27:18] are reserved for non-upstream ports. + * Bits 28 and [14:6] are reserved for non-endpoint devices. + * Other bits are read-only. + */ + .ro = BIT(15) | GENMASK(5, 0), }, [PCI_EXP_DEVCTL / 4] = { - /* Device control register is RW */ - .rw = GENMASK(15, 0), + /* + * Device control register is RW, except bit 15 which is + * reserved for non-endpoints or non-PCIe-to-PCI/X bridges. + */ + .rw = GENMASK(14, 0), /* * Device status register has bits 6 and [3:0] W1C, [5:4] RO, - * the rest is reserved + * the rest is reserved. Also bit 6 is reserved for non-upstream + * ports. */ - .w1c = (BIT(6) | GENMASK(3, 0)) << 16, + .w1c = GENMASK(3, 0) << 16, .ro = GENMASK(5, 4) << 16, }, [PCI_EXP_LNKCAP / 4] = { - /* All bits are RO, except bit 23 which is reserved */ - .ro = lower_32_bits(~BIT(23)), + /* + * All bits are RO, except bit 23 which is reserved and + * bit 18 which is reserved for non-upstream ports. + */ + .ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)), }, [PCI_EXP_LNKCTL / 4] = { /* * Link control has bits [15:14], [11:3] and [1:0] RW, the - * rest is reserved. + * rest is reserved. Bit 8 is reserved for non-upstream ports. * * Link status has bits [13:0] RO, and bits [15:14] * W1C. */ - .rw = GENMASK(15, 14) | GENMASK(11, 3) | GENMASK(1, 0), + .rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0), .ro = GENMASK(13, 0) << 16, .w1c = GENMASK(15, 14) << 16, }, -- Gitee From e3b79eed2f90a99d2b6cc01d23134d0a18ddceab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 24 Nov 2021 16:59:43 +0100 Subject: [PATCH 029/100] PCI: pci-bridge-emul: Correctly set PCIe capabilities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 7aeeb9fe9ca0b9e084c3ac4c1361b188c31fa9f6 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 1f1050c5e1fefb34ac90a506b43e9da803b5f8f7 upstream. Older mvebu hardware provides PCIe Capability structure only in version 1. New mvebu and aardvark hardware provides it in version 2. So do not force version to 2 in pci_bridge_emul_init() and rather allow drivers to set correct version. Drivers need to set version in pcie_conf.cap field without overwriting PCI_CAP_LIST_ID register. Both drivers (mvebu and aardvark) do not provide slot support yet, so do not set PCI_EXP_FLAGS_SLOT flag. Link: https://lore.kernel.org/r/20211124155944.1290-6-pali@kernel.org Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/controller/pci-aardvark.c | 4 +++- drivers/pci/controller/pci-mvebu.c | 8 ++++++++ drivers/pci/pci-bridge-emul.c | 5 +---- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 0f6a6685ab5b..f30144c8c0bd 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -879,7 +879,6 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, return PCI_BRIDGE_EMUL_HANDLED; } - case PCI_CAP_LIST_ID: case PCI_EXP_DEVCAP: case PCI_EXP_DEVCTL: *value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg); @@ -960,6 +959,9 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) /* Support interrupt A for MSI feature */ bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE; + /* Aardvark HW provides PCIe Capability structure in version 2 */ + bridge->pcie_conf.cap = cpu_to_le16(2); + /* Indicates supports for Completion Retry Status */ bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS); diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index ed13e81cd691..2dc6890dbcaa 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -573,6 +573,8 @@ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = { static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) { struct pci_bridge_emul *bridge = &port->bridge; + u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP); + u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS); bridge->conf.vendor = PCI_VENDOR_ID_MARVELL; bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16; @@ -585,6 +587,12 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32; } + /* + * Older mvebu hardware provides PCIe Capability structure only in + * version 1. New hardware provides it in version 2. + */ + bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver); + bridge->has_pcie = true; bridge->data = port; bridge->ops = &mvebu_pci_bridge_emul_ops; diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 2c7e04fb2685..867159562d07 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -297,10 +297,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, if (bridge->has_pcie) { bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START; bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP; - /* Set PCIe v2, root port, slot support */ - bridge->pcie_conf.cap = - cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4 | 2 | - PCI_EXP_FLAGS_SLOT); + bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4); bridge->pcie_cap_regs_behavior = kmemdup(pcie_cap_regs_behavior, sizeof(pcie_cap_regs_behavior), -- Gitee From d2e7f418f2eecec5eebd327cb53608c3a73f3d4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Wed, 24 Nov 2021 16:59:44 +0100 Subject: [PATCH 030/100] PCI: pci-bridge-emul: Set PCI_STATUS_CAP_LIST for PCIe device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 84166c1177f39b852f92dde675d290e697a04b8b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3be9d243b21724d49b65043d4520d688b6040b36 upstream. Since all PCI Express device Functions are required to implement the PCI Express Capability structure, Capabilities List bit in PCI Status Register must be hardwired to 1b. Capabilities Pointer register (which is already set by pci-bride-emul.c driver) is valid only when Capabilities List is set to 1b. Link: https://lore.kernel.org/r/20211124155944.1290-7-pali@kernel.org Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/pci/pci-bridge-emul.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 867159562d07..37504c2cce9b 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -296,6 +296,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, if (bridge->has_pcie) { bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START; + bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST); bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP; bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4); bridge->pcie_cap_regs_behavior = -- Gitee From 717944886346d770ce7413f14b7d87f56c3c8780 Mon Sep 17 00:00:00 2001 From: Ghalem Boudour Date: Fri, 19 Nov 2021 18:20:16 +0100 Subject: [PATCH 031/100] xfrm: fix policy lookup for ipv6 gre packets stable inclusion from stable-5.10.94 commit 56f974d583fcf7c8bb8b62d5164315de25268079 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit bcf141b2eb551b3477b24997ebc09c65f117a803 upstream. On egress side, xfrm lookup is called from __gre6_xmit() with the fl6_gre_key field not initialized leading to policies selectors check failure. Consequently, gre packets are sent without encryption. On ingress side, INET6_PROTO_NOPOLICY was set, thus packets were not checked against xfrm policies. Like for egress side, fl6_gre_key should be correctly set, this is now done in decode_session6(). Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Cc: stable@vger.kernel.org Signed-off-by: Ghalem Boudour Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/ipv6/ip6_gre.c | 5 ++++- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 09fa49bbf617..9a0263f25232 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -755,6 +755,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & @@ -990,6 +991,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1098,6 +1100,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1543,7 +1546,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 51238f957903..8fa258fbdcfc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -33,6 +33,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -3458,6 +3459,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); -- Gitee From af0dd0297694f5bbc8e4e89cd188ca486e98aef0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Oct 2021 18:30:25 +0100 Subject: [PATCH 032/100] btrfs: fix deadlock between quota enable and other quota operations stable inclusion from stable-5.10.94 commit 09e0ef287e93c65b6a78d2dca34ad5a6c78ef93b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 232796df8c1437c41d308d161007f0715bac0a54 upstream. When enabling quotas, we attempt to commit a transaction while holding the mutex fs_info->qgroup_ioctl_lock. This can result on a deadlock with other quota operations such as: - qgroup creation and deletion, ioctl BTRFS_IOC_QGROUP_CREATE; - adding and removing qgroup relations, ioctl BTRFS_IOC_QGROUP_ASSIGN. This is because these operations join a transaction and after that they attempt to lock the mutex fs_info->qgroup_ioctl_lock. Acquiring that mutex after joining or starting a transaction is a pattern followed everywhere in qgroups, so the quota enablement operation is the one at fault here, and should not commit a transaction while holding that mutex. Fix this by making the transaction commit while not holding the mutex. We are safe from two concurrent tasks trying to enable quotas because we are serialized by the rw semaphore fs_info->subvol_sem at btrfs_ioctl_quota_ctl(), which is the only call site for enabling quotas. When this deadlock happens, it produces a trace like the following: INFO: task syz-executor:25604 blocked for more than 143 seconds. Not tainted 5.15.0-rc6 #4 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor state:D stack:24800 pid:25604 ppid: 24873 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 btrfs_commit_transaction+0x994/0x2e90 fs/btrfs/transaction.c:2201 btrfs_quota_enable+0x95c/0x1790 fs/btrfs/qgroup.c:1120 btrfs_ioctl_quota_ctl fs/btrfs/ioctl.c:4229 [inline] btrfs_ioctl+0x637e/0x7b70 fs/btrfs/ioctl.c:5010 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f86920b2c4d RSP: 002b:00007f868f61ac58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007f86921d90a0 RCX: 00007f86920b2c4d RDX: 0000000020005e40 RSI: 00000000c0109428 RDI: 0000000000000008 RBP: 00007f869212bd80 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f86921d90a0 R13: 00007fff6d233e4f R14: 00007fff6d233ff0 R15: 00007f868f61adc0 INFO: task syz-executor:25628 blocked for more than 143 seconds. Not tainted 5.15.0-rc6 #4 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor state:D stack:29080 pid:25628 ppid: 24873 flags:0x00004004 Call Trace: context_switch kernel/sched/core.c:4940 [inline] __schedule+0xcd9/0x2530 kernel/sched/core.c:6287 schedule+0xd3/0x270 kernel/sched/core.c:6366 schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:6425 __mutex_lock_common kernel/locking/mutex.c:669 [inline] __mutex_lock+0xc96/0x1680 kernel/locking/mutex.c:729 btrfs_remove_qgroup+0xb7/0x7d0 fs/btrfs/qgroup.c:1548 btrfs_ioctl_qgroup_create fs/btrfs/ioctl.c:4333 [inline] btrfs_ioctl+0x683c/0x7b70 fs/btrfs/ioctl.c:5014 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:874 [inline] __se_sys_ioctl fs/ioctl.c:860 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Reported-by: Hao Sun Link: https://lore.kernel.org/linux-btrfs/CACkBjsZQF19bQ1C6=yetF3BvL10OSORpFUcWXTP6HErshDB4dQ@mail.gmail.com/ Fixes: 340f1aa27f36 ("btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable") CC: stable@vger.kernel.org # 4.19 Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/btrfs/qgroup.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 4bac32a274ce..f65aa4ed5ca1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -941,6 +941,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) int ret = 0; int slot; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to enable quotas, because we will unlock + * and relock qgroup_ioctl_lock before setting fs_info->quota_root + * and before setting BTRFS_FS_QUOTA_ENABLED. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (fs_info->quota_root) goto out; @@ -1118,8 +1126,19 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) goto out_free_path; } + mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* + * Commit the transaction while not holding qgroup_ioctl_lock, to avoid + * a deadlock with tasks concurrently doing other qgroup operations, such + * adding/removing qgroups or adding/deleting qgroup relations for example, + * because all qgroup operations first start or join a transaction and then + * lock the qgroup_ioctl_lock mutex. + * We are safe from a concurrent task trying to enable quotas, by calling + * this function, since we are serialized by fs_info->subvol_sem. + */ ret = btrfs_commit_transaction(trans); trans = NULL; + mutex_lock(&fs_info->qgroup_ioctl_lock); if (ret) goto out_free_path; -- Gitee From 997836d2d55d8210ddd4c70d3c73c45d192c8866 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 24 Nov 2021 14:14:24 -0500 Subject: [PATCH 033/100] btrfs: check the root node for uptodate before returning it stable inclusion from stable-5.10.94 commit e7764bccae77d3620113576ed18abd5233ba07a6 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 120de408e4b97504a2d9b5ca534b383de2c73d49 upstream. Now that we clear the extent buffer uptodate if we fail to write it out we need to check to see if our root node is uptodate before we search down it. Otherwise we could return stale data (or potentially corrupt data that was caught by the write verification step) and think that the path is OK to search down. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/btrfs/ctree.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 519cf145f9bd..5addd1e36a8e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2589,12 +2589,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; - int root_lock; + int root_lock = 0; int level = 0; - /* We try very hard to do read locks on the root */ - root_lock = BTRFS_READ_LOCK; - if (p->search_commit_root) { /* * The commit roots are read only so we always do read locks, @@ -2632,6 +2629,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, goto out; } + /* We try very hard to do read locks on the root */ + root_lock = BTRFS_READ_LOCK; + /* * If the level is set to maximum, we can skip trying to get the read * lock. @@ -2658,6 +2658,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, level = btrfs_header_level(b); out: + /* + * The root may have failed to write out at some point, and thus is no + * longer valid, return an error in this case. + */ + if (!extent_buffer_uptodate(b)) { + if (root_lock) + btrfs_tree_unlock_rw(b, root_lock); + free_extent_buffer(b); + return ERR_PTR(-EIO); + } + p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = root_lock; -- Gitee From 313fecf6573b2c107088c1a8eeff0f41083ed893 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 16 Dec 2021 15:00:32 +0000 Subject: [PATCH 034/100] btrfs: respect the max size in the header when activating swap file stable inclusion from stable-5.10.94 commit f8c3ec2e21b9cd9dbb7b91de982d6016010c5605 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c2f822635df873c510bda6fb7fd1b10b7c31be2d upstream. If we extended the size of a swapfile after its header was created (by the mkswap utility) and then try to activate it, we will map the entire file when activating the swap file, instead of limiting to the max size defined in the swap file's header. Currently test case generic/643 from fstests fails because we do not respect that size limit defined in the swap file's header. So fix this by not mapping file ranges beyond the max size defined in the swap header. This is the same type of bug that iomap used to have, and was fixed in commit 36ca7943ac18ae ("mm/swap: consider max pages in iomap_swapfile_add_extent"). Fixes: ed46ff3d423780 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-and-tested-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/btrfs/inode.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff3f0638cdb9..1d9262a35473 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10094,9 +10094,19 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, struct btrfs_swap_info *bsi) { unsigned long nr_pages; + unsigned long max_pages; u64 first_ppage, first_ppage_reported, next_ppage; int ret; + /* + * Our swapfile may have had its size extended after the swap header was + * written. In that case activating the swapfile should not go beyond + * the max size set in the swap header. + */ + if (bsi->nr_pages >= sis->max) + return 0; + + max_pages = sis->max - bsi->nr_pages; first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, PAGE_SIZE) >> PAGE_SHIFT; @@ -10104,6 +10114,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, if (first_ppage >= next_ppage) return 0; nr_pages = next_ppage - first_ppage; + nr_pages = min(nr_pages, max_pages); first_ppage_reported = first_ppage; if (bsi->start == 0) -- Gitee From b153b188f191c3e7745034f7892f4d6781a2162b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Oct 2021 17:53:36 +0200 Subject: [PATCH 035/100] ext4: make sure to reset inode lockdep class when quota enabling fails stable inclusion from stable-5.10.94 commit 762e4c33e9e5ecdcfedb1752e38c2aac2921df2e category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 4013d47a5307fdb5c13370b5392498b00fedd274 upstream. When we succeed in enabling some quota type but fail to enable another one with quota feature, we correctly disable all enabled quota types. However we forget to reset i_data_sem lockdep class. When the inode gets freed and reused, it will inherit this lockdep class (i_data_sem is initialized only when a slab is created) and thus eventually lockdep barfs about possible deadlocks. Reported-and-tested-by: syzbot+3b6f9218b1301ddda3e2@syzkaller.appspotmail.com Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20211007155336.12493-3-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 478b1516b7d4..d6b3169d9853 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6459,8 +6459,19 @@ static int ext4_enable_quotas(struct super_block *sb) "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) + for (type--; type >= 0; type--) { + struct inode *inode; + + inode = sb_dqopt(sb)->files[type]; + if (inode) + inode = igrab(inode); dquot_quota_off(sb, type); + if (inode) { + lockdep_set_quota_inode(inode, + I_DATA_SEM_NORMAL); + iput(inode); + } + } return err; } -- Gitee From 04f2549dad9eb289d984f6ba387bf3137d8fba01 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 7 Oct 2021 17:53:35 +0200 Subject: [PATCH 036/100] ext4: make sure quota gets properly shutdown on error stable inclusion from stable-5.10.94 commit 115b762b48ab83de2898b8c1a38e3799446a97af category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e upstream. When we hit an error when enabling quotas and setting inode flags, we do not properly shutdown quota subsystem despite returning error from Q_QUOTAON quotactl. This can lead to some odd situations like kernel using quota file while it is still writeable for userspace. Make sure we properly cleanup the quota subsystem in case of error. Signed-off-by: Jan Kara Cc: stable@kernel.org Link: https://lore.kernel.org/r/20211007155336.12493-2-jack@suse.cz Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/super.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d6b3169d9853..6e55b9ac5a5f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6373,10 +6373,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) { - lockdep_set_quota_inode(path->dentry->d_inode, - I_DATA_SEM_NORMAL); - } else { + if (!err) { struct inode *inode = d_inode(path->dentry); handle_t *handle; @@ -6396,7 +6393,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); + if (err) + dquot_quota_off(sb, type); } + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); return err; } -- Gitee From 71fb6573cae6f396507eb4244713c0888b95ea69 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Tue, 23 Nov 2021 09:17:57 +0800 Subject: [PATCH 037/100] ext4: fix a possible ABBA deadlock due to busy PA stable inclusion from stable-5.10.94 commit f9ed0ea0a9fc59de71b230ff02f59a51fd174ca7 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 8c80fb312d7abf8bcd66cca1d843a80318a2c522 upstream. We found on older kernel (3.10) that in the scenario of insufficient disk space, system may trigger an ABBA deadlock problem, it seems that this problem still exists in latest kernel, try to fix it here. The main process triggered by this problem is that task A occupies the PA and waits for the jbd2 transaction finish, the jbd2 transaction waits for the completion of task B's IO (plug_list), but task B waits for the release of PA by task A to finish discard, which indirectly forms an ABBA deadlock. The related calltrace is as follows: Task A vfs_write ext4_mb_new_blocks() ext4_mb_mark_diskspace_used() JBD2 jbd2_journal_get_write_access() -> jbd2_journal_commit_transaction() ->schedule() filemap_fdatawait() | | | Task B | | do_unlinkat() | | ext4_evict_inode() | | jbd2_journal_begin_ordered_truncate() | | filemap_fdatawrite_range() | | ext4_mb_new_blocks() | -ext4_mb_discard_group_preallocations() <----- Here, try to cancel ext4_mb_discard_group_preallocations() internal retry due to PA busy, and do a limited number of retries inside ext4_mb_discard_preallocations(), which can circumvent the above problems, but also has some advantages: 1. Since the PA is in a busy state, if other groups have free PAs, keeping the current PA may help to reduce fragmentation. 2. Continue to traverse forward instead of waiting for the current group PA to be released. In most scenarios, the PA discard time can be reduced. However, in the case of smaller free space, if only a few groups have space, then due to multiple traversals of the group, it may increase CPU overhead. But in contrast, I feel that the overall benefit is better than the cost. Signed-off-by: Chunguang Xu Reported-by: kernel test robot Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/1637630277-23496-1-git-send-email-brookxu.cn@gmail.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/mballoc.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 60aef7fdd61d..e40f87d07783 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4234,7 +4234,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, */ static noinline_for_stack int ext4_mb_discard_group_preallocations(struct super_block *sb, - ext4_group_t group, int needed) + ext4_group_t group, int *busy) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; @@ -4242,8 +4242,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct list_head list; struct ext4_buddy e4b; int err; - int busy = 0; - int free, free_total = 0; + int free = 0; mb_debug(sb, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) @@ -4266,19 +4265,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, goto out_dbg; } - if (needed == 0) - needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; - INIT_LIST_HEAD(&list); -repeat: - free = 0; ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, &grp->bb_prealloc_list, pa_group_list) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { spin_unlock(&pa->pa_lock); - busy = 1; + *busy = 1; continue; } if (pa->pa_deleted) { @@ -4318,22 +4312,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - free_total += free; - - /* if we still need more blocks and some PAs were used, try again */ - if (free_total < needed && busy) { - ext4_unlock_group(sb, group); - cond_resched(); - busy = 0; - goto repeat; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); out_dbg: mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", - free_total, group, grp->bb_free); - return free_total; + free, group, grp->bb_free); + return free; } /* @@ -4875,13 +4860,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; - int freed = 0; + int freed = 0, busy = 0; + int retry = 0; trace_ext4_mb_discard_preallocations(sb, needed); + + if (needed == 0) + needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; + repeat: for (i = 0; i < ngroups && needed > 0; i++) { - ret = ext4_mb_discard_group_preallocations(sb, i, needed); + ret = ext4_mb_discard_group_preallocations(sb, i, &busy); freed += ret; needed -= ret; + cond_resched(); + } + + if (needed > 0 && busy && ++retry < 3) { + busy = 0; + goto repeat; } return freed; -- Gitee From b6d90c1d855fb71981a8bd130ce4bfb71b42ef0c Mon Sep 17 00:00:00 2001 From: Harshad Shirwadkar Date: Wed, 1 Dec 2021 08:34:21 -0800 Subject: [PATCH 038/100] ext4: initialize err_blk before calling __ext4_get_inode_loc stable inclusion from stable-5.10.94 commit 720508dd118d04035875823f44bcd27388ff39b2 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c27c29c6af4f3f4ce925a2111c256733c5a5b430 upstream. It is not guaranteed that __ext4_get_inode_loc will definitely set err_blk pointer when it returns EIO. To avoid using uninitialized variables, let's first set err_blk to 0. Reported-by: Dan Carpenter Signed-off-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211201163421.2631661-1-harshads@google.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ef102cca42dc..7522f1db798d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4544,7 +4544,7 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, static int __ext4_get_inode_loc_noinmem(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, @@ -4559,7 +4559,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, -- Gitee From 9e49c50961b429ca35028c9786c98b7c1f2879db Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Tue, 21 Dec 2021 10:28:39 +0800 Subject: [PATCH 039/100] ext4: fix fast commit may miss tracking range for FALLOC_FL_ZERO_RANGE stable inclusion from stable-5.10.94 commit e4221629d5e1479db400d8a4cbf865c65a457630 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 5e4d0eba1ccaf19f93222abdeda5a368be141785 upstream. when call falloc with FALLOC_FL_ZERO_RANGE, to set an range to unwritten, which has been already initialized. If the range is align to blocksize, fast commit will not track range for this change. Also track range for unwritten range in ext4_map_blocks(). Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211221022839.374606-1-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/extents.c | 2 -- fs/ext4/inode.c | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 398df993ccd8..6a99b6ed7031 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4650,8 +4650,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits, - (offset + len - 1) >> inode->i_sb->s_blocksize_bits); /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7522f1db798d..9cf1c76514be 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -741,10 +741,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, if (ret) return ret; } - ext4_fc_track_range(handle, inode, map->m_lblk, - map->m_lblk + map->m_len - 1); } - + if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || + map->m_flags & EXT4_MAP_MAPPED)) + ext4_fc_track_range(handle, inode, map->m_lblk, + map->m_lblk + map->m_len - 1); if (retval < 0) ext_debug(inode, "failed with err %d\n", retval); return retval; -- Gitee From 9b06d15cfe4859ecc116497e875c2cdedd61b2e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= Date: Tue, 14 Dec 2021 17:50:58 +0000 Subject: [PATCH 040/100] ext4: set csum seed in tmp inode while migrating to extents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit da364ab35892f69785266a93bd174f647db1f670 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit e81c9302a6c3c008f5c30beb73b38adb0170ff2d upstream. When migrating to extents, the temporary inode will have it's own checksum seed. This means that, when swapping the inodes data, the inode checksums will be incorrect. This can be fixed by recalculating the extents checksums again. Or simply by copying the seed into the temporary inode. Link: https://bugzilla.kernel.org/show_bug.cgi?id=213357 Reported-by: Jeroen van Wolffelaar Signed-off-by: Luís Henriques Link: https://lore.kernel.org/r/20211214175058.19511-1-lhenriques@suse.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index c5e3fc998211..69cc3f8e03bd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -459,6 +459,17 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } + /* + * Use the correct seed for checksum (i.e. the seed from 'inode'). This + * is so that the metadata blocks will have the correct checksum after + * the migration. + * + * Note however that, if a crash occurs during the migration process, + * the recovery process is broken because the tmp_inode checksums will + * be wrong and the orphans cleanup will fail. + */ + ei = EXT4_I(inode); + EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later @@ -502,7 +513,6 @@ int ext4_ext_migrate(struct inode *inode) goto out_tmp_inode; } - ei = EXT4_I(inode); i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); -- Gitee From 1d56861a8f503736d102b715b0c281df0ee1b996 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 23 Dec 2021 09:55:06 +0800 Subject: [PATCH 041/100] ext4: Fix BUG_ON in ext4_bread when write quota data stable inclusion from stable-5.10.94 commit 53998b3f6dcde1d6553e27db1957ac0545a6e19f category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 380a0091cab482489e9b19e07f2a166ad2b76d5c upstream. We got issue as follows when run syzkaller: [ 167.936972] EXT4-fs error (device loop0): __ext4_remount:6314: comm rep: Abort forced by user [ 167.938306] EXT4-fs (loop0): Remounting filesystem read-only [ 167.981637] Assertion failure in ext4_getblk() at fs/ext4/inode.c:847: '(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) || handle != NULL || create == 0' [ 167.983601] ------------[ cut here ]------------ [ 167.984245] kernel BUG at fs/ext4/inode.c:847! [ 167.984882] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI [ 167.985624] CPU: 7 PID: 2290 Comm: rep Tainted: G B 5.16.0-rc5-next-20211217+ #123 [ 167.986823] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ 167.988590] RIP: 0010:ext4_getblk+0x17e/0x504 [ 167.989189] Code: c6 01 74 28 49 c7 c0 a0 a3 5c 9b b9 4f 03 00 00 48 c7 c2 80 9c 5c 9b 48 c7 c6 40 b6 5c 9b 48 c7 c7 20 a4 5c 9b e8 77 e3 fd ff <0f> 0b 8b 04 244 [ 167.991679] RSP: 0018:ffff8881736f7398 EFLAGS: 00010282 [ 167.992385] RAX: 0000000000000094 RBX: 1ffff1102e6dee75 RCX: 0000000000000000 [ 167.993337] RDX: 0000000000000001 RSI: ffffffff9b6e29e0 RDI: ffffed102e6dee66 [ 167.994292] RBP: ffff88816a076210 R08: 0000000000000094 R09: ffffed107363fa09 [ 167.995252] R10: ffff88839b1fd047 R11: ffffed107363fa08 R12: ffff88816a0761e8 [ 167.996205] R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000001 [ 167.997158] FS: 00007f6a1428c740(0000) GS:ffff88839b000000(0000) knlGS:0000000000000000 [ 167.998238] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 167.999025] CR2: 00007f6a140716c8 CR3: 0000000133216000 CR4: 00000000000006e0 [ 167.999987] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 168.000944] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 168.001899] Call Trace: [ 168.002235] [ 168.007167] ext4_bread+0xd/0x53 [ 168.007612] ext4_quota_write+0x20c/0x5c0 [ 168.010457] write_blk+0x100/0x220 [ 168.010944] remove_free_dqentry+0x1c6/0x440 [ 168.011525] free_dqentry.isra.0+0x565/0x830 [ 168.012133] remove_tree+0x318/0x6d0 [ 168.014744] remove_tree+0x1eb/0x6d0 [ 168.017346] remove_tree+0x1eb/0x6d0 [ 168.019969] remove_tree+0x1eb/0x6d0 [ 168.022128] qtree_release_dquot+0x291/0x340 [ 168.023297] v2_release_dquot+0xce/0x120 [ 168.023847] dquot_release+0x197/0x3e0 [ 168.024358] ext4_release_dquot+0x22a/0x2d0 [ 168.024932] dqput.part.0+0x1c9/0x900 [ 168.025430] __dquot_drop+0x120/0x190 [ 168.025942] ext4_clear_inode+0x86/0x220 [ 168.026472] ext4_evict_inode+0x9e8/0xa22 [ 168.028200] evict+0x29e/0x4f0 [ 168.028625] dispose_list+0x102/0x1f0 [ 168.029148] evict_inodes+0x2c1/0x3e0 [ 168.030188] generic_shutdown_super+0xa4/0x3b0 [ 168.030817] kill_block_super+0x95/0xd0 [ 168.031360] deactivate_locked_super+0x85/0xd0 [ 168.031977] cleanup_mnt+0x2bc/0x480 [ 168.033062] task_work_run+0xd1/0x170 [ 168.033565] do_exit+0xa4f/0x2b50 [ 168.037155] do_group_exit+0xef/0x2d0 [ 168.037666] __x64_sys_exit_group+0x3a/0x50 [ 168.038237] do_syscall_64+0x3b/0x90 [ 168.038751] entry_SYSCALL_64_after_hwframe+0x44/0xae In order to reproduce this problem, the following conditions need to be met: 1. Ext4 filesystem with no journal; 2. Filesystem image with incorrect quota data; 3. Abort filesystem forced by user; 4. umount filesystem; As in ext4_quota_write: ... if (EXT4_SB(sb)->s_journal && !handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); return -EIO; } ... We only check handle if NULL when filesystem has journal. There is need check handle if NULL even when filesystem has no journal. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20211223015506.297766-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6e55b9ac5a5f..2e7a8cfe572a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6577,7 +6577,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (EXT4_SB(sb)->s_journal && !handle) { + if (!handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); -- Gitee From fe5358a811a8c0bc52b3a2b6c584bd92e041152b Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 23 Dec 2021 11:23:36 +0800 Subject: [PATCH 042/100] ext4: use ext4_ext_remove_space() for fast commit replay delete range stable inclusion from stable-5.10.94 commit 04b562730677630d905a9e47c5e4826888745ebc category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 0b5b5a62b945a141e64011b2f90ee7e46f14be98 upstream. For now ,we use ext4_punch_hole() during fast commit replay delete range procedure. But it will be affected by inode->i_size, which may not correct during fast commit replay procedure. The following test will failed. -create & write foo (len 1000K) -falloc FALLOC_FL_ZERO_RANGE foo (range 400K - 600K) -create & fsync bar -falloc FALLOC_FL_PUNCH_HOLE foo (range 300K-500K) -fsync foo -crash before a full commit After the fast_commit reply procedure, the range 400K-500K will not be removed. Because in this case, when calling ext4_punch_hole() the inode->i_size is 0, and it just retruns with doing nothing. Change to use ext4_ext_remove_space() instead of ext4_punch_hole() to remove blocks of inode directly. Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223032337.5198-2-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/fast_commit.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 08ca690f928b..02cb1a1e6111 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -1764,11 +1764,14 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } } - ret = ext4_punch_hole(inode, - le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits, - le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits); - if (ret) - jbd_debug(1, "ext4_punch_hole returned %d", ret); + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_ext_remove_space(inode, lrange.fc_lblk, + lrange.fc_lblk + lrange.fc_len - 1); + up_write(&EXT4_I(inode)->i_data_sem); + if (ret) { + iput(inode); + return 0; + } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); -- Gitee From 04ff86c4c7dadf9a42f0ddd0a4d674c4d72502c0 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Thu, 23 Dec 2021 11:23:37 +0800 Subject: [PATCH 043/100] ext4: fast commit may miss tracking unwritten range during ftruncate stable inclusion from stable-5.10.94 commit f26b24b4c115f9c8fe8defd2c158420d30b7af0f category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 9725958bb75cdfa10f2ec11526fdb23e7485e8e4 upstream. If use FALLOC_FL_KEEP_SIZE to alloc unwritten range at bottom, the inode->i_size will not include the unwritten range. When call ftruncate with fast commit enabled, it will miss to track the unwritten range. Change to trace the full range during ftruncate. Signed-off-by: Xin Yin Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211223032337.5198-3-yinxin.x@bytedance.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9cf1c76514be..4cfee4cb2350 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5445,8 +5445,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, - (oldsize > 0 ? oldsize - 1 : 0) >> - inode->i_sb->s_blocksize_bits); + EXT_MAX_BLOCKS - 1); else ext4_fc_track_range( handle, inode, -- Gitee From cb8bc57b124df261b7673ef736b9e58e68b76338 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 23 Dec 2021 17:44:36 +0100 Subject: [PATCH 044/100] ext4: destroy ext4_fc_dentry_cachep kmemcache on module removal stable inclusion from stable-5.10.94 commit d60e9daba29e44e0f277333e46fff90c74509398 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit ab047d516dea72f011c15c04a929851e4d053109 upstream. The kmemcache for ext4_fc_dentry_cachep remains registered after module removal. Destroy ext4_fc_dentry_cachep kmemcache on module removal. Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Lukas Czerner Reviewed-by: Harshad Shirwadkar Link: https://lore.kernel.org/r/20211110134640.lyku5vklvdndw6uk@linutronix.de Link: https://lore.kernel.org/r/YbiK3JetFFl08bd7@linutronix.de Link: https://lore.kernel.org/r/20211223164436.2628390-1-bigeasy@linutronix.de Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/ext4.h | 1 + fs/ext4/fast_commit.c | 5 +++++ fs/ext4/super.c | 2 ++ 3 files changed, 8 insertions(+) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bb3adca53d93..c801d37e5001 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2799,6 +2799,7 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block); void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); +void ext4_fc_destroy_dentry_cache(void); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 02cb1a1e6111..f483abcd5213 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -2169,3 +2169,8 @@ int __init ext4_fc_init_dentry_cache(void) return 0; } + +void ext4_fc_destroy_dentry_cache(void) +{ + kmem_cache_destroy(ext4_fc_dentry_cachep); +} diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2e7a8cfe572a..56bd2ea08090 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -6761,6 +6761,7 @@ static int __init ext4_init_fs(void) out: unregister_as_ext2(); unregister_as_ext3(); + ext4_fc_destroy_dentry_cache(); out05: destroy_inodecache(); out1: @@ -6787,6 +6788,7 @@ static void __exit ext4_exit_fs(void) unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); + ext4_fc_destroy_dentry_cache(); destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); -- Gitee From 92117f491353dc15984c52ebe34f9a4fb102a6b8 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Fri, 24 Dec 2021 18:03:41 +0800 Subject: [PATCH 045/100] ext4: fix null-ptr-deref in '__ext4_journal_ensure_credits' stable inclusion from stable-5.10.94 commit 679fb065326be0bff3d7463cfc975cddeeae9d68 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 298b5c521746d69c07beb2757292fb5ccc1b0f85 upstream. We got issue as follows when run syzkaller test: [ 1901.130043] EXT4-fs error (device vda): ext4_remount:5624: comm syz-executor.5: Abort forced by user [ 1901.130901] Aborting journal on device vda-8. [ 1901.131437] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.16: Detected aborted journal [ 1901.131566] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.11: Detected aborted journal [ 1901.132586] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.18: Detected aborted journal [ 1901.132751] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.9: Detected aborted journal [ 1901.136149] EXT4-fs error (device vda) in ext4_reserve_inode_write:6035: Journal has aborted [ 1901.136837] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-fuzzer: Detected aborted journal [ 1901.136915] ================================================================== [ 1901.138175] BUG: KASAN: null-ptr-deref in __ext4_journal_ensure_credits+0x74/0x140 [ext4] [ 1901.138343] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.13: Detected aborted journal [ 1901.138398] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.1: Detected aborted journal [ 1901.138808] Read of size 8 at addr 0000000000000000 by task syz-executor.17/968 [ 1901.138817] [ 1901.138852] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.30: Detected aborted journal [ 1901.144779] CPU: 1 PID: 968 Comm: syz-executor.17 Not tainted 4.19.90-vhulk2111.1.0.h893.eulerosv2r10.aarch64+ #1 [ 1901.146479] Hardware name: linux,dummy-virt (DT) [ 1901.147317] Call trace: [ 1901.147552] dump_backtrace+0x0/0x2d8 [ 1901.147898] show_stack+0x28/0x38 [ 1901.148215] dump_stack+0xec/0x15c [ 1901.148746] kasan_report+0x108/0x338 [ 1901.149207] __asan_load8+0x58/0xb0 [ 1901.149753] __ext4_journal_ensure_credits+0x74/0x140 [ext4] [ 1901.150579] ext4_xattr_delete_inode+0xe4/0x700 [ext4] [ 1901.151316] ext4_evict_inode+0x524/0xba8 [ext4] [ 1901.151985] evict+0x1a4/0x378 [ 1901.152353] iput+0x310/0x428 [ 1901.152733] do_unlinkat+0x260/0x428 [ 1901.153056] __arm64_sys_unlinkat+0x6c/0xc0 [ 1901.153455] el0_svc_common+0xc8/0x320 [ 1901.153799] el0_svc_handler+0xf8/0x160 [ 1901.154265] el0_svc+0x10/0x218 [ 1901.154682] ================================================================== This issue may happens like this: Process1 Process2 ext4_evict_inode ext4_journal_start ext4_truncate ext4_ind_truncate ext4_free_branches ext4_ind_truncate_ensure_credits ext4_journal_ensure_credits_fn ext4_journal_restart handle->h_transaction = NULL; mount -o remount,abort /mnt -> trigger JBD abort start_this_handle -> will return failed ext4_xattr_delete_inode ext4_journal_ensure_credits ext4_journal_ensure_credits_fn __ext4_journal_ensure_credits jbd2_handle_buffer_credits journal = handle->h_transaction->t_journal; ->null-ptr-deref Now, indirect truncate process didn't handle error. To solve this issue maybe simply add check handle is abort in '__ext4_journal_ensure_credits' is enough, and i also think this is necessary. Cc: stable@kernel.org Signed-off-by: Ye Bin Link: https://lore.kernel.org/r/20211224100341.3299128-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/ext4_jbd2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index b96ecba91899..b53e1d0b13fc 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, { if (!ext4_handle_valid(handle)) return 0; + if (is_handle_aborted(handle)) + return -EROFS; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; -- Gitee From 2d65c5e0ea97c766cfd09862ad11b523a4f60f80 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 5 Jan 2022 23:59:56 -0500 Subject: [PATCH 046/100] ext4: don't use the orphan list when migrating an inode stable inclusion from stable-5.10.94 commit 0ca7ec6db20c66b91277c231780e9f05b42a8163 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 6eeaf88fd586f05aaf1d48cb3a139d2a5c6eb055 upstream. We probably want to remove the indirect block to extents migration feature after a deprecation window, but until then, let's fix a potential data loss problem caused by the fact that we put the tmp_inode on the orphan list. In the unlikely case where we crash and do a journal recovery, the data blocks belonging to the inode being migrated are also represented in the tmp_inode on the orphan list --- and so its data blocks will get marked unallocated, and available for reuse. Instead, stop putting the tmp_inode on the oprhan list. So in the case where we crash while migrating the inode, we'll leak an inode, which is not a disaster. It will be easily fixed the next time we run fsck, and it's better than potentially having blocks getting claimed by two different files, and losing data as a result. Signed-off-by: Theodore Ts'o Reviewed-by: Lukas Czerner Cc: stable@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/ext4/migrate.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 69cc3f8e03bd..49912814f3d8 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* - * Worst case we can touch the allocation bitmaps, a bgd - * block, and a block to link in the orphan list. We do need - * need to worry about credits for modifying the quota inode. + * Worst case we can touch the allocation bitmaps and a block + * group descriptor block. We do need need to worry about + * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, - 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); + 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); @@ -463,10 +463,6 @@ int ext4_ext_migrate(struct inode *inode) * Use the correct seed for checksum (i.e. the seed from 'inode'). This * is so that the metadata blocks will have the correct checksum after * the migration. - * - * Note however that, if a crash occurs during the migration process, - * the recovery process is broken because the tmp_inode checksums will - * be wrong and the orphans cleanup will fail. */ ei = EXT4_I(inode); EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; @@ -478,7 +474,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); /* @@ -503,12 +498,6 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { - /* - * It is impossible to update on-disk structures without - * a handle, so just rollback in-core changes and live other - * work to orphan_list_cleanup() - */ - ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); goto out_tmp_inode; } -- Gitee From 042564302301f920e7b009f6a8772293aa21c835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 17 Jan 2022 10:31:26 +0100 Subject: [PATCH 047/100] drm/radeon: fix error handling in radeon_driver_open_kms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 7bb99c7e13f8417f81247b2acea8ceaae0c5afb3 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 4722f463896cc0ef1a6f1c3cb2e171e949831249 upstream. The return value was never initialized so the cleanup code executed when it isn't even necessary. Just add proper error handling. Fixes: ab50cb9df889 ("drm/radeon/radeon_kms: Fix a NULL pointer dereference in radeon_driver_open_kms()") Signed-off-by: Christian König Tested-by: Jan Stancek Tested-by: Borislav Petkov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/gpu/drm/radeon/radeon_kms.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 204634b23928..32070e94f6c4 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -652,18 +652,18 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); if (unlikely(!fpriv)) { r = -ENOMEM; - goto out_suspend; + goto err_suspend; } if (rdev->accel_working) { vm = &fpriv->vm; r = radeon_vm_init(rdev, vm); if (r) - goto out_fpriv; + goto err_fpriv; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) - goto out_vm_fini; + goto err_vm_fini; /* map the ib pool buffer read only into * virtual address space */ @@ -671,7 +671,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) rdev->ring_tmp_bo.bo); if (!vm->ib_bo_va) { r = -ENOMEM; - goto out_vm_fini; + goto err_vm_fini; } r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va, @@ -679,19 +679,21 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); if (r) - goto out_vm_fini; + goto err_vm_fini; } file_priv->driver_priv = fpriv; } - if (!r) - goto out_suspend; + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; -out_vm_fini: +err_vm_fini: radeon_vm_fini(rdev, vm); -out_fpriv: +err_fpriv: kfree(fpriv); -out_suspend: + +err_suspend: pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return r; -- Gitee From 5939e5ece8e8b0ec9f8ea866fe1393096fd1b5ae Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 30 Dec 2021 18:31:53 +0200 Subject: [PATCH 048/100] of: base: Improve argument length mismatch error stable inclusion from stable-5.10.94 commit 12224c0d19f34edaca246b99fd79b076b0abadaa category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 5d05b811b5acb92fc581a7b328b36646c86f5ab9 upstream. The cells_name field of of_phandle_iterator might be NULL. Use the phandle name instead. With this change instead of: OF: /soc/pinctrl@1000000: (null) = 3 found 2 We get: OF: /soc/pinctrl@1000000: phandle pinctrl@1000000 needs 3, found 2 Which is a more helpful messages making DT debugging easier. In this particular example the phandle name looks like duplicate of the same node name. But note that the first node is the parent node (it->parent), while the second is the phandle target (it->node). They happen to be the same in the case that triggered this improvement. See commit 72cb4c48a46a ("arm64: dts: qcom: ipq6018: Fix gpio-ranges property"). Signed-off-by: Baruch Siach Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/f6a68e0088a552ea9dfd4d8e3b5b586d92594738.1640881913.git.baruch@tkos.co.il Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/of/base.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 60cb9b44d4ec..a44a0e7ba251 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1328,9 +1328,14 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) * property data length */ if (it->cur + count > it->list_end) { - pr_err("%pOF: %s = %d found %td\n", - it->parent, it->cells_name, - count, it->list_end - it->cur); + if (it->cells_name) + pr_err("%pOF: %s = %d found %td\n", + it->parent, it->cells_name, + count, it->list_end - it->cur); + else + pr_err("%pOF: phandle %s needs %d, found %td\n", + it->parent, of_node_full_name(it->node), + count, it->list_end - it->cur); goto err; } } -- Gitee From 3574c1368120d1497c42cb2e11c9967caab2eccc Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 18 Jun 2018 23:55:40 +0100 Subject: [PATCH 049/100] firmware: Update Kconfig help text for Google firmware stable inclusion from stable-5.10.94 commit f62bf6ee4fa3198385e0658a9f070f98335a8408 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit d185a3466f0cd5af8f1c5c782c53bc0e6f2e7136 upstream. The help text for GOOGLE_FIRMWARE states that it should only be enabled when building a kernel for Google's own servers. However, many of the drivers dependent on it are also useful on Chromebooks or on any platform using coreboot. Update the help text to reflect this double duty. Fixes: d384d6f43d1e ("firmware: google memconsole: Add coreboot support") Reviewed-by: Julius Werner Signed-off-by: Ben Hutchings Link: https://lore.kernel.org/r/20180618225540.GD14131@decadent.org.uk Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/firmware/google/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 97968aece54f..931544c9f63d 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE bool "Google Firmware Drivers" default n help - These firmware drivers are used by Google's servers. They are - only useful if you are working directly on one of their - proprietary servers. If in doubt, say "N". + These firmware drivers are used by Google servers, + Chromebooks and other devices using coreboot firmware. + If in doubt, say "N". if GOOGLE_FIRMWARE -- Gitee From ab12c4e698dd8b6ac898733fb95b2fec2ea66087 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 23 Apr 2021 11:26:56 +0200 Subject: [PATCH 050/100] can: mcp251xfd: mcp251xfd_tef_obj_read(): fix typo in error message stable inclusion from stable-5.10.94 commit 0baa3729d2eb20ae159a863e473fcfca6390aba9 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 99e7cc3b3f85d9a583ab83f386315c59443509ae upstream. This patch fixes a typo in the error message in mcp251xfd_tef_obj_read(), if trying to read too many objects. Link: https://lore.kernel.org/all/20220105154300.1258636-3-mkl@pengutronix.de Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN") Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index e0b322ab0362..abe00a085f6f 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1288,7 +1288,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, len > tx_ring->obj_num || offset + len > tx_ring->obj_num)) { netdev_err(priv->ndev, - "Trying to read to many TEF objects (max=%d, offset=%d, len=%d).\n", + "Trying to read too many TEF objects (max=%d, offset=%d, len=%d).\n", tx_ring->obj_num, offset, len); return -ERANGE; } -- Gitee From 412baca26dce57e69bdf48070450ade2ee3713c1 Mon Sep 17 00:00:00 2001 From: Suresh Udipi Date: Fri, 13 Aug 2021 17:07:56 +0200 Subject: [PATCH 051/100] media: rcar-csi2: Optimize the selection PHTW register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 81ac08a800b010d40c4bc8112089473c1b950cbd category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 549cc89cd09a85aaa16dc07ef3db811d5cf9bcb1 upstream. PHTW register is selected based on default bit rate from Table[1]. for the bit rates less than or equal to 250. Currently first value of default bit rate which is greater than or equal to the caculated mbps is selected. This selection can be further improved by selecting the default bit rate which is nearest to the calculated value. [1] specs r19uh0105ej0200-r-car-3rd-generation.pdf [Table 25.12] Fixes: 769afd212b16 ("media: rcar-csi2: add Renesas R-Car MIPI CSI-2 receiver driver") Signed-off-by: Suresh Udipi Signed-off-by: Michael Rodin Reviewed-by: Niklas Söderlund Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/media/platform/rcar-vin/rcar-csi2.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c index 6fb8efcb4044..5e8e48a721a0 100644 --- a/drivers/media/platform/rcar-vin/rcar-csi2.c +++ b/drivers/media/platform/rcar-vin/rcar-csi2.c @@ -976,10 +976,17 @@ static int rcsi2_phtw_write_mbps(struct rcar_csi2 *priv, unsigned int mbps, const struct rcsi2_mbps_reg *values, u16 code) { const struct rcsi2_mbps_reg *value; + const struct rcsi2_mbps_reg *prev_value = NULL; - for (value = values; value->mbps; value++) + for (value = values; value->mbps; value++) { if (value->mbps >= mbps) break; + prev_value = value; + } + + if (prev_value && + ((mbps - prev_value->mbps) <= (value->mbps - mbps))) + value = prev_value; if (!value->mbps) { dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps); -- Gitee From eaa9c2fe5f1c18490bbd5d5979218844f1c8baf4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 19 Aug 2021 15:59:30 +0200 Subject: [PATCH 052/100] drm/vc4: hdmi: Make sure the device is powered with CEC stable inclusion from stable-5.10.94 commit 55b10b88ac8654fc2f31518aa349a2e643b37f18 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 upstream. Similarly to what we encountered with the detect hook with DRM, nothing actually prevents any of the CEC callback from being run while the HDMI output is disabled. However, this is an issue since any register access to the controller when it's powered down will result in a silent hang. Let's make sure we run the runtime_pm hooks when the CEC adapter is opened and closed by the userspace to avoid that issue. Fixes: 15b4511a4af6 ("drm/vc4: add HDMI CEC support") Reviewed-by: Dave Stevenson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20210819135931.895976-6-maxime@cerno.tech Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/gpu/drm/vc4/vc4_hdmi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 5d5c4e9a8621..9392de2679a1 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1399,8 +1399,14 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable) struct vc4_hdmi *vc4_hdmi = cec_get_drvdata(adap); /* clock period in microseconds */ const u32 usecs = 1000000 / CEC_CLOCK_FREQ; - u32 val = HDMI_READ(HDMI_CEC_CNTRL_5); + u32 val; + int ret; + + ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev); + if (ret) + return ret; + val = HDMI_READ(HDMI_CEC_CNTRL_5); val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET | VC4_HDMI_CEC_CNT_TO_4700_US_MASK | VC4_HDMI_CEC_CNT_TO_4500_US_MASK); @@ -1525,6 +1531,8 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) if (ret < 0) goto err_delete_cec_adap; + pm_runtime_put(&vc4_hdmi->pdev->dev); + return 0; err_delete_cec_adap: -- Gitee From e33f9d1558064a3d014578409655884b6cb6c1ac Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 7 Nov 2021 18:19:23 +0000 Subject: [PATCH 053/100] media: correct MEDIA_TEST_SUPPORT help text stable inclusion from stable-5.10.94 commit f6736bd81db48abd9455e007f1f3ba7ec593fd48 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 09f4d1513267d0ab712f5d29e7bd136535748709 upstream. Fix grammar/wording in the help text for MEDIA_TEST_SUPPORT. Fixes: 4b32216adb01 ("media: split test drivers from platform directory") Signed-off-by: Randy Dunlap Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/media/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig index a6d073f2e036..d157af63be41 100644 --- a/drivers/media/Kconfig +++ b/drivers/media/Kconfig @@ -142,10 +142,10 @@ config MEDIA_TEST_SUPPORT prompt "Test drivers" if MEDIA_SUPPORT_FILTER default y if !MEDIA_SUPPORT_FILTER help - Those drivers should not be used on production Kernels, but - can be useful on debug ones. It enables several dummy drivers - that simulate a real hardware. Very useful to test userspace - applications and to validate if the subsystem core is doesn't + These drivers should not be used on production kernels, but + can be useful on debug ones. This option enables several dummy drivers + that simulate real hardware. Very useful to test userspace + applications and to validate if the subsystem core doesn't have regressions. Say Y if you want to use some virtual test driver. -- Gitee From 94ebf996f552ff892d53f46764a1faa36f386384 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 18 Nov 2021 10:09:52 +0000 Subject: [PATCH 054/100] Documentation: dmaengine: Correctly describe dmatest with channel unset stable inclusion from stable-5.10.94 commit d1e85fcd73b5c94dc5032a63ecb3df4cbceda82d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit c61d7b2ef141abf81140756b45860a2306f395a2 upstream. Currently the documentation states that channels must be configured before running the dmatest. This has not been true since commit 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel"). Fix accordingly. Fixes: 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel") Signed-off-by: Daniel Thompson Link: https://lore.kernel.org/r/20211118100952.27268-3-daniel.thompson@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- Documentation/driver-api/dmaengine/dmatest.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index ee268d445d38..d2e1d8b58e7d 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -143,13 +143,14 @@ Part 5 - Handling channel allocation Allocating Channels ------------------- -Channels are required to be configured prior to starting the test run. -Attempting to run the test without configuring the channels will fail. +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. Example:: % echo 1 > /sys/module/dmatest/parameters/run - dmatest: Could not start test, no channels configured + dmatest: No channels configured, continue with any Channels are registered using the "channel" parameter. Channels can be requested by their name, once requested, the channel is registered and a pending thread is added to the test list. -- Gitee From 809cabeaecde929cef793c6dfb674785b38b40f9 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 1 Dec 2021 14:59:31 +0200 Subject: [PATCH 055/100] Documentation: ACPI: Fix data node reference documentation stable inclusion from stable-5.10.94 commit abecf9d748369d5d1f53ae868f96cae88f9875ef category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit a11174952205d082f1658fab4314f0caf706e0a8 upstream. The data node reference documentation was missing a package that must contain the property values, instead property name and multiple values being present in a single package. This is not aligned with the _DSD spec. Fix it by adding the package for the values. Also add the missing "reg" properties to two numbered nodes. Fixes: b10134a3643d ("ACPI: property: Document hierarchical data extension references") Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- .../firmware-guide/acpi/dsd/data-node-references.rst | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index 9b17dc77d18c..da0e46496fc4 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -5,7 +5,7 @@ Referencing hierarchical data nodes =================================== -:Copyright: |copy| 2018 Intel Corporation +:Copyright: |copy| 2018, 2021 Intel Corporation :Author: Sakari Ailus ACPI in general allows referring to device objects in the tree only. @@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference. Name (NOD0, Package() { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { + Package () { "reg", 0 }, Package () { "random-property", 3 }, } }) Name (NOD1, Package() { ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), Package () { + Package () { "reg", 1 }, Package () { "anothernode", "ANOD" }, } }) @@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { "reference", ^DEV0, "node@1", "anothernode" }, + Package () { + "reference", Package () { + ^DEV0, "node@1", "anothernode" + } + }, } }) } -- Gitee From ba3d7a4b6e7fc0ea7c91bd6eb3ac4db20a942103 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Thu, 30 Dec 2021 18:19:40 +0100 Subject: [PATCH 056/100] Documentation: refer to config RANDOMIZE_BASE for kernel address-space randomization stable inclusion from stable-5.10.94 commit 5d38cbf66dd7df04f06be86d812c7e5861cd3a69 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 82ca67321f55a8d1da6ac3ed611da3c32818bb37 upstream. The config RANDOMIZE_SLAB does not exist, the authors probably intended to refer to the config RANDOMIZE_BASE, which provides kernel address-space randomization. They probably just confused SLAB with BASE (these two four-letter words coincidentally share three common letters), as they also point out the config SLAB_FREELIST_RANDOM as further randomization within the same sentence. Fix the reference of the config for kernel address-space randomization to the config that provides that. Fixes: 6e88559470f5 ("Documentation: Add section about CPU vulnerabilities for Spectre") Signed-off-by: Lukas Bulwahn Link: https://lore.kernel.org/r/20211230171940.27558-1-lukas.bulwahn@gmail.com Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- Documentation/admin-guide/hw-vuln/spectre.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index a4cb79d0dd57..7e061ed449aa 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -491,7 +491,7 @@ Spectre variant 2 before invoking any firmware code to prevent Spectre variant 2 exploits using the firmware. - Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y + Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes attacks on the kernel generally more difficult. -- Gitee From fa35860210f61c8a651a7c8b7917932fb9b855ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 18 Jan 2022 19:39:05 -0800 Subject: [PATCH 057/100] Documentation: fix firewire.rst ABI file path error stable inclusion from stable-5.10.94 commit 55698d11c8dacbfc9750803cf8fa398fb9a12af5 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit b0ac702f3329cdc8a06dcaac73183d4b5a2b942d upstream. Adjust the path of the ABI files for firewire.rst to prevent a documentation build error. Prevents this problem: Sphinx parallel build error: docutils.utils.SystemMessage: Documentation/driver-api/firewire.rst:22: (SEVERE/4) Problems with "include" directive path: InputError: [Errno 2] No such file or directory: '../Documentation/driver-api/ABI/stable/firewire-cdev'. Fixes: 2f4830ef96d2 ("FireWire: add driver-api Introduction section") Signed-off-by: Randy Dunlap Tested-by: Akira Yokosawa Link: https://lore.kernel.org/r/20220119033905.4779-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- Documentation/driver-api/firewire.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst index 94a2d7f01d99..d3cfa73cbb2b 100644 --- a/Documentation/driver-api/firewire.rst +++ b/Documentation/driver-api/firewire.rst @@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module Firewire char device data structures ==================================== -.. include:: /ABI/stable/firewire-cdev +.. include:: ../ABI/stable/firewire-cdev :literal: .. kernel-doc:: include/uapi/linux/firewire-cdev.h @@ -28,7 +28,7 @@ Firewire char device data structures Firewire device probing and sysfs interfaces ============================================ -.. include:: /ABI/stable/sysfs-bus-firewire +.. include:: ../ABI/stable/sysfs-bus-firewire :literal: .. kernel-doc:: drivers/firewire/core-device.c -- Gitee From fc169044ed88926965907e81aad25d3fcc424d8a Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Nov 2021 15:14:41 -0700 Subject: [PATCH 058/100] Bluetooth: hci_sync: Fix not setting adv set duration stable inclusion from stable-5.10.94 commit b30240911da4a19560daa51c1e1d328f9903dde7 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit f16a491c65d9eb19398b25aefc10c2d3313d17b3 upstream. 10bbffa3e88e attempted to fix the use of rotation duration as advertising duration but it didn't change the if condition which still uses the duration instead of the timeout. Fixes: 10bbffa3e88e ("Bluetooth: Fix using advertising instance duration as timeout") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/bluetooth/hci_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1a94ed2f8a4f..d965b7c66bd6 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2118,7 +2118,7 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance) /* Set duration per instance since controller is responsible for * scheduling it. */ - if (adv_instance && adv_instance->duration) { + if (adv_instance && adv_instance->timeout) { u16 duration = adv_instance->timeout * MSEC_PER_SEC; /* Time = N * 10 ms */ -- Gitee From 6f1e629064a92c86c3ad67cfd4fd193bf17fd8b1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 29 Nov 2021 11:46:00 -0800 Subject: [PATCH 059/100] scsi: core: Show SCMD_LAST in text form stable inclusion from stable-5.10.94 commit 4fe77b7cd27229486b67c83df71019fe89d3951e category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3369046e54ca8f82e0cb17740643da2d80d3cfa8 upstream. The SCSI debugfs code supports showing information about pending commands, including translating SCSI command flags from numeric into text format. Also convert the SCMD_LAST flag from numeric into text form. Link: https://lore.kernel.org/r/20211129194609.3466071-4-bvanassche@acm.org Fixes: 8930a6c20791 ("scsi: core: add support for request batching") Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/scsi/scsi_debugfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c index c19ea7ab54cb..d9a18124cfc9 100644 --- a/drivers/scsi/scsi_debugfs.c +++ b/drivers/scsi/scsi_debugfs.c @@ -10,6 +10,7 @@ static const char *const scsi_cmd_flags[] = { SCSI_CMD_FLAG_NAME(TAGGED), SCSI_CMD_FLAG_NAME(UNCHECKED_ISA_DMA), SCSI_CMD_FLAG_NAME(INITIALIZED), + SCSI_CMD_FLAG_NAME(LAST), }; #undef SCSI_CMD_FLAG_NAME -- Gitee From d3d1e76d2b8aabb92bd63e63cb3aa47ce5f50f9d Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Tue, 14 Dec 2021 13:42:43 +0900 Subject: [PATCH 060/100] dmaengine: uniphier-xdmac: Fix type of address variables stable inclusion from stable-5.10.94 commit 57cd8597c3ef8ba8d00aa9f39f506f45d949d29b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 105a8c525675bb7d4d64871f9b2edf39460de881 upstream. The variables src_addr and dst_addr handle DMA addresses, so these should be declared as dma_addr_t. Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver") Signed-off-by: Kunihiko Hayashi Link: https://lore.kernel.org/r/1639456963-10232-1-git-send-email-hayashi.kunihiko@socionext.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/dma/uniphier-xdmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index d6b8a202474f..290836b7e1be 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc) static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc, struct uniphier_xdmac_desc *xd) { - u32 src_mode, src_addr, src_width; - u32 dst_mode, dst_addr, dst_width; + u32 src_mode, src_width; + u32 dst_mode, dst_width; + dma_addr_t src_addr, dst_addr; u32 val, its, tnum; enum dma_slave_buswidth buswidth; -- Gitee From 790c88adf78f881d400212912306c058405cfc2d Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Mon, 6 Dec 2021 21:36:52 +0800 Subject: [PATCH 061/100] RDMA/hns: Modify the mapping attribute of doorbell to device stable inclusion from stable-5.10.94 commit 885860717c29decb32bc8d2074bd7be066eaab87 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 39d5534b1302189c809e90641ffae8cbdc42a8fc upstream. It is more general for ARM device drivers to use the device attribute to map PCI BAR spaces. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20211206133652.27476-1-liangwenpeng@huawei.com Signed-off-by: Yixing Liu Signed-off-by: Wenpeng Liang Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c0249e4874a9..1e8b3e4ef1b1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -359,7 +359,7 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot), + pgprot_device(vma->vm_page_prot), NULL); /* vm_pgoff: 1 -- TPTR */ -- Gitee From 36935aeb5372c5d66faa0e37074a5faee88d7772 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 18 Dec 2021 19:23:20 +0800 Subject: [PATCH 062/100] RDMA/rxe: Fix a typo in opcode name stable inclusion from stable-5.10.94 commit 16ad0aa917c905982a6bdddd5f67a824f153501e category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 8d1cfb884e881efd69a3be4ef10772c71cb22216 upstream. There is a redundant ']' in the name of opcode IB_OPCODE_RC_SEND_MIDDLE, so just fix it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20211218112320.3558770-1-cgxu519@mykernel.net Signed-off-by: Chengguang Xu Acked-by: Zhu Yanjun Reviewed-by: Bob Pearson Signed-off-by: Jason Gunthorpe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/infiniband/sw/rxe/rxe_opcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 0cb4b01fd910..66ffb516bdaf 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -110,7 +110,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { } }, [IB_OPCODE_RC_SEND_MIDDLE] = { - .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .name = "IB_OPCODE_RC_SEND_MIDDLE", .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, -- Gitee From 28199767d31a7f81792925c6bed013dec2a5aa63 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Mon, 20 Dec 2021 17:58:27 +0100 Subject: [PATCH 063/100] dmaengine: stm32-mdma: fix STM32_MDMA_CTBR_TSEL_MASK stable inclusion from stable-5.10.94 commit 78cf5f63a3a98151ad7fa645103de0fa17c7317c category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit e7f110889a87307fb0fed408a5dee1707796ca04 upstream. This patch fixes STM32_MDMA_CTBR_TSEL_MASK, which is [5:0], not [7:0]. Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver") Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20211220165827.1238097-1-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/dma/stm32-mdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 9d473923712a..fe36738f2dd7 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -184,7 +184,7 @@ #define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x)) #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) -#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) +#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0) #define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \ STM32_MDMA_CTBR_TSEL_MASK) -- Gitee From ef479b24d32c210b909eb47acb641d3e149e3c10 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sun, 5 Dec 2021 11:20:59 +0200 Subject: [PATCH 064/100] Revert "net/mlx5: Add retry mechanism to the command entry index allocation" stable inclusion from stable-5.10.94 commit 4cb7aba1e0868585289c4ebf9151ec5c7e7606c8 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 4f6626b0e140867fd6d5a2e9d4ceaef97f10f46a upstream. This reverts commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf. The reverted commit had added a retry mechanism to the command entry index allocation. The previous patch ensures that there is a free command entry index once the command work handler holds the command semaphore. Thus the retry mechanism is not needed. Fixes: 410bd754cd73 ("net/mlx5: Add retry mechanism to the command entry index allocation") Signed-off-by: Moshe Shemesh Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 20e3f8cd074a..6af0dd847169 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -887,25 +887,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) { return pci_channel_offline(dev->pdev) || @@ -930,7 +911,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { -- Gitee From a321da840d00fb8f30ab3b76bfdeb4eca76d0b3f Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 7 Dec 2021 12:02:28 +0100 Subject: [PATCH 065/100] powerpc/cell: Fix clang -Wimplicit-fallthrough warning stable inclusion from stable-5.10.94 commit 19aaef65194854e8e880084f52645fb77b54188d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit e89257e28e844f5d1d39081bb901d9f1183a7705 upstream. Clang warns: arch/powerpc/platforms/cell/pervasive.c:81:2: error: unannotated fall-through between switch labels case SRR1_WAKEEE: ^ arch/powerpc/platforms/cell/pervasive.c:81:2: note: insert 'break;' to avoid fall-through case SRR1_WAKEEE: ^ break; 1 error generated. Clang is more pedantic than GCC, which does not warn when failing through to a case that is just break or return. Clang's version is more in line with the kernel's own stance in deprecated.rst. Add athe missing break to silence the warning. Fixes: 6e83985b0f6e ("powerpc/cbe: Do not process external or decremeter interrupts from sreset") Reported-by: Naresh Kamboju Signed-off-by: Anders Roxell Reviewed-by: Nathan Chancellor Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211207110228.698956-1-anders.roxell@linaro.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- arch/powerpc/platforms/cell/pervasive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9068edef71f7..59999902e4a6 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take -- Gitee From fed105c5d050f396dc60eccb9f8f1a98977429a6 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 18 Jan 2022 22:50:52 +0100 Subject: [PATCH 066/100] powerpc/fsl/dts: Enable WA for erratum A-009885 on fman3l MDIO buses stable inclusion from stable-5.10.94 commit 5e59f885353e3b65dbb26c3bbb08cd8e4fa59043 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 0d375d610fa96524e2ee2b46830a46a7bfa92a9f upstream. This block is used in (at least) T1024 and T1040, including their variants like T1023 etc. Fixes: d55ad2967d89 ("powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA FMan") Signed-off-by: Tobias Waldekranz Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a53..48e5cd61599c 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ mdio0: mdio@fc000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ xmdio0: mdio@fd000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; -- Gitee From 990224a13466dfae4ab2fe0aea12d19acef414fe Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Mon, 29 Nov 2021 09:26:59 +0800 Subject: [PATCH 067/100] block: Fix fsync always failed if once failed stable inclusion from stable-5.10.94 commit 2bcab471a26fce0410c3d30d28af642db373ef8c category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 8a7518931baa8ea023700987f3db31cb0a80610b upstream. We do test with inject error fault base on v4.19, after test some time we found sync /dev/sda always failed. [root@localhost] sync /dev/sda sync: error syncing '/dev/sda': Input/output error scsi log as follows: [19069.812296] sd 0:0:0:0: [sda] tag#64 Send: scmd 0x00000000d03a0b6b [19069.812302] sd 0:0:0:0: [sda] tag#64 CDB: Synchronize Cache(10) 35 00 00 00 00 00 00 00 00 00 [19069.812533] sd 0:0:0:0: [sda] tag#64 Done: SUCCESS Result: hostbyte=DID_OK driverbyte=DRIVER_OK [19069.812536] sd 0:0:0:0: [sda] tag#64 CDB: Synchronize Cache(10) 35 00 00 00 00 00 00 00 00 00 [19069.812539] sd 0:0:0:0: [sda] tag#64 scsi host busy 1 failed 0 [19069.812542] sd 0:0:0:0: Notifying upper driver of completion (result 0) [19069.812546] sd 0:0:0:0: [sda] tag#64 sd_done: completed 0 of 0 bytes [19069.812549] sd 0:0:0:0: [sda] tag#64 0 sectors total, 0 bytes done. [19069.812564] print_req_error: I/O error, dev sda, sector 0 ftrace log as follows: rep-306069 [007] .... 19654.923315: block_bio_queue: 8,0 FWS 0 + 0 [rep] rep-306069 [007] .... 19654.923333: block_getrq: 8,0 FWS 0 + 0 [rep] kworker/7:1H-250 [007] .... 19654.923352: block_rq_issue: 8,0 FF 0 () 0 + 0 [kworker/7:1H] -0 [007] ..s. 19654.923562: block_rq_complete: 8,0 FF () 18446744073709551615 + 0 [0] -0 [007] d.s. 19654.923576: block_rq_complete: 8,0 WS () 0 + 0 [-5] As 8d6996630c03 introduce 'fq->rq_status', this data only update when 'flush_rq' reference count isn't zero. If flush request once failed and record error code in 'fq->rq_status'. If there is no chance to update 'fq->rq_status',then do fsync will always failed. To address this issue reset 'fq->rq_status' after return error code to upper layer. Fixes: 8d6996630c03("block: fix null pointer dereference in blk_mq_rq_timed_out()") Signed-off-by: Ye Bin Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20211129012659.1553733-1-yebin10@huawei.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- block/blk-flush.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index b38839de9408..82919829bc4d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -235,8 +235,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) * avoiding use-after-free. */ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); - if (fq->rq_status != BLK_STS_OK) + if (fq->rq_status != BLK_STS_OK) { error = fq->rq_status; + fq->rq_status = BLK_STS_OK; + } if (!q->elevator) { flush_rq->tag = BLK_MQ_NO_TAG; -- Gitee From 06f8cbd774d110c51dbbf46e2c2af664e2ee1a0f Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 10 Nov 2021 11:46:28 +0000 Subject: [PATCH 068/100] bpftool: Remove inclusion of utilities.mak from Makefiles stable inclusion from stable-5.10.94 commit 38ee417f59c893e770a6118072f5d52fe9082901 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 48f5aef4c458c19ab337eed8c95a6486cc014aa3 upstream. Bpftool's Makefile, and the Makefile for its documentation, both include scripts/utilities.mak, but they use none of the items defined in this file. Remove the includes. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20211110114632.24537-3-quentin@isovalent.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- tools/bpf/bpftool/Documentation/Makefile | 1 - tools/bpf/bpftool/Makefile | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index f33cb02de95c..3601b1d1974c 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f60e6ad3a1df..1896ef69b449 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include -include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) -- Gitee From 7adb9e1777999da05e81ab0b4c55497736ba155e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 7 Jan 2022 23:11:13 +0100 Subject: [PATCH 069/100] xdp: check prog type before updating BPF link MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 58fa3e900255d61684ea5dfce0302b4b59d39c24 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 382778edc8262b7535f00523e9eb22edba1b9816 upstream. The bpf_xdp_link_update() function didn't check the program type before updating the program, which made it possible to install any program type as an XDP program, which is obviously not good. Syzbot managed to trigger this by swapping in an LWT program on the XDP hook which would crash in a helper call. Fix this by adding a check and bailing out if the types don't match. Fixes: 026a4c28e1db ("bpf, xdp: Implement LINK_UPDATE for BPF XDP link") Reported-by: syzbot+983941aa85af6ded1fd9@syzkaller.appspotmail.com Acked-by: Andrii Nakryiko Signed-off-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20220107221115.326171-1-toke@redhat.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/core/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index a8f3dedafed8..6b2cd690644c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9339,6 +9339,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } old_prog = link->prog; + if (old_prog->type != new_prog->type || + old_prog->expected_attach_type != new_prog->expected_attach_type) { + err = -EINVAL; + goto out_unlock; + } + if (old_prog == new_prog) { /* no-op, don't disturb drivers */ bpf_prog_put(new_prog); -- Gitee From 5649c8f5869f28507d88d5467d3c0b824ec6f337 Mon Sep 17 00:00:00 2001 From: German Gomez Date: Tue, 18 Jan 2022 14:40:54 +0000 Subject: [PATCH 070/100] perf evsel: Override attr->sample_period for non-libpfm4 events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit 10e99ae9b5da7e6a5304db3a926f6a460ee850d2 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3606c0e1a1050d397ad759a62607e419fd8b0ccb upstream. A previous patch preventing "attr->sample_period" values from being overridden in pfm events changed a related behaviour in arm-spe. Before said patch: perf record -c 10000 -e arm_spe_0// -- sleep 1 Would yield an SPE event with period=10000. After the patch, the period in "-c 10000" was being ignored because the arm-spe code initializes sample_period to a non-zero value. This patch restores the previous behaviour for non-libpfm4 events. Fixes: ae5dcc8abe31 (“perf record: Prevent override of attr->sample_period for libpfm4 events”) Reported-by: Chase Conklin Signed-off-by: German Gomez Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Song Liu Cc: Stephane Eranian Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20220118144054.2541-1-german.gomez@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- tools/perf/util/evsel.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b0..1a1cbd16d76d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1014,6 +1014,17 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs return found_term; } +static void evsel__set_default_freq_period(struct record_opts *opts, + struct perf_event_attr *attr) +{ + if (opts->freq) { + attr->freq = 1; + attr->sample_freq = opts->freq; + } else { + attr->sample_period = opts->default_interval; + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -1080,14 +1091,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period) { - if (opts->freq) { - attr->freq = 1; - attr->sample_freq = opts->freq; - } else { - attr->sample_period = opts->default_interval; - } - } + if ((evsel->is_libpfm_event && !attr->sample_period) || + (!evsel->is_libpfm_event && (!attr->sample_period || + opts->user_freq != UINT_MAX || + opts->user_interval != ULLONG_MAX))) + evsel__set_default_freq_period(opts, attr); + /* * If attr->freq was set (here or earlier), ask for period * to be sampled. -- Gitee From ab448ce09c2e58c64c03b25c4f2ce042d5f72e7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Jan 2022 01:02:20 -0800 Subject: [PATCH 071/100] ipv4: update fib_info_cnt under spinlock protection stable inclusion from stable-5.10.94 commit 86f0587f74320412e19db0bc341ef418b2d1d114 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 0a6e6b3c7db6c34e3d149f09cd714972f8753e3f upstream. In the past, free_fib_info() was supposed to be called under RTNL protection. This eventually was no longer the case. Instead of enforcing RTNL it seems we simply can move fib_info_cnt changes to occur when fib_info_lock is held. v2: David Laight suggested to update fib_info_cnt only when an entry is added/deleted to/from the hash table, as fib_info_cnt is used to make sure hash table size is optimal. BUG: KCSAN: data-race in fib_create_info / free_fib_info write to 0xffffffff86e243a0 of 4 bytes by task 26429 on cpu 0: fib_create_info+0xe78/0x3440 net/ipv4/fib_semantics.c:1428 fib_table_insert+0x148/0x10c0 net/ipv4/fib_trie.c:1224 fib_magic+0x195/0x1e0 net/ipv4/fib_frontend.c:1087 fib_add_ifaddr+0xd0/0x2e0 net/ipv4/fib_frontend.c:1109 fib_netdev_event+0x178/0x510 net/ipv4/fib_frontend.c:1466 notifier_call_chain kernel/notifier.c:83 [inline] raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:391 __dev_notify_flags+0x1d3/0x3b0 dev_change_flags+0xa2/0xc0 net/core/dev.c:8872 do_setlink+0x810/0x2410 net/core/rtnetlink.c:2719 rtnl_group_changelink net/core/rtnetlink.c:3242 [inline] __rtnl_newlink net/core/rtnetlink.c:3396 [inline] rtnl_newlink+0xb10/0x13b0 net/core/rtnetlink.c:3506 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2496 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x726/0x840 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmsg+0x195/0x230 net/socket.c:2492 __do_sys_sendmsg net/socket.c:2501 [inline] __se_sys_sendmsg net/socket.c:2499 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffffffff86e243a0 of 4 bytes by task 31505 on cpu 1: free_fib_info+0x35/0x80 net/ipv4/fib_semantics.c:252 fib_info_put include/net/ip_fib.h:575 [inline] nsim_fib4_rt_destroy drivers/net/netdevsim/fib.c:294 [inline] nsim_fib4_rt_replace drivers/net/netdevsim/fib.c:403 [inline] nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:431 [inline] nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline] nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline] nsim_fib_event_work+0x15ca/0x2cf0 drivers/net/netdevsim/fib.c:1477 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298 process_scheduled_works kernel/workqueue.c:2361 [inline] worker_thread+0x7df/0xa70 kernel/workqueue.c:2447 kthread+0x2c7/0x2e0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 value changed: 0x00000d2d -> 0x00000d2e Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 31505 Comm: kworker/1:21 Not tainted 5.16.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: events nsim_fib_event_work Fixes: 48bb9eb47b27 ("netdevsim: fib: Add dummy implementation for FIB offload") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: David Laight Cc: Ido Schimmel Cc: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/ipv4/fib_semantics.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ab6a8f35d369..a13265e87da4 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -251,7 +251,6 @@ void free_fib_info(struct fib_info *fi) pr_warn("Freeing alive fib_info %p\n", fi); return; } - fib_info_cnt--; call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -262,6 +261,10 @@ void fib_release_info(struct fib_info *fi) spin_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); + + /* Paired with READ_ONCE() in fib_create_info(). */ + WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { @@ -1431,7 +1434,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_info_hash_size) { + + /* Paired with WRITE_ONCE() in fib_release_info() */ + if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; @@ -1463,7 +1468,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, return ERR_PTR(err); } - fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; @@ -1590,6 +1594,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi->fib_treeref++; refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); + fib_info_cnt++; hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { -- Gitee From 27470d11094bc3bed478bbb3116d19a153648946 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Jan 2022 02:04:12 -0800 Subject: [PATCH 072/100] ipv4: avoid quadratic behavior in netns dismantle stable inclusion from stable-5.10.94 commit 734f4b0f831e6d5fdad819a8832fb562b6cb8291 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit d07418afea8f1d9896aaf9dc5ae47ac4f45b220c upstream. net/ipv4/fib_semantics.c uses an hash table of 256 slots, keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE] Problem is that with network namespaces, devices tend to use the same ifindex. lo device for instance has a fixed ifindex of one, for all network namespaces. This means that hosts with thousands of netns spend a lot of time looking at some hash buckets with thousands of elements, notably at netns dismantle. Simply add a per netns perturbation (net_hash_mix()) to spread elements more uniformely. Also change fib_devindex_hashfn() to use more entropy. Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace") Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/ipv4/fib_semantics.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index a13265e87da4..838a876c168c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -321,11 +322,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} + +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -435,12 +440,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -1608,12 +1612,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg, } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1963,8 +1965,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1983,12 +1984,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2133,7 +2133,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2149,8 +2148,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { -- Gitee From 565181c2e55315e0b78fa271b5a62c5aac2fd35a Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 18 Jan 2022 22:50:50 +0100 Subject: [PATCH 073/100] net/fsl: xgmac_mdio: Add workaround for erratum A-009885 stable inclusion from stable-5.10.94 commit 38c798384b903e0c38f7978c1fbf942e36213b8d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 6198c722019774d38018457a8bfb9ba3ed8c931e upstream. Once an MDIO read transaction is initiated, we must read back the data register within 16 MDC cycles after the transaction completes. Outside of this window, reads may return corrupt data. Therefore, disable local interrupts in the critical section, to maximize the probability that we can satisfy this requirement. Fixes: d55ad2967d89 ("powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA FMan") Signed-off-by: Tobias Waldekranz Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/freescale/xgmac_mdio.c | 25 ++++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index bfa2826c5545..252a06a13a47 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -49,6 +49,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a009885; bool has_a011043; }; @@ -184,10 +185,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) { struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + unsigned long flags; uint16_t dev_addr; uint32_t mdio_stat; uint32_t mdio_ctl; - uint16_t value; int ret; bool endian = priv->is_little_endian; @@ -219,12 +220,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; } + if (priv->has_a009885) + /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we + * must read back the data register within 16 MDC cycles. + */ + local_irq_save(flags); + /* Initiate the read */ xgmac_write32(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl, endian); ret = xgmac_wait_until_done(&bus->dev, regs, endian); if (ret) - return ret; + goto irq_restore; /* Return all Fs if nothing was there */ if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && @@ -232,13 +239,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); - return 0xffff; + ret = 0xffff; + } else { + ret = xgmac_read32(®s->mdio_data, endian) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", ret); } - value = xgmac_read32(®s->mdio_data, endian) & 0xffff; - dev_dbg(&bus->dev, "read %04x\n", value); +irq_restore: + if (priv->has_a009885) + local_irq_restore(flags); - return value; + return ret; } static int xgmac_mdio_probe(struct platform_device *pdev) @@ -282,6 +293,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = device_property_read_bool(&pdev->dev, "little-endian"); + priv->has_a009885 = device_property_read_bool(&pdev->dev, + "fsl,erratum-a009885"); priv->has_a011043 = device_property_read_bool(&pdev->dev, "fsl,erratum-a011043"); -- Gitee From 7857c834a274c04132ac44137a5c06d3ae25cc68 Mon Sep 17 00:00:00 2001 From: Tobias Waldekranz Date: Tue, 18 Jan 2022 22:50:53 +0100 Subject: [PATCH 074/100] net/fsl: xgmac_mdio: Fix incorrect iounmap when removing module stable inclusion from stable-5.10.94 commit d806eb5f4e231c7c470b7e9d13277d9e69c80a67 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3f7c239c7844d2044ed399399d97a5f1c6008e1b upstream. As reported by sparse: In the remove path, the driver would attempt to unmap its own priv pointer - instead of the io memory that it mapped in probe. Fixes: 9f35a7342cff ("net/fsl: introduce Freescale 10G MDIO driver") Signed-off-by: Tobias Waldekranz Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/freescale/xgmac_mdio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 252a06a13a47..b7984a772e12 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -320,9 +320,10 @@ static int xgmac_mdio_probe(struct platform_device *pdev) static int xgmac_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct mdio_fsl_priv *priv = bus->priv; mdiobus_unregister(bus); - iounmap(bus->priv); + iounmap(priv->mdio_base); mdiobus_free(bus); return 0; -- Gitee From 8a820e06d32fbcca7e25cde8cb754982930d1bfb Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 20 Jan 2022 12:18:12 +0000 Subject: [PATCH 075/100] parisc: pdc_stable: Fix memory leak in pdcs_register_pathentries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-5.10.94 commit e1840365ed4f831f51dfd7b3af5e39045f3d62b7 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit d24846a4246b6e61ecbd036880a4adf61681d241 upstream. kobject_init_and_add() takes reference even when it fails. According to the doc of kobject_init_and_add(): If this function returns an error, kobject_put() must be called to properly clean up the memory associated with the object. Fix memory leak by calling kobject_put(). Fixes: 73f368cf679b ("Kobject: change drivers/parisc/pdc_stable.c to use kobject_init_and_add") Signed-off-by: Miaoqian Lin Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/parisc/pdc_stable.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index e090978518f1..4760f82def6e 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -979,8 +979,10 @@ pdcs_register_pathentries(void) entry->kobj.kset = paths_kset; err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL, "%s", entry->name); - if (err) + if (err) { + kobject_put(&entry->kobj); return err; + } /* kobject is now registered */ write_lock(&entry->rw_lock); -- Gitee From dd74186c80e9ef617e4ef810edc3806b3f537b96 Mon Sep 17 00:00:00 2001 From: Hyeong-Jun Kim Date: Fri, 10 Dec 2021 13:30:12 +0900 Subject: [PATCH 076/100] f2fs: compress: fix potential deadlock of compress file stable inclusion from stable-5.10.94 commit 39ad0581176dd62d4dbb0ec528d2d2fb54f11069 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 7377e853967ba45bf409e3b5536624d2cbc99f21 upstream. There is a potential deadlock between writeback process and a process performing write_begin() or write_cache_pages() while trying to write same compress file, but not compressable, as below: [Process A] - doing checkpoint [Process B] [Process C] f2fs_write_cache_pages() - lock_page() [all pages in cluster, 0-31] - f2fs_write_multi_pages() - f2fs_write_raw_pages() - f2fs_write_single_data_page() - f2fs_do_write_data_page() - return -EAGAIN [f2fs_trylock_op() failed] - unlock_page(page) [e.g., page 0] - generic_perform_write() - f2fs_write_begin() - f2fs_prepare_compress_overwrite() - prepare_compress_overwrite() - lock_page() [e.g., page 0] - lock_page() [e.g., page 1] - lock_page(page) [e.g., page 0] Since there is no compress process, it is no longer necessary to hold locks on every pages in cluster within f2fs_write_raw_pages(). This patch changes f2fs_write_raw_pages() to release all locks first and then perform write same as the non-compress file in f2fs_write_cache_pages(). Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Hyeong-Jun Kim Signed-off-by: Sungjong Seo Signed-off-by: Youngjin Gil Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- fs/f2fs/compress.c | 50 ++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 30987ea011f1..ec542e8c46cc 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1362,25 +1362,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret; - int i = -1, err = 0; + int _submitted, compr_blocks, ret, i; compr_blocks = f2fs_compressed_blocks(cc); - if (compr_blocks < 0) { - err = compr_blocks; - goto out_err; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); } + if (compr_blocks < 0) + return compr_blocks; + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; retry_write: + lock_page(cc->rpages[i]); + if (cc->rpages[i]->mapping != mapping) { +continue_unlock: unlock_page(cc->rpages[i]); continue; } - BUG_ON(!PageLocked(cc->rpages[i])); + if (!PageDirty(cc->rpages[i])) + goto continue_unlock; + + if (!clear_page_dirty_for_io(cc->rpages[i])) + goto continue_unlock; ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, NULL, NULL, wbc, io_type, @@ -1395,26 +1408,15 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, * avoid deadlock caused by cluster update race * from foreground operation. */ - if (IS_NOQUOTA(cc->inode)) { - err = 0; - goto out_err; - } + if (IS_NOQUOTA(cc->inode)) + return 0; ret = 0; cond_resched(); congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); - lock_page(cc->rpages[i]); - - if (!PageDirty(cc->rpages[i])) { - unlock_page(cc->rpages[i]); - continue; - } - - clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } - err = ret; - goto out_err; + return ret; } *submitted += _submitted; @@ -1423,14 +1425,6 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, f2fs_balance_fs(F2FS_M_SB(mapping), true); return 0; -out_err: - for (++i; i < cc->cluster_size; i++) { - if (!cc->rpages[i]) - continue; - redirty_page_for_writepage(wbc, cc->rpages[i]); - unlock_page(cc->rpages[i]); - } - return err; } int f2fs_write_multi_pages(struct compress_ctx *cc, -- Gitee From c1e0ad85b9759b576ac9289f89d6f089d00bacef Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 11 Dec 2021 21:27:36 +0800 Subject: [PATCH 077/100] f2fs: fix to reserve space for IO align feature stable inclusion from stable-5.10.94 commit a49e402f23096d21362b25d7224206a45288072a category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 300a842937fbcfb5a189cea9ba15374fdb0b5c6b upstream. https://bugzilla.kernel.org/show_bug.cgi?id=204137 With below script, we will hit panic during new segment allocation: DISK=bingo.img MOUNT_DIR=/mnt/f2fs dd if=/dev/zero of=$DISK bs=1M count=105 mkfs.f2fe -a 1 -o 19 -t 1 -z 1 -f -q $DISK mount -t f2fs $DISK $MOUNT_DIR -o "noinline_dentry,flush_merge,noextent_cache,mode=lfs,io_bits=7,fsync_mode=strict" for (( i = 0; i < 4096; i++ )); do name=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 10` mkdir $MOUNT_DIR/$name done umount $MOUNT_DIR rm $DISK Signed-off-by: wenfei --- fs/f2fs/f2fs.h | 11 +++++++++++ fs/f2fs/segment.h | 3 ++- fs/f2fs/super.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/sysfs.c | 4 +++- 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 407673f7564d..48782861cf62 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -976,6 +976,7 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ + unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -2023,6 +2024,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + avail_user_block_count -= sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (avail_user_block_count > sbi->unusable_block_count) avail_user_block_count -= sbi->unusable_block_count; @@ -2268,6 +2274,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + valid_block_count += sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) user_block_count -= sbi->unusable_block_count; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fa18a6b6fc4c..366671ca89aa 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -557,7 +557,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments; + return SM_I(sbi)->reserved_segments + + SM_I(sbi)->additional_reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 71e33b65430e..b124193e7631 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -306,6 +306,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } +static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) +{ + unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; + unsigned int avg_vblocks; + unsigned int wanted_reserved_segments; + block_t avail_user_block_count; + + if (!F2FS_IO_ALIGNED(sbi)) + return 0; + + /* average valid block count in section in worst case */ + avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); + + /* + * we need enough free space when migrating one section in worst case + */ + wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * + reserved_segments(sbi); + wanted_reserved_segments -= reserved_segments(sbi); + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks - + F2FS_OPTION(sbi).root_reserved_blocks; + + if (wanted_reserved_segments * sbi->blocks_per_seg > + avail_user_block_count) { + f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", + wanted_reserved_segments, + avail_user_block_count >> sbi->log_blocks_per_seg); + return -ENOSPC; + } + + SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; + + f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", + wanted_reserved_segments); + + return 0; +} + static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -3792,6 +3832,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_nm; } + err = adjust_reserved_segment(sbi); + if (err) + goto free_nm; + /* For write statistics */ if (sb->s_bdev->bd_part) sbi->sectors_written_start = diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c90280c3168f..e67e19ecc214 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -337,7 +337,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks - + sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } -- Gitee From df55744b83a93a51c5b78dd5a521064fa2fee6ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Jan 2022 08:43:28 -0800 Subject: [PATCH 078/100] af_unix: annote lockless accesses to unix_tot_inflight & gc_in_progress stable inclusion from stable-5.10.94 commit 38221afa03af6d40d19f69607b21e3ccf9118d3a category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 9d6d7f1cb67cdee15f1a0e85aacfb924e0e02435 upstream. wait_for_unix_gc() reads unix_tot_inflight & gc_in_progress without synchronization. Adds READ_ONCE()/WRITE_ONCE() and their associated comments to better document the intent. BUG: KCSAN: data-race in unix_inflight / wait_for_unix_gc write to 0xffffffff86e2b7c0 of 4 bytes by task 9380 on cpu 0: unix_inflight+0x1e8/0x260 net/unix/scm.c:63 unix_attach_fds+0x10c/0x1e0 net/unix/scm.c:121 unix_scm_to_skb net/unix/af_unix.c:1674 [inline] unix_dgram_sendmsg+0x679/0x16b0 net/unix/af_unix.c:1817 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549 __do_sys_sendmmsg net/socket.c:2578 [inline] __se_sys_sendmmsg net/socket.c:2575 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffffffff86e2b7c0 of 4 bytes by task 9375 on cpu 1: wait_for_unix_gc+0x24/0x160 net/unix/garbage.c:196 unix_dgram_sendmsg+0x8e/0x16b0 net/unix/af_unix.c:1772 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg net/socket.c:724 [inline] ____sys_sendmsg+0x39a/0x510 net/socket.c:2409 ___sys_sendmsg net/socket.c:2463 [inline] __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549 __do_sys_sendmmsg net/socket.c:2578 [inline] __se_sys_sendmmsg net/socket.c:2575 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x00000002 -> 0x00000004 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 9375 Comm: syz-executor.1 Not tainted 5.16.0-rc7-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 9915672d4127 ("af_unix: limit unix_tot_inflight") Signed-off-by: Eric Dumazet Reported-by: syzbot Link: https://lore.kernel.org/r/20220114164328.2038499-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/unix/garbage.c | 14 +++++++++++--- net/unix/scm.c | 6 ++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f..d45d5366115a 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 052ae709ce28..aa27a02478dc 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); -- Gitee From 6431dc565b64338e0f71aeaa19622edcad272712 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 9 Dec 2021 17:34:05 -0800 Subject: [PATCH 079/100] clk: Emit a stern warning with writable debugfs enabled stable inclusion from stable-5.10.94 commit fe40f7aef387c90021bc415d258f22dee19f4909 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 489a71964f9d74e697a12cd0ace20ed829eb1f93 upstream. We don't want vendors to be enabling this part of the clk code and shipping it to customers. Exposing the ability to change clk frequencies and parents via debugfs is potentially damaging to the system if folks don't know what they're doing. Emit a strong warning so that the message is clear: don't enable this outside of development systems. Fixes: 37215da5553e ("clk: Add support for setting clk_rate via debugfs") Cc: Geert Uytterhoeven Link: https://lore.kernel.org/r/20211210014237.2130300-1-sboyd@kernel.org Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/clk/clk.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 515ef39c4610..b8a0e3d23698 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3314,6 +3314,24 @@ static int __init clk_debug_init(void) { struct clk_core *core; +#ifdef CLOCK_ALLOW_WRITE_DEBUGFS + pr_warn("\n"); + pr_warn("********************************************************************\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("** **\n"); + pr_warn("** WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n"); + pr_warn("** **\n"); + pr_warn("** This means that this kernel is built to expose clk operations **\n"); + pr_warn("** such as parent or rate setting, enabling, disabling, etc. **\n"); + pr_warn("** to userspace, which may compromise security on your system. **\n"); + pr_warn("** **\n"); + pr_warn("** If you see this message and you are not debugging the **\n"); + pr_warn("** kernel, report this immediately to your vendor! **\n"); + pr_warn("** **\n"); + pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_warn("********************************************************************\n"); +#endif + rootdir = debugfs_create_dir("clk", NULL); debugfs_create_file("clk_summary", 0444, rootdir, &all_lists, -- Gitee From 0aac52d56352ff29ab001be9025a55db9806b78a Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Wed, 12 Jan 2022 14:38:16 -0600 Subject: [PATCH 080/100] clk: si5341: Fix clock HW provider cleanup stable inclusion from stable-5.10.94 commit 51b52cf354850e7560758740d072815485932139 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 49a8f2bc8d88702783c7e163ec84374e9a022f71 upstream. The call to of_clk_add_hw_provider was not undone on remove or on probe failure, which could cause an oops on a subsequent attempt to retrieve clocks for the removed device. Switch to the devm version of the function to avoid this issue. Fixes: 3044a860fd09 ("clk: Add Si5341/Si5340 driver") Signed-off-by: Robert Hancock Link: https://lore.kernel.org/r/20220112203816.1784610-1-robert.hancock@calian.com Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/clk/clk-si5341.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index eb22f4fdbc6b..772b48ad0cd7 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1576,7 +1576,7 @@ static int si5341_probe(struct i2c_client *client, clk_prepare(data->clk[i].hw.clk); } - err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get, + err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get, data); if (err) { dev_err(&client->dev, "unable to add clk provider\n"); -- Gitee From 2f1a2fad490e72865b14efd5bb270511f1a04598 Mon Sep 17 00:00:00 2001 From: Wen Gu Date: Sun, 16 Jan 2022 15:43:42 +0800 Subject: [PATCH 081/100] net/smc: Fix hung_task when removing SMC-R devices stable inclusion from stable-5.10.94 commit a66b9bccf73259345f5e9bbc21531abf38f76f3c category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 56d99e81ecbc997a5f984684d0eeb583992b2072 upstream. A hung_task is observed when removing SMC-R devices. Suppose that a link group has two active links(lnk_A, lnk_B) associated with two different SMC-R devices(dev_A, dev_B). When dev_A is removed, the link group will be removed from smc_lgr_list and added into lgr_linkdown_list. lnk_A will be cleared and smcibdev(A)->lnk_cnt will reach to zero. However, when dev_B is removed then, the link group can't be found in smc_lgr_list and lnk_B won't be cleared, making smcibdev->lnk_cnt never reaches zero, which causes a hung_task. This patch fixes this issue by restoring the implementation of smc_smcr_terminate_all() to what it was before commit 349d43127dac ("net/smc: fix kernel panic caused by race of smc_sock"). The original implementation also satisfies the intention that make sure QP destroy earlier than CQ destroy because we will always wait for smcibdev->lnk_cnt reaches zero, which guarantees QP has been destroyed. Fixes: 349d43127dac ("net/smc: fix kernel panic caused by race of smc_sock") Signed-off-by: Wen Gu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/smc/smc_core.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2a22dc85951e..4eb9ef9c2800 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1002,16 +1002,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. - * - * We must wait here for QPs been destroyed before we destroy the CQs, - * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus - * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); - LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1023,7 +1018,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - list_move_tail(&lgr->list, &lgr_linkdown_list); + smcr_link_down_cond_sched(&lgr->lnk[i]); } } } @@ -1035,16 +1030,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } - list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].smcibdev == smcibdev) { - mutex_lock(&lgr->llc_conf_mutex); - smcr_link_down_cond(&lgr->lnk[i]); - mutex_unlock(&lgr->llc_conf_mutex); - } - } - } - if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, -- Gitee From 50893a1c2d7fe4fc33eb30002bda97aae2131748 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:24 -0600 Subject: [PATCH 082/100] net: axienet: increase reset timeout stable inclusion from stable-5.10.94 commit 34942a228aec23ce65fbf4969f091f0f31803880 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 2e5644b1bab2ccea9cfc7a9520af95b94eb0dbf1 upstream. The previous timeout of 1ms was too short to handle some cases where the core is reset just after the input clocks were started, which will be introduced in an upcoming patch. Increase the timeout to 50ms. Also simplify the reset timeout checking to use read_poll_timeout. Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- .../net/ethernet/xilinx/xilinx_axienet_main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 69c79cc24e6e..137c3d33ea68 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -496,7 +496,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options) static int __axienet_device_reset(struct axienet_local *lp) { - u32 timeout; + u32 value; + int ret; /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending @@ -506,15 +507,13 @@ static int __axienet_device_reset(struct axienet_local *lp) * they both reset the entire DMA core, so only one needs to be used. */ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK); - timeout = DELAY_OF_ONE_MILLISEC; - while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) & - XAXIDMA_CR_RESET_MASK) { - udelay(1); - if (--timeout == 0) { - netdev_err(lp->ndev, "%s: DMA reset timeout!\n", - __func__); - return -ETIMEDOUT; - } + ret = read_poll_timeout(axienet_dma_in32, value, + !(value & XAXIDMA_CR_RESET_MASK), + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAXIDMA_TX_CR_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__); + return ret; } return 0; -- Gitee From 90e22e142dafe233d70454729848bef2f4221580 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:25 -0600 Subject: [PATCH 083/100] net: axienet: Wait for PhyRstCmplt after core reset stable inclusion from stable-5.10.94 commit 46c0ccaff28516ccedc2afaa3ce1d0c4284022e0 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit b400c2f4f4c53c86594dd57098970d97d488bfde upstream. When resetting the device, wait for the PhyRstCmplt bit to be set in the interrupt status register before continuing initialization, to ensure that the core is actually ready. When using an external PHY, this also ensures we do not start trying to access the PHY while it is still in reset. The PHY reset is initiated by the core reset which is triggered just above, but remains asserted for 5ms after the core is reset according to the documentation. The MgtRdy bit could also be waited for, but unfortunately when using 7-series devices, the bit does not appear to work as documented (it seems to behave as some sort of link state indication and not just an indication the transceiver is ready) so it can't really be relied on for this purpose. Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 137c3d33ea68..811f15eace9f 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -516,6 +516,16 @@ static int __axienet_device_reset(struct axienet_local *lp) return ret; } + /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */ + ret = read_poll_timeout(axienet_ior, value, + value & XAE_INT_PHYRSTCMPLT_MASK, + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAE_IS_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__); + return ret; + } + return 0; } -- Gitee From 3ac7c74eef8b8e107ebc5989560c636c8d31ca83 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:26 -0600 Subject: [PATCH 084/100] net: axienet: reset core on initialization prior to MDIO access stable inclusion from stable-5.10.94 commit bcc5d57e6091ef2346eabcd6e2169353a8770ec8 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 04cc2da39698efd7eb2e30c112538922d26f848e upstream. In some cases where the Xilinx Ethernet core was used in 1000Base-X or SGMII modes, which use the internal PCS/PMA PHY, and the MGT transceiver clock source for the PCS was not running at the time the FPGA logic was loaded, the core would come up in a state where the PCS could not be found on the MDIO bus. To fix this, the Ethernet core (including the PCS) should be reset after enabling the clocks, prior to attempting to access the PCS using of_mdio_find_device. Fixes: 1a02556086fc (net: axienet: Properly handle PCS/PMA PHY for 1000BaseX mode) Signed-off-by: Robert Hancock Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 811f15eace9f..7b94784a1465 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2024,6 +2024,11 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { ret = axienet_mdio_setup(lp); -- Gitee From c99fc3dd3d5ff5942b1b462757a58668a3159329 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:27 -0600 Subject: [PATCH 085/100] net: axienet: add missing memory barriers stable inclusion from stable-5.10.94 commit 2f548489d64debf093f2299e38a8d0fc655f87b9 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 95978df6fa328df619c15312e65ece469c2be2d2 upstream. This driver was missing some required memory barriers: Use dma_rmb to ensure we see all updates to the descriptor after we see that an entry has been completed. Use wmb and rmb to avoid stale descriptor status between the TX path and TX complete IRQ path. Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 7b94784a1465..478d80882f01 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -632,6 +632,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK)) break; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), @@ -645,8 +647,10 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app4 = 0; - cur_p->status = 0; cur_p->skb = NULL; + /* ensure our transmit path and device don't prematurely see status cleared */ + wmb(); + cur_p->status = 0; if (sizep) *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; @@ -704,6 +708,9 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, int num_frag) { struct axidma_bd *cur_p; + + /* Ensure we see all descriptor updates from device or TX IRQ path */ + rmb(); cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) return NETDEV_TX_BUSY; @@ -843,6 +850,8 @@ static void axienet_recv(struct net_device *ndev) tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, DMA_FROM_DEVICE); -- Gitee From e3ba7050a55f987eb8d13721eb5efd382f025a26 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:28 -0600 Subject: [PATCH 086/100] net: axienet: limit minimum TX ring size stable inclusion from stable-5.10.94 commit 7a3d3d7f6d7b3db6cabeb9d43ef207c3eda1b8a5 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 70f5817deddbc6ef3faa35841cab83c280cc653a upstream. The driver will not work properly if the TX ring size is set to below MAX_SKB_FRAGS + 1 since it needs to hold at least one full maximally fragmented packet in the TX ring. Limit setting the ring size to below this value. Fixes: 8b09ca823ffb4 ("net: axienet: Make RX/TX ring sizes configurable") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 478d80882f01..acfd7d545fef 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -43,6 +43,7 @@ /* Descriptors defines for Tx and Rx DMA */ #define TX_BD_NUM_DEFAULT 64 #define RX_BD_NUM_DEFAULT 1024 +#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 @@ -1373,7 +1374,8 @@ static int axienet_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending < TX_BD_NUM_MIN || + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) -- Gitee From 0a05d304cd9b79656fe4a61b8c3c6b2d7a2f1329 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:29 -0600 Subject: [PATCH 087/100] net: axienet: Fix TX ring slot available check stable inclusion from stable-5.10.94 commit 6301f3566aef4e8e8ad6019e43ddb95ad6f65fc6 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 996defd7f8b5dafc1d480b7585c7c62437f80c3c upstream. The check for whether a TX ring slot was available was incorrect, since a slot which had been loaded with transmit data but the device had not started transmitting would be treated as available, potentially causing non-transmitted slots to be overwritten. The control field in the descriptor should be checked, rather than the status field (which may only be updated when the device completes the entry). Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index acfd7d545fef..94e39750932c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -643,7 +643,6 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) dev_consume_skb_irq(cur_p->skb); - cur_p->cntrl = 0; cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; @@ -651,6 +650,7 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, cur_p->skb = NULL; /* ensure our transmit path and device don't prematurely see status cleared */ wmb(); + cur_p->cntrl = 0; cur_p->status = 0; if (sizep) @@ -713,7 +713,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, /* Ensure we see all descriptor updates from device or TX IRQ path */ rmb(); cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; - if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) + if (cur_p->cntrl) return NETDEV_TX_BUSY; return 0; } -- Gitee From 8824d81f4fed49478578418a7817f52cdc1a541c Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:30 -0600 Subject: [PATCH 088/100] net: axienet: fix number of TX ring slots for available check stable inclusion from stable-5.10.94 commit 41831d496772ce095735af9cbff3a17c5d082258 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit aba57a823d2985a2cc8c74a2535f3a88e68d9424 upstream. The check for the number of available TX ring slots was off by 1 since a slot is required for the skb header as well as each fragment. This could result in overwriting a TX ring slot that was still in use. Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 94e39750932c..638f2fee8074 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -747,7 +747,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) num_frag = skb_shinfo(skb)->nr_frags; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; - if (axienet_check_tx_bd_space(lp, num_frag)) { + if (axienet_check_tx_bd_space(lp, num_frag + 1)) { if (netif_queue_stopped(ndev)) return NETDEV_TX_BUSY; @@ -757,7 +757,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) smp_mb(); /* Space might have just been freed - check again */ - if (axienet_check_tx_bd_space(lp, num_frag)) + if (axienet_check_tx_bd_space(lp, num_frag + 1)) return NETDEV_TX_BUSY; netif_wake_queue(ndev); -- Gitee From 40a84870f60eaad8d3363bdce01640a1bc4a3fde Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:31 -0600 Subject: [PATCH 089/100] net: axienet: fix for TX busy handling stable inclusion from stable-5.10.94 commit 557829d42d1f5cf77e743b0b8286940d0b1e4428 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit bb193e3db8b86a63f26889c99e14fd30c9ebd72a upstream. Network driver documentation indicates we should be avoiding returning NETDEV_TX_BUSY from ndo_start_xmit in normal cases, since it requires the packets to be requeued. Instead the queue should be stopped after a packet is added to the TX ring when there may not be enough room for an additional one. Also, when TX ring entries are completed, we should only wake the queue if we know there is room for another full maximally fragmented packet. Print a warning if there is insufficient space at the start of start_xmit, since this should no longer happen. Combined with increasing the default TX ring size (in a subsequent patch), this appears to recover the TX performance lost by previous changes to actually manage the TX ring state properly. Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- .../net/ethernet/xilinx/xilinx_axienet_main.c | 86 ++++++++++--------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 638f2fee8074..2dd8126eee87 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -660,6 +660,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, return i; } +/** + * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy + * @lp: Pointer to the axienet_local structure + * @num_frag: The number of BDs to check for + * + * Return: 0, on success + * NETDEV_TX_BUSY, if any of the descriptors are not free + * + * This function is invoked before BDs are allocated and transmission starts. + * This function returns 0 if a BD or group of BDs can be allocated for + * transmission. If the BD or any of the BDs are not free the function + * returns a busy status. This is invoked from axienet_start_xmit. + */ +static inline int axienet_check_tx_bd_space(struct axienet_local *lp, + int num_frag) +{ + struct axidma_bd *cur_p; + + /* Ensure we see all descriptor updates from device or TX IRQ path */ + rmb(); + cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; + if (cur_p->cntrl) + return NETDEV_TX_BUSY; + return 0; +} + /** * axienet_start_xmit_done - Invoked once a transmit is completed by the * Axi DMA Tx channel. @@ -689,33 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev) /* Matches barrier in axienet_start_xmit */ smp_mb(); - netif_wake_queue(ndev); -} - -/** - * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy - * @lp: Pointer to the axienet_local structure - * @num_frag: The number of BDs to check for - * - * Return: 0, on success - * NETDEV_TX_BUSY, if any of the descriptors are not free - * - * This function is invoked before BDs are allocated and transmission starts. - * This function returns 0 if a BD or group of BDs can be allocated for - * transmission. If the BD or any of the BDs are not free the function - * returns a busy status. This is invoked from axienet_start_xmit. - */ -static inline int axienet_check_tx_bd_space(struct axienet_local *lp, - int num_frag) -{ - struct axidma_bd *cur_p; - - /* Ensure we see all descriptor updates from device or TX IRQ path */ - rmb(); - cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; - if (cur_p->cntrl) - return NETDEV_TX_BUSY; - return 0; + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); } /** @@ -748,19 +749,14 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (axienet_check_tx_bd_space(lp, num_frag + 1)) { - if (netif_queue_stopped(ndev)) - return NETDEV_TX_BUSY; - + /* Should not happen as last start_xmit call should have + * checked for sufficient space and queue should only be + * woken when sufficient space is available. + */ netif_stop_queue(ndev); - - /* Matches barrier in axienet_start_xmit_done */ - smp_mb(); - - /* Space might have just been freed - check again */ - if (axienet_check_tx_bd_space(lp, num_frag + 1)) - return NETDEV_TX_BUSY; - - netif_wake_queue(ndev); + if (net_ratelimit()) + netdev_warn(ndev, "TX ring unexpectedly full\n"); + return NETDEV_TX_BUSY; } if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -821,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (++lp->tx_bd_tail >= lp->tx_bd_num) lp->tx_bd_tail = 0; + /* Stop queue if next transmit may not have space */ + if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { + netif_stop_queue(ndev); + + /* Matches barrier in axienet_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); + } + return NETDEV_TX_OK; } -- Gitee From 972138b37e799c12bf897ee248677a3a7d463595 Mon Sep 17 00:00:00 2001 From: Robert Hancock Date: Tue, 18 Jan 2022 15:41:32 -0600 Subject: [PATCH 090/100] net: axienet: increase default TX ring size to 128 stable inclusion from stable-5.10.94 commit d7544cf6939c1f978933992db483be4a6351494b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 2d19c3fd80178160dd505ccd7fed1643831227a5 upstream. With previous changes to make the driver handle the TX ring size more correctly, the default TX ring size of 64 appears to significantly bottleneck TX performance to around 600 Mbps on a 1 Gbps link on ZynqMP. Increasing this to 128 seems to bring performance up to near line rate and shouldn't cause excess bufferbloat (this driver doesn't yet support modern byte-based queue management). Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver") Signed-off-by: Robert Hancock Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 2dd8126eee87..0baf85122f5a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -41,7 +41,7 @@ #include "xilinx_axienet.h" /* Descriptors defines for Tx and Rx DMA */ -#define TX_BD_NUM_DEFAULT 64 +#define TX_BD_NUM_DEFAULT 128 #define RX_BD_NUM_DEFAULT 1024 #define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 -- Gitee From 5447a4dafbb24b58b7e17945d997773b7afa66e3 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 7 Jan 2022 12:09:36 -0800 Subject: [PATCH 091/100] HID: vivaldi: fix handling devices not using numbered reports stable inclusion from stable-5.10.94 commit 8b8ff4c793ee3d412569033c8ef0bc5d98a87262 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 3fe6acd4dc922237b30e55473c9349c6ce0690f3 upstream. Unfortunately details of USB HID transport bled into HID core and handling of numbered/unnumbered reports is quite a mess, with hid_report_len() calculating the length according to USB rules, and hid_hw_raw_request() adding report ID to the buffer for both numbered and unnumbered reports. Untangling it all requres a lot of changes in HID, so for now let's handle this in the driver. [jkosina@suse.cz: microoptimize field->report->id to report->id] Fixes: 14c9c014babe ("HID: add vivaldi HID driver") Signed-off-by: Dmitry Torokhov Tested-by: Stephen Boyd # CoachZ Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/hid/hid-vivaldi.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c index 72957a9f7117..576518e704ee 100644 --- a/drivers/hid/hid-vivaldi.c +++ b/drivers/hid/hid-vivaldi.c @@ -74,10 +74,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, struct hid_usage *usage) { struct vivaldi_data *drvdata = hid_get_drvdata(hdev); + struct hid_report *report = field->report; int fn_key; int ret; u32 report_len; - u8 *buf; + u8 *report_data, *buf; if (field->logical != HID_USAGE_FN_ROW_PHYSMAP || (usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL) @@ -89,12 +90,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, if (fn_key > drvdata->max_function_row_key) drvdata->max_function_row_key = fn_key; - buf = hid_alloc_report_buf(field->report, GFP_KERNEL); - if (!buf) + report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL); + if (!report_data) return; - report_len = hid_report_len(field->report); - ret = hid_hw_raw_request(hdev, field->report->id, buf, + report_len = hid_report_len(report); + if (!report->id) { + /* + * hid_hw_raw_request() will stuff report ID (which will be 0) + * into the first byte of the buffer even for unnumbered + * reports, so we need to account for this to avoid getting + * -EOVERFLOW in return. + * Note that hid_alloc_report_buf() adds 7 bytes to the size + * so we can safely say that we have space for an extra byte. + */ + report_len++; + } + + ret = hid_hw_raw_request(hdev, report->id, report_data, report_len, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { @@ -103,7 +116,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, goto out; } - ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, + if (!report->id) { + /* + * Undo the damage from hid_hw_raw_request() for unnumbered + * reports. + */ + report_data++; + report_len--; + } + + ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data, report_len, 0); if (ret) { dev_warn(&hdev->dev, "failed to report feature %d\n", -- Gitee From db1efb48035d52eca794598b94a52386603264f3 Mon Sep 17 00:00:00 2001 From: Laurence de Bruxelles Date: Sat, 1 Jan 2022 15:41:49 +0000 Subject: [PATCH 092/100] rtc: pxa: fix null pointer dereference stable inclusion from stable-5.10.94 commit c736ec01a2fc2f751c52ac6025a14a0e1308460b category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 34127b3632b21e5c391756e724b1198eb9917981 upstream. With the latest stable kernel versions the rtc on the PXA based Zaurus does not work, when booting I see the following kernel messages: pxa-rtc pxa-rtc: failed to find rtc clock source pxa-rtc pxa-rtc: Unable to init SA1100 RTC sub-device pxa-rtc: probe of pxa-rtc failed with error -2 hctosys: unable to open rtc device (rtc0) I think this is because commit f2997775b111 ("rtc: sa1100: fix possible race condition") moved the allocation of the rtc_device struct out of sa1100_rtc_init and into sa1100_rtc_probe. This means that pxa_rtc_probe also needs to do allocation for the rtc_device struct, otherwise sa1100_rtc_init will try to dereference a null pointer. This patch adds that allocation by copying how sa1100_rtc_probe in drivers/rtc/rtc-sa1100.c does it; after the IRQs are set up a managed rtc_device is allocated. I've tested this patch with `qemu-system-arm -machine akita` and with a real Zaurus SL-C1000 applied to 4.19, 5.4, and 5.10. Signed-off-by: Laurence de Bruxelles Fixes: f2997775b111 ("rtc: sa1100: fix possible race condition") Signed-off-by: Alexandre Belloni Link: https://lore.kernel.org/r/20220101154149.12026-1-lfdebrux@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/rtc/rtc-pxa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index d2f1d8f754bf..cf8119b6d320 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) if (sa1100_rtc->irq_alarm < 0) return -ENXIO; + sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(sa1100_rtc->rtc)) + return PTR_ERR(sa1100_rtc->rtc); + pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start, resource_size(pxa_rtc->ress)); if (!pxa_rtc->base) { -- Gitee From 6923956e7c6b9193a0a4e756e12ae5c70c72ab71 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 30 Dec 2021 16:20:24 +0200 Subject: [PATCH 093/100] vdpa/mlx5: Fix wrong configuration of virtio_version_1_0 stable inclusion from stable-5.10.94 commit 0c4ebcb00d8871ed4b9916072d1d177c33bd43e3 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 97143b70aa847f2b0a1f959dde126b76ff7b5376 upstream. Remove overriding of virtio_version_1_0 which forced the virtqueue object to version 1. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20211230142024.142979-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Parav Pandit Acked-by: Jason Wang Reviewed-by: Si-Wei Liu Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index fbdc9468818d..65d6f8fd81e7 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -812,8 +812,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); - if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) - MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) -- Gitee From 7750257260a3a6cbfb3747e97beaea0b402b80b1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 6 Jan 2022 07:57:46 -0500 Subject: [PATCH 094/100] virtio_ring: mark ring unused on error stable inclusion from stable-5.10.94 commit 56daa21414e9cf66131de3458ba389a1f3a9c690 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 1861ba626ae9b98136f3e504208cdef6b29cd3ec upstream. A recently added error path does not mark ring unused when exiting on OOM, which will lead to BUG on the next entry in debug builds. TODO: refactor code so we have START_USE and END_USE in the same function. Fixes: fc6d70f40b3d ("virtio_ring: check desc == NULL when using indirect with packed") Cc: "Xuan Zhuo" Cc: Jiasheng Jiang Reviewed-by: Xuan Zhuo Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/virtio/virtio_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cce75d3b3ba0..3cc2a4ee7152 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1124,8 +1124,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, if (virtqueue_use_indirect(_vq, total_sg)) { err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, in_sgs, data, gfp); - if (err != -ENOMEM) + if (err != -ENOMEM) { + END_USE(vq); return err; + } /* fall back on direct */ } -- Gitee From 712b05e15df168bbb9dae4de88fb4279e5fc7480 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Jan 2022 11:32:36 -0600 Subject: [PATCH 095/100] taskstats: Cleanup the use of task->exit_code stable inclusion from stable-5.10.94 commit 69e7e979ed668656551ca141dc235a756da32eb0 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 1b5a42d9c85f0e731f01c8d1129001fd8531a8a0 upstream. In the function bacct_add_task the code reading task->exit_code was introduced in commit f3cef7a99469 ("[PATCH] csa: basic accounting over taskstats"), and it is not entirely clear what the taskstats interface is trying to return as only returning the exit_code of the first task in a process doesn't make a lot of sense. As best as I can figure the intent is to return task->exit_code after a task exits. The field is returned with per task fields, so the exit_code of the entire process is not wanted. Only the value of the first task is returned so this is not a useful way to get the per task ptrace stop code. The ordinary case of returning this value is returning after a task exits, which also precludes use for getting a ptrace value. It is common to for the first task of a process to also be the last task of a process so this field may have done something reasonable by accident in testing. Make ac_exitcode a reliable per task value by always returning it for every exited task. Setting ac_exitcode in a sensible mannter makes it possible to continue to provide this value going forward. Cc: Balbir Singh Fixes: f3cef7a99469 ("[PATCH] csa: basic accounting over taskstats") Link: https://lkml.kernel.org/r/20220103213312.9144-5-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- kernel/tsacct.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 257ffb993ea2..fd2f7a052fdd 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); stats->ac_btime64 = btime; - if (thread_group_leader(tsk)) { + if (tsk->flags & PF_EXITING) stats->ac_exitcode = tsk->exit_code; - if (tsk->flags & PF_FORKNOEXEC) - stats->ac_flag |= AFORK; - } + if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC)) + stats->ac_flag |= AFORK; if (tsk->flags & PF_SUPERPRIV) stats->ac_flag |= ASU; if (tsk->flags & PF_DUMPCORE) -- Gitee From 7e963e9d2f431926dd73f9feb9600eb49bd5970f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Jan 2022 01:22:29 -0800 Subject: [PATCH 096/100] inet: frags: annotate races around fqdir->dead and fqdir->high_thresh stable inclusion from stable-5.10.94 commit edc09548ffc52433ed25ef4a829e664e353f615d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 91341fa0003befd097e190ec2a4bf63ad957c49a upstream. Both fields can be read/written without synchronization, add proper accessors and documentation. Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- include/net/inet_frag.h | 11 +++++++++-- include/net/ipv6_frag.h | 3 ++- net/ipv4/inet_fragment.c | 8 +++++--- net/ipv4/ip_fragment.c | 3 ++- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index bac79e817776..4cbd413e71a3 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -116,8 +116,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13..0a4779175a52 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 10d31733297d..e0e8a65d561e 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94e..fad803d2d711 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); -- Gitee From d99c68df9aba2fa0dc4eb05d90fbb4477a440c79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Jan 2022 03:43:40 -0800 Subject: [PATCH 097/100] netns: add schedule point in ops_exit_list() stable inclusion from stable-5.10.94 commit 2b1415c60b2e4aff93f790a7ae374dd6ef20d3a5 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 2836615aa22de55b8fca5e32fe1b27a67cda625e upstream. When under stress, cleanup_net() can have to dismantle netns in big numbers. ops_exit_list() currently calls many helpers [1] that have no schedule point, and we can end up with soft lockups, particularly on hosts with many cpus. Even for moderate amount of netns processed by cleanup_net() this patch avoids latency spikes. [1] Some of these helpers like fib_sync_up() and fib_sync_down_dev() are very slow because net/ipv4/fib_semantics.c uses host-wide hash tables, and ifindex is used as the only input of two hash functions. ifindexes tend to be the same for all netns (lo.ifindex==1 per instance) This will be fixed in a separate patch. Fixes: 72ad937abd0a ("net: Add support for batching network namespace cleanups") Signed-off-by: Eric Dumazet Cc: Eric W. Biederman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/core/net_namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ac852db83de9..cbff7d94b993 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -183,8 +183,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); -- Gitee From fe6d02de4c5df41b8b16f86a89a05b60823dde90 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 10 Jan 2022 14:43:06 +0100 Subject: [PATCH 098/100] xfrm: Don't accidentally set RTO_ONLINK in decode_session4() stable inclusion from stable-5.10.94 commit 7f2ca96bd26644517ab03357bcf2982cf79c2ff7 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit 23e7b1bfed61e301853b5e35472820d919498278 upstream. Similar to commit 94e2238969e8 ("xfrm4: strip ECN bits from tos field"), clear the ECN bits from iph->tos when setting ->flowi4_tos. This ensures that the last bit of ->flowi4_tos is cleared, so ip_route_output_key_hash() isn't going to restrict the scope of the route lookup. Use ~INET_ECN_MASK instead of IPTOS_RT_MASK, because we have no reason to clear the high order bits. Found by code inspection, compile tested only. Fixes: 4da3089f2b58 ("[IPSEC]: Use TOS when doing tunnel lookups") Signed-off-by: Guillaume Nault Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/xfrm/xfrm_policy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8fa258fbdcfc..e12de9b989f5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -3297,7 +3298,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { -- Gitee From 0462a2582fcfa5b3b3162e5d4987168e57d981d2 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 10 Jan 2022 14:43:09 +0100 Subject: [PATCH 099/100] gre: Don't accidentally set RTO_ONLINK in gre_fill_metadata_dst() stable inclusion from stable-5.10.94 commit cd5c24d2230f418159d46766b2705a6058cf8650 category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit f7716b318568b22fbf0e3be99279a979e217cf71 upstream. Mask the ECN bits before initialising ->flowi4_tos. The tunnel key may have the last ECN bit set, which will interfere with the route lookup process as ip_route_output_key_hash() interpretes this bit specially (to restrict the route scope). Found by code inspection, compile tested only. Fixes: 962924fa2b7a ("ip_gre: Refactor collect metatdata mode tunnel xmit to ip_md_tunnel_xmit") Signed-off-by: Guillaume Nault Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- net/ipv4/ip_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a9cc05043fa4..e4504dd510c6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -599,8 +599,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); -- Gitee From cfe21f6bfd06ef76941e4ad441e7c43abb294eb1 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 10 Jan 2022 14:43:11 +0100 Subject: [PATCH 100/100] libcxgb: Don't accidentally set RTO_ONLINK in cxgb_find_route() stable inclusion from stable-5.10.94 commit 7b9d40e9f60d3fdc2a130b943223308c338fc49d category: bugfix issue: #I4UU4A CVE: NA Signed-off-by: Wenfei --------------------------------------- commit a915deaa9abe4fb3a440312c954253a6a733608e upstream. Mask the ECN bits before calling ip_route_output_ports(). The tos variable might be passed directly from an IPv4 header, so it may have the last ECN bit set. This interferes with the route lookup process as ip_route_output_key_hash() interpretes this bit specially (to restrict the route scope). Found by code inspection, compile tested only. Fixes: 804c2f3e36ef ("libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route()") Signed-off-by: Guillaume Nault Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman Signed-off-by: wenfei --- drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d04a6c163445..da8d10475a08 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, peer_port, local_port, IPPROTO_TCP, - tos, 0); + tos & ~INET_ECN_MASK, 0); if (IS_ERR(rt)) return NULL; n = dst_neigh_lookup(&rt->dst, &peer_ip); -- Gitee