From 13adaaf6afdd26acd7d6fdece0db1b337ffb22b9 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@google.com>
Date: Fri, 24 Dec 2021 14:47:16 +0800
Subject: [PATCH 001/100] ASoC: mediatek: mt8173: fix device_node leak

stable inclusion
from stable-5.10.94
commit f28672eef4a9f401b617ddd9d394aaa35d634c61
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

[ Upstream commit 493433785df0075afc0c106ab65f10a605d0b35d ]

Fixes the device_node leak.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Link: https://lore.kernel.org/r/20211224064719.2031210-2-tzungbi@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 sound/soc/mediatek/mt8173/mt8173-max98090.c      | 3 +++
 sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c | 2 ++
 sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c | 2 ++
 sound/soc/mediatek/mt8173/mt8173-rt5650.c        | 2 ++
 4 files changed, 9 insertions(+)

diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c
index fc94314bfc02..3bdd4931316c 100644
--- a/sound/soc/mediatek/mt8173/mt8173-max98090.c
+++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c
@@ -180,6 +180,9 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev)
 	if (ret)
 		dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
 			__func__, ret);
+
+	of_node_put(codec_node);
+	of_node_put(platform_node);
 	return ret;
 }
 
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
index 0f28dc2217c0..390da5bf727e 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c
@@ -218,6 +218,8 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev)
 	if (ret)
 		dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
 			__func__, ret);
+
+	of_node_put(platform_node);
 	return ret;
 }
 
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
index 077c6ee06780..c8e4e85e1057 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c
@@ -285,6 +285,8 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev)
 	if (ret)
 		dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
 			__func__, ret);
+
+	of_node_put(platform_node);
 	return ret;
 }
 
diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
index c28ebf891cb0..e168d31f4445 100644
--- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c
+++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c
@@ -323,6 +323,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev)
 	if (ret)
 		dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n",
 			__func__, ret);
+
+	of_node_put(platform_node);
 	return ret;
 }
 
-- 
Gitee


From 8d2899f26ef4266a7a46c8d77c53d2cae17eb08e Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@google.com>
Date: Fri, 24 Dec 2021 14:47:17 +0800
Subject: [PATCH 002/100] ASoC: mediatek: mt8183: fix device_node leak

stable inclusion
from stable-5.10.94
commit ff08cf1e34a17c1205630a9682640b784932970b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

[ Upstream commit cb006006fe6221f092fadaffd3f219288304c9ad ]

Fixes the device_node leak.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Link: https://lore.kernel.org/r/20211224064719.2031210-3-tzungbi@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c         | 6 +++++-
 sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
index 20d31b69a5c0..9cc0f26b08fb 100644
--- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
+++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c
@@ -787,7 +787,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	return devm_snd_soc_register_card(&pdev->dev, card);
+	ret = devm_snd_soc_register_card(&pdev->dev, card);
+
+	of_node_put(platform_node);
+	of_node_put(hdmi_codec);
+	return ret;
 }
 
 #ifdef CONFIG_OF
diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
index 79ba2f2d8452..14ce8b93597f 100644
--- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
+++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c
@@ -720,7 +720,12 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev)
 				 __func__, ret);
 	}
 
-	return devm_snd_soc_register_card(&pdev->dev, card);
+	ret = devm_snd_soc_register_card(&pdev->dev, card);
+
+	of_node_put(platform_node);
+	of_node_put(ec_codec);
+	of_node_put(hdmi_codec);
+	return ret;
 }
 
 #ifdef CONFIG_OF
-- 
Gitee


From e6df85ef05b5b4d4e6a045ac50820a9e74ff0669 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Fri, 24 Dec 2021 08:21:03 +0000
Subject: [PATCH 003/100] phy: mediatek: Fix missing check in mtk_mipi_tx_probe

stable inclusion
from stable-5.10.94
commit 082ff9e12b4a1c40142a1710f0534e65c48f1d51
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

[ Upstream commit 399c91c3f30531593e5ff6ca7b53f47092128669 ]

The of_device_get_match_data() function may return NULL.
Add check to prevent potential null dereference.

Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20211224082103.7658-1-linmq006@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/mediatek/mtk_mipi_tx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
index 8cee2591e728..ccc742dc78bd 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
@@ -147,6 +147,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	mipi_tx->driver_data = of_device_get_match_data(dev);
+	if (!mipi_tx->driver_data)
+		return -ENODEV;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mipi_tx->regs = devm_ioremap_resource(dev, mem);
-- 
Gitee


From 2cd2a0ff630f251c90c1d84247590f512910f44b Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Date: Mon, 6 Dec 2021 20:07:58 +0100
Subject: [PATCH 004/100] rpmsg: core: Clean up resources on announce_create
 failure.

stable inclusion
from stable-5.10.94
commit 43e94431c313ca2270584a106da36b02d65978c8
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 8066c615cb69b7da8a94f59379847b037b3a5e46 upstream.

During the rpmsg_dev_probe, if rpdev->ops->announce_create returns an
error, the rpmsg device and default endpoint should be freed before
exiting the function.

Fixes: 5e619b48677c ("rpmsg: Split rpmsg core and virtio backend")
Suggested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211206190758.10004-1-arnaud.pouliquen@foss.st.com
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/rpmsg/rpmsg_core.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 91de940896e3..028ca5961bc2 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -473,13 +473,25 @@ static int rpmsg_dev_probe(struct device *dev)
 	err = rpdrv->probe(rpdev);
 	if (err) {
 		dev_err(dev, "%s: failed: %d\n", __func__, err);
-		if (ept)
-			rpmsg_destroy_ept(ept);
-		goto out;
+		goto destroy_ept;
 	}
 
-	if (ept && rpdev->ops->announce_create)
+	if (ept && rpdev->ops->announce_create) {
 		err = rpdev->ops->announce_create(rpdev);
+		if (err) {
+			dev_err(dev, "failed to announce creation\n");
+			goto remove_rpdev;
+		}
+	}
+
+	return 0;
+
+remove_rpdev:
+	if (rpdrv->remove)
+		rpdrv->remove(rpdev);
+destroy_ept:
+	if (ept)
+		rpmsg_destroy_ept(ept);
 out:
 	return err;
 }
-- 
Gitee


From 64910abfbc7e844e4892687c7ce4f7a5a302ba3b Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 17 Dec 2021 10:03:30 +0100
Subject: [PATCH 005/100] crypto: omap-aes - Fix broken pm_runtime_and_get()
 usage

stable inclusion
from stable-5.10.94
commit 2031e0246e459cea7a01b342b5e4b87de8fc090c
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c2aec59be093bd44627bc4f6bc67e4614a93a7b6 upstream.

This fix is basically the same as 3d6b661330a7 ("crypto: stm32 -
Revert broken pm_runtime_resume_and_get changes"), just for the omap
driver. If the return value isn't used, then pm_runtime_get_sync()
has to be used for ensuring that the usage count is balanced.

Fixes: 1f34cc4a8da3 ("crypto: omap-aes - Fix PM reference leak on omap-aes.c")
Cc: stable@vger.kernel.org
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/crypto/omap-aes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 9b968ac4ee7b..a196bb8b1701 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1302,7 +1302,7 @@ static int omap_aes_suspend(struct device *dev)
 
 static int omap_aes_resume(struct device *dev)
 {
-	pm_runtime_resume_and_get(dev);
+	pm_runtime_get_sync(dev);
 	return 0;
 }
 #endif
-- 
Gitee


From 72f2d437dc6216d4af1b5c9fbb96d0a734ffff78 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 20 Dec 2021 20:50:22 +0100
Subject: [PATCH 006/100] crypto: stm32/crc32 - Fix kernel BUG triggered in
 probe()

stable inclusion
from stable-5.10.94
commit 9e6ff2d5725b70b6f9430597a072cbbc0b36223f
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 29009604ad4e3ef784fd9b9fef6f23610ddf633d upstream.

The include/linux/crypto.h struct crypto_alg field cra_driver_name description
states "Unique name of the transformation provider. " ... " this contains the
name of the chip or provider and the name of the transformation algorithm."

In case of the stm32-crc driver, field cra_driver_name is identical for all
registered transformation providers and set to the name of the driver itself,
which is incorrect. This patch fixes it by assigning a unique cra_driver_name
to each registered transformation provider.

The kernel crash is triggered when the driver calls crypto_register_shashes()
which calls crypto_register_shash(), which calls crypto_register_alg(), which
calls __crypto_register_alg(), which returns -EEXIST, which is propagated
back through this call chain. Upon -EEXIST from crypto_register_shash(), the
crypto_register_shashes() starts unregistering the providers back, and calls
crypto_unregister_shash(), which calls crypto_unregister_alg(), and this is
where the BUG() triggers due to incorrect cra_refcnt.

Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: <stable@vger.kernel.org> # 4.12+
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Fabien Dessenne <fabien.dessenne@st.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lionel Debieve <lionel.debieve@st.com>
Cc: Nicolas Toromanoff <nicolas.toromanoff@st.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-crypto@vger.kernel.org
Acked-by: Nicolas Toromanoff <nicolas.toromanoff@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/crypto/stm32/stm32-crc32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 75867c0b0017..be1bf39a317d 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -279,7 +279,7 @@ static struct shash_alg algs[] = {
 		.digestsize     = CHKSUM_DIGEST_SIZE,
 		.base           = {
 			.cra_name               = "crc32",
-			.cra_driver_name        = DRIVER_NAME,
+			.cra_driver_name        = "stm32-crc32-crc32",
 			.cra_priority           = 200,
 			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
@@ -301,7 +301,7 @@ static struct shash_alg algs[] = {
 		.digestsize     = CHKSUM_DIGEST_SIZE,
 		.base           = {
 			.cra_name               = "crc32c",
-			.cra_driver_name        = DRIVER_NAME,
+			.cra_driver_name        = "stm32-crc32-crc32c",
 			.cra_priority           = 200,
 			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
-- 
Gitee


From 099c339cb052f7984c7f38bd17abef3b3f0090d6 Mon Sep 17 00:00:00 2001
From: Meng Li <Meng.Li@windriver.com>
Date: Mon, 1 Nov 2021 11:13:53 +0800
Subject: [PATCH 007/100] crypto: caam - replace this_cpu_ptr with raw_cpu_ptr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 4f0762ac32b57bc52fc8357641962b669c42269b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit efd21e10fc3bf4c6da122470a5ae89ec4ed8d180 upstream.

When enable the kernel debug config, there is below calltrace detected:
BUG: using smp_processor_id() in preemptible [00000000] code: cryptomgr_test/339
caller is debug_smp_processor_id+0x20/0x30
CPU: 9 PID: 339 Comm: cryptomgr_test Not tainted 5.10.63-yocto-standard #1
Hardware name: NXP Layerscape LX2160ARDB (DT)
Call trace:
 dump_backtrace+0x0/0x1a0
 show_stack+0x24/0x30
 dump_stack+0xf0/0x13c
 check_preemption_disabled+0x100/0x110
 debug_smp_processor_id+0x20/0x30
 dpaa2_caam_enqueue+0x10c/0x25c
 ......
 cryptomgr_test+0x38/0x60
 kthread+0x158/0x164
 ret_from_fork+0x10/0x38
According to the comment in commit ac5d15b4519f("crypto: caam/qi2
 - use affine DPIOs "), because preemption is no longer disabled
while trying to enqueue an FQID, it might be possible to run the
enqueue on a different CPU(due to migration, when in process context),
however this wouldn't be a functionality issue. But there will be
above calltrace when enable kernel debug config. So, replace this_cpu_ptr
with raw_cpu_ptr to avoid above call trace.

Fixes: ac5d15b4519f ("crypto: caam/qi2 - use affine DPIOs")
Cc: stable@vger.kernel.org
Signed-off-by: Meng Li <Meng.Li@windriver.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/crypto/caam/caamalg_qi2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index a780e627838a..5a40c7d10cc9 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -5467,7 +5467,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req)
 	dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1]));
 	dpaa2_fd_set_flc(&fd, req->flc_dma);
 
-	ppriv = this_cpu_ptr(priv->ppriv);
+	ppriv = raw_cpu_ptr(priv->ppriv);
 	for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) {
 		err = dpaa2_io_service_enqueue_fq(ppriv->dpio, ppriv->req_fqid,
 						  &fd);
-- 
Gitee


From 2cb48e4a96a47edd0c0d888eb471e71d08b359b9 Mon Sep 17 00:00:00 2001
From: Petr Cvachoucek <cvachoucek@gmail.com>
Date: Mon, 30 Aug 2021 21:20:37 +0200
Subject: [PATCH 008/100] ubifs: Error path in ubifs_remount_rw() seems to
 wrongly free write buffers

stable inclusion
from stable-5.10.94
commit 76006d33f1c8cabe28e5f3adfad0f38f273195ad
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3fea4d9d160186617ff40490ae01f4f4f36b28ff upstream.

it seems freeing the write buffers in the error path of the
ubifs_remount_rw() is wrong. It leads later to a kernel oops like this:

[10016.431274] UBIFS (ubi0:0): start fixing up free space
[10090.810042] UBIFS (ubi0:0): free space fixup complete
[10090.814623] UBIFS error (ubi0:0 pid 512): ubifs_remount_fs: cannot
spawn "ubifs_bgt0_0", error -4
[10101.915108] UBIFS (ubi0:0): background thread "ubifs_bgt0_0" started,
PID 517
[10105.275498] Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000030
[10105.284352] Mem abort info:
[10105.287160]   ESR = 0x96000006
[10105.290252]   EC = 0x25: DABT (current EL), IL = 32 bits
[10105.295592]   SET = 0, FnV = 0
[10105.298652]   EA = 0, S1PTW = 0
[10105.301848] Data abort info:
[10105.304723]   ISV = 0, ISS = 0x00000006
[10105.308573]   CM = 0, WnR = 0
[10105.311564] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000f03d1000
[10105.318034] [0000000000000030] pgd=00000000f6cee003,
pud=00000000f4884003, pmd=0000000000000000
[10105.326783] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[10105.332355] Modules linked in: ath10k_pci ath10k_core ath mac80211
libarc4 cfg80211 nvme nvme_core cryptodev(O)
[10105.342468] CPU: 3 PID: 518 Comm: touch Tainted: G           O
5.4.3 #1
[10105.349517] Hardware name: HYPEX CPU (DT)
[10105.353525] pstate: 40000005 (nZcv daif -PAN -UAO)
[10105.358324] pc : atomic64_try_cmpxchg_acquire.constprop.22+0x8/0x34
[10105.364596] lr : mutex_lock+0x1c/0x34
[10105.368253] sp : ffff000075633aa0
[10105.371563] x29: ffff000075633aa0 x28: 0000000000000001
[10105.376874] x27: ffff000076fa80c8 x26: 0000000000000004
[10105.382185] x25: 0000000000000030 x24: 0000000000000000
[10105.387495] x23: 0000000000000000 x22: 0000000000000038
[10105.392807] x21: 000000000000000c x20: ffff000076fa80c8
[10105.398119] x19: ffff000076fa8000 x18: 0000000000000000
[10105.403429] x17: 0000000000000000 x16: 0000000000000000
[10105.408741] x15: 0000000000000000 x14: fefefefefefefeff
[10105.414052] x13: 0000000000000000 x12: 0000000000000fe0
[10105.419364] x11: 0000000000000fe0 x10: ffff000076709020
[10105.424675] x9 : 0000000000000000 x8 : 00000000000000a0
[10105.429986] x7 : ffff000076fa80f4 x6 : 0000000000000030
[10105.435297] x5 : 0000000000000000 x4 : 0000000000000000
[10105.440609] x3 : 0000000000000000 x2 : ffff00006f276040
[10105.445920] x1 : ffff000075633ab8 x0 : 0000000000000030
[10105.451232] Call trace:
[10105.453676]  atomic64_try_cmpxchg_acquire.constprop.22+0x8/0x34
[10105.459600]  ubifs_garbage_collect+0xb4/0x334
[10105.463956]  ubifs_budget_space+0x398/0x458
[10105.468139]  ubifs_create+0x50/0x180
[10105.471712]  path_openat+0x6a0/0x9b0
[10105.475284]  do_filp_open+0x34/0x7c
[10105.478771]  do_sys_open+0x78/0xe4
[10105.482170]  __arm64_sys_openat+0x1c/0x24
[10105.486180]  el0_svc_handler+0x84/0xc8
[10105.489928]  el0_svc+0x8/0xc
[10105.492808] Code: 52800013 17fffffb d2800003 f9800011 (c85ffc05)
[10105.498903] ---[ end trace 46b721d93267a586 ]---

To reproduce the problem:

1. Filesystem initially mounted read-only, free space fixup flag set.

2. mount -o remount,rw <mountpoint>

3. it takes some time (free space fixup running)
    ... try to terminate running mount by CTRL-C
    ... does not respond, only after free space fixup is complete
    ... then "ubifs_remount_fs: cannot spawn "ubifs_bgt0_0", error -4"

4. mount -o remount,rw <mountpoint>
    ... now finished instantly (fixup already done).

5. Create file or just unmount the filesystem and we get the oops.

Cc: <stable@vger.kernel.org>
Fixes: b50b9f408502 ("UBIFS: do not free write-buffers when in R/O mode")
Signed-off-by: Petr Cvachoucek <cvachoucek@gmail.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ubifs/super.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 98cbd9cac246..2dd79fd123ff 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1853,7 +1853,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 		kthread_stop(c->bgt);
 		c->bgt = NULL;
 	}
-	free_wbufs(c);
 	kfree(c->write_reserve_buf);
 	c->write_reserve_buf = NULL;
 	vfree(c->ileb_buf);
-- 
Gitee


From 0491f274e619a81496be978090a1e48f5697bfa1 Mon Sep 17 00:00:00 2001
From: Patrick Williams <patrick@stwcx.xyz>
Date: Thu, 23 Dec 2021 09:49:31 -0600
Subject: [PATCH 009/100] tpm: fix NPE on probe for missing device

stable inclusion
from stable-5.10.94
commit 05026c4e94c9bc723013ea0f7a93fbae2bb0b257
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 84cc69589700b90a4c8d27b481a51fce8cca6051 upstream.

When using the tpm_tis-spi driver on a system missing the physical TPM,
a null pointer exception was observed.

    [    0.938677] Unable to handle kernel NULL pointer dereference at virtual address 00000004
    [    0.939020] pgd = 10c753cb
    [    0.939237] [00000004] *pgd=00000000
    [    0.939808] Internal error: Oops: 5 [#1] SMP ARM
    [    0.940157] CPU: 0 PID: 48 Comm: kworker/u4:1 Not tainted 5.15.10-dd1e40c #1
    [    0.940364] Hardware name: Generic DT based system
    [    0.940601] Workqueue: events_unbound async_run_entry_fn
    [    0.941048] PC is at tpm_tis_remove+0x28/0xb4
    [    0.941196] LR is at tpm_tis_core_init+0x170/0x6ac

This is due to an attempt in 'tpm_tis_remove' to use the drvdata, which
was not initialized in 'tpm_tis_core_init' prior to the first error.

Move the initialization of drvdata earlier so 'tpm_tis_remove' has
access to it.

Signed-off-by: Patrick Williams <patrick@stwcx.xyz>
Fixes: 79ca6f74dae0 ("tpm: fix Atmel TPM crash caused by too frequent queries")
Cc: stable@vger.kernel.org
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/char/tpm/tpm_tis_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 36d1ad8f479d..dc56b976d816 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -950,6 +950,8 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 	priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
 	priv->phy_ops = phy_ops;
 
+	dev_set_drvdata(&chip->dev, priv);
+
 	rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor);
 	if (rc < 0)
 		return rc;
@@ -962,8 +964,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
 	}
 
-	dev_set_drvdata(&chip->dev, priv);
-
 	if (is_bsw()) {
 		priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR,
 					ILB_REMAP_SIZE);
-- 
Gitee


From 2f9f5791f36972bace4070fa3124669deb1426f3 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Wed, 22 Dec 2021 13:48:12 +0900
Subject: [PATCH 010/100] spi: uniphier: Fix a bug that doesn't point to
 private data correctly

stable inclusion
from stable-5.10.94
commit 67b078d996f723fe835edc2997a6fc4f31a16d02
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 80bb73a9fbcde4ecc55e12f10c73fabbe68a24d1 upstream.

In uniphier_spi_remove(), there is a wrong code to get private data from
the platform device, so the driver can't be removed properly.

The driver should get spi_master from the platform device and retrieve
the private data from it.

Cc: <stable@vger.kernel.org>
Fixes: 5ba155a4d4cc ("spi: add SPI controller driver for UniPhier SoC")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Link: https://lore.kernel.org/r/1640148492-32178-1-git-send-email-hayashi.kunihiko@socionext.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/spi/spi-uniphier.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 6a9ef8ee3cc9..e5c234aecf67 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -767,12 +767,13 @@ static int uniphier_spi_probe(struct platform_device *pdev)
 
 static int uniphier_spi_remove(struct platform_device *pdev)
 {
-	struct uniphier_spi_priv *priv = platform_get_drvdata(pdev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
 
-	if (priv->master->dma_tx)
-		dma_release_channel(priv->master->dma_tx);
-	if (priv->master->dma_rx)
-		dma_release_channel(priv->master->dma_rx);
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 
 	clk_disable_unprepare(priv->clk);
 
-- 
Gitee


From 97e8136ade1a5e5513146f574719da401c17ba47 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 10 Dec 2021 11:28:17 +0200
Subject: [PATCH 011/100] xen/gntdev: fix unmap notification order

stable inclusion
from stable-5.10.94
commit 9fbaddd783fdc7c074ac9af5aef9baa45596b8b3
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit ce2f46f3531a03781181b7f4bd1ff9f8c5086e7e upstream.

While working with Xen's libxenvchan library I have faced an issue with
unmap notifications sent in wrong order if both UNMAP_NOTIFY_SEND_EVENT
and UNMAP_NOTIFY_CLEAR_BYTE were requested: first we send an event channel
notification and then clear the notification byte which renders in the below
inconsistency (cli_live is the byte which was requested to be cleared on unmap):

[  444.514243] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6
libxenvchan_is_open cli_live 1
[  444.515239] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14

Thus it is not possible to reliably implement the checks like
- wait for the notification (UNMAP_NOTIFY_SEND_EVENT)
- check the variable (UNMAP_NOTIFY_CLEAR_BYTE)
because it is possible that the variable gets checked before it is cleared
by the kernel.

To fix that we need to re-order the notifications, so the variable is first
gets cleared and then the event channel notification is sent.
With this fix I can see the correct order of execution:

[   54.522611] __unmap_grant_pages UNMAP_NOTIFY_CLEAR_BYTE at 14
[   54.537966] gntdev_put_map UNMAP_NOTIFY_SEND_EVENT map->notify.event 6
libxenvchan_is_open cli_live 0

Cc: stable@vger.kernel.org
Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20211210092817.580718-1-andr2000@gmail.com
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/xen/gntdev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index b9651f797676..54778aadf618 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -240,13 +240,13 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
 	if (!refcount_dec_and_test(&map->users))
 		return;
 
+	if (map->pages && !use_ptemod)
+		unmap_grant_pages(map, 0, map->count);
+
 	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 		notify_remote_via_evtchn(map->notify.event);
 		evtchn_put(map->notify.event);
 	}
-
-	if (map->pages && !use_ptemod)
-		unmap_grant_pages(map, 0, map->count);
 	gntdev_free_map(map);
 }
 
-- 
Gitee


From e20d2d3877464ea19e87799ec7e4a48525242085 Mon Sep 17 00:00:00 2001
From: Xie Yongji <xieyongji@bytedance.com>
Date: Mon, 22 Nov 2021 17:05:31 +0800
Subject: [PATCH 012/100] fuse: Pass correct lend value to
 filemap_write_and_wait_range()

stable inclusion
from stable-5.10.94
commit 9668cf9e4af0a0989c5f8f975213a58e74568a17
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit e388164ea385f04666c4633f5dc4f951fca71890 upstream.

The acceptable maximum value of lend parameter in
filemap_write_and_wait_range() is LLONG_MAX rather than -1. And there is
also some logic depending on LLONG_MAX check in write_cache_pages(). So
let's pass LLONG_MAX to filemap_write_and_wait_range() in
fuse_writeback_range() instead.

Fixes: 59bda8ecee2f ("fuse: flush extending writes")
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Cc: <stable@vger.kernel.org> # v5.15
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/fuse/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 05def4d20161..d1bc96ee6eb3 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3252,7 +3252,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
 {
-	int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
+	int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX);
 
 	if (!err)
 		fuse_sync_writes(inode);
-- 
Gitee


From 0dda1e356a8d3466485b558a1baaa7d683b0673d Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 18 Dec 2021 10:58:56 +0100
Subject: [PATCH 013/100] serial: Fix incorrect rs485 polarity on uart open

stable inclusion
from stable-5.10.94
commit 13518f058fdee42d7b624e22add590fa1e5badbf
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit d3b3404df318504ec084213ab1065b73f49b0f1d upstream.

Commit a6845e1e1b78 ("serial: core: Consider rs485 settings to drive
RTS") sought to deassert RTS when opening an rs485-enabled uart port.
That way, the transceiver does not occupy the bus until it transmits
data.

Unfortunately, the commit mixed up the logic and *asserted* RTS instead
of *deasserting* it:

The commit amended uart_port_dtr_rts(), which raises DTR and RTS when
opening an rs232 port.  "Raising" actually means lowering the signal
that's coming out of the uart, because an rs232 transceiver not only
changes a signal's voltage level, it also *inverts* the signal.  See
the simplified schematic in the MAX232 datasheet for an example:
https://www.ti.com/lit/ds/symlink/max232.pdf

So, to raise RTS on an rs232 port, TIOCM_RTS is *set* in port->mctrl
and that results in the signal being driven low.

In contrast to rs232, the signal level for rs485 Transmit Enable is the
identity, not the inversion:  If the transceiver expects a "high" RTS
signal for Transmit Enable, the signal coming out of the uart must also
be high, so TIOCM_RTS must be *cleared* in port->mctrl.

The commit did the exact opposite, but it's easy to see why given the
confusing semantics of rs232 and rs485.  Fix it.

Fixes: a6845e1e1b78 ("serial: core: Consider rs485 settings to drive RTS")
Cc: stable@vger.kernel.org # v4.14+
Cc: Rafael Gago Castano <rgc@hms.se>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Su Bao Cheng <baocheng.su@siemens.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/9395767847833f2f3193c49cde38501eeb3b5669.1639821059.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/tty/serial/serial_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 55108db5b64b..be0d9922e320 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -162,7 +162,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
 	int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND);
 
 	if (raise) {
-		if (rs485_on && !RTS_after_send) {
+		if (rs485_on && RTS_after_send) {
 			uart_set_mctrl(uport, TIOCM_DTR);
 			uart_clear_mctrl(uport, TIOCM_RTS);
 		} else {
@@ -171,7 +171,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
 	} else {
 		unsigned int clear = TIOCM_DTR;
 
-		clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0;
+		clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0;
 		uart_clear_mctrl(uport, clear);
 	}
 }
-- 
Gitee


From 8b189a423e1de97c2e4ae76d63bf33e8ac82dfed Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <arbn@yandex-team.com>
Date: Mon, 15 Nov 2021 19:46:04 +0300
Subject: [PATCH 014/100] cputime, cpuacct: Include guest time in user time in
 cpuacct.stat

stable inclusion
from stable-5.10.94
commit b72075e395b33761c574c4afbf6bb62fcab3ce6a
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 9731698ecb9c851f353ce2496292ff9fcea39dff upstream.

cpuacct.stat in no-root cgroups shows user time without guest time
included int it. This doesn't match with user time shown in root
cpuacct.stat and /proc/<pid>/stat. This also affects cgroup2's cpu.stat
in the same way.

Make account_guest_time() to add user time to cgroup's cpustat to
fix this.

Fixes: ef12fefabf94 ("cpuacct: add per-cgroup utime/stime statistics")
Signed-off-by: Andrey Ryabinin <arbn@yandex-team.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211115164607.23784-1-arbn@yandex-team.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 kernel/sched/cputime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index cf87d3fff5dd..40c657b1653a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -162,10 +162,10 @@ void account_guest_time(struct task_struct *p, u64 cputime)
 
 	/* Add guest time to cpustat. */
 	if (task_nice(p) > 0) {
-		cpustat[CPUTIME_NICE] += cputime;
+		task_group_account_field(p, CPUTIME_NICE, cputime);
 		cpustat[CPUTIME_GUEST_NICE] += cputime;
 	} else {
-		cpustat[CPUTIME_USER] += cputime;
+		task_group_account_field(p, CPUTIME_USER, cputime);
 		cpustat[CPUTIME_GUEST] += cputime;
 	}
 }
-- 
Gitee


From a80d9af8d0a031f3f3217e21938e9b8a7efc3499 Mon Sep 17 00:00:00 2001
From: Xiangyang Zhang <xyz.sun.ok@gmail.com>
Date: Fri, 7 Jan 2022 23:02:42 +0800
Subject: [PATCH 015/100] tracing/kprobes: 'nmissed' not showed correctly for
 kretprobe

stable inclusion
from stable-5.10.94
commit c524f4cfb3e59d412dc6b532481bbe4190f9e5a6
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit dfea08a2116fe327f79d8f4d4b2cf6e0c88be11f upstream.

The 'nmissed' column of the 'kprobe_profile' file for kretprobe is
not showed correctly, kretprobe can be skipped by two reasons,
shortage of kretprobe_instance which is counted by tk->rp.nmissed,
and kprobe itself is missed by some reason, so to show the sum.

Link: https://lkml.kernel.org/r/20220107150242.5019-1-xyz.sun.ok@gmail.com

Cc: stable@vger.kernel.org
Fixes: 4a846b443b4e ("tracing/kprobes: Cleanup kprobe tracer code")
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Xiangyang Zhang <xyz.sun.ok@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 kernel/trace/trace_kprobe.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 552dbc9d5226..d8a9fc794126 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1183,15 +1183,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
 {
 	struct dyn_event *ev = v;
 	struct trace_kprobe *tk;
+	unsigned long nmissed;
 
 	if (!is_trace_kprobe(ev))
 		return 0;
 
 	tk = to_trace_kprobe(ev);
+	nmissed = trace_kprobe_is_return(tk) ?
+		tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed;
 	seq_printf(m, "  %-44s %15lu %15lu\n",
 		   trace_probe_name(&tk->tp),
 		   trace_kprobe_nhit(tk),
-		   tk->rp.kp.nmissed);
+		   nmissed);
 
 	return 0;
 }
-- 
Gitee


From 750d0107b9379d8d4fc01504da9b4b48be094ad8 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 10 Dec 2021 09:06:21 +0200
Subject: [PATCH 016/100] iwlwifi: mvm: Increase the scan timeout guard to 30
 seconds

stable inclusion
from stable-5.10.94
commit 798754ba48b7a8f6dc42883b0122859a281c9644
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit ced50f1133af12f7521bb777fcf4046ca908fb77 upstream.

With the introduction of 6GHz channels the scan guard timeout should
be adjusted to account for the following extreme case:

- All 6GHz channels are scanned passively: 58 channels.
- The scan is fragmented with the following parameters: 3 fragments,
  95 TUs suspend time, 44 TUs maximal out of channel time.

The above would result with scan time of more than 24 seconds. Thus,
set the timeout to 30 seconds.

Cc: stable@vger.kernel.org
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20211210090244.3c851b93aef5.I346fa2e1d79220a6770496e773c6f87a2ad9e6c4@changeid
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index a5d90e028833..46255d2c555b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -2157,7 +2157,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
 	return -EIO;
 }
 
-#define SCAN_TIMEOUT 20000
+#define SCAN_TIMEOUT 30000
 
 void iwl_mvm_scan_timeout_wk(struct work_struct *work)
 {
-- 
Gitee


From 3f3975564e4e77678367e18ebfedb68f1ce2fda5 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Thu, 4 Nov 2021 07:14:44 +0100
Subject: [PATCH 017/100] s390/mm: fix 2KB pgtable release race

stable inclusion
from stable-5.10.94
commit ecb71f7bd584cd56713b139b6429a8ddaef30f7b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c2c224932fd0ee6854d6ebfc8d059c2bcad86606 upstream.

There is a race on concurrent 2KB-pgtables release paths when
both upper and lower halves of the containing parent page are
freed, one via page_table_free_rcu() + __tlb_remove_table(),
and the other via page_table_free(). The race might lead to a
corruption as result of remove of list item in page_table_free()
concurrently with __free_page() in __tlb_remove_table().

Let's assume first the lower and next the upper 2KB-pgtables are
freed from a page. Since both halves of the page are allocated
the tracking byte (bits 24-31 of the page _refcount) has value
of 0x03 initially:

CPU0                            CPU1
----                            ----

page_table_free_rcu() // lower half
{
        // _refcount[31..24] == 0x03
        ...
        atomic_xor_bits(&page->_refcount,
                        0x11U << (0 + 24));
        // _refcount[31..24] <= 0x12
        ...
        table = table | (1U << 0);
        tlb_remove_table(tlb, table);
}
...
__tlb_remove_table()
{
        // _refcount[31..24] == 0x12
        mask = _table & 3;
        // mask <= 0x01
        ...

                                page_table_free() // upper half
                                {
                                        // _refcount[31..24] == 0x12
                                        ...
                                        atomic_xor_bits(
                                                &page->_refcount,
                                                1U << (1 + 24));
                                        // _refcount[31..24] <= 0x10
                                        // mask <= 0x10
                                        ...
        atomic_xor_bits(&page->_refcount,
                        mask << (4 + 24));
        // _refcount[31..24] <= 0x00
        // mask <= 0x00
        ...
        if (mask != 0) // == false
                break;
        fallthrough;
        ...
                                        if (mask & 3) // == false
                                                ...
                                        else
        __free_page(page);                      list_del(&page->lru);
        ^^^^^^^^^^^^^^^^^^      RACE!           ^^^^^^^^^^^^^^^^^^^^^
}                                       ...
                                }

The problem is page_table_free() releases the page as result of
lower nibble unset and __tlb_remove_table() observing zero too
early. With this update page_table_free() will use the similar
logic as page_table_free_rcu() + __tlb_remove_table(), and mark
the fragment as pending for removal in the upper nibble until
after the list_del().

In other words, the parent page is considered as unreferenced and
safe to release only when the lower nibble is cleared already and
unsetting a bit in upper nibble results in that nibble turned zero.

Cc: stable@vger.kernel.org
Suggested-by: Vlastimil Babka <vbabka@suse.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 arch/s390/mm/pgalloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 11d2c8395e2a..6d99b1be0082 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -253,13 +253,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 		/* Free 2K page table fragment of a 4K page */
 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
 		spin_lock_bh(&mm->context.lock);
-		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+		mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
 		mask >>= 24;
 		if (mask & 3)
 			list_add(&page->lru, &mm->context.pgtable_list);
 		else
 			list_del(&page->lru);
 		spin_unlock_bh(&mm->context.lock);
+		mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+		mask >>= 24;
 		if (mask != 0)
 			return;
 	} else {
-- 
Gitee


From a1dc77a84be4c3838b1c3a27f0cdfecf036fbcd8 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 1 Dec 2021 14:59:29 +0200
Subject: [PATCH 018/100] device property: Fix fwnode_graph_devcon_match()
 fwnode leak

stable inclusion
from stable-5.10.94
commit 6c1e3d8b1bff4c861c9ebe2d37a5b296fc7ee207
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 4a7f4110f79163fd53ea65438041994ed615e3af upstream.

For each endpoint it encounters, fwnode_graph_devcon_match() checks
whether the endpoint's remote port parent device is available. If it is
not, it ignores the endpoint but does not put the reference to the remote
endpoint port parent fwnode. For available devices the fwnode handle
reference is put as expected.

Put the reference for unavailable devices now.

Fixes: 637e9e52b185 ("device connection: Find device connections also from device graphs")
Cc: 5.1+ <stable@vger.kernel.org> # 5.1+
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/base/property.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 4c43d30145c6..cf88a5554d9c 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1195,8 +1195,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
 
 	fwnode_graph_for_each_endpoint(fwnode, ep) {
 		node = fwnode_graph_get_remote_port_parent(ep);
-		if (!fwnode_device_is_available(node))
+		if (!fwnode_device_is_available(node)) {
+			fwnode_handle_put(node);
 			continue;
+		}
 
 		ret = match(node, con_id, data);
 		fwnode_handle_put(node);
-- 
Gitee


From 7155ed34a025f8f6318e568687a7fc0ce648f9c4 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 17 Dec 2021 11:59:28 +0100
Subject: [PATCH 019/100] drm/etnaviv: limit submit sizes

stable inclusion
from stable-5.10.94
commit 605583fcccb51c73362f5c2b326693da692e4492
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 6dfa2fab8ddd46faa771a102672176bee7a065de upstream.

Currently we allow rediculous amounts of kernel memory being allocated
via the etnaviv GEM_SUBMIT ioctl, which is a pretty easy DoS vector. Put
some reasonable limits in to fix this.

The commandstream size is limited to 64KB, which was already a soft limit
on older kernels after which the kernel only took submits on a best effort
base, so there is no userspace that tries to submit commandstreams larger
than this. Even if the whole commandstream is a single incrementing address
load, the size limit also limits the number of potential relocs and
referenced buffers to slightly under 64K, so use the same limit for those
arguments. The performance monitoring infrastructure currently supports
less than 50 performance counter signals, so limiting them to 128 on a
single submit seems like a reasonably future-proof number for now. This
number can be bumped if needed without breaking the interface.

Cc: stable@vger.kernel.org
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 5f24cc52c287..ed2c50011d44 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -469,6 +469,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K ||
+	    args->nr_bos > SZ_64K || args->nr_pmrs > 128) {
+		DRM_ERROR("submit arguments out of size limits\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
-- 
Gitee


From 71d429e0495d0b39863b5c102739023b64e27b13 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 7 Mar 2021 12:48:53 -0500
Subject: [PATCH 020/100] drm/nouveau/kms/nv04: use vzalloc for nv04_display

stable inclusion
from stable-5.10.94
commit 8cbbf4a6f1acdf0f392694ebc719b584a40400f6
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit bd6e07e72f37f34535bec7eebc807e5fcfe37b43 upstream.

The struct is giant, and triggers an order-7 allocation (512K). There is
no reason for this to be kmalloc-type memory, so switch to vmalloc. This
should help loading nouveau on low-memory and/or long-running systems.

Reported-by: Nathan E. Egge <unlord@xiph.org>
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Link: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests/10
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/nouveau/dispnv04/disp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 7739f46470d3..99fee4d8cd31 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -205,7 +205,7 @@ nv04_display_destroy(struct drm_device *dev)
 	nvif_notify_dtor(&disp->flip);
 
 	nouveau_display(dev)->priv = NULL;
-	kfree(disp);
+	vfree(disp);
 
 	nvif_object_unmap(&drm->client.device.object);
 }
@@ -223,7 +223,7 @@ nv04_display_create(struct drm_device *dev)
 	struct nv04_display *disp;
 	int i, ret;
 
-	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
+	disp = vzalloc(sizeof(*disp));
 	if (!disp)
 		return -ENOMEM;
 
-- 
Gitee


From 10927f088ba2942e1c56f75071e15b86c21a4b20 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 3 Nov 2021 13:52:00 -0700
Subject: [PATCH 021/100] drm/bridge: analogix_dp: Make PSR-exit block less

stable inclusion
from stable-5.10.94
commit 9b78ee2341d4496688a157ae3fc901cc6c3c4385
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c4c6ef229593366ab593d4d424addc7025b54a76 upstream.

Prior to commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"),
"PSR exit" used non-blocking analogix_dp_send_psr_spd(). The refactor
started using the blocking variant, for a variety of reasons -- quoting
Sean Paul's potentially-faulty memory:

"""
 - To avoid racing a subsequent PSR entry (if exit takes a long time)
 - To avoid racing disable/modeset
 - We're not displaying new content while exiting PSR anyways, so there
   is minimal utility in allowing frames to be submitted
 - We're lying to userspace telling them frames are on the screen when
   we're just dropping them on the floor
"""

However, I'm finding that this blocking transition is causing upwards of
60+ ms of unneeded latency on PSR-exit, to the point that initial cursor
movements when leaving PSR are unbearably jumpy.

It turns out that we need to meet in the middle somewhere: Sean is right
that we were "lying to userspace" with a non-blocking PSR-exit, but the
new blocking behavior is also waiting too long:

According to the eDP specification, the sink device must support PSR
entry transitions from both state 4 (ACTIVE_RESYNC) and state 0
(INACTIVE). It also states that in ACTIVE_RESYNC, "the Sink device must
display the incoming active frames from the Source device with no
visible glitches and/or artifacts."

Thus, for our purposes, we only need to wait for ACTIVE_RESYNC before
moving on; we are ready to display video, and subsequent PSR-entry is
safe.

Tested on a Samsung Chromebook Plus (i.e., Rockchip RK3399 Gru Kevin),
where this saves about 60ms of latency, for PSR-exit that used to
take about 80ms.

Fixes: 6c836d965bad ("drm/rockchip: Use the helpers for PSR")
Cc: <stable@vger.kernel.org>
Cc: Zain Wang <wzz@rock-chips.com>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Robert Foss <robert.foss@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211103135112.v3.1.I67612ea073c3306c71b46a87be894f79707082df@changeid
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
index 914c569ab8c1..cab3f5c4e2fc 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
@@ -1086,11 +1086,21 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp,
 	if (!blocking)
 		return 0;
 
+	/*
+	 * db[1]!=0: entering PSR, wait for fully active remote frame buffer.
+	 * db[1]==0: exiting PSR, wait for either
+	 *  (a) ACTIVE_RESYNC - the sink "must display the
+	 *      incoming active frames from the Source device with no visible
+	 *      glitches and/or artifacts", even though timings may still be
+	 *      re-synchronizing; or
+	 *  (b) INACTIVE - the transition is fully complete.
+	 */
 	ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status,
 		psr_status >= 0 &&
 		((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) ||
-		(!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500,
-		DP_TIMEOUT_PSR_LOOP_MS * 1000);
+		(!vsc->db[1] && (psr_status == DP_PSR_SINK_ACTIVE_RESYNC ||
+				 psr_status == DP_PSR_SINK_INACTIVE))),
+		1500, DP_TIMEOUT_PSR_LOOP_MS * 1000);
 	if (ret) {
 		dev_warn(dp->dev, "Failed to apply PSR %d\n", ret);
 		return ret;
-- 
Gitee


From 133d207d68bf86e5a14c47f321455cdca5fe0d8d Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Wed, 22 Dec 2021 16:01:31 +0000
Subject: [PATCH 022/100] parisc: Fix lpa and lpa_user defines

stable inclusion
from stable-5.10.94
commit eb44b1386af5751e0faec3a06e65078f146f81a7
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit db19c6f1a2a353cc8dec35b4789733a3cf6e2838 upstream.

While working on the rewrite to the light-weight syscall and futex code, I
experimented with using a hash index based on the user physical address of
atomic variable. This exposed two problems with the lpa and lpa_user defines.

Because of the copy instruction, the pa argument needs to be an early clobber
argument. This prevents gcc from allocating the va and pa arguments to the same
register.

Secondly, the lpa instruction can cause a page fault so we need to catch
exceptions.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Fixes: 116d753308cf ("parisc: Use lpa instruction to load physical addresses in driver code")
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org # v5.2+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 arch/parisc/include/asm/special_insns.h | 44 ++++++++++++++-----------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index a303ae9a77f4..16ee41e77174 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -2,28 +2,32 @@
 #ifndef __PARISC_SPECIAL_INSNS_H
 #define __PARISC_SPECIAL_INSNS_H
 
-#define lpa(va)	({			\
-	unsigned long pa;		\
-	__asm__ __volatile__(		\
-		"copy %%r0,%0\n\t"	\
-		"lpa %%r0(%1),%0"	\
-		: "=r" (pa)		\
-		: "r" (va)		\
-		: "memory"		\
-	);				\
-	pa;				\
+#define lpa(va)	({					\
+	unsigned long pa;				\
+	__asm__ __volatile__(				\
+		"copy %%r0,%0\n"			\
+		"8:\tlpa %%r0(%1),%0\n"			\
+		"9:\n"					\
+		ASM_EXCEPTIONTABLE_ENTRY(8b, 9b)	\
+		: "=&r" (pa)				\
+		: "r" (va)				\
+		: "memory"				\
+	);						\
+	pa;						\
 })
 
-#define lpa_user(va)	({		\
-	unsigned long pa;		\
-	__asm__ __volatile__(		\
-		"copy %%r0,%0\n\t"	\
-		"lpa %%r0(%%sr3,%1),%0"	\
-		: "=r" (pa)		\
-		: "r" (va)		\
-		: "memory"		\
-	);				\
-	pa;				\
+#define lpa_user(va)	({				\
+	unsigned long pa;				\
+	__asm__ __volatile__(				\
+		"copy %%r0,%0\n"			\
+		"8:\tlpa %%r0(%%sr3,%1),%0\n"		\
+		"9:\n"					\
+		ASM_EXCEPTIONTABLE_ENTRY(8b, 9b)	\
+		: "=&r" (pa)				\
+		: "r" (va)				\
+		: "memory"				\
+	);						\
+	pa;						\
 })
 
 #define mfctl(reg)	({		\
-- 
Gitee


From 317a345165d9d66ce5603403e0afc79a863f09e9 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 16 Dec 2021 20:33:42 +1000
Subject: [PATCH 023/100] powerpc/64s/radix: Fix huge vmap false positive

stable inclusion
from stable-5.10.94
commit e09f47e77b6e3148c02d88ef91e9ab16a005cc95
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 467ba14e1660b52a2f9338b484704c461bd23019 upstream.

pmd_huge() is defined to false when HUGETLB_PAGE is not configured, but
the vmap code still installs huge PMDs. This leads to false bad PMD
errors when vunmapping because it is not seen as a huge PTE, and the bad
PMD check catches it. The end result may not be much more serious than
some bad pmd warning messages, because the pmd_none_or_clear_bad() does
what we wanted and clears the huge PTE anyway.

Fix this by checking pmd_is_leaf(), which checks for a PTE regardless of
config options. The whole huge/large/leaf stuff is a tangled mess but
that's kernel-wide and not something we can improve much in arch/powerpc
code.

pmd_page(), pud_page(), etc., called by vmalloc_to_page() on huge vmaps
can similarly trigger a false VM_BUG_ON when CONFIG_HUGETLB_PAGE=n, so
those checks are adjusted. The checks were added by commit d6eacedd1f0e
("powerpc/book3s: Use config independent helpers for page table walk"),
while implementing a similar fix for other page table walking functions.

Fixes: d909f9109c30 ("powerpc/64s/radix: Enable HAVE_ARCH_HUGE_VMAP")
Cc: stable@vger.kernel.org # v5.3+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211216103342.609192-1-npiggin@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 arch/powerpc/mm/book3s64/radix_pgtable.c |  4 ++--
 arch/powerpc/mm/pgtable_64.c             | 14 +++++++++++---
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 1d5eec847b88..295959487b76 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1152,7 +1152,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 
 int pud_clear_huge(pud_t *pud)
 {
-	if (pud_huge(*pud)) {
+	if (pud_is_leaf(*pud)) {
 		pud_clear(pud);
 		return 1;
 	}
@@ -1199,7 +1199,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 
 int pmd_clear_huge(pmd_t *pmd)
 {
-	if (pmd_huge(*pmd)) {
+	if (pmd_is_leaf(*pmd)) {
 		pmd_clear(pmd);
 		return 1;
 	}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index cc6e2f94517f..aefc2bfdf104 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift);
 struct page *p4d_page(p4d_t p4d)
 {
 	if (p4d_is_leaf(p4d)) {
-		VM_WARN_ON(!p4d_huge(p4d));
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!p4d_huge(p4d));
 		return pte_page(p4d_pte(p4d));
 	}
 	return virt_to_page(p4d_page_vaddr(p4d));
@@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d)
 struct page *pud_page(pud_t pud)
 {
 	if (pud_is_leaf(pud)) {
-		VM_WARN_ON(!pud_huge(pud));
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!pud_huge(pud));
 		return pte_page(pud_pte(pud));
 	}
 	return virt_to_page(pud_page_vaddr(pud));
@@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud)
 struct page *pmd_page(pmd_t pmd)
 {
 	if (pmd_is_leaf(pmd)) {
-		VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
+		/*
+		 * vmalloc_to_page may be called on any vmap address (not only
+		 * vmalloc), and it uses pmd_page() etc., when huge vmap is
+		 * enabled so these checks can't be used.
+		 */
+		if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+			VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd)));
 		return pte_page(pmd_pte(pmd));
 	}
 	return virt_to_page(pmd_page_vaddr(pmd));
-- 
Gitee


From 618bdc807059d5a0c35210ccc6dd9d71a0a64d25 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 29 Nov 2021 11:36:37 -0600
Subject: [PATCH 024/100] PCI: xgene: Fix IB window setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 6cbe8f8deb6286a33937b9b95204c6f77b232796
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c7a75d07827a1f33d566e18e6098379cc2a0c2b2 upstream.

Commit 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup")
broke PCI support on XGene. The cause is the IB resources are now sorted
in address order instead of being in DT dma-ranges order. The result is
which inbound registers are used for each region are swapped. I don't
know the details about this h/w, but it appears that IB region 0
registers can't handle a size greater than 4GB. In any case, limiting
the size for region 0 is enough to get back to the original assignment
of dma-ranges to regions.

Link: https://lore.kernel.org/all/CA+enf=v9rY_xnZML01oEgKLmvY1NGBUUhnSJaETmXtDtXfaczA@mail.gmail.com/
Link: https://lore.kernel.org/r/20211129173637.303201-1-robh@kernel.org
Fixes: 6dce5aa59e0b ("PCI: xgene: Use inbound resources for setup")
Reported-by: Stéphane Graber <stgraber@ubuntu.com>
Tested-by: Stéphane Graber <stgraber@ubuntu.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Krzysztof Wilczyński <kw@linux.com>
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/controller/pci-xgene.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c
index c33b385ac918..b651b6f44469 100644
--- a/drivers/pci/controller/pci-xgene.c
+++ b/drivers/pci/controller/pci-xgene.c
@@ -467,7 +467,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size)
 		return 1;
 	}
 
-	if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) {
+	if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) {
 		*ib_reg_mask |= (1 << 0);
 		return 0;
 	}
-- 
Gitee


From 663f36ca1f94594d3cb448c7de3127b4ba3f010f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Dec 2021 15:17:09 +0100
Subject: [PATCH 025/100] PCI: pciehp: Use down_read/write_nested(reset_lock)
 to fix lockdep errors

stable inclusion
from stable-5.10.94
commit def2825b09ece8c8b6e2514c8f785073bf3a864e
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 085a9f43433f30cbe8a1ade62d9d7827c3217f4d upstream.

Use down_read_nested() and down_write_nested() when taking the
ctrl->reset_lock rw-sem, passing the number of PCIe hotplug controllers in
the path to the PCI root bus as lock subclass parameter.

This fixes the following false-positive lockdep report when unplugging a
Lenovo X1C8 from a Lenovo 2nd gen TB3 dock:

  pcieport 0000:06:01.0: pciehp: Slot(1): Link Down
  pcieport 0000:06:01.0: pciehp: Slot(1): Card not present
  ============================================
  WARNING: possible recursive locking detected
  5.16.0-rc2+ #621 Not tainted
  --------------------------------------------
  irq/124-pciehp/86 is trying to acquire lock:
  ffff8e5ac4299ef8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_check_presence+0x23/0x80

  but task is already holding lock:
  ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180

   other info that might help us debug this:
   Possible unsafe locking scenario:

         CPU0
         ----
    lock(&ctrl->reset_lock);
    lock(&ctrl->reset_lock);

   *** DEADLOCK ***

   May be due to missing lock nesting notation

  3 locks held by irq/124-pciehp/86:
   #0: ffff8e5ac4298af8 (&ctrl->reset_lock){.+.+}-{3:3}, at: pciehp_ist+0xf3/0x180
   #1: ffffffffa3b024e8 (pci_rescan_remove_lock){+.+.}-{3:3}, at: pciehp_unconfigure_device+0x31/0x110
   #2: ffff8e5ac1ee2248 (&dev->mutex){....}-{3:3}, at: device_release_driver+0x1c/0x40

  stack backtrace:
  CPU: 4 PID: 86 Comm: irq/124-pciehp Not tainted 5.16.0-rc2+ #621
  Hardware name: LENOVO 20U90SIT19/20U90SIT19, BIOS N2WET30W (1.20 ) 08/26/2021
  Call Trace:
   <TASK>
   dump_stack_lvl+0x59/0x73
   __lock_acquire.cold+0xc5/0x2c6
   lock_acquire+0xb5/0x2b0
   down_read+0x3e/0x50
   pciehp_check_presence+0x23/0x80
   pciehp_runtime_resume+0x5c/0xa0
   device_for_each_child+0x45/0x70
   pcie_port_device_runtime_resume+0x20/0x30
   pci_pm_runtime_resume+0xa7/0xc0
   __rpm_callback+0x41/0x110
   rpm_callback+0x59/0x70
   rpm_resume+0x512/0x7b0
   __pm_runtime_resume+0x4a/0x90
   __device_release_driver+0x28/0x240
   device_release_driver+0x26/0x40
   pci_stop_bus_device+0x68/0x90
   pci_stop_bus_device+0x2c/0x90
   pci_stop_and_remove_bus_device+0xe/0x20
   pciehp_unconfigure_device+0x6c/0x110
   pciehp_disable_slot+0x5b/0xe0
   pciehp_handle_presence_or_link_change+0xc3/0x2f0
   pciehp_ist+0x179/0x180

This lockdep warning is triggered because with Thunderbolt, hotplug ports
are nested. When removing multiple devices in a daisy-chain, each hotplug
port's reset_lock may be acquired recursively. It's never the same lock, so
the lockdep splat is a false positive.

Because locks at the same hierarchy level are never acquired recursively, a
per-level lockdep class is sufficient to fix the lockdep warning.

The choice to use one lockdep subclass per pcie-hotplug controller in the
path to the root-bus was made to conserve class keys because their number
is limited and the complexity grows quadratically with number of keys
according to Documentation/locking/lockdep-design.rst.

Link: https://lore.kernel.org/linux-pci/20190402021933.GA2966@mit.edu/
Link: https://lore.kernel.org/linux-pci/de684a28-9038-8fc6-27ca-3f6f2f6400d7@redhat.com/
Link: https://lore.kernel.org/r/20211217141709.379663-1-hdegoede@redhat.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=208855
Reported-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/hotplug/pciehp.h      |  3 +++
 drivers/pci/hotplug/pciehp_core.c |  2 +-
 drivers/pci/hotplug/pciehp_hpc.c  | 21 ++++++++++++++++++---
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 4fd200d8b0a9..f1f789fe0637 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -72,6 +72,8 @@ extern int pciehp_poll_time;
  * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
  *	Link Status register and to the Presence Detect State bit in the Slot
  *	Status register during a slot reset which may cause them to flap
+ * @depth: Number of additional hotplug ports in the path to the root bus,
+ *	used as lock subclass for @reset_lock
  * @ist_running: flag to keep user request waiting while IRQ thread is running
  * @request_result: result of last user request submitted to the IRQ thread
  * @requester: wait queue to wake up on completion of user request,
@@ -103,6 +105,7 @@ struct controller {
 
 	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
 	struct rw_semaphore reset_lock;
+	unsigned int depth;
 	unsigned int ist_running;
 	int request_result;
 	wait_queue_head_t requester;
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index ad3393930ecb..e7fe4b42f039 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl)
 {
 	int occupied;
 
-	down_read(&ctrl->reset_lock);
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
 	mutex_lock(&ctrl->state_lock);
 
 	occupied = pciehp_card_present_or_link_active(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 9d06939736c0..90da17c6da66 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -583,7 +583,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl,
 	 * the corresponding link change may have been ignored above.
 	 * Synthesize it to ensure that it is acted on.
 	 */
-	down_read(&ctrl->reset_lock);
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
 	if (!pciehp_check_link_active(ctrl))
 		pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC);
 	up_read(&ctrl->reset_lock);
@@ -746,7 +746,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
 	 * Disable requests have higher priority than Presence Detect Changed
 	 * or Data Link Layer State Changed events.
 	 */
-	down_read(&ctrl->reset_lock);
+	down_read_nested(&ctrl->reset_lock, ctrl->depth);
 	if (events & DISABLE_SLOT)
 		pciehp_handle_disable_request(ctrl);
 	else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC))
@@ -880,7 +880,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe)
 	if (probe)
 		return 0;
 
-	down_write(&ctrl->reset_lock);
+	down_write_nested(&ctrl->reset_lock, ctrl->depth);
 
 	if (!ATTN_BUTTN(ctrl)) {
 		ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
@@ -936,6 +936,20 @@ static inline void dbg_ctrl(struct controller *ctrl)
 
 #define FLAG(x, y)	(((x) & (y)) ? '+' : '-')
 
+static inline int pcie_hotplug_depth(struct pci_dev *dev)
+{
+	struct pci_bus *bus = dev->bus;
+	int depth = 0;
+
+	while (bus->parent) {
+		bus = bus->parent;
+		if (bus->self && bus->self->is_hotplug_bridge)
+			depth++;
+	}
+
+	return depth;
+}
+
 struct controller *pcie_init(struct pcie_device *dev)
 {
 	struct controller *ctrl;
@@ -949,6 +963,7 @@ struct controller *pcie_init(struct pcie_device *dev)
 		return NULL;
 
 	ctrl->pcie = dev;
+	ctrl->depth = pcie_hotplug_depth(dev->port);
 	pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
 
 	if (pdev->hotplug_user_indicators)
-- 
Gitee


From 0c33069e7bb8d0904f6ed6d36caf594c4c26812a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 24 Nov 2021 16:59:39 +0100
Subject: [PATCH 026/100] PCI: pci-bridge-emul: Make expansion ROM Base Address
 register read-only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 2a0d437d8a76474452cb91d2fc729020c2371ad3
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 1c1a3b4d3e86b997a313ffb297c1129540882859 upstream.

If expansion ROM is unsupported (which is the case of pci-bridge-emul.c
driver) then ROM Base Address register must be implemented as read-only
register that return 0 when read, same as for unused Base Address
registers.

Link: https://lore.kernel.org/r/20211124155944.1290-2-pali@kernel.org
Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/pci-bridge-emul.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index db97cddfc85e..5de8b8dde209 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -139,8 +139,13 @@ struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = {
 		.ro = GENMASK(7, 0),
 	},
 
+	/*
+	 * If expansion ROM is unsupported then ROM Base Address register must
+	 * be implemented as read-only register that return 0 when read, same
+	 * as for unused Base Address registers.
+	 */
 	[PCI_ROM_ADDRESS1 / 4] = {
-		.rw = GENMASK(31, 11) | BIT(0),
+		.ro = ~0,
 	},
 
 	/*
-- 
Gitee


From 8b77385319e30b91b291bd3b5ef6d185abf8b082 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 24 Nov 2021 16:59:40 +0100
Subject: [PATCH 027/100] PCI: pci-bridge-emul: Properly mark reserved PCIe
 bits in PCI config space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 0f2ae6691e73c25798a38aa6204b2763d5bb982f
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 7b067ac63a5730d2fae18399fed7e45f23d36912 upstream.

Some bits in PCI config space are reserved when device is PCIe. Properly
define behavior of PCI registers for PCIe emulated bridge and ensure that
it would not be possible change these reserved bits.

Link: https://lore.kernel.org/r/20211124155944.1290-3-pali@kernel.org
Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/pci-bridge-emul.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index 5de8b8dde209..0cbb4e3ca827 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -295,6 +295,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
 			kfree(bridge->pci_regs_behavior);
 			return -ENOMEM;
 		}
+		/* These bits are applicable only for PCI and reserved on PCIe */
+		bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &=
+			~GENMASK(15, 8);
+		bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &=
+			~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE |
+			   PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT |
+			   PCI_COMMAND_FAST_BACK) |
+			  (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+			   PCI_STATUS_DEVSEL_MASK) << 16);
+		bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &=
+			~GENMASK(31, 24);
+		bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &=
+			~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK |
+			   PCI_STATUS_DEVSEL_MASK) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &=
+			~((PCI_BRIDGE_CTL_MASTER_ABORT |
+			   BIT(8) | BIT(9) | BIT(11)) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &=
+			~((PCI_BRIDGE_CTL_FAST_BACK) << 16);
+		bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &=
+			~(BIT(10) << 16);
 	}
 
 	if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) {
-- 
Gitee


From fccfd680aa74402817795a6d01b6697e9320ac22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 24 Nov 2021 16:59:42 +0100
Subject: [PATCH 028/100] PCI: pci-bridge-emul: Fix definitions of reserved
 bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit af1d0acdaca70c62861964288dcf1dc1b9108b5c
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 12998087d9f48b66965b97412069c7826502cd7e upstream.

Some bits in PCI_EXP registers are reserved for non-root ports. Driver
pci-bridge-emul.c implements PCIe Root Port device therefore it should not
allow setting reserved bits of registers.

Properly define non-reserved bits for all PCI_EXP registers.

Link: https://lore.kernel.org/r/20211124155944.1290-5-pali@kernel.org
Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/pci-bridge-emul.c | 36 ++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index 0cbb4e3ca827..2c7e04fb2685 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -176,41 +176,55 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] =
 	[PCI_CAP_LIST_ID / 4] = {
 		/*
 		 * Capability ID, Next Capability Pointer and
-		 * Capabilities register are all read-only.
+		 * bits [14:0] of Capabilities register are all read-only.
+		 * Bit 15 of Capabilities register is reserved.
 		 */
-		.ro = ~0,
+		.ro = GENMASK(30, 0),
 	},
 
 	[PCI_EXP_DEVCAP / 4] = {
-		.ro = ~0,
+		/*
+		 * Bits [31:29] and [17:16] are reserved.
+		 * Bits [27:18] are reserved for non-upstream ports.
+		 * Bits 28 and [14:6] are reserved for non-endpoint devices.
+		 * Other bits are read-only.
+		 */
+		.ro = BIT(15) | GENMASK(5, 0),
 	},
 
 	[PCI_EXP_DEVCTL / 4] = {
-		/* Device control register is RW */
-		.rw = GENMASK(15, 0),
+		/*
+		 * Device control register is RW, except bit 15 which is
+		 * reserved for non-endpoints or non-PCIe-to-PCI/X bridges.
+		 */
+		.rw = GENMASK(14, 0),
 
 		/*
 		 * Device status register has bits 6 and [3:0] W1C, [5:4] RO,
-		 * the rest is reserved
+		 * the rest is reserved. Also bit 6 is reserved for non-upstream
+		 * ports.
 		 */
-		.w1c = (BIT(6) | GENMASK(3, 0)) << 16,
+		.w1c = GENMASK(3, 0) << 16,
 		.ro = GENMASK(5, 4) << 16,
 	},
 
 	[PCI_EXP_LNKCAP / 4] = {
-		/* All bits are RO, except bit 23 which is reserved */
-		.ro = lower_32_bits(~BIT(23)),
+		/*
+		 * All bits are RO, except bit 23 which is reserved and
+		 * bit 18 which is reserved for non-upstream ports.
+		 */
+		.ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)),
 	},
 
 	[PCI_EXP_LNKCTL / 4] = {
 		/*
 		 * Link control has bits [15:14], [11:3] and [1:0] RW, the
-		 * rest is reserved.
+		 * rest is reserved. Bit 8 is reserved for non-upstream ports.
 		 *
 		 * Link status has bits [13:0] RO, and bits [15:14]
 		 * W1C.
 		 */
-		.rw = GENMASK(15, 14) | GENMASK(11, 3) | GENMASK(1, 0),
+		.rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0),
 		.ro = GENMASK(13, 0) << 16,
 		.w1c = GENMASK(15, 14) << 16,
 	},
-- 
Gitee


From e3b79eed2f90a99d2b6cc01d23134d0a18ddceab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 24 Nov 2021 16:59:43 +0100
Subject: [PATCH 029/100] PCI: pci-bridge-emul: Correctly set PCIe capabilities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 7aeeb9fe9ca0b9e084c3ac4c1361b188c31fa9f6
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 1f1050c5e1fefb34ac90a506b43e9da803b5f8f7 upstream.

Older mvebu hardware provides PCIe Capability structure only in version 1.
New mvebu and aardvark hardware provides it in version 2. So do not force
version to 2 in pci_bridge_emul_init() and rather allow drivers to set
correct version. Drivers need to set version in pcie_conf.cap field without
overwriting PCI_CAP_LIST_ID register. Both drivers (mvebu and aardvark) do
not provide slot support yet, so do not set PCI_EXP_FLAGS_SLOT flag.

Link: https://lore.kernel.org/r/20211124155944.1290-6-pali@kernel.org
Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/controller/pci-aardvark.c | 4 +++-
 drivers/pci/controller/pci-mvebu.c    | 8 ++++++++
 drivers/pci/pci-bridge-emul.c         | 5 +----
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 0f6a6685ab5b..f30144c8c0bd 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -879,7 +879,6 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge,
 		return PCI_BRIDGE_EMUL_HANDLED;
 	}
 
-	case PCI_CAP_LIST_ID:
 	case PCI_EXP_DEVCAP:
 	case PCI_EXP_DEVCTL:
 		*value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg);
@@ -960,6 +959,9 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie)
 	/* Support interrupt A for MSI feature */
 	bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE;
 
+	/* Aardvark HW provides PCIe Capability structure in version 2 */
+	bridge->pcie_conf.cap = cpu_to_le16(2);
+
 	/* Indicates supports for Completion Retry Status */
 	bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS);
 
diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index ed13e81cd691..2dc6890dbcaa 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -573,6 +573,8 @@ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = {
 static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
 {
 	struct pci_bridge_emul *bridge = &port->bridge;
+	u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP);
+	u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS);
 
 	bridge->conf.vendor = PCI_VENDOR_ID_MARVELL;
 	bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16;
@@ -585,6 +587,12 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port)
 		bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32;
 	}
 
+	/*
+	 * Older mvebu hardware provides PCIe Capability structure only in
+	 * version 1. New hardware provides it in version 2.
+	 */
+	bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver);
+
 	bridge->has_pcie = true;
 	bridge->data = port;
 	bridge->ops = &mvebu_pci_bridge_emul_ops;
diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index 2c7e04fb2685..867159562d07 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -297,10 +297,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
 	if (bridge->has_pcie) {
 		bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START;
 		bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP;
-		/* Set PCIe v2, root port, slot support */
-		bridge->pcie_conf.cap =
-			cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4 | 2 |
-				    PCI_EXP_FLAGS_SLOT);
+		bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4);
 		bridge->pcie_cap_regs_behavior =
 			kmemdup(pcie_cap_regs_behavior,
 				sizeof(pcie_cap_regs_behavior),
-- 
Gitee


From d2e7f418f2eecec5eebd327cb53608c3a73f3d4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Wed, 24 Nov 2021 16:59:44 +0100
Subject: [PATCH 030/100] PCI: pci-bridge-emul: Set PCI_STATUS_CAP_LIST for
 PCIe device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 84166c1177f39b852f92dde675d290e697a04b8b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3be9d243b21724d49b65043d4520d688b6040b36 upstream.

Since all PCI Express device Functions are required to implement the PCI
Express Capability structure, Capabilities List bit in PCI Status Register
must be hardwired to 1b. Capabilities Pointer register (which is already
set by pci-bride-emul.c driver) is valid only when Capabilities List is set
to 1b.

Link: https://lore.kernel.org/r/20211124155944.1290-7-pali@kernel.org
Fixes: 23a5fba4d941 ("PCI: Introduce PCI bridge emulated config space common logic")
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/pci/pci-bridge-emul.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c
index 867159562d07..37504c2cce9b 100644
--- a/drivers/pci/pci-bridge-emul.c
+++ b/drivers/pci/pci-bridge-emul.c
@@ -296,6 +296,7 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
 
 	if (bridge->has_pcie) {
 		bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START;
+		bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST);
 		bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP;
 		bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4);
 		bridge->pcie_cap_regs_behavior =
-- 
Gitee


From 717944886346d770ce7413f14b7d87f56c3c8780 Mon Sep 17 00:00:00 2001
From: Ghalem Boudour <ghalem.boudour@6wind.com>
Date: Fri, 19 Nov 2021 18:20:16 +0100
Subject: [PATCH 031/100] xfrm: fix policy lookup for ipv6 gre packets

stable inclusion
from stable-5.10.94
commit 56f974d583fcf7c8bb8b62d5164315de25268079
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit bcf141b2eb551b3477b24997ebc09c65f117a803 upstream.

On egress side, xfrm lookup is called from __gre6_xmit() with the
fl6_gre_key field not initialized leading to policies selectors check
failure. Consequently, gre packets are sent without encryption.

On ingress side, INET6_PROTO_NOPOLICY was set, thus packets were not
checked against xfrm policies. Like for egress side, fl6_gre_key should be
correctly set, this is now done in decode_session6().

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Cc: stable@vger.kernel.org
Signed-off-by: Ghalem Boudour <ghalem.boudour@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/ipv6/ip6_gre.c     |  5 ++++-
 net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 09fa49bbf617..9a0263f25232 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -755,6 +755,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
 		fl6->daddr = key->u.ipv6.dst;
 		fl6->flowlabel = key->label;
 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+		fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id);
 
 		dsfield = key->tos;
 		flags = key->tun_flags &
@@ -990,6 +991,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
 		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+		fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id);
 
 		dsfield = key->tos;
 		if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
@@ -1098,6 +1100,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t)
 	fl6->flowi6_oif = p->link;
 	fl6->flowlabel = 0;
 	fl6->flowi6_proto = IPPROTO_GRE;
+	fl6->fl6_gre_key = t->parms.o_key;
 
 	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
 		fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
@@ -1543,7 +1546,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
 static struct inet6_protocol ip6gre_protocol __read_mostly = {
 	.handler     = gre_rcv,
 	.err_handler = ip6gre_err,
-	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+	.flags       = INET6_PROTO_FINAL,
 };
 
 static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 51238f957903..8fa258fbdcfc 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -33,6 +33,7 @@
 #include <net/flow.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
+#include <net/gre.h>
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 #include <net/mip6.h>
 #endif
@@ -3458,6 +3459,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
 			}
 			fl6->flowi6_proto = nexthdr;
 			return;
+		case IPPROTO_GRE:
+			if (!onlyproto &&
+			    (nh + offset + 12 < skb->data ||
+			     pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
+				struct gre_base_hdr *gre_hdr;
+				__be32 *gre_key;
+
+				nh = skb_network_header(skb);
+				gre_hdr = (struct gre_base_hdr *)(nh + offset);
+				gre_key = (__be32 *)(gre_hdr + 1);
+
+				if (gre_hdr->flags & GRE_KEY) {
+					if (gre_hdr->flags & GRE_CSUM)
+						gre_key++;
+					fl6->fl6_gre_key = *gre_key;
+				}
+			}
+			fl6->flowi6_proto = nexthdr;
+			return;
+
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 		case IPPROTO_MH:
 			offset += ipv6_optlen(exthdr);
-- 
Gitee


From af0dd0297694f5bbc8e4e89cd188ca486e98aef0 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 27 Oct 2021 18:30:25 +0100
Subject: [PATCH 032/100] btrfs: fix deadlock between quota enable and other
 quota operations

stable inclusion
from stable-5.10.94
commit 09e0ef287e93c65b6a78d2dca34ad5a6c78ef93b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 232796df8c1437c41d308d161007f0715bac0a54 upstream.

When enabling quotas, we attempt to commit a transaction while holding the
mutex fs_info->qgroup_ioctl_lock. This can result on a deadlock with other
quota operations such as:

- qgroup creation and deletion, ioctl BTRFS_IOC_QGROUP_CREATE;

- adding and removing qgroup relations, ioctl BTRFS_IOC_QGROUP_ASSIGN.

This is because these operations join a transaction and after that they
attempt to lock the mutex fs_info->qgroup_ioctl_lock. Acquiring that mutex
after joining or starting a transaction is a pattern followed everywhere
in qgroups, so the quota enablement operation is the one at fault here,
and should not commit a transaction while holding that mutex.

Fix this by making the transaction commit while not holding the mutex.
We are safe from two concurrent tasks trying to enable quotas because
we are serialized by the rw semaphore fs_info->subvol_sem at
btrfs_ioctl_quota_ctl(), which is the only call site for enabling
quotas.

When this deadlock happens, it produces a trace like the following:

  INFO: task syz-executor:25604 blocked for more than 143 seconds.
  Not tainted 5.15.0-rc6 #4
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  task:syz-executor state:D stack:24800 pid:25604 ppid: 24873 flags:0x00004004
  Call Trace:
  context_switch kernel/sched/core.c:4940 [inline]
  __schedule+0xcd9/0x2530 kernel/sched/core.c:6287
  schedule+0xd3/0x270 kernel/sched/core.c:6366
  btrfs_commit_transaction+0x994/0x2e90 fs/btrfs/transaction.c:2201
  btrfs_quota_enable+0x95c/0x1790 fs/btrfs/qgroup.c:1120
  btrfs_ioctl_quota_ctl fs/btrfs/ioctl.c:4229 [inline]
  btrfs_ioctl+0x637e/0x7b70 fs/btrfs/ioctl.c:5010
  vfs_ioctl fs/ioctl.c:51 [inline]
  __do_sys_ioctl fs/ioctl.c:874 [inline]
  __se_sys_ioctl fs/ioctl.c:860 [inline]
  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
  do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
  entry_SYSCALL_64_after_hwframe+0x44/0xae
  RIP: 0033:0x7f86920b2c4d
  RSP: 002b:00007f868f61ac58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
  RAX: ffffffffffffffda RBX: 00007f86921d90a0 RCX: 00007f86920b2c4d
  RDX: 0000000020005e40 RSI: 00000000c0109428 RDI: 0000000000000008
  RBP: 00007f869212bd80 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000246 R12: 00007f86921d90a0
  R13: 00007fff6d233e4f R14: 00007fff6d233ff0 R15: 00007f868f61adc0
  INFO: task syz-executor:25628 blocked for more than 143 seconds.
  Not tainted 5.15.0-rc6 #4
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  task:syz-executor state:D stack:29080 pid:25628 ppid: 24873 flags:0x00004004
  Call Trace:
  context_switch kernel/sched/core.c:4940 [inline]
  __schedule+0xcd9/0x2530 kernel/sched/core.c:6287
  schedule+0xd3/0x270 kernel/sched/core.c:6366
  schedule_preempt_disabled+0xf/0x20 kernel/sched/core.c:6425
  __mutex_lock_common kernel/locking/mutex.c:669 [inline]
  __mutex_lock+0xc96/0x1680 kernel/locking/mutex.c:729
  btrfs_remove_qgroup+0xb7/0x7d0 fs/btrfs/qgroup.c:1548
  btrfs_ioctl_qgroup_create fs/btrfs/ioctl.c:4333 [inline]
  btrfs_ioctl+0x683c/0x7b70 fs/btrfs/ioctl.c:5014
  vfs_ioctl fs/ioctl.c:51 [inline]
  __do_sys_ioctl fs/ioctl.c:874 [inline]
  __se_sys_ioctl fs/ioctl.c:860 [inline]
  __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:860
  do_syscall_x64 arch/x86/entry/common.c:50 [inline]
  do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Reported-by: Hao Sun <sunhao.th@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CACkBjsZQF19bQ1C6=yetF3BvL10OSORpFUcWXTP6HErshDB4dQ@mail.gmail.com/
Fixes: 340f1aa27f36 ("btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable")
CC: stable@vger.kernel.org # 4.19
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/btrfs/qgroup.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4bac32a274ce..f65aa4ed5ca1 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -941,6 +941,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 	int ret = 0;
 	int slot;
 
+	/*
+	 * We need to have subvol_sem write locked, to prevent races between
+	 * concurrent tasks trying to enable quotas, because we will unlock
+	 * and relock qgroup_ioctl_lock before setting fs_info->quota_root
+	 * and before setting BTRFS_FS_QUOTA_ENABLED.
+	 */
+	lockdep_assert_held_write(&fs_info->subvol_sem);
+
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (fs_info->quota_root)
 		goto out;
@@ -1118,8 +1126,19 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 		goto out_free_path;
 	}
 
+	mutex_unlock(&fs_info->qgroup_ioctl_lock);
+	/*
+	 * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
+	 * a deadlock with tasks concurrently doing other qgroup operations, such
+	 * adding/removing qgroups or adding/deleting qgroup relations for example,
+	 * because all qgroup operations first start or join a transaction and then
+	 * lock the qgroup_ioctl_lock mutex.
+	 * We are safe from a concurrent task trying to enable quotas, by calling
+	 * this function, since we are serialized by fs_info->subvol_sem.
+	 */
 	ret = btrfs_commit_transaction(trans);
 	trans = NULL;
+	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (ret)
 		goto out_free_path;
 
-- 
Gitee


From 997836d2d55d8210ddd4c70d3c73c45d192c8866 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 24 Nov 2021 14:14:24 -0500
Subject: [PATCH 033/100] btrfs: check the root node for uptodate before
 returning it

stable inclusion
from stable-5.10.94
commit e7764bccae77d3620113576ed18abd5233ba07a6
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 120de408e4b97504a2d9b5ca534b383de2c73d49 upstream.

Now that we clear the extent buffer uptodate if we fail to write it out
we need to check to see if our root node is uptodate before we search
down it.  Otherwise we could return stale data (or potentially corrupt
data that was caught by the write verification step) and think that the
path is OK to search down.

CC: stable@vger.kernel.org # 5.4+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/btrfs/ctree.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 519cf145f9bd..5addd1e36a8e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2589,12 +2589,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *b;
-	int root_lock;
+	int root_lock = 0;
 	int level = 0;
 
-	/* We try very hard to do read locks on the root */
-	root_lock = BTRFS_READ_LOCK;
-
 	if (p->search_commit_root) {
 		/*
 		 * The commit roots are read only so we always do read locks,
@@ -2632,6 +2629,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
 		goto out;
 	}
 
+	/* We try very hard to do read locks on the root */
+	root_lock = BTRFS_READ_LOCK;
+
 	/*
 	 * If the level is set to maximum, we can skip trying to get the read
 	 * lock.
@@ -2658,6 +2658,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
 	level = btrfs_header_level(b);
 
 out:
+	/*
+	 * The root may have failed to write out at some point, and thus is no
+	 * longer valid, return an error in this case.
+	 */
+	if (!extent_buffer_uptodate(b)) {
+		if (root_lock)
+			btrfs_tree_unlock_rw(b, root_lock);
+		free_extent_buffer(b);
+		return ERR_PTR(-EIO);
+	}
+
 	p->nodes[level] = b;
 	if (!p->skip_locking)
 		p->locks[level] = root_lock;
-- 
Gitee


From 313fecf6573b2c107088c1a8eeff0f41083ed893 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 16 Dec 2021 15:00:32 +0000
Subject: [PATCH 034/100] btrfs: respect the max size in the header when
 activating swap file

stable inclusion
from stable-5.10.94
commit f8c3ec2e21b9cd9dbb7b91de982d6016010c5605
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c2f822635df873c510bda6fb7fd1b10b7c31be2d upstream.

If we extended the size of a swapfile after its header was created (by the
mkswap utility) and then try to activate it, we will map the entire file
when activating the swap file, instead of limiting to the max size defined
in the swap file's header.

Currently test case generic/643 from fstests fails because we do not
respect that size limit defined in the swap file's header.

So fix this by not mapping file ranges beyond the max size defined in the
swap header.

This is the same type of bug that iomap used to have, and was fixed in
commit 36ca7943ac18ae ("mm/swap: consider max pages in
iomap_swapfile_add_extent").

Fixes: ed46ff3d423780 ("Btrfs: support swap files")
CC: stable@vger.kernel.org # 5.4+
Reviewed-and-tested-by: Josef Bacik <josef@toxicpanda.com
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/btrfs/inode.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ff3f0638cdb9..1d9262a35473 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10094,9 +10094,19 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
 				 struct btrfs_swap_info *bsi)
 {
 	unsigned long nr_pages;
+	unsigned long max_pages;
 	u64 first_ppage, first_ppage_reported, next_ppage;
 	int ret;
 
+	/*
+	 * Our swapfile may have had its size extended after the swap header was
+	 * written. In that case activating the swapfile should not go beyond
+	 * the max size set in the swap header.
+	 */
+	if (bsi->nr_pages >= sis->max)
+		return 0;
+
+	max_pages = sis->max - bsi->nr_pages;
 	first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
 	next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
 				PAGE_SIZE) >> PAGE_SHIFT;
@@ -10104,6 +10114,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis,
 	if (first_ppage >= next_ppage)
 		return 0;
 	nr_pages = next_ppage - first_ppage;
+	nr_pages = min(nr_pages, max_pages);
 
 	first_ppage_reported = first_ppage;
 	if (bsi->start == 0)
-- 
Gitee


From b153b188f191c3e7745034f7892f4d6781a2162b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 7 Oct 2021 17:53:36 +0200
Subject: [PATCH 035/100] ext4: make sure to reset inode lockdep class when
 quota enabling fails

stable inclusion
from stable-5.10.94
commit 762e4c33e9e5ecdcfedb1752e38c2aac2921df2e
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 4013d47a5307fdb5c13370b5392498b00fedd274 upstream.

When we succeed in enabling some quota type but fail to enable another
one with quota feature, we correctly disable all enabled quota types.
However we forget to reset i_data_sem lockdep class. When the inode gets
freed and reused, it will inherit this lockdep class (i_data_sem is
initialized only when a slab is created) and thus eventually lockdep
barfs about possible deadlocks.

Reported-and-tested-by: syzbot+3b6f9218b1301ddda3e2@syzkaller.appspotmail.com
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20211007155336.12493-3-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/super.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 478b1516b7d4..d6b3169d9853 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6459,8 +6459,19 @@ static int ext4_enable_quotas(struct super_block *sb)
 					"Failed to enable quota tracking "
 					"(type=%d, err=%d). Please run "
 					"e2fsck to fix.", type, err);
-				for (type--; type >= 0; type--)
+				for (type--; type >= 0; type--) {
+					struct inode *inode;
+
+					inode = sb_dqopt(sb)->files[type];
+					if (inode)
+						inode = igrab(inode);
 					dquot_quota_off(sb, type);
+					if (inode) {
+						lockdep_set_quota_inode(inode,
+							I_DATA_SEM_NORMAL);
+						iput(inode);
+					}
+				}
 
 				return err;
 			}
-- 
Gitee


From 04f2549dad9eb289d984f6ba387bf3137d8fba01 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 7 Oct 2021 17:53:35 +0200
Subject: [PATCH 036/100] ext4: make sure quota gets properly shutdown on error

stable inclusion
from stable-5.10.94
commit 115b762b48ab83de2898b8c1a38e3799446a97af
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e upstream.

When we hit an error when enabling quotas and setting inode flags, we do
not properly shutdown quota subsystem despite returning error from
Q_QUOTAON quotactl. This can lead to some odd situations like kernel
using quota file while it is still writeable for userspace. Make sure we
properly cleanup the quota subsystem in case of error.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20211007155336.12493-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/super.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d6b3169d9853..6e55b9ac5a5f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6373,10 +6373,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
 	err = dquot_quota_on(sb, type, format_id, path);
-	if (err) {
-		lockdep_set_quota_inode(path->dentry->d_inode,
-					     I_DATA_SEM_NORMAL);
-	} else {
+	if (!err) {
 		struct inode *inode = d_inode(path->dentry);
 		handle_t *handle;
 
@@ -6396,7 +6393,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		ext4_journal_stop(handle);
 	unlock_inode:
 		inode_unlock(inode);
+		if (err)
+			dquot_quota_off(sb, type);
 	}
+	if (err)
+		lockdep_set_quota_inode(path->dentry->d_inode,
+					     I_DATA_SEM_NORMAL);
 	return err;
 }
 
-- 
Gitee


From 71fb6573cae6f396507eb4244713c0888b95ea69 Mon Sep 17 00:00:00 2001
From: Chunguang Xu <brookxu@tencent.com>
Date: Tue, 23 Nov 2021 09:17:57 +0800
Subject: [PATCH 037/100] ext4: fix a possible ABBA deadlock due to busy PA

stable inclusion
from stable-5.10.94
commit f9ed0ea0a9fc59de71b230ff02f59a51fd174ca7
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 8c80fb312d7abf8bcd66cca1d843a80318a2c522 upstream.

We found on older kernel (3.10) that in the scenario of insufficient
disk space, system may trigger an ABBA deadlock problem, it seems that
this problem still exists in latest kernel, try to fix it here. The
main process triggered by this problem is that task A occupies the PA
and waits for the jbd2 transaction finish, the jbd2 transaction waits
for the completion of task B's IO (plug_list), but task B waits for
the release of PA by task A to finish discard, which indirectly forms
an ABBA deadlock. The related calltrace is as follows:

    Task A
    vfs_write
    ext4_mb_new_blocks()
    ext4_mb_mark_diskspace_used()       JBD2
    jbd2_journal_get_write_access()  -> jbd2_journal_commit_transaction()
  ->schedule()                          filemap_fdatawait()
 |                                              |
 | Task B                                       |
 | do_unlinkat()                                |
 | ext4_evict_inode()                           |
 | jbd2_journal_begin_ordered_truncate()        |
 | filemap_fdatawrite_range()                   |
 | ext4_mb_new_blocks()                         |
  -ext4_mb_discard_group_preallocations() <-----

Here, try to cancel ext4_mb_discard_group_preallocations() internal
retry due to PA busy, and do a limited number of retries inside
ext4_mb_discard_preallocations(), which can circumvent the above
problems, but also has some advantages:

1. Since the PA is in a busy state, if other groups have free PAs,
   keeping the current PA may help to reduce fragmentation.
2. Continue to traverse forward instead of waiting for the current
   group PA to be released. In most scenarios, the PA discard time
   can be reduced.

However, in the case of smaller free space, if only a few groups have
space, then due to multiple traversals of the group, it may increase
CPU overhead. But in contrast, I feel that the overall benefit is
better than the cost.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/1637630277-23496-1-git-send-email-brookxu.cn@gmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/mballoc.c | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 60aef7fdd61d..e40f87d07783 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4234,7 +4234,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
  */
 static noinline_for_stack int
 ext4_mb_discard_group_preallocations(struct super_block *sb,
-					ext4_group_t group, int needed)
+				     ext4_group_t group, int *busy)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct buffer_head *bitmap_bh = NULL;
@@ -4242,8 +4242,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 	struct list_head list;
 	struct ext4_buddy e4b;
 	int err;
-	int busy = 0;
-	int free, free_total = 0;
+	int free = 0;
 
 	mb_debug(sb, "discard preallocation for group %u\n", group);
 	if (list_empty(&grp->bb_prealloc_list))
@@ -4266,19 +4265,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		goto out_dbg;
 	}
 
-	if (needed == 0)
-		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
-
 	INIT_LIST_HEAD(&list);
-repeat:
-	free = 0;
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
 				&grp->bb_prealloc_list, pa_group_list) {
 		spin_lock(&pa->pa_lock);
 		if (atomic_read(&pa->pa_count)) {
 			spin_unlock(&pa->pa_lock);
-			busy = 1;
+			*busy = 1;
 			continue;
 		}
 		if (pa->pa_deleted) {
@@ -4318,22 +4312,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
 
-	free_total += free;
-
-	/* if we still need more blocks and some PAs were used, try again */
-	if (free_total < needed && busy) {
-		ext4_unlock_group(sb, group);
-		cond_resched();
-		busy = 0;
-		goto repeat;
-	}
 	ext4_unlock_group(sb, group);
 	ext4_mb_unload_buddy(&e4b);
 	put_bh(bitmap_bh);
 out_dbg:
 	mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n",
-		 free_total, group, grp->bb_free);
-	return free_total;
+		 free, group, grp->bb_free);
+	return free;
 }
 
 /*
@@ -4875,13 +4860,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
 {
 	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 	int ret;
-	int freed = 0;
+	int freed = 0, busy = 0;
+	int retry = 0;
 
 	trace_ext4_mb_discard_preallocations(sb, needed);
+
+	if (needed == 0)
+		needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1;
+ repeat:
 	for (i = 0; i < ngroups && needed > 0; i++) {
-		ret = ext4_mb_discard_group_preallocations(sb, i, needed);
+		ret = ext4_mb_discard_group_preallocations(sb, i, &busy);
 		freed += ret;
 		needed -= ret;
+		cond_resched();
+	}
+
+	if (needed > 0 && busy && ++retry < 3) {
+		busy = 0;
+		goto repeat;
 	}
 
 	return freed;
-- 
Gitee


From b6d90c1d855fb71981a8bd130ce4bfb71b42ef0c Mon Sep 17 00:00:00 2001
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Date: Wed, 1 Dec 2021 08:34:21 -0800
Subject: [PATCH 038/100] ext4: initialize err_blk before calling
 __ext4_get_inode_loc

stable inclusion
from stable-5.10.94
commit 720508dd118d04035875823f44bcd27388ff39b2
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c27c29c6af4f3f4ce925a2111c256733c5a5b430 upstream.

It is not guaranteed that __ext4_get_inode_loc will definitely set
err_blk pointer when it returns EIO. To avoid using uninitialized
variables, let's first set err_blk to 0.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20211201163421.2631661-1-harshads@google.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ef102cca42dc..7522f1db798d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4544,7 +4544,7 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
 static int __ext4_get_inode_loc_noinmem(struct inode *inode,
 					struct ext4_iloc *iloc)
 {
-	ext4_fsblk_t err_blk;
+	ext4_fsblk_t err_blk = 0;
 	int ret;
 
 	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc,
@@ -4559,7 +4559,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode,
 
 int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
 {
-	ext4_fsblk_t err_blk;
+	ext4_fsblk_t err_blk = 0;
 	int ret;
 
 	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc,
-- 
Gitee


From 9e49c50961b429ca35028c9786c98b7c1f2879db Mon Sep 17 00:00:00 2001
From: Xin Yin <yinxin.x@bytedance.com>
Date: Tue, 21 Dec 2021 10:28:39 +0800
Subject: [PATCH 039/100] ext4: fix fast commit may miss tracking range for
 FALLOC_FL_ZERO_RANGE

stable inclusion
from stable-5.10.94
commit e4221629d5e1479db400d8a4cbf865c65a457630
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 5e4d0eba1ccaf19f93222abdeda5a368be141785 upstream.

when call falloc with FALLOC_FL_ZERO_RANGE, to set an range to unwritten,
which has been already initialized. If the range is align to blocksize,
fast commit will not track range for this change.

Also track range for unwritten range in ext4_map_blocks().

Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20211221022839.374606-1-yinxin.x@bytedance.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/extents.c | 2 --
 fs/ext4/inode.c   | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 398df993ccd8..6a99b6ed7031 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4650,8 +4650,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	ret = ext4_mark_inode_dirty(handle, inode);
 	if (unlikely(ret))
 		goto out_handle;
-	ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
-			(offset + len - 1) >> inode->i_sb->s_blocksize_bits);
 	/* Zero out partial block at the edges of the range */
 	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
 	if (ret >= 0)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7522f1db798d..9cf1c76514be 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -741,10 +741,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 			if (ret)
 				return ret;
 		}
-		ext4_fc_track_range(handle, inode, map->m_lblk,
-			    map->m_lblk + map->m_len - 1);
 	}
-
+	if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN ||
+				map->m_flags & EXT4_MAP_MAPPED))
+		ext4_fc_track_range(handle, inode, map->m_lblk,
+					map->m_lblk + map->m_len - 1);
 	if (retval < 0)
 		ext_debug(inode, "failed with err %d\n", retval);
 	return retval;
-- 
Gitee


From 9b06d15cfe4859ecc116497e875c2cdedd61b2e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
Date: Tue, 14 Dec 2021 17:50:58 +0000
Subject: [PATCH 040/100] ext4: set csum seed in tmp inode while migrating to
 extents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit da364ab35892f69785266a93bd174f647db1f670
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit e81c9302a6c3c008f5c30beb73b38adb0170ff2d upstream.

When migrating to extents, the temporary inode will have it's own checksum
seed.  This means that, when swapping the inodes data, the inode checksums
will be incorrect.

This can be fixed by recalculating the extents checksums again.  Or simply
by copying the seed into the temporary inode.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=213357
Reported-by: Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
Signed-off-by: Luís Henriques <lhenriques@suse.de>
Link: https://lore.kernel.org/r/20211214175058.19511-1-lhenriques@suse.de
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/migrate.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index c5e3fc998211..69cc3f8e03bd 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -459,6 +459,17 @@ int ext4_ext_migrate(struct inode *inode)
 		ext4_journal_stop(handle);
 		goto out_unlock;
 	}
+	/*
+	 * Use the correct seed for checksum (i.e. the seed from 'inode').  This
+	 * is so that the metadata blocks will have the correct checksum after
+	 * the migration.
+	 *
+	 * Note however that, if a crash occurs during the migration process,
+	 * the recovery process is broken because the tmp_inode checksums will
+	 * be wrong and the orphans cleanup will fail.
+	 */
+	ei = EXT4_I(inode);
+	EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
 	i_size_write(tmp_inode, i_size_read(inode));
 	/*
 	 * Set the i_nlink to zero so it will be deleted later
@@ -502,7 +513,6 @@ int ext4_ext_migrate(struct inode *inode)
 		goto out_tmp_inode;
 	}
 
-	ei = EXT4_I(inode);
 	i_data = ei->i_data;
 	memset(&lb, 0, sizeof(lb));
 
-- 
Gitee


From 1d56861a8f503736d102b715b0c281df0ee1b996 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Thu, 23 Dec 2021 09:55:06 +0800
Subject: [PATCH 041/100] ext4: Fix BUG_ON in ext4_bread when write quota data

stable inclusion
from stable-5.10.94
commit 53998b3f6dcde1d6553e27db1957ac0545a6e19f
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 380a0091cab482489e9b19e07f2a166ad2b76d5c upstream.

We got issue as follows when run syzkaller:
[  167.936972] EXT4-fs error (device loop0): __ext4_remount:6314: comm rep: Abort forced by user
[  167.938306] EXT4-fs (loop0): Remounting filesystem read-only
[  167.981637] Assertion failure in ext4_getblk() at fs/ext4/inode.c:847: '(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) || handle != NULL || create == 0'
[  167.983601] ------------[ cut here ]------------
[  167.984245] kernel BUG at fs/ext4/inode.c:847!
[  167.984882] invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
[  167.985624] CPU: 7 PID: 2290 Comm: rep Tainted: G    B             5.16.0-rc5-next-20211217+ #123
[  167.986823] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
[  167.988590] RIP: 0010:ext4_getblk+0x17e/0x504
[  167.989189] Code: c6 01 74 28 49 c7 c0 a0 a3 5c 9b b9 4f 03 00 00 48 c7 c2 80 9c 5c 9b 48 c7 c6 40 b6 5c 9b 48 c7 c7 20 a4 5c 9b e8 77 e3 fd ff <0f> 0b 8b 04 244
[  167.991679] RSP: 0018:ffff8881736f7398 EFLAGS: 00010282
[  167.992385] RAX: 0000000000000094 RBX: 1ffff1102e6dee75 RCX: 0000000000000000
[  167.993337] RDX: 0000000000000001 RSI: ffffffff9b6e29e0 RDI: ffffed102e6dee66
[  167.994292] RBP: ffff88816a076210 R08: 0000000000000094 R09: ffffed107363fa09
[  167.995252] R10: ffff88839b1fd047 R11: ffffed107363fa08 R12: ffff88816a0761e8
[  167.996205] R13: 0000000000000000 R14: 0000000000000021 R15: 0000000000000001
[  167.997158] FS:  00007f6a1428c740(0000) GS:ffff88839b000000(0000) knlGS:0000000000000000
[  167.998238] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  167.999025] CR2: 00007f6a140716c8 CR3: 0000000133216000 CR4: 00000000000006e0
[  167.999987] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  168.000944] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  168.001899] Call Trace:
[  168.002235]  <TASK>
[  168.007167]  ext4_bread+0xd/0x53
[  168.007612]  ext4_quota_write+0x20c/0x5c0
[  168.010457]  write_blk+0x100/0x220
[  168.010944]  remove_free_dqentry+0x1c6/0x440
[  168.011525]  free_dqentry.isra.0+0x565/0x830
[  168.012133]  remove_tree+0x318/0x6d0
[  168.014744]  remove_tree+0x1eb/0x6d0
[  168.017346]  remove_tree+0x1eb/0x6d0
[  168.019969]  remove_tree+0x1eb/0x6d0
[  168.022128]  qtree_release_dquot+0x291/0x340
[  168.023297]  v2_release_dquot+0xce/0x120
[  168.023847]  dquot_release+0x197/0x3e0
[  168.024358]  ext4_release_dquot+0x22a/0x2d0
[  168.024932]  dqput.part.0+0x1c9/0x900
[  168.025430]  __dquot_drop+0x120/0x190
[  168.025942]  ext4_clear_inode+0x86/0x220
[  168.026472]  ext4_evict_inode+0x9e8/0xa22
[  168.028200]  evict+0x29e/0x4f0
[  168.028625]  dispose_list+0x102/0x1f0
[  168.029148]  evict_inodes+0x2c1/0x3e0
[  168.030188]  generic_shutdown_super+0xa4/0x3b0
[  168.030817]  kill_block_super+0x95/0xd0
[  168.031360]  deactivate_locked_super+0x85/0xd0
[  168.031977]  cleanup_mnt+0x2bc/0x480
[  168.033062]  task_work_run+0xd1/0x170
[  168.033565]  do_exit+0xa4f/0x2b50
[  168.037155]  do_group_exit+0xef/0x2d0
[  168.037666]  __x64_sys_exit_group+0x3a/0x50
[  168.038237]  do_syscall_64+0x3b/0x90
[  168.038751]  entry_SYSCALL_64_after_hwframe+0x44/0xae

In order to reproduce this problem, the following conditions need to be met:
1. Ext4 filesystem with no journal;
2. Filesystem image with incorrect quota data;
3. Abort filesystem forced by user;
4. umount filesystem;

As in ext4_quota_write:
...
         if (EXT4_SB(sb)->s_journal && !handle) {
                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
                         " cancelled because transaction is not started",
                         (unsigned long long)off, (unsigned long long)len);
                 return -EIO;
         }
...
We only check handle if NULL when filesystem has journal. There is need
check handle if NULL even when filesystem has no journal.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20211223015506.297766-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6e55b9ac5a5f..2e7a8cfe572a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6577,7 +6577,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
-	if (EXT4_SB(sb)->s_journal && !handle) {
+	if (!handle) {
 		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
 			" cancelled because transaction is not started",
 			(unsigned long long)off, (unsigned long long)len);
-- 
Gitee


From fe5358a811a8c0bc52b3a2b6c584bd92e041152b Mon Sep 17 00:00:00 2001
From: Xin Yin <yinxin.x@bytedance.com>
Date: Thu, 23 Dec 2021 11:23:36 +0800
Subject: [PATCH 042/100] ext4: use ext4_ext_remove_space() for fast commit
 replay delete range

stable inclusion
from stable-5.10.94
commit 04b562730677630d905a9e47c5e4826888745ebc
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 0b5b5a62b945a141e64011b2f90ee7e46f14be98 upstream.

For now ,we use ext4_punch_hole() during fast commit replay delete range
procedure. But it will be affected by inode->i_size, which may not
correct during fast commit replay procedure. The following test will
failed.

-create & write foo (len 1000K)
-falloc FALLOC_FL_ZERO_RANGE foo (range 400K - 600K)
-create & fsync bar
-falloc FALLOC_FL_PUNCH_HOLE foo (range 300K-500K)
-fsync foo
-crash before a full commit

After the fast_commit reply procedure, the range 400K-500K will not be
removed. Because in this case, when calling ext4_punch_hole() the
inode->i_size is 0, and it just retruns with doing nothing.

Change to use ext4_ext_remove_space() instead of ext4_punch_hole()
to remove blocks of inode directly.

Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20211223032337.5198-2-yinxin.x@bytedance.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/fast_commit.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 08ca690f928b..02cb1a1e6111 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1764,11 +1764,14 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
 		}
 	}
 
-	ret = ext4_punch_hole(inode,
-		le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits,
-		le32_to_cpu(lrange.fc_len) <<  sb->s_blocksize_bits);
-	if (ret)
-		jbd_debug(1, "ext4_punch_hole returned %d", ret);
+	down_write(&EXT4_I(inode)->i_data_sem);
+	ret = ext4_ext_remove_space(inode, lrange.fc_lblk,
+				lrange.fc_lblk + lrange.fc_len - 1);
+	up_write(&EXT4_I(inode)->i_data_sem);
+	if (ret) {
+		iput(inode);
+		return 0;
+	}
 	ext4_ext_replay_shrink_inode(inode,
 		i_size_read(inode) >> sb->s_blocksize_bits);
 	ext4_mark_inode_dirty(NULL, inode);
-- 
Gitee


From 04ff86c4c7dadf9a42f0ddd0a4d674c4d72502c0 Mon Sep 17 00:00:00 2001
From: Xin Yin <yinxin.x@bytedance.com>
Date: Thu, 23 Dec 2021 11:23:37 +0800
Subject: [PATCH 043/100] ext4: fast commit may miss tracking unwritten range
 during ftruncate

stable inclusion
from stable-5.10.94
commit f26b24b4c115f9c8fe8defd2c158420d30b7af0f
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 9725958bb75cdfa10f2ec11526fdb23e7485e8e4 upstream.

If use FALLOC_FL_KEEP_SIZE to alloc unwritten range at bottom, the
inode->i_size will not include the unwritten range. When call
ftruncate with fast commit enabled, it will miss to track the
unwritten range.

Change to trace the full range during ftruncate.

Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20211223032337.5198-3-yinxin.x@bytedance.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9cf1c76514be..4cfee4cb2350 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5445,8 +5445,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				ext4_fc_track_range(handle, inode,
 					(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
 					inode->i_sb->s_blocksize_bits,
-					(oldsize > 0 ? oldsize - 1 : 0) >>
-					inode->i_sb->s_blocksize_bits);
+					EXT_MAX_BLOCKS - 1);
 			else
 				ext4_fc_track_range(
 					handle, inode,
-- 
Gitee


From cb8bc57b124df261b7673ef736b9e58e68b76338 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 23 Dec 2021 17:44:36 +0100
Subject: [PATCH 044/100] ext4: destroy ext4_fc_dentry_cachep kmemcache on
 module removal

stable inclusion
from stable-5.10.94
commit d60e9daba29e44e0f277333e46fff90c74509398
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit ab047d516dea72f011c15c04a929851e4d053109 upstream.

The kmemcache for ext4_fc_dentry_cachep remains registered after module
removal.

Destroy ext4_fc_dentry_cachep kmemcache on module removal.

Fixes: aa75f4d3daaeb ("ext4: main fast-commit commit path")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20211110134640.lyku5vklvdndw6uk@linutronix.de
Link: https://lore.kernel.org/r/YbiK3JetFFl08bd7@linutronix.de
Link: https://lore.kernel.org/r/20211223164436.2628390-1-bigeasy@linutronix.de
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/ext4.h        | 1 +
 fs/ext4/fast_commit.c | 5 +++++
 fs/ext4/super.c       | 2 ++
 3 files changed, 8 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bb3adca53d93..c801d37e5001 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2799,6 +2799,7 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
 void ext4_fc_replay_cleanup(struct super_block *sb);
 int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
 int __init ext4_fc_init_dentry_cache(void);
+void ext4_fc_destroy_dentry_cache(void);
 
 /* mballoc.c */
 extern const struct seq_operations ext4_mb_seq_groups_ops;
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index 02cb1a1e6111..f483abcd5213 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -2169,3 +2169,8 @@ int __init ext4_fc_init_dentry_cache(void)
 
 	return 0;
 }
+
+void ext4_fc_destroy_dentry_cache(void)
+{
+	kmem_cache_destroy(ext4_fc_dentry_cachep);
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2e7a8cfe572a..56bd2ea08090 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6761,6 +6761,7 @@ static int __init ext4_init_fs(void)
 out:
 	unregister_as_ext2();
 	unregister_as_ext3();
+	ext4_fc_destroy_dentry_cache();
 out05:
 	destroy_inodecache();
 out1:
@@ -6787,6 +6788,7 @@ static void __exit ext4_exit_fs(void)
 	unregister_as_ext2();
 	unregister_as_ext3();
 	unregister_filesystem(&ext4_fs_type);
+	ext4_fc_destroy_dentry_cache();
 	destroy_inodecache();
 	ext4_exit_mballoc();
 	ext4_exit_sysfs();
-- 
Gitee


From 92117f491353dc15984c52ebe34f9a4fb102a6b8 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Fri, 24 Dec 2021 18:03:41 +0800
Subject: [PATCH 045/100] ext4: fix null-ptr-deref in
 '__ext4_journal_ensure_credits'

stable inclusion
from stable-5.10.94
commit 679fb065326be0bff3d7463cfc975cddeeae9d68
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 298b5c521746d69c07beb2757292fb5ccc1b0f85 upstream.

We got issue as follows when run syzkaller test:
[ 1901.130043] EXT4-fs error (device vda): ext4_remount:5624: comm syz-executor.5: Abort forced by user
[ 1901.130901] Aborting journal on device vda-8.
[ 1901.131437] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.16: Detected aborted journal
[ 1901.131566] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.11: Detected aborted journal
[ 1901.132586] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.18: Detected aborted journal
[ 1901.132751] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.9: Detected aborted journal
[ 1901.136149] EXT4-fs error (device vda) in ext4_reserve_inode_write:6035: Journal has aborted
[ 1901.136837] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-fuzzer: Detected aborted journal
[ 1901.136915] ==================================================================
[ 1901.138175] BUG: KASAN: null-ptr-deref in __ext4_journal_ensure_credits+0x74/0x140 [ext4]
[ 1901.138343] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.13: Detected aborted journal
[ 1901.138398] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.1: Detected aborted journal
[ 1901.138808] Read of size 8 at addr 0000000000000000 by task syz-executor.17/968
[ 1901.138817]
[ 1901.138852] EXT4-fs error (device vda): ext4_journal_check_start:61: comm syz-executor.30: Detected aborted journal
[ 1901.144779] CPU: 1 PID: 968 Comm: syz-executor.17 Not tainted 4.19.90-vhulk2111.1.0.h893.eulerosv2r10.aarch64+ #1
[ 1901.146479] Hardware name: linux,dummy-virt (DT)
[ 1901.147317] Call trace:
[ 1901.147552]  dump_backtrace+0x0/0x2d8
[ 1901.147898]  show_stack+0x28/0x38
[ 1901.148215]  dump_stack+0xec/0x15c
[ 1901.148746]  kasan_report+0x108/0x338
[ 1901.149207]  __asan_load8+0x58/0xb0
[ 1901.149753]  __ext4_journal_ensure_credits+0x74/0x140 [ext4]
[ 1901.150579]  ext4_xattr_delete_inode+0xe4/0x700 [ext4]
[ 1901.151316]  ext4_evict_inode+0x524/0xba8 [ext4]
[ 1901.151985]  evict+0x1a4/0x378
[ 1901.152353]  iput+0x310/0x428
[ 1901.152733]  do_unlinkat+0x260/0x428
[ 1901.153056]  __arm64_sys_unlinkat+0x6c/0xc0
[ 1901.153455]  el0_svc_common+0xc8/0x320
[ 1901.153799]  el0_svc_handler+0xf8/0x160
[ 1901.154265]  el0_svc+0x10/0x218
[ 1901.154682] ==================================================================

This issue may happens like this:
        Process1                               Process2
ext4_evict_inode
  ext4_journal_start
   ext4_truncate
     ext4_ind_truncate
       ext4_free_branches
         ext4_ind_truncate_ensure_credits
           ext4_journal_ensure_credits_fn
             ext4_journal_restart
               handle->h_transaction = NULL;
                                           mount -o remount,abort  /mnt
                                           -> trigger JBD abort
               start_this_handle -> will return failed
  ext4_xattr_delete_inode
    ext4_journal_ensure_credits
      ext4_journal_ensure_credits_fn
        __ext4_journal_ensure_credits
          jbd2_handle_buffer_credits
            journal = handle->h_transaction->t_journal; ->null-ptr-deref

Now, indirect truncate process didn't handle error. To solve this issue
maybe simply add check handle is abort in '__ext4_journal_ensure_credits'
is enough, and i also think this is necessary.

Cc: stable@kernel.org
Signed-off-by: Ye Bin <yebin10@huawei.com>
Link: https://lore.kernel.org/r/20211224100341.3299128-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/ext4_jbd2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b96ecba91899..b53e1d0b13fc 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
 {
 	if (!ext4_handle_valid(handle))
 		return 0;
+	if (is_handle_aborted(handle))
+		return -EROFS;
 	if (jbd2_handle_buffer_credits(handle) >= check_cred &&
 	    handle->h_revoke_credits >= revoke_cred)
 		return 0;
-- 
Gitee


From 2d65c5e0ea97c766cfd09862ad11b523a4f60f80 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 5 Jan 2022 23:59:56 -0500
Subject: [PATCH 046/100] ext4: don't use the orphan list when migrating an
 inode

stable inclusion
from stable-5.10.94
commit 0ca7ec6db20c66b91277c231780e9f05b42a8163
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 6eeaf88fd586f05aaf1d48cb3a139d2a5c6eb055 upstream.

We probably want to remove the indirect block to extents migration
feature after a deprecation window, but until then, let's fix a
potential data loss problem caused by the fact that we put the
tmp_inode on the orphan list.  In the unlikely case where we crash and
do a journal recovery, the data blocks belonging to the inode being
migrated are also represented in the tmp_inode on the orphan list ---
and so its data blocks will get marked unallocated, and available for
reuse.

Instead, stop putting the tmp_inode on the oprhan list.  So in the
case where we crash while migrating the inode, we'll leak an inode,
which is not a disaster.  It will be easily fixed the next time we run
fsck, and it's better than potentially having blocks getting claimed
by two different files, and losing data as a result.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/ext4/migrate.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 69cc3f8e03bd..49912814f3d8 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode)
 	percpu_down_write(&sbi->s_writepages_rwsem);
 
 	/*
-	 * Worst case we can touch the allocation bitmaps, a bgd
-	 * block, and a block to link in the orphan list.  We do need
-	 * need to worry about credits for modifying the quota inode.
+	 * Worst case we can touch the allocation bitmaps and a block
+	 * group descriptor block.  We do need need to worry about
+	 * credits for modifying the quota inode.
 	 */
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
-		4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
+		3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
 
 	if (IS_ERR(handle)) {
 		retval = PTR_ERR(handle);
@@ -463,10 +463,6 @@ int ext4_ext_migrate(struct inode *inode)
 	 * Use the correct seed for checksum (i.e. the seed from 'inode').  This
 	 * is so that the metadata blocks will have the correct checksum after
 	 * the migration.
-	 *
-	 * Note however that, if a crash occurs during the migration process,
-	 * the recovery process is broken because the tmp_inode checksums will
-	 * be wrong and the orphans cleanup will fail.
 	 */
 	ei = EXT4_I(inode);
 	EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
@@ -478,7 +474,6 @@ int ext4_ext_migrate(struct inode *inode)
 	clear_nlink(tmp_inode);
 
 	ext4_ext_tree_init(handle, tmp_inode);
-	ext4_orphan_add(handle, tmp_inode);
 	ext4_journal_stop(handle);
 
 	/*
@@ -503,12 +498,6 @@ int ext4_ext_migrate(struct inode *inode)
 
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
 	if (IS_ERR(handle)) {
-		/*
-		 * It is impossible to update on-disk structures without
-		 * a handle, so just rollback in-core changes and live other
-		 * work to orphan_list_cleanup()
-		 */
-		ext4_orphan_del(NULL, tmp_inode);
 		retval = PTR_ERR(handle);
 		goto out_tmp_inode;
 	}
-- 
Gitee


From 042564302301f920e7b009f6a8772293aa21c835 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 17 Jan 2022 10:31:26 +0100
Subject: [PATCH 047/100] drm/radeon: fix error handling in
 radeon_driver_open_kms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 7bb99c7e13f8417f81247b2acea8ceaae0c5afb3
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 4722f463896cc0ef1a6f1c3cb2e171e949831249 upstream.

The return value was never initialized so the cleanup code executed when
it isn't even necessary.

Just add proper error handling.

Fixes: ab50cb9df889 ("drm/radeon/radeon_kms: Fix a NULL pointer dereference in radeon_driver_open_kms()")
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/radeon/radeon_kms.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 204634b23928..32070e94f6c4 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -652,18 +652,18 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
 		if (unlikely(!fpriv)) {
 			r = -ENOMEM;
-			goto out_suspend;
+			goto err_suspend;
 		}
 
 		if (rdev->accel_working) {
 			vm = &fpriv->vm;
 			r = radeon_vm_init(rdev, vm);
 			if (r)
-				goto out_fpriv;
+				goto err_fpriv;
 
 			r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 			if (r)
-				goto out_vm_fini;
+				goto err_vm_fini;
 
 			/* map the ib pool buffer read only into
 			 * virtual address space */
@@ -671,7 +671,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 							rdev->ring_tmp_bo.bo);
 			if (!vm->ib_bo_va) {
 				r = -ENOMEM;
-				goto out_vm_fini;
+				goto err_vm_fini;
 			}
 
 			r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va,
@@ -679,19 +679,21 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 						  RADEON_VM_PAGE_READABLE |
 						  RADEON_VM_PAGE_SNOOPED);
 			if (r)
-				goto out_vm_fini;
+				goto err_vm_fini;
 		}
 		file_priv->driver_priv = fpriv;
 	}
 
-	if (!r)
-		goto out_suspend;
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+	return 0;
 
-out_vm_fini:
+err_vm_fini:
 	radeon_vm_fini(rdev, vm);
-out_fpriv:
+err_fpriv:
 	kfree(fpriv);
-out_suspend:
+
+err_suspend:
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 	return r;
-- 
Gitee


From 5939e5ece8e8b0ec9f8ea866fe1393096fd1b5ae Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 30 Dec 2021 18:31:53 +0200
Subject: [PATCH 048/100] of: base: Improve argument length mismatch error

stable inclusion
from stable-5.10.94
commit 12224c0d19f34edaca246b99fd79b076b0abadaa
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 5d05b811b5acb92fc581a7b328b36646c86f5ab9 upstream.

The cells_name field of of_phandle_iterator might be NULL. Use the
phandle name instead. With this change instead of:

  OF: /soc/pinctrl@1000000: (null) = 3 found 2

We get:

  OF: /soc/pinctrl@1000000: phandle pinctrl@1000000 needs 3, found 2

Which is a more helpful messages making DT debugging easier.

In this particular example the phandle name looks like duplicate of the
same node name. But note that the first node is the parent node
(it->parent), while the second is the phandle target (it->node). They
happen to be the same in the case that triggered this improvement. See
commit 72cb4c48a46a ("arm64: dts: qcom: ipq6018: Fix gpio-ranges
property").

Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/f6a68e0088a552ea9dfd4d8e3b5b586d92594738.1640881913.git.baruch@tkos.co.il
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/of/base.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 60cb9b44d4ec..a44a0e7ba251 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1328,9 +1328,14 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it)
 		 * property data length
 		 */
 		if (it->cur + count > it->list_end) {
-			pr_err("%pOF: %s = %d found %td\n",
-			       it->parent, it->cells_name,
-			       count, it->list_end - it->cur);
+			if (it->cells_name)
+				pr_err("%pOF: %s = %d found %td\n",
+					it->parent, it->cells_name,
+					count, it->list_end - it->cur);
+			else
+				pr_err("%pOF: phandle %s needs %d, found %td\n",
+					it->parent, of_node_full_name(it->node),
+					count, it->list_end - it->cur);
 			goto err;
 		}
 	}
-- 
Gitee


From 3574c1368120d1497c42cb2e11c9967caab2eccc Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 18 Jun 2018 23:55:40 +0100
Subject: [PATCH 049/100] firmware: Update Kconfig help text for Google
 firmware

stable inclusion
from stable-5.10.94
commit f62bf6ee4fa3198385e0658a9f070f98335a8408
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit d185a3466f0cd5af8f1c5c782c53bc0e6f2e7136 upstream.

The help text for GOOGLE_FIRMWARE states that it should only be
enabled when building a kernel for Google's own servers.  However,
many of the drivers dependent on it are also useful on Chromebooks or
on any platform using coreboot.

Update the help text to reflect this double duty.

Fixes: d384d6f43d1e ("firmware: google memconsole: Add coreboot support")
Reviewed-by: Julius Werner <jwerner@chromium.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Link: https://lore.kernel.org/r/20180618225540.GD14131@decadent.org.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/firmware/google/Kconfig | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index 97968aece54f..931544c9f63d 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE
 	bool "Google Firmware Drivers"
 	default n
 	help
-	  These firmware drivers are used by Google's servers.  They are
-	  only useful if you are working directly on one of their
-	  proprietary servers.  If in doubt, say "N".
+	  These firmware drivers are used by Google servers,
+	  Chromebooks and other devices using coreboot firmware.
+	  If in doubt, say "N".
 
 if GOOGLE_FIRMWARE
 
-- 
Gitee


From ab12c4e698dd8b6ac898733fb95b2fec2ea66087 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 23 Apr 2021 11:26:56 +0200
Subject: [PATCH 050/100] can: mcp251xfd: mcp251xfd_tef_obj_read(): fix typo in
 error message

stable inclusion
from stable-5.10.94
commit 0baa3729d2eb20ae159a863e473fcfca6390aba9
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 99e7cc3b3f85d9a583ab83f386315c59443509ae upstream.

This patch fixes a typo in the error message in
mcp251xfd_tef_obj_read(), if trying to read too many objects.

Link: https://lore.kernel.org/all/20220105154300.1258636-3-mkl@pengutronix.de
Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index e0b322ab0362..abe00a085f6f 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1288,7 +1288,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 	     len > tx_ring->obj_num ||
 	     offset + len > tx_ring->obj_num)) {
 		netdev_err(priv->ndev,
-			   "Trying to read to many TEF objects (max=%d, offset=%d, len=%d).\n",
+			   "Trying to read too many TEF objects (max=%d, offset=%d, len=%d).\n",
 			   tx_ring->obj_num, offset, len);
 		return -ERANGE;
 	}
-- 
Gitee


From 412baca26dce57e69bdf48070450ade2ee3713c1 Mon Sep 17 00:00:00 2001
From: Suresh Udipi <sudipi@jp.adit-jv.com>
Date: Fri, 13 Aug 2021 17:07:56 +0200
Subject: [PATCH 051/100] media: rcar-csi2: Optimize the selection PHTW
 register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 81ac08a800b010d40c4bc8112089473c1b950cbd
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 549cc89cd09a85aaa16dc07ef3db811d5cf9bcb1 upstream.

PHTW register is selected based on default bit rate from Table[1].
for the bit rates less than or equal to 250. Currently first
value of default bit rate which is greater than or equal to
the caculated mbps is selected. This selection can be further
improved by selecting the default bit rate which is nearest to
the calculated value.

[1] specs r19uh0105ej0200-r-car-3rd-generation.pdf [Table 25.12]

Fixes: 769afd212b16 ("media: rcar-csi2: add Renesas R-Car MIPI CSI-2 receiver driver")
Signed-off-by: Suresh Udipi <sudipi@jp.adit-jv.com>
Signed-off-by: Michael Rodin <mrodin@de.adit-jv.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/media/platform/rcar-vin/rcar-csi2.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
index 6fb8efcb4044..5e8e48a721a0 100644
--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
+++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
@@ -976,10 +976,17 @@ static int rcsi2_phtw_write_mbps(struct rcar_csi2 *priv, unsigned int mbps,
 				 const struct rcsi2_mbps_reg *values, u16 code)
 {
 	const struct rcsi2_mbps_reg *value;
+	const struct rcsi2_mbps_reg *prev_value = NULL;
 
-	for (value = values; value->mbps; value++)
+	for (value = values; value->mbps; value++) {
 		if (value->mbps >= mbps)
 			break;
+		prev_value = value;
+	}
+
+	if (prev_value &&
+	    ((mbps - prev_value->mbps) <= (value->mbps - mbps)))
+		value = prev_value;
 
 	if (!value->mbps) {
 		dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps);
-- 
Gitee


From eaa9c2fe5f1c18490bbd5d5979218844f1c8baf4 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Thu, 19 Aug 2021 15:59:30 +0200
Subject: [PATCH 052/100] drm/vc4: hdmi: Make sure the device is powered with
 CEC

stable inclusion
from stable-5.10.94
commit 55b10b88ac8654fc2f31518aa349a2e643b37f18
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 20b0dfa86bef0e80b41b0e5ac38b92f23b6f27f9 upstream.

Similarly to what we encountered with the detect hook with DRM, nothing
actually prevents any of the CEC callback from being run while the HDMI
output is disabled.

However, this is an issue since any register access to the controller
when it's powered down will result in a silent hang.

Let's make sure we run the runtime_pm hooks when the CEC adapter is
opened and closed by the userspace to avoid that issue.

Fixes: 15b4511a4af6 ("drm/vc4: add HDMI CEC support")
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20210819135931.895976-6-maxime@cerno.tech
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 5d5c4e9a8621..9392de2679a1 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -1399,8 +1399,14 @@ static int vc4_hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
 	struct vc4_hdmi *vc4_hdmi = cec_get_drvdata(adap);
 	/* clock period in microseconds */
 	const u32 usecs = 1000000 / CEC_CLOCK_FREQ;
-	u32 val = HDMI_READ(HDMI_CEC_CNTRL_5);
+	u32 val;
+	int ret;
+
+	ret = pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev);
+	if (ret)
+		return ret;
 
+	val = HDMI_READ(HDMI_CEC_CNTRL_5);
 	val &= ~(VC4_HDMI_CEC_TX_SW_RESET | VC4_HDMI_CEC_RX_SW_RESET |
 		 VC4_HDMI_CEC_CNT_TO_4700_US_MASK |
 		 VC4_HDMI_CEC_CNT_TO_4500_US_MASK);
@@ -1525,6 +1531,8 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
 	if (ret < 0)
 		goto err_delete_cec_adap;
 
+	pm_runtime_put(&vc4_hdmi->pdev->dev);
+
 	return 0;
 
 err_delete_cec_adap:
-- 
Gitee


From e33f9d1558064a3d014578409655884b6cb6c1ac Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 7 Nov 2021 18:19:23 +0000
Subject: [PATCH 053/100] media: correct MEDIA_TEST_SUPPORT help text

stable inclusion
from stable-5.10.94
commit f6736bd81db48abd9455e007f1f3ba7ec593fd48
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 09f4d1513267d0ab712f5d29e7bd136535748709 upstream.

Fix grammar/wording in the help text for MEDIA_TEST_SUPPORT.

Fixes: 4b32216adb01 ("media: split test drivers from platform directory")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/media/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index a6d073f2e036..d157af63be41 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig
@@ -142,10 +142,10 @@ config MEDIA_TEST_SUPPORT
 	prompt "Test drivers" if MEDIA_SUPPORT_FILTER
 	default y if !MEDIA_SUPPORT_FILTER
 	help
-	  Those drivers should not be used on production Kernels, but
-	  can be useful on debug ones. It enables several dummy drivers
-	  that simulate a real hardware. Very useful to test userspace
-	  applications and to validate if the subsystem core is doesn't
+	  These drivers should not be used on production kernels, but
+	  can be useful on debug ones. This option enables several dummy drivers
+	  that simulate real hardware. Very useful to test userspace
+	  applications and to validate if the subsystem core doesn't
 	  have regressions.
 
 	  Say Y if you want to use some virtual test driver.
-- 
Gitee


From 94ebf996f552ff892d53f46764a1faa36f386384 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 18 Nov 2021 10:09:52 +0000
Subject: [PATCH 054/100] Documentation: dmaengine: Correctly describe dmatest
 with channel unset

stable inclusion
from stable-5.10.94
commit d1e85fcd73b5c94dc5032a63ecb3df4cbceda82d
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit c61d7b2ef141abf81140756b45860a2306f395a2 upstream.

Currently the documentation states that channels must be configured before
running the dmatest. This has not been true since commit 6b41030fdc79
("dmaengine: dmatest: Restore default for channel"). Fix accordingly.

Fixes: 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel")
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20211118100952.27268-3-daniel.thompson@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 Documentation/driver-api/dmaengine/dmatest.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst
index ee268d445d38..d2e1d8b58e7d 100644
--- a/Documentation/driver-api/dmaengine/dmatest.rst
+++ b/Documentation/driver-api/dmaengine/dmatest.rst
@@ -143,13 +143,14 @@ Part 5 - Handling channel allocation
 Allocating Channels
 -------------------
 
-Channels are required to be configured prior to starting the test run.
-Attempting to run the test without configuring the channels will fail.
+Channels do not need to be configured prior to starting a test run. Attempting
+to run the test without configuring the channels will result in testing any
+channels that are available.
 
 Example::
 
     % echo 1 > /sys/module/dmatest/parameters/run
-    dmatest: Could not start test, no channels configured
+    dmatest: No channels configured, continue with any
 
 Channels are registered using the "channel" parameter. Channels can be requested by their
 name, once requested, the channel is registered and a pending thread is added to the test list.
-- 
Gitee


From 809cabeaecde929cef793c6dfb674785b38b40f9 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 1 Dec 2021 14:59:31 +0200
Subject: [PATCH 055/100] Documentation: ACPI: Fix data node reference
 documentation

stable inclusion
from stable-5.10.94
commit abecf9d748369d5d1f53ae868f96cae88f9875ef
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit a11174952205d082f1658fab4314f0caf706e0a8 upstream.

The data node reference documentation was missing a package that must
contain the property values, instead property name and multiple values
being present in a single package. This is not aligned with the _DSD
spec.

Fix it by adding the package for the values.

Also add the missing "reg" properties to two numbered nodes.

Fixes: b10134a3643d ("ACPI: property: Document hierarchical data extension references")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 .../firmware-guide/acpi/dsd/data-node-references.rst   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
index 9b17dc77d18c..da0e46496fc4 100644
--- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
@@ -5,7 +5,7 @@
 Referencing hierarchical data nodes
 ===================================
 
-:Copyright: |copy| 2018 Intel Corporation
+:Copyright: |copy| 2018, 2021 Intel Corporation
 :Author: Sakari Ailus <sakari.ailus@linux.intel.com>
 
 ACPI in general allows referring to device objects in the tree only.
@@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference.
 	    Name (NOD0, Package() {
 		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 		Package () {
+		    Package () { "reg", 0 },
 		    Package () { "random-property", 3 },
 		}
 	    })
 	    Name (NOD1, Package() {
 		ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
 		Package () {
+		    Package () { "reg", 1 },
 		    Package () { "anothernode", "ANOD" },
 		}
 	    })
@@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference.
 	    Name (_DSD, Package () {
 		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
 		Package () {
-		    Package () { "reference", ^DEV0, "node@1", "anothernode" },
+		    Package () {
+			"reference", Package () {
+			    ^DEV0, "node@1", "anothernode"
+			}
+		    },
 		}
 	    })
 	}
-- 
Gitee


From ba3d7a4b6e7fc0ea7c91bd6eb3ac4db20a942103 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 30 Dec 2021 18:19:40 +0100
Subject: [PATCH 056/100] Documentation: refer to config RANDOMIZE_BASE for
 kernel address-space randomization

stable inclusion
from stable-5.10.94
commit 5d38cbf66dd7df04f06be86d812c7e5861cd3a69
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 82ca67321f55a8d1da6ac3ed611da3c32818bb37 upstream.

The config RANDOMIZE_SLAB does not exist, the authors probably intended to
refer to the config RANDOMIZE_BASE, which provides kernel address-space
randomization. They probably just confused SLAB with BASE (these two
four-letter words coincidentally share three common letters), as they also
point out the config SLAB_FREELIST_RANDOM as further randomization within
the same sentence.

Fix the reference of the config for kernel address-space randomization to
the config that provides that.

Fixes: 6e88559470f5 ("Documentation: Add section about CPU vulnerabilities for Spectre")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20211230171940.27558-1-lukas.bulwahn@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 Documentation/admin-guide/hw-vuln/spectre.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index a4cb79d0dd57..7e061ed449aa 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -491,7 +491,7 @@ Spectre variant 2
    before invoking any firmware code to prevent Spectre variant 2 exploits
    using the firmware.
 
-   Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y
+   Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y
    and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes
    attacks on the kernel generally more difficult.
 
-- 
Gitee


From fa35860210f61c8a651a7c8b7917932fb9b855ee Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 18 Jan 2022 19:39:05 -0800
Subject: [PATCH 057/100] Documentation: fix firewire.rst ABI file path error

stable inclusion
from stable-5.10.94
commit 55698d11c8dacbfc9750803cf8fa398fb9a12af5
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit b0ac702f3329cdc8a06dcaac73183d4b5a2b942d upstream.

Adjust the path of the ABI files for firewire.rst to prevent a
documentation build error. Prevents this problem:

Sphinx parallel build error:
docutils.utils.SystemMessage: Documentation/driver-api/firewire.rst:22: (SEVERE/4) Problems with "include" directive path:
InputError: [Errno 2] No such file or directory: '../Documentation/driver-api/ABI/stable/firewire-cdev'.

Fixes: 2f4830ef96d2 ("FireWire: add driver-api Introduction section")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/20220119033905.4779-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 Documentation/driver-api/firewire.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst
index 94a2d7f01d99..d3cfa73cbb2b 100644
--- a/Documentation/driver-api/firewire.rst
+++ b/Documentation/driver-api/firewire.rst
@@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module
 Firewire char device data structures
 ====================================
 
-.. include:: /ABI/stable/firewire-cdev
+.. include:: ../ABI/stable/firewire-cdev
     :literal:
 
 .. kernel-doc:: include/uapi/linux/firewire-cdev.h
@@ -28,7 +28,7 @@ Firewire char device data structures
 Firewire device probing and sysfs interfaces
 ============================================
 
-.. include:: /ABI/stable/sysfs-bus-firewire
+.. include:: ../ABI/stable/sysfs-bus-firewire
     :literal:
 
 .. kernel-doc:: drivers/firewire/core-device.c
-- 
Gitee


From fc169044ed88926965907e81aad25d3fcc424d8a Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 1 Nov 2021 15:14:41 -0700
Subject: [PATCH 058/100] Bluetooth: hci_sync: Fix not setting adv set duration

stable inclusion
from stable-5.10.94
commit b30240911da4a19560daa51c1e1d328f9903dde7
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit f16a491c65d9eb19398b25aefc10c2d3313d17b3 upstream.

10bbffa3e88e attempted to fix the use of rotation duration as
advertising duration but it didn't change the if condition which still
uses the duration instead of the timeout.

Fixes: 10bbffa3e88e ("Bluetooth: Fix using advertising instance duration as timeout")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/bluetooth/hci_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1a94ed2f8a4f..d965b7c66bd6 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2118,7 +2118,7 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance)
 	/* Set duration per instance since controller is responsible for
 	 * scheduling it.
 	 */
-	if (adv_instance && adv_instance->duration) {
+	if (adv_instance && adv_instance->timeout) {
 		u16 duration = adv_instance->timeout * MSEC_PER_SEC;
 
 		/* Time = N * 10 ms */
-- 
Gitee


From 6f1e629064a92c86c3ad67cfd4fd193bf17fd8b1 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 29 Nov 2021 11:46:00 -0800
Subject: [PATCH 059/100] scsi: core: Show SCMD_LAST in text form

stable inclusion
from stable-5.10.94
commit 4fe77b7cd27229486b67c83df71019fe89d3951e
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3369046e54ca8f82e0cb17740643da2d80d3cfa8 upstream.

The SCSI debugfs code supports showing information about pending commands,
including translating SCSI command flags from numeric into text format.
Also convert the SCMD_LAST flag from numeric into text form.

Link: https://lore.kernel.org/r/20211129194609.3466071-4-bvanassche@acm.org
Fixes: 8930a6c20791 ("scsi: core: add support for request batching")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/scsi/scsi_debugfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c
index c19ea7ab54cb..d9a18124cfc9 100644
--- a/drivers/scsi/scsi_debugfs.c
+++ b/drivers/scsi/scsi_debugfs.c
@@ -10,6 +10,7 @@ static const char *const scsi_cmd_flags[] = {
 	SCSI_CMD_FLAG_NAME(TAGGED),
 	SCSI_CMD_FLAG_NAME(UNCHECKED_ISA_DMA),
 	SCSI_CMD_FLAG_NAME(INITIALIZED),
+	SCSI_CMD_FLAG_NAME(LAST),
 };
 #undef SCSI_CMD_FLAG_NAME
 
-- 
Gitee


From d3d1e76d2b8aabb92bd63e63cb3aa47ce5f50f9d Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Tue, 14 Dec 2021 13:42:43 +0900
Subject: [PATCH 060/100] dmaengine: uniphier-xdmac: Fix type of address
 variables

stable inclusion
from stable-5.10.94
commit 57cd8597c3ef8ba8d00aa9f39f506f45d949d29b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 105a8c525675bb7d4d64871f9b2edf39460de881 upstream.

The variables src_addr and dst_addr handle DMA addresses, so these should
be declared as dma_addr_t.

Fixes: 667b9251440b ("dmaengine: uniphier-xdmac: Add UniPhier external DMA controller driver")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Link: https://lore.kernel.org/r/1639456963-10232-1-git-send-email-hayashi.kunihiko@socionext.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/dma/uniphier-xdmac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c
index d6b8a202474f..290836b7e1be 100644
--- a/drivers/dma/uniphier-xdmac.c
+++ b/drivers/dma/uniphier-xdmac.c
@@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc)
 static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc,
 				      struct uniphier_xdmac_desc *xd)
 {
-	u32 src_mode, src_addr, src_width;
-	u32 dst_mode, dst_addr, dst_width;
+	u32 src_mode, src_width;
+	u32 dst_mode, dst_width;
+	dma_addr_t src_addr, dst_addr;
 	u32 val, its, tnum;
 	enum dma_slave_buswidth buswidth;
 
-- 
Gitee


From 790c88adf78f881d400212912306c058405cfc2d Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Mon, 6 Dec 2021 21:36:52 +0800
Subject: [PATCH 061/100] RDMA/hns: Modify the mapping attribute of doorbell to
 device

stable inclusion
from stable-5.10.94
commit 885860717c29decb32bc8d2074bd7be066eaab87
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 39d5534b1302189c809e90641ffae8cbdc42a8fc upstream.

It is more general for ARM device drivers to use the device attribute to
map PCI BAR spaces.

Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver")
Link: https://lore.kernel.org/r/20211206133652.27476-1-liangwenpeng@huawei.com
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index c0249e4874a9..1e8b3e4ef1b1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -359,7 +359,7 @@ static int hns_roce_mmap(struct ib_ucontext *context,
 		return rdma_user_mmap_io(context, vma,
 					 to_hr_ucontext(context)->uar.pfn,
 					 PAGE_SIZE,
-					 pgprot_noncached(vma->vm_page_prot),
+					 pgprot_device(vma->vm_page_prot),
 					 NULL);
 
 	/* vm_pgoff: 1 -- TPTR */
-- 
Gitee


From 36935aeb5372c5d66faa0e37074a5faee88d7772 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@mykernel.net>
Date: Sat, 18 Dec 2021 19:23:20 +0800
Subject: [PATCH 062/100] RDMA/rxe: Fix a typo in opcode name

stable inclusion
from stable-5.10.94
commit 16ad0aa917c905982a6bdddd5f67a824f153501e
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 8d1cfb884e881efd69a3be4ef10772c71cb22216 upstream.

There is a redundant ']' in the name of opcode IB_OPCODE_RC_SEND_MIDDLE,
so just fix it.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20211218112320.3558770-1-cgxu519@mykernel.net
Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Acked-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Reviewed-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index 0cb4b01fd910..66ffb516bdaf 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -110,7 +110,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
 		}
 	},
 	[IB_OPCODE_RC_SEND_MIDDLE]		= {
-		.name	= "IB_OPCODE_RC_SEND_MIDDLE]",
+		.name	= "IB_OPCODE_RC_SEND_MIDDLE",
 		.mask	= RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK
 				| RXE_MIDDLE_MASK,
 		.length = RXE_BTH_BYTES,
-- 
Gitee


From 28199767d31a7f81792925c6bed013dec2a5aa63 Mon Sep 17 00:00:00 2001
From: Amelie Delaunay <amelie.delaunay@foss.st.com>
Date: Mon, 20 Dec 2021 17:58:27 +0100
Subject: [PATCH 063/100] dmaengine: stm32-mdma: fix STM32_MDMA_CTBR_TSEL_MASK

stable inclusion
from stable-5.10.94
commit 78cf5f63a3a98151ad7fa645103de0fa17c7317c
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit e7f110889a87307fb0fed408a5dee1707796ca04 upstream.

This patch fixes STM32_MDMA_CTBR_TSEL_MASK, which is [5:0], not [7:0].

Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver")
Signed-off-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/20211220165827.1238097-1-amelie.delaunay@foss.st.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/dma/stm32-mdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 9d473923712a..fe36738f2dd7 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -184,7 +184,7 @@
 #define STM32_MDMA_CTBR(x)		(0x68 + 0x40 * (x))
 #define STM32_MDMA_CTBR_DBUS		BIT(17)
 #define STM32_MDMA_CTBR_SBUS		BIT(16)
-#define STM32_MDMA_CTBR_TSEL_MASK	GENMASK(7, 0)
+#define STM32_MDMA_CTBR_TSEL_MASK	GENMASK(5, 0)
 #define STM32_MDMA_CTBR_TSEL(n)		STM32_MDMA_SET(n, \
 						      STM32_MDMA_CTBR_TSEL_MASK)
 
-- 
Gitee


From ef479b24d32c210b909eb47acb641d3e149e3c10 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Sun, 5 Dec 2021 11:20:59 +0200
Subject: [PATCH 064/100] Revert "net/mlx5: Add retry mechanism to the command
 entry index allocation"

stable inclusion
from stable-5.10.94
commit 4cb7aba1e0868585289c4ebf9151ec5c7e7606c8
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 4f6626b0e140867fd6d5a2e9d4ceaef97f10f46a upstream.

This reverts commit 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf.

The reverted commit had added a retry mechanism to the command entry
index allocation. The previous patch ensures that there is a free
command entry index once the command work handler holds the command
semaphore. Thus the retry mechanism is not needed.

Fixes: 410bd754cd73 ("net/mlx5: Add retry mechanism to the command entry index allocation")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Eran Ben Elisha <eranbe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 +------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 20e3f8cd074a..6af0dd847169 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -887,25 +887,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
 	return cmd->allowed_opcode == opcode;
 }
 
-static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
-{
-	unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
-	int idx;
-
-retry:
-	idx = cmd_alloc_index(cmd);
-	if (idx < 0 && time_before(jiffies, alloc_end)) {
-		/* Index allocation can fail on heavy load of commands. This is a temporary
-		 * situation as the current command already holds the semaphore, meaning that
-		 * another command completion is being handled and it is expected to release
-		 * the entry index soon.
-		 */
-		cpu_relax();
-		goto retry;
-	}
-	return idx;
-}
-
 bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
 {
 	return pci_channel_offline(dev->pdev) ||
@@ -930,7 +911,7 @@ static void cmd_work_handler(struct work_struct *work)
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
 	if (!ent->page_queue) {
-		alloc_ret = cmd_alloc_index_retry(cmd);
+		alloc_ret = cmd_alloc_index(cmd);
 		if (alloc_ret < 0) {
 			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
 			if (ent->callback) {
-- 
Gitee


From a321da840d00fb8f30ab3b76bfdeb4eca76d0b3f Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 7 Dec 2021 12:02:28 +0100
Subject: [PATCH 065/100] powerpc/cell: Fix clang -Wimplicit-fallthrough
 warning

stable inclusion
from stable-5.10.94
commit 19aaef65194854e8e880084f52645fb77b54188d
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit e89257e28e844f5d1d39081bb901d9f1183a7705 upstream.

Clang warns:

arch/powerpc/platforms/cell/pervasive.c:81:2: error: unannotated fall-through between switch labels
        case SRR1_WAKEEE:
        ^
arch/powerpc/platforms/cell/pervasive.c:81:2: note: insert 'break;' to avoid fall-through
        case SRR1_WAKEEE:
        ^
        break;
1 error generated.

Clang is more pedantic than GCC, which does not warn when failing
through to a case that is just break or return. Clang's version is more
in line with the kernel's own stance in deprecated.rst. Add athe missing
break to silence the warning.

Fixes: 6e83985b0f6e ("powerpc/cbe: Do not process external or decremeter interrupts from sreset")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211207110228.698956-1-anders.roxell@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 arch/powerpc/platforms/cell/pervasive.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
index 9068edef71f7..59999902e4a6 100644
--- a/arch/powerpc/platforms/cell/pervasive.c
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs)
 	switch (regs->msr & SRR1_WAKEMASK) {
 	case SRR1_WAKEDEC:
 		set_dec(1);
+		break;
 	case SRR1_WAKEEE:
 		/*
 		 * Handle these when interrupts get re-enabled and we take
-- 
Gitee


From fed105c5d050f396dc60eccb9f8f1a98977429a6 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 18 Jan 2022 22:50:52 +0100
Subject: [PATCH 066/100] powerpc/fsl/dts: Enable WA for erratum A-009885 on
 fman3l MDIO buses

stable inclusion
from stable-5.10.94
commit 5e59f885353e3b65dbb26c3bbb08cd8e4fa59043
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 0d375d610fa96524e2ee2b46830a46a7bfa92a9f upstream.

This block is used in (at least) T1024 and T1040, including their
variants like T1023 etc.

Fixes: d55ad2967d89 ("powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA FMan")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
index c90702b04a53..48e5cd61599c 100644
--- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
@@ -79,6 +79,7 @@ mdio0: mdio@fc000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xfc000 0x1000>;
+		fsl,erratum-a009885;
 	};
 
 	xmdio0: mdio@fd000 {
@@ -86,6 +87,7 @@ xmdio0: mdio@fd000 {
 		#size-cells = <0>;
 		compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
 		reg = <0xfd000 0x1000>;
+		fsl,erratum-a009885;
 	};
 };
 
-- 
Gitee


From 990224a13466dfae4ab2fe0aea12d19acef414fe Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Mon, 29 Nov 2021 09:26:59 +0800
Subject: [PATCH 067/100] block: Fix fsync always failed if once failed

stable inclusion
from stable-5.10.94
commit 2bcab471a26fce0410c3d30d28af642db373ef8c
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 8a7518931baa8ea023700987f3db31cb0a80610b upstream.

We do test with inject error fault base on v4.19, after test some time we found
sync /dev/sda always failed.
[root@localhost] sync /dev/sda
sync: error syncing '/dev/sda': Input/output error

scsi log as follows:
[19069.812296] sd 0:0:0:0: [sda] tag#64 Send: scmd 0x00000000d03a0b6b
[19069.812302] sd 0:0:0:0: [sda] tag#64 CDB: Synchronize Cache(10) 35 00 00 00 00 00 00 00 00 00
[19069.812533] sd 0:0:0:0: [sda] tag#64 Done: SUCCESS Result: hostbyte=DID_OK driverbyte=DRIVER_OK
[19069.812536] sd 0:0:0:0: [sda] tag#64 CDB: Synchronize Cache(10) 35 00 00 00 00 00 00 00 00 00
[19069.812539] sd 0:0:0:0: [sda] tag#64 scsi host busy 1 failed 0
[19069.812542] sd 0:0:0:0: Notifying upper driver of completion (result 0)
[19069.812546] sd 0:0:0:0: [sda] tag#64 sd_done: completed 0 of 0 bytes
[19069.812549] sd 0:0:0:0: [sda] tag#64 0 sectors total, 0 bytes done.
[19069.812564] print_req_error: I/O error, dev sda, sector 0

ftrace log as follows:
 rep-306069 [007] .... 19654.923315: block_bio_queue: 8,0 FWS 0 + 0 [rep]
 rep-306069 [007] .... 19654.923333: block_getrq: 8,0 FWS 0 + 0 [rep]
 kworker/7:1H-250   [007] .... 19654.923352: block_rq_issue: 8,0 FF 0 () 0 + 0 [kworker/7:1H]
 <idle>-0     [007] ..s. 19654.923562: block_rq_complete: 8,0 FF () 18446744073709551615 + 0 [0]
 <idle>-0     [007] d.s. 19654.923576: block_rq_complete: 8,0 WS () 0 + 0 [-5]

As 8d6996630c03 introduce 'fq->rq_status', this data only update when 'flush_rq'
reference count isn't zero. If flush request once failed and record error code
in 'fq->rq_status'. If there is no chance to update 'fq->rq_status',then do fsync
will always failed.
To address this issue reset 'fq->rq_status' after return error code to upper layer.

Fixes: 8d6996630c03("block: fix null pointer dereference in blk_mq_rq_timed_out()")
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20211129012659.1553733-1-yebin10@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 block/blk-flush.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index b38839de9408..82919829bc4d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -235,8 +235,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
 	 * avoiding use-after-free.
 	 */
 	WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
-	if (fq->rq_status != BLK_STS_OK)
+	if (fq->rq_status != BLK_STS_OK) {
 		error = fq->rq_status;
+		fq->rq_status = BLK_STS_OK;
+	}
 
 	if (!q->elevator) {
 		flush_rq->tag = BLK_MQ_NO_TAG;
-- 
Gitee


From 06f8cbd774d110c51dbbf46e2c2af664e2ee1a0f Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 10 Nov 2021 11:46:28 +0000
Subject: [PATCH 068/100] bpftool: Remove inclusion of utilities.mak from
 Makefiles

stable inclusion
from stable-5.10.94
commit 38ee417f59c893e770a6118072f5d52fe9082901
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 48f5aef4c458c19ab337eed8c95a6486cc014aa3 upstream.

Bpftool's Makefile, and the Makefile for its documentation, both include
scripts/utilities.mak, but they use none of the items defined in this
file. Remove the includes.

Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool")
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211110114632.24537-3-quentin@isovalent.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 tools/bpf/bpftool/Documentation/Makefile | 1 -
 tools/bpf/bpftool/Makefile               | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index f33cb02de95c..3601b1d1974c 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 include ../../../scripts/Makefile.include
-include ../../../scripts/utilities.mak
 
 INSTALL ?= install
 RM ?= rm -f
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f60e6ad3a1df..1896ef69b449 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 include ../../scripts/Makefile.include
-include ../../scripts/utilities.mak
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-- 
Gitee


From 7adb9e1777999da05e81ab0b4c55497736ba155e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Fri, 7 Jan 2022 23:11:13 +0100
Subject: [PATCH 069/100] xdp: check prog type before updating BPF link
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 58fa3e900255d61684ea5dfce0302b4b59d39c24
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 382778edc8262b7535f00523e9eb22edba1b9816 upstream.

The bpf_xdp_link_update() function didn't check the program type before
updating the program, which made it possible to install any program type as
an XDP program, which is obviously not good. Syzbot managed to trigger this
by swapping in an LWT program on the XDP hook which would crash in a helper
call.

Fix this by adding a check and bailing out if the types don't match.

Fixes: 026a4c28e1db ("bpf, xdp: Implement LINK_UPDATE for BPF XDP link")
Reported-by: syzbot+983941aa85af6ded1fd9@syzkaller.appspotmail.com
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20220107221115.326171-1-toke@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/core/dev.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index a8f3dedafed8..6b2cd690644c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9339,6 +9339,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 		goto out_unlock;
 	}
 	old_prog = link->prog;
+	if (old_prog->type != new_prog->type ||
+	    old_prog->expected_attach_type != new_prog->expected_attach_type) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
 	if (old_prog == new_prog) {
 		/* no-op, don't disturb drivers */
 		bpf_prog_put(new_prog);
-- 
Gitee


From 5649c8f5869f28507d88d5467d3c0b824ec6f337 Mon Sep 17 00:00:00 2001
From: German Gomez <german.gomez@arm.com>
Date: Tue, 18 Jan 2022 14:40:54 +0000
Subject: [PATCH 070/100] perf evsel: Override attr->sample_period for
 non-libpfm4 events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit 10e99ae9b5da7e6a5304db3a926f6a460ee850d2
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3606c0e1a1050d397ad759a62607e419fd8b0ccb upstream.

A previous patch preventing "attr->sample_period" values from being
overridden in pfm events changed a related behaviour in arm-spe.

Before said patch:

  perf record -c 10000 -e arm_spe_0// -- sleep 1

Would yield an SPE event with period=10000. After the patch, the period
in "-c 10000" was being ignored because the arm-spe code initializes
sample_period to a non-zero value.

This patch restores the previous behaviour for non-libpfm4 events.

Fixes: ae5dcc8abe31 (“perf record: Prevent override of attr->sample_period for libpfm4 events”)
Reported-by: Chase Conklin <chase.conklin@arm.com>
Signed-off-by: German Gomez <german.gomez@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Cc: netdev@vger.kernel.org
Link: http://lore.kernel.org/lkml/20220118144054.2541-1-german.gomez@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 tools/perf/util/evsel.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1cad6051d8b0..1a1cbd16d76d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1014,6 +1014,17 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs
 	return found_term;
 }
 
+static void evsel__set_default_freq_period(struct record_opts *opts,
+					   struct perf_event_attr *attr)
+{
+	if (opts->freq) {
+		attr->freq = 1;
+		attr->sample_freq = opts->freq;
+	} else {
+		attr->sample_period = opts->default_interval;
+	}
+}
+
 /*
  * The enable_on_exec/disabled value strategy:
  *
@@ -1080,14 +1091,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
 	 * We default some events to have a default interval. But keep
 	 * it a weak assumption overridable by the user.
 	 */
-	if (!attr->sample_period) {
-		if (opts->freq) {
-			attr->freq		= 1;
-			attr->sample_freq	= opts->freq;
-		} else {
-			attr->sample_period = opts->default_interval;
-		}
-	}
+	if ((evsel->is_libpfm_event && !attr->sample_period) ||
+	    (!evsel->is_libpfm_event && (!attr->sample_period ||
+					 opts->user_freq != UINT_MAX ||
+					 opts->user_interval != ULLONG_MAX)))
+		evsel__set_default_freq_period(opts, attr);
+
 	/*
 	 * If attr->freq was set (here or earlier), ask for period
 	 * to be sampled.
-- 
Gitee


From ab448ce09c2e58c64c03b25c4f2ce042d5f72e7a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 16 Jan 2022 01:02:20 -0800
Subject: [PATCH 071/100] ipv4: update fib_info_cnt under spinlock protection

stable inclusion
from stable-5.10.94
commit 86f0587f74320412e19db0bc341ef418b2d1d114
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 0a6e6b3c7db6c34e3d149f09cd714972f8753e3f upstream.

In the past, free_fib_info() was supposed to be called
under RTNL protection.

This eventually was no longer the case.

Instead of enforcing RTNL it seems we simply can
move fib_info_cnt changes to occur when fib_info_lock
is held.

v2: David Laight suggested to update fib_info_cnt
only when an entry is added/deleted to/from the hash table,
as fib_info_cnt is used to make sure hash table size
is optimal.

BUG: KCSAN: data-race in fib_create_info / free_fib_info

write to 0xffffffff86e243a0 of 4 bytes by task 26429 on cpu 0:
 fib_create_info+0xe78/0x3440 net/ipv4/fib_semantics.c:1428
 fib_table_insert+0x148/0x10c0 net/ipv4/fib_trie.c:1224
 fib_magic+0x195/0x1e0 net/ipv4/fib_frontend.c:1087
 fib_add_ifaddr+0xd0/0x2e0 net/ipv4/fib_frontend.c:1109
 fib_netdev_event+0x178/0x510 net/ipv4/fib_frontend.c:1466
 notifier_call_chain kernel/notifier.c:83 [inline]
 raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:391
 __dev_notify_flags+0x1d3/0x3b0
 dev_change_flags+0xa2/0xc0 net/core/dev.c:8872
 do_setlink+0x810/0x2410 net/core/rtnetlink.c:2719
 rtnl_group_changelink net/core/rtnetlink.c:3242 [inline]
 __rtnl_newlink net/core/rtnetlink.c:3396 [inline]
 rtnl_newlink+0xb10/0x13b0 net/core/rtnetlink.c:3506
 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5571
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2496
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5589
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0x5fc/0x6c0 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x726/0x840 net/netlink/af_netlink.c:1921
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2492
 __do_sys_sendmsg net/socket.c:2501 [inline]
 __se_sys_sendmsg net/socket.c:2499 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2499
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffffffff86e243a0 of 4 bytes by task 31505 on cpu 1:
 free_fib_info+0x35/0x80 net/ipv4/fib_semantics.c:252
 fib_info_put include/net/ip_fib.h:575 [inline]
 nsim_fib4_rt_destroy drivers/net/netdevsim/fib.c:294 [inline]
 nsim_fib4_rt_replace drivers/net/netdevsim/fib.c:403 [inline]
 nsim_fib4_rt_insert drivers/net/netdevsim/fib.c:431 [inline]
 nsim_fib4_event drivers/net/netdevsim/fib.c:461 [inline]
 nsim_fib_event drivers/net/netdevsim/fib.c:881 [inline]
 nsim_fib_event_work+0x15ca/0x2cf0 drivers/net/netdevsim/fib.c:1477
 process_one_work+0x3fc/0x980 kernel/workqueue.c:2298
 process_scheduled_works kernel/workqueue.c:2361 [inline]
 worker_thread+0x7df/0xa70 kernel/workqueue.c:2447
 kthread+0x2c7/0x2e0 kernel/kthread.c:327
 ret_from_fork+0x1f/0x30

value changed: 0x00000d2d -> 0x00000d2e

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 31505 Comm: kworker/1:21 Not tainted 5.16.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events nsim_fib_event_work

Fixes: 48bb9eb47b27 ("netdevsim: fib: Add dummy implementation for FIB offload")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Ido Schimmel <idosch@mellanox.com>
Cc: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/ipv4/fib_semantics.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ab6a8f35d369..a13265e87da4 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -251,7 +251,6 @@ void free_fib_info(struct fib_info *fi)
 		pr_warn("Freeing alive fib_info %p\n", fi);
 		return;
 	}
-	fib_info_cnt--;
 
 	call_rcu(&fi->rcu, free_fib_info_rcu);
 }
@@ -262,6 +261,10 @@ void fib_release_info(struct fib_info *fi)
 	spin_lock_bh(&fib_info_lock);
 	if (fi && --fi->fib_treeref == 0) {
 		hlist_del(&fi->fib_hash);
+
+		/* Paired with READ_ONCE() in fib_create_info(). */
+		WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1);
+
 		if (fi->fib_prefsrc)
 			hlist_del(&fi->fib_lhash);
 		if (fi->nh) {
@@ -1431,7 +1434,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 #endif
 
 	err = -ENOBUFS;
-	if (fib_info_cnt >= fib_info_hash_size) {
+
+	/* Paired with WRITE_ONCE() in fib_release_info() */
+	if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) {
 		unsigned int new_size = fib_info_hash_size << 1;
 		struct hlist_head *new_info_hash;
 		struct hlist_head *new_laddrhash;
@@ -1463,7 +1468,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		return ERR_PTR(err);
 	}
 
-	fib_info_cnt++;
 	fi->fib_net = net;
 	fi->fib_protocol = cfg->fc_protocol;
 	fi->fib_scope = cfg->fc_scope;
@@ -1590,6 +1594,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	fi->fib_treeref++;
 	refcount_set(&fi->fib_clntref, 1);
 	spin_lock_bh(&fib_info_lock);
+	fib_info_cnt++;
 	hlist_add_head(&fi->fib_hash,
 		       &fib_info_hash[fib_info_hashfn(fi)]);
 	if (fi->fib_prefsrc) {
-- 
Gitee


From 27470d11094bc3bed478bbb3116d19a153648946 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 19 Jan 2022 02:04:12 -0800
Subject: [PATCH 072/100] ipv4: avoid quadratic behavior in netns dismantle

stable inclusion
from stable-5.10.94
commit 734f4b0f831e6d5fdad819a8832fb562b6cb8291
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit d07418afea8f1d9896aaf9dc5ae47ac4f45b220c upstream.

net/ipv4/fib_semantics.c uses an hash table of 256 slots,
keyed by device ifindexes: fib_info_devhash[DEVINDEX_HASHSIZE]

Problem is that with network namespaces, devices tend
to use the same ifindex.

lo device for instance has a fixed ifindex of one,
for all network namespaces.

This means that hosts with thousands of netns spend
a lot of time looking at some hash buckets with thousands
of elements, notably at netns dismantle.

Simply add a per netns perturbation (net_hash_mix())
to spread elements more uniformely.

Also change fib_devindex_hashfn() to use more entropy.

Fixes: aa79e66eee5d ("net: Make ifindex generation per-net namespace")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/ipv4/fib_semantics.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a13265e87da4..838a876c168c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/netlink.h>
+#include <linux/hash.h>
 
 #include <net/arp.h>
 #include <net/ip.h>
@@ -321,11 +322,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
 
 static inline unsigned int fib_devindex_hashfn(unsigned int val)
 {
-	unsigned int mask = DEVINDEX_HASHSIZE - 1;
+	return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+	u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
 
-	return (val ^
-		(val >> DEVINDEX_HASHBITS) ^
-		(val >> (DEVINDEX_HASHBITS * 2))) & mask;
+	return &fib_info_devhash[fib_devindex_hashfn(val)];
 }
 
 static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -435,12 +440,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
 	struct hlist_head *head;
 	struct fib_nh *nh;
-	unsigned int hash;
 
 	spin_lock(&fib_info_lock);
 
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
+	head = fib_info_devhash_bucket(dev);
+
 	hlist_for_each_entry(nh, head, nh_hash) {
 		if (nh->fib_nh_dev == dev &&
 		    nh->fib_nh_gw4 == gw &&
@@ -1608,12 +1612,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	} else {
 		change_nexthops(fi) {
 			struct hlist_head *head;
-			unsigned int hash;
 
 			if (!nexthop_nh->fib_nh_dev)
 				continue;
-			hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
-			head = &fib_info_devhash[hash];
+			head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
 			hlist_add_head(&nexthop_nh->nh_hash, head);
 		} endfor_nexthops(fi)
 	}
@@ -1963,8 +1965,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
 
 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
 {
-	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-	struct hlist_head *head = &fib_info_devhash[hash];
+	struct hlist_head *head = fib_info_devhash_bucket(dev);
 	struct fib_nh *nh;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
@@ -1983,12 +1984,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
  */
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
-	int ret = 0;
-	int scope = RT_SCOPE_NOWHERE;
+	struct hlist_head *head = fib_info_devhash_bucket(dev);
 	struct fib_info *prev_fi = NULL;
-	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
-	struct hlist_head *head = &fib_info_devhash[hash];
+	int scope = RT_SCOPE_NOWHERE;
 	struct fib_nh *nh;
+	int ret = 0;
 
 	if (force)
 		scope = -1;
@@ -2133,7 +2133,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 {
 	struct fib_info *prev_fi;
-	unsigned int hash;
 	struct hlist_head *head;
 	struct fib_nh *nh;
 	int ret;
@@ -2149,8 +2148,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
 	}
 
 	prev_fi = NULL;
-	hash = fib_devindex_hashfn(dev->ifindex);
-	head = &fib_info_devhash[hash];
+	head = fib_info_devhash_bucket(dev);
 	ret = 0;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
-- 
Gitee


From 565181c2e55315e0b78fa271b5a62c5aac2fd35a Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 18 Jan 2022 22:50:50 +0100
Subject: [PATCH 073/100] net/fsl: xgmac_mdio: Add workaround for erratum
 A-009885

stable inclusion
from stable-5.10.94
commit 38c798384b903e0c38f7978c1fbf942e36213b8d
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 6198c722019774d38018457a8bfb9ba3ed8c931e upstream.

Once an MDIO read transaction is initiated, we must read back the data
register within 16 MDC cycles after the transaction completes. Outside
of this window, reads may return corrupt data.

Therefore, disable local interrupts in the critical section, to
maximize the probability that we can satisfy this requirement.

Fixes: d55ad2967d89 ("powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA FMan")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/freescale/xgmac_mdio.c | 25 ++++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index bfa2826c5545..252a06a13a47 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -49,6 +49,7 @@ struct tgec_mdio_controller {
 struct mdio_fsl_priv {
 	struct	tgec_mdio_controller __iomem *mdio_base;
 	bool	is_little_endian;
+	bool	has_a009885;
 	bool	has_a011043;
 };
 
@@ -184,10 +185,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 {
 	struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv;
 	struct tgec_mdio_controller __iomem *regs = priv->mdio_base;
+	unsigned long flags;
 	uint16_t dev_addr;
 	uint32_t mdio_stat;
 	uint32_t mdio_ctl;
-	uint16_t value;
 	int ret;
 	bool endian = priv->is_little_endian;
 
@@ -219,12 +220,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 			return ret;
 	}
 
+	if (priv->has_a009885)
+		/* Once the operation completes, i.e. MDIO_STAT_BSY clears, we
+		 * must read back the data register within 16 MDC cycles.
+		 */
+		local_irq_save(flags);
+
 	/* Initiate the read */
 	xgmac_write32(mdio_ctl | MDIO_CTL_READ, &regs->mdio_ctl, endian);
 
 	ret = xgmac_wait_until_done(&bus->dev, regs, endian);
 	if (ret)
-		return ret;
+		goto irq_restore;
 
 	/* Return all Fs if nothing was there */
 	if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
@@ -232,13 +239,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 		dev_dbg(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
-		return 0xffff;
+		ret = 0xffff;
+	} else {
+		ret = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
+		dev_dbg(&bus->dev, "read %04x\n", ret);
 	}
 
-	value = xgmac_read32(&regs->mdio_data, endian) & 0xffff;
-	dev_dbg(&bus->dev, "read %04x\n", value);
+irq_restore:
+	if (priv->has_a009885)
+		local_irq_restore(flags);
 
-	return value;
+	return ret;
 }
 
 static int xgmac_mdio_probe(struct platform_device *pdev)
@@ -282,6 +293,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 	priv->is_little_endian = device_property_read_bool(&pdev->dev,
 							   "little-endian");
 
+	priv->has_a009885 = device_property_read_bool(&pdev->dev,
+						      "fsl,erratum-a009885");
 	priv->has_a011043 = device_property_read_bool(&pdev->dev,
 						      "fsl,erratum-a011043");
 
-- 
Gitee


From 7857c834a274c04132ac44137a5c06d3ae25cc68 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Tue, 18 Jan 2022 22:50:53 +0100
Subject: [PATCH 074/100] net/fsl: xgmac_mdio: Fix incorrect iounmap when
 removing module

stable inclusion
from stable-5.10.94
commit d806eb5f4e231c7c470b7e9d13277d9e69c80a67
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3f7c239c7844d2044ed399399d97a5f1c6008e1b upstream.

As reported by sparse: In the remove path, the driver would attempt to
unmap its own priv pointer - instead of the io memory that it mapped
in probe.

Fixes: 9f35a7342cff ("net/fsl: introduce Freescale 10G MDIO driver")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/freescale/xgmac_mdio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 252a06a13a47..b7984a772e12 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -320,9 +320,10 @@ static int xgmac_mdio_probe(struct platform_device *pdev)
 static int xgmac_mdio_remove(struct platform_device *pdev)
 {
 	struct mii_bus *bus = platform_get_drvdata(pdev);
+	struct mdio_fsl_priv *priv = bus->priv;
 
 	mdiobus_unregister(bus);
-	iounmap(bus->priv);
+	iounmap(priv->mdio_base);
 	mdiobus_free(bus);
 
 	return 0;
-- 
Gitee


From 8a820e06d32fbcca7e25cde8cb754982930d1bfb Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Thu, 20 Jan 2022 12:18:12 +0000
Subject: [PATCH 075/100] parisc: pdc_stable: Fix memory leak in
 pdcs_register_pathentries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-5.10.94
commit e1840365ed4f831f51dfd7b3af5e39045f3d62b7
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit d24846a4246b6e61ecbd036880a4adf61681d241 upstream.

kobject_init_and_add() takes reference even when it fails.
According to the doc of kobject_init_and_add()：

   If this function returns an error, kobject_put() must be called to
   properly clean up the memory associated with the object.

Fix memory leak by calling kobject_put().

Fixes: 73f368cf679b ("Kobject: change drivers/parisc/pdc_stable.c to use kobject_init_and_add")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/parisc/pdc_stable.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c
index e090978518f1..4760f82def6e 100644
--- a/drivers/parisc/pdc_stable.c
+++ b/drivers/parisc/pdc_stable.c
@@ -979,8 +979,10 @@ pdcs_register_pathentries(void)
 		entry->kobj.kset = paths_kset;
 		err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL,
 					   "%s", entry->name);
-		if (err)
+		if (err) {
+			kobject_put(&entry->kobj);
 			return err;
+		}
 
 		/* kobject is now registered */
 		write_lock(&entry->rw_lock);
-- 
Gitee


From dd74186c80e9ef617e4ef810edc3806b3f537b96 Mon Sep 17 00:00:00 2001
From: Hyeong-Jun Kim <hj514.kim@samsung.com>
Date: Fri, 10 Dec 2021 13:30:12 +0900
Subject: [PATCH 076/100] f2fs: compress: fix potential deadlock of compress
 file

stable inclusion
from stable-5.10.94
commit 39ad0581176dd62d4dbb0ec528d2d2fb54f11069
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 7377e853967ba45bf409e3b5536624d2cbc99f21 upstream.

There is a potential deadlock between writeback process and a process
performing write_begin() or write_cache_pages() while trying to write
same compress file, but not compressable, as below:

[Process A] - doing checkpoint
[Process B]                     [Process C]
f2fs_write_cache_pages()
- lock_page() [all pages in cluster, 0-31]
- f2fs_write_multi_pages()
 - f2fs_write_raw_pages()
  - f2fs_write_single_data_page()
   - f2fs_do_write_data_page()
     - return -EAGAIN [f2fs_trylock_op() failed]
   - unlock_page(page) [e.g., page 0]
                                - generic_perform_write()
                                 - f2fs_write_begin()
                                  - f2fs_prepare_compress_overwrite()
                                   - prepare_compress_overwrite()
                                    - lock_page() [e.g., page 0]
                                    - lock_page() [e.g., page 1]
   - lock_page(page) [e.g., page 0]

Since there is no compress process, it is no longer necessary to hold
locks on every pages in cluster within f2fs_write_raw_pages().

This patch changes f2fs_write_raw_pages() to release all locks first
and then perform write same as the non-compress file in
f2fs_write_cache_pages().

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Hyeong-Jun Kim <hj514.kim@samsung.com>
Signed-off-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Youngjin Gil <youngjin.gil@samsung.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/f2fs/compress.c | 50 ++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 30987ea011f1..ec542e8c46cc 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1362,25 +1362,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
 					enum iostat_type io_type)
 {
 	struct address_space *mapping = cc->inode->i_mapping;
-	int _submitted, compr_blocks, ret;
-	int i = -1, err = 0;
+	int _submitted, compr_blocks, ret, i;
 
 	compr_blocks = f2fs_compressed_blocks(cc);
-	if (compr_blocks < 0) {
-		err = compr_blocks;
-		goto out_err;
+
+	for (i = 0; i < cc->cluster_size; i++) {
+		if (!cc->rpages[i])
+			continue;
+
+		redirty_page_for_writepage(wbc, cc->rpages[i]);
+		unlock_page(cc->rpages[i]);
 	}
 
+	if (compr_blocks < 0)
+		return compr_blocks;
+
 	for (i = 0; i < cc->cluster_size; i++) {
 		if (!cc->rpages[i])
 			continue;
 retry_write:
+		lock_page(cc->rpages[i]);
+
 		if (cc->rpages[i]->mapping != mapping) {
+continue_unlock:
 			unlock_page(cc->rpages[i]);
 			continue;
 		}
 
-		BUG_ON(!PageLocked(cc->rpages[i]));
+		if (!PageDirty(cc->rpages[i]))
+			goto continue_unlock;
+
+		if (!clear_page_dirty_for_io(cc->rpages[i]))
+			goto continue_unlock;
 
 		ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted,
 						NULL, NULL, wbc, io_type,
@@ -1395,26 +1408,15 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
 				 * avoid deadlock caused by cluster update race
 				 * from foreground operation.
 				 */
-				if (IS_NOQUOTA(cc->inode)) {
-					err = 0;
-					goto out_err;
-				}
+				if (IS_NOQUOTA(cc->inode))
+					return 0;
 				ret = 0;
 				cond_resched();
 				congestion_wait(BLK_RW_ASYNC,
 						DEFAULT_IO_TIMEOUT);
-				lock_page(cc->rpages[i]);
-
-				if (!PageDirty(cc->rpages[i])) {
-					unlock_page(cc->rpages[i]);
-					continue;
-				}
-
-				clear_page_dirty_for_io(cc->rpages[i]);
 				goto retry_write;
 			}
-			err = ret;
-			goto out_err;
+			return ret;
 		}
 
 		*submitted += _submitted;
@@ -1423,14 +1425,6 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc,
 	f2fs_balance_fs(F2FS_M_SB(mapping), true);
 
 	return 0;
-out_err:
-	for (++i; i < cc->cluster_size; i++) {
-		if (!cc->rpages[i])
-			continue;
-		redirty_page_for_writepage(wbc, cc->rpages[i]);
-		unlock_page(cc->rpages[i]);
-	}
-	return err;
 }
 
 int f2fs_write_multi_pages(struct compress_ctx *cc,
-- 
Gitee


From c1e0ad85b9759b576ac9289f89d6f089d00bacef Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Sat, 11 Dec 2021 21:27:36 +0800
Subject: [PATCH 077/100] f2fs: fix to reserve space for IO align feature

stable inclusion
from stable-5.10.94
commit a49e402f23096d21362b25d7224206a45288072a
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 300a842937fbcfb5a189cea9ba15374fdb0b5c6b upstream.

https://bugzilla.kernel.org/show_bug.cgi?id=204137

With below script, we will hit panic during new segment allocation:

DISK=bingo.img
MOUNT_DIR=/mnt/f2fs

dd if=/dev/zero of=$DISK bs=1M count=105
mkfs.f2fe -a 1 -o 19 -t 1 -z 1 -f -q $DISK

mount -t f2fs $DISK $MOUNT_DIR -o "noinline_dentry,flush_merge,noextent_cache,mode=lfs,io_bits=7,fsync_mode=strict"

for (( i = 0; i < 4096; i++ )); do
        name=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 10`
        mkdir $MOUNT_DIR/$name
done

umount $MOUNT_DIR
rm $DISK

Signed-off-by: wenfei <wenfei9@huawei.com>
---
 fs/f2fs/f2fs.h    | 11 +++++++++++
 fs/f2fs/segment.h |  3 ++-
 fs/f2fs/super.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/f2fs/sysfs.c   |  4 +++-
 4 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 407673f7564d..48782861cf62 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -976,6 +976,7 @@ struct f2fs_sm_info {
 	unsigned int segment_count;	/* total # of segments */
 	unsigned int main_segments;	/* # of segments in main area */
 	unsigned int reserved_segments;	/* # of reserved segments */
+	unsigned int additional_reserved_segments;/* reserved segs for IO align feature */
 	unsigned int ovp_segments;	/* # of overprovision segments */
 
 	/* a threshold to reclaim prefree segments */
@@ -2023,6 +2024,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
+
+	if (F2FS_IO_ALIGNED(sbi))
+		avail_user_block_count -= sbi->blocks_per_seg *
+				SM_I(sbi)->additional_reserved_segments;
+
 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
 		if (avail_user_block_count > sbi->unusable_block_count)
 			avail_user_block_count -= sbi->unusable_block_count;
@@ -2268,6 +2274,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+
+	if (F2FS_IO_ALIGNED(sbi))
+		valid_block_count += sbi->blocks_per_seg *
+				SM_I(sbi)->additional_reserved_segments;
+
 	user_block_count = sbi->user_block_count;
 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		user_block_count -= sbi->unusable_block_count;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index fa18a6b6fc4c..366671ca89aa 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -557,7 +557,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
 
 static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
 {
-	return SM_I(sbi)->reserved_segments;
+	return SM_I(sbi)->reserved_segments +
+			SM_I(sbi)->additional_reserved_segments;
 }
 
 static inline unsigned int free_sections(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 71e33b65430e..b124193e7631 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -306,6 +306,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
 					   F2FS_OPTION(sbi).s_resgid));
 }
 
+static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi)
+{
+	unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec;
+	unsigned int avg_vblocks;
+	unsigned int wanted_reserved_segments;
+	block_t avail_user_block_count;
+
+	if (!F2FS_IO_ALIGNED(sbi))
+		return 0;
+
+	/* average valid block count in section in worst case */
+	avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi);
+
+	/*
+	 * we need enough free space when migrating one section in worst case
+	 */
+	wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) *
+						reserved_segments(sbi);
+	wanted_reserved_segments -= reserved_segments(sbi);
+
+	avail_user_block_count = sbi->user_block_count -
+				sbi->current_reserved_blocks -
+				F2FS_OPTION(sbi).root_reserved_blocks;
+
+	if (wanted_reserved_segments * sbi->blocks_per_seg >
+					avail_user_block_count) {
+		f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u",
+			wanted_reserved_segments,
+			avail_user_block_count >> sbi->log_blocks_per_seg);
+		return -ENOSPC;
+	}
+
+	SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments;
+
+	f2fs_info(sbi, "IO align feature needs additional reserved segment: %u",
+			 wanted_reserved_segments);
+
+	return 0;
+}
+
 static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
 {
 	if (!F2FS_OPTION(sbi).unusable_cap_perc)
@@ -3792,6 +3832,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 		goto free_nm;
 	}
 
+	err = adjust_reserved_segment(sbi);
+	if (err)
+		goto free_nm;
+
 	/* For write statistics */
 	if (sb->s_bdev->bd_part)
 		sbi->sectors_written_start =
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index c90280c3168f..e67e19ecc214 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -337,7 +337,9 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
 	if (a->struct_type == RESERVED_BLOCKS) {
 		spin_lock(&sbi->stat_lock);
 		if (t > (unsigned long)(sbi->user_block_count -
-				F2FS_OPTION(sbi).root_reserved_blocks)) {
+				F2FS_OPTION(sbi).root_reserved_blocks -
+				sbi->blocks_per_seg *
+				SM_I(sbi)->additional_reserved_segments)) {
 			spin_unlock(&sbi->stat_lock);
 			return -EINVAL;
 		}
-- 
Gitee


From df55744b83a93a51c5b78dd5a521064fa2fee6ab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 14 Jan 2022 08:43:28 -0800
Subject: [PATCH 078/100] af_unix: annote lockless accesses to
 unix_tot_inflight & gc_in_progress

stable inclusion
from stable-5.10.94
commit 38221afa03af6d40d19f69607b21e3ccf9118d3a
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 9d6d7f1cb67cdee15f1a0e85aacfb924e0e02435 upstream.

wait_for_unix_gc() reads unix_tot_inflight & gc_in_progress
without synchronization.

Adds READ_ONCE()/WRITE_ONCE() and their associated comments
to better document the intent.

BUG: KCSAN: data-race in unix_inflight / wait_for_unix_gc

write to 0xffffffff86e2b7c0 of 4 bytes by task 9380 on cpu 0:
 unix_inflight+0x1e8/0x260 net/unix/scm.c:63
 unix_attach_fds+0x10c/0x1e0 net/unix/scm.c:121
 unix_scm_to_skb net/unix/af_unix.c:1674 [inline]
 unix_dgram_sendmsg+0x679/0x16b0 net/unix/af_unix.c:1817
 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549
 __do_sys_sendmmsg net/socket.c:2578 [inline]
 __se_sys_sendmmsg net/socket.c:2575 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffffffff86e2b7c0 of 4 bytes by task 9375 on cpu 1:
 wait_for_unix_gc+0x24/0x160 net/unix/garbage.c:196
 unix_dgram_sendmsg+0x8e/0x16b0 net/unix/af_unix.c:1772
 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549
 __do_sys_sendmmsg net/socket.c:2578 [inline]
 __se_sys_sendmmsg net/socket.c:2575 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000002 -> 0x00000004

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 9375 Comm: syz-executor.1 Not tainted 5.16.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 9915672d4127 ("af_unix: limit unix_tot_inflight")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220114164328.2038499-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/unix/garbage.c | 14 +++++++++++---
 net/unix/scm.c     |  6 ++++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 12e2ddaf887f..d45d5366115a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -192,8 +192,11 @@ void wait_for_unix_gc(void)
 {
 	/* If number of inflight sockets is insane,
 	 * force a garbage collect right now.
+	 * Paired with the WRITE_ONCE() in unix_inflight(),
+	 * unix_notinflight() and gc_in_progress().
 	 */
-	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+	    !READ_ONCE(gc_in_progress))
 		unix_gc();
 	wait_event(unix_gc_wait, gc_in_progress == false);
 }
@@ -213,7 +216,9 @@ void unix_gc(void)
 	if (gc_in_progress)
 		goto out;
 
-	gc_in_progress = true;
+	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
+	WRITE_ONCE(gc_in_progress, true);
+
 	/* First, select candidates for garbage collection.  Only
 	 * in-flight sockets are considered, and from those only ones
 	 * which don't have any external reference.
@@ -299,7 +304,10 @@ void unix_gc(void)
 
 	/* All candidates should have been detached by now. */
 	BUG_ON(!list_empty(&gc_candidates));
-	gc_in_progress = false;
+
+	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
+	WRITE_ONCE(gc_in_progress, false);
+
 	wake_up(&unix_gc_wait);
 
  out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 052ae709ce28..aa27a02478dc 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
 		} else {
 			BUG_ON(list_empty(&u->link));
 		}
-		unix_tot_inflight++;
+		/* Paired with READ_ONCE() in wait_for_unix_gc() */
+		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
 	}
 	user->unix_inflight++;
 	spin_unlock(&unix_gc_lock);
@@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 
 		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
-		unix_tot_inflight--;
+		/* Paired with READ_ONCE() in wait_for_unix_gc() */
+		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
 	}
 	user->unix_inflight--;
 	spin_unlock(&unix_gc_lock);
-- 
Gitee


From 6431dc565b64338e0f71aeaa19622edcad272712 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Thu, 9 Dec 2021 17:34:05 -0800
Subject: [PATCH 079/100] clk: Emit a stern warning with writable debugfs
 enabled

stable inclusion
from stable-5.10.94
commit fe40f7aef387c90021bc415d258f22dee19f4909
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 489a71964f9d74e697a12cd0ace20ed829eb1f93 upstream.

We don't want vendors to be enabling this part of the clk code and
shipping it to customers. Exposing the ability to change clk frequencies
and parents via debugfs is potentially damaging to the system if folks
don't know what they're doing. Emit a strong warning so that the message
is clear: don't enable this outside of development systems.

Fixes: 37215da5553e ("clk: Add support for setting clk_rate via debugfs")
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20211210014237.2130300-1-sboyd@kernel.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/clk/clk.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 515ef39c4610..b8a0e3d23698 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -3314,6 +3314,24 @@ static int __init clk_debug_init(void)
 {
 	struct clk_core *core;
 
+#ifdef CLOCK_ALLOW_WRITE_DEBUGFS
+	pr_warn("\n");
+	pr_warn("********************************************************************\n");
+	pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE           **\n");
+	pr_warn("**                                                                **\n");
+	pr_warn("**  WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n");
+	pr_warn("**                                                                **\n");
+	pr_warn("** This means that this kernel is built to expose clk operations  **\n");
+	pr_warn("** such as parent or rate setting, enabling, disabling, etc.      **\n");
+	pr_warn("** to userspace, which may compromise security on your system.    **\n");
+	pr_warn("**                                                                **\n");
+	pr_warn("** If you see this message and you are not debugging the          **\n");
+	pr_warn("** kernel, report this immediately to your vendor!                **\n");
+	pr_warn("**                                                                **\n");
+	pr_warn("**     NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE           **\n");
+	pr_warn("********************************************************************\n");
+#endif
+
 	rootdir = debugfs_create_dir("clk", NULL);
 
 	debugfs_create_file("clk_summary", 0444, rootdir, &all_lists,
-- 
Gitee


From 0aac52d56352ff29ab001be9025a55db9806b78a Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Wed, 12 Jan 2022 14:38:16 -0600
Subject: [PATCH 080/100] clk: si5341: Fix clock HW provider cleanup

stable inclusion
from stable-5.10.94
commit 51b52cf354850e7560758740d072815485932139
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 49a8f2bc8d88702783c7e163ec84374e9a022f71 upstream.

The call to of_clk_add_hw_provider was not undone on remove or on probe
failure, which could cause an oops on a subsequent attempt to retrieve
clocks for the removed device. Switch to the devm version of the
function to avoid this issue.

Fixes: 3044a860fd09 ("clk: Add Si5341/Si5340 driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Link: https://lore.kernel.org/r/20220112203816.1784610-1-robert.hancock@calian.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/clk/clk-si5341.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c
index eb22f4fdbc6b..772b48ad0cd7 100644
--- a/drivers/clk/clk-si5341.c
+++ b/drivers/clk/clk-si5341.c
@@ -1576,7 +1576,7 @@ static int si5341_probe(struct i2c_client *client,
 			clk_prepare(data->clk[i].hw.clk);
 	}
 
-	err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get,
+	err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get,
 			data);
 	if (err) {
 		dev_err(&client->dev, "unable to add clk provider\n");
-- 
Gitee


From 2f1a2fad490e72865b14efd5bb270511f1a04598 Mon Sep 17 00:00:00 2001
From: Wen Gu <guwen@linux.alibaba.com>
Date: Sun, 16 Jan 2022 15:43:42 +0800
Subject: [PATCH 081/100] net/smc: Fix hung_task when removing SMC-R devices

stable inclusion
from stable-5.10.94
commit a66b9bccf73259345f5e9bbc21531abf38f76f3c
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 56d99e81ecbc997a5f984684d0eeb583992b2072 upstream.

A hung_task is observed when removing SMC-R devices. Suppose that
a link group has two active links(lnk_A, lnk_B) associated with two
different SMC-R devices(dev_A, dev_B). When dev_A is removed, the
link group will be removed from smc_lgr_list and added into
lgr_linkdown_list. lnk_A will be cleared and smcibdev(A)->lnk_cnt
will reach to zero. However, when dev_B is removed then, the link
group can't be found in smc_lgr_list and lnk_B won't be cleared,
making smcibdev->lnk_cnt never reaches zero, which causes a hung_task.

This patch fixes this issue by restoring the implementation of
smc_smcr_terminate_all() to what it was before commit 349d43127dac
("net/smc: fix kernel panic caused by race of smc_sock"). The original
implementation also satisfies the intention that make sure QP destroy
earlier than CQ destroy because we will always wait for smcibdev->lnk_cnt
reaches zero, which guarantees QP has been destroyed.

Fixes: 349d43127dac ("net/smc: fix kernel panic caused by race of smc_sock")
Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/smc/smc_core.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2a22dc85951e..4eb9ef9c2800 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1002,16 +1002,11 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd)
 /* Called when an SMCR device is removed or the smc module is unloaded.
  * If smcibdev is given, all SMCR link groups using this device are terminated.
  * If smcibdev is NULL, all SMCR link groups are terminated.
- *
- * We must wait here for QPs been destroyed before we destroy the CQs,
- * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
- * smc_sock cannot be released.
  */
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 {
 	struct smc_link_group *lgr, *lg;
 	LIST_HEAD(lgr_free_list);
-	LIST_HEAD(lgr_linkdown_list);
 	int i;
 
 	spin_lock_bh(&smc_lgr_list.lock);
@@ -1023,7 +1018,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
 			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 				if (lgr->lnk[i].smcibdev == smcibdev)
-					list_move_tail(&lgr->list, &lgr_linkdown_list);
+					smcr_link_down_cond_sched(&lgr->lnk[i]);
 			}
 		}
 	}
@@ -1035,16 +1030,6 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 		__smc_lgr_terminate(lgr, false);
 	}
 
-	list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (lgr->lnk[i].smcibdev == smcibdev) {
-				mutex_lock(&lgr->llc_conf_mutex);
-				smcr_link_down_cond(&lgr->lnk[i]);
-				mutex_unlock(&lgr->llc_conf_mutex);
-			}
-		}
-	}
-
 	if (smcibdev) {
 		if (atomic_read(&smcibdev->lnk_cnt))
 			wait_event(smcibdev->lnks_deleted,
-- 
Gitee


From 50893a1c2d7fe4fc33eb30002bda97aae2131748 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:24 -0600
Subject: [PATCH 082/100] net: axienet: increase reset timeout

stable inclusion
from stable-5.10.94
commit 34942a228aec23ce65fbf4969f091f0f31803880
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 2e5644b1bab2ccea9cfc7a9520af95b94eb0dbf1 upstream.

The previous timeout of 1ms was too short to handle some cases where the
core is reset just after the input clocks were started, which will
be introduced in an upcoming patch. Increase the timeout to 50ms. Also
simplify the reset timeout checking to use read_poll_timeout.

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 .../net/ethernet/xilinx/xilinx_axienet_main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 69c79cc24e6e..137c3d33ea68 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -496,7 +496,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options)
 
 static int __axienet_device_reset(struct axienet_local *lp)
 {
-	u32 timeout;
+	u32 value;
+	int ret;
 
 	/* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset
 	 * process of Axi DMA takes a while to complete as all pending
@@ -506,15 +507,13 @@ static int __axienet_device_reset(struct axienet_local *lp)
 	 * they both reset the entire DMA core, so only one needs to be used.
 	 */
 	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK);
-	timeout = DELAY_OF_ONE_MILLISEC;
-	while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) &
-				XAXIDMA_CR_RESET_MASK) {
-		udelay(1);
-		if (--timeout == 0) {
-			netdev_err(lp->ndev, "%s: DMA reset timeout!\n",
-				   __func__);
-			return -ETIMEDOUT;
-		}
+	ret = read_poll_timeout(axienet_dma_in32, value,
+				!(value & XAXIDMA_CR_RESET_MASK),
+				DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+				XAXIDMA_TX_CR_OFFSET);
+	if (ret) {
+		dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__);
+		return ret;
 	}
 
 	return 0;
-- 
Gitee


From 90e22e142dafe233d70454729848bef2f4221580 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:25 -0600
Subject: [PATCH 083/100] net: axienet: Wait for PhyRstCmplt after core reset

stable inclusion
from stable-5.10.94
commit 46c0ccaff28516ccedc2afaa3ce1d0c4284022e0
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit b400c2f4f4c53c86594dd57098970d97d488bfde upstream.

When resetting the device, wait for the PhyRstCmplt bit to be set
in the interrupt status register before continuing initialization, to
ensure that the core is actually ready. When using an external PHY, this
also ensures we do not start trying to access the PHY while it is still
in reset. The PHY reset is initiated by the core reset which is
triggered just above, but remains asserted for 5ms after the core is
reset according to the documentation.

The MgtRdy bit could also be waited for, but unfortunately when using
7-series devices, the bit does not appear to work as documented (it
seems to behave as some sort of link state indication and not just an
indication the transceiver is ready) so it can't really be relied on for
this purpose.

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 137c3d33ea68..811f15eace9f 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -516,6 +516,16 @@ static int __axienet_device_reset(struct axienet_local *lp)
 		return ret;
 	}
 
+	/* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */
+	ret = read_poll_timeout(axienet_ior, value,
+				value & XAE_INT_PHYRSTCMPLT_MASK,
+				DELAY_OF_ONE_MILLISEC, 50000, false, lp,
+				XAE_IS_OFFSET);
+	if (ret) {
+		dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__);
+		return ret;
+	}
+
 	return 0;
 }
 
-- 
Gitee


From 3ac7c74eef8b8e107ebc5989560c636c8d31ca83 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:26 -0600
Subject: [PATCH 084/100] net: axienet: reset core on initialization prior to
 MDIO access

stable inclusion
from stable-5.10.94
commit bcc5d57e6091ef2346eabcd6e2169353a8770ec8
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 04cc2da39698efd7eb2e30c112538922d26f848e upstream.

In some cases where the Xilinx Ethernet core was used in 1000Base-X or
SGMII modes, which use the internal PCS/PMA PHY, and the MGT
transceiver clock source for the PCS was not running at the time the
FPGA logic was loaded, the core would come up in a state where the
PCS could not be found on the MDIO bus. To fix this, the Ethernet core
(including the PCS) should be reset after enabling the clocks, prior to
attempting to access the PCS using of_mdio_find_device.

Fixes: 1a02556086fc (net: axienet: Properly handle PCS/PMA PHY for 1000BaseX mode)
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 811f15eace9f..7b94784a1465 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -2024,6 +2024,11 @@ static int axienet_probe(struct platform_device *pdev)
 	lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
 	lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
 
+	/* Reset core now that clocks are enabled, prior to accessing MDIO */
+	ret = __axienet_device_reset(lp);
+	if (ret)
+		goto cleanup_clk;
+
 	lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
 	if (lp->phy_node) {
 		ret = axienet_mdio_setup(lp);
-- 
Gitee


From c99fc3dd3d5ff5942b1b462757a58668a3159329 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:27 -0600
Subject: [PATCH 085/100] net: axienet: add missing memory barriers

stable inclusion
from stable-5.10.94
commit 2f548489d64debf093f2299e38a8d0fc655f87b9
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 95978df6fa328df619c15312e65ece469c2be2d2 upstream.

This driver was missing some required memory barriers:

Use dma_rmb to ensure we see all updates to the descriptor after we see
that an entry has been completed.

Use wmb and rmb to avoid stale descriptor status between the TX path and
TX complete IRQ path.

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 7b94784a1465..478d80882f01 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -632,6 +632,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 		if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK))
 			break;
 
+		/* Ensure we see complete descriptor update */
+		dma_rmb();
 		phys = desc_get_phys_addr(lp, cur_p);
 		dma_unmap_single(ndev->dev.parent, phys,
 				 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
@@ -645,8 +647,10 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
 		cur_p->app4 = 0;
-		cur_p->status = 0;
 		cur_p->skb = NULL;
+		/* ensure our transmit path and device don't prematurely see status cleared */
+		wmb();
+		cur_p->status = 0;
 
 		if (sizep)
 			*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
@@ -704,6 +708,9 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
 					    int num_frag)
 {
 	struct axidma_bd *cur_p;
+
+	/* Ensure we see all descriptor updates from device or TX IRQ path */
+	rmb();
 	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
 	if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
 		return NETDEV_TX_BUSY;
@@ -843,6 +850,8 @@ static void axienet_recv(struct net_device *ndev)
 
 		tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
 
+		/* Ensure we see complete descriptor update */
+		dma_rmb();
 		phys = desc_get_phys_addr(lp, cur_p);
 		dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size,
 				 DMA_FROM_DEVICE);
-- 
Gitee


From e3ba7050a55f987eb8d13721eb5efd382f025a26 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:28 -0600
Subject: [PATCH 086/100] net: axienet: limit minimum TX ring size

stable inclusion
from stable-5.10.94
commit 7a3d3d7f6d7b3db6cabeb9d43ef207c3eda1b8a5
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 70f5817deddbc6ef3faa35841cab83c280cc653a upstream.

The driver will not work properly if the TX ring size is set to below
MAX_SKB_FRAGS + 1 since it needs to hold at least one full maximally
fragmented packet in the TX ring. Limit setting the ring size to below
this value.

Fixes: 8b09ca823ffb4 ("net: axienet: Make RX/TX ring sizes configurable")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 478d80882f01..acfd7d545fef 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -43,6 +43,7 @@
 /* Descriptors defines for Tx and Rx DMA */
 #define TX_BD_NUM_DEFAULT		64
 #define RX_BD_NUM_DEFAULT		1024
+#define TX_BD_NUM_MIN			(MAX_SKB_FRAGS + 1)
 #define TX_BD_NUM_MAX			4096
 #define RX_BD_NUM_MAX			4096
 
@@ -1373,7 +1374,8 @@ static int axienet_ethtools_set_ringparam(struct net_device *ndev,
 	if (ering->rx_pending > RX_BD_NUM_MAX ||
 	    ering->rx_mini_pending ||
 	    ering->rx_jumbo_pending ||
-	    ering->rx_pending > TX_BD_NUM_MAX)
+	    ering->tx_pending < TX_BD_NUM_MIN ||
+	    ering->tx_pending > TX_BD_NUM_MAX)
 		return -EINVAL;
 
 	if (netif_running(ndev))
-- 
Gitee


From 0a05d304cd9b79656fe4a61b8c3c6b2d7a2f1329 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:29 -0600
Subject: [PATCH 087/100] net: axienet: Fix TX ring slot available check

stable inclusion
from stable-5.10.94
commit 6301f3566aef4e8e8ad6019e43ddb95ad6f65fc6
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 996defd7f8b5dafc1d480b7585c7c62437f80c3c upstream.

The check for whether a TX ring slot was available was incorrect,
since a slot which had been loaded with transmit data but the device had
not started transmitting would be treated as available, potentially
causing non-transmitted slots to be overwritten. The control field in
the descriptor should be checked, rather than the status field (which may
only be updated when the device completes the entry).

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index acfd7d545fef..94e39750932c 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -643,7 +643,6 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
 			dev_consume_skb_irq(cur_p->skb);
 
-		cur_p->cntrl = 0;
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
@@ -651,6 +650,7 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 		cur_p->skb = NULL;
 		/* ensure our transmit path and device don't prematurely see status cleared */
 		wmb();
+		cur_p->cntrl = 0;
 		cur_p->status = 0;
 
 		if (sizep)
@@ -713,7 +713,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
 	/* Ensure we see all descriptor updates from device or TX IRQ path */
 	rmb();
 	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
-	if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK)
+	if (cur_p->cntrl)
 		return NETDEV_TX_BUSY;
 	return 0;
 }
-- 
Gitee


From 8824d81f4fed49478578418a7817f52cdc1a541c Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:30 -0600
Subject: [PATCH 088/100] net: axienet: fix number of TX ring slots for
 available check

stable inclusion
from stable-5.10.94
commit 41831d496772ce095735af9cbff3a17c5d082258
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit aba57a823d2985a2cc8c74a2535f3a88e68d9424 upstream.

The check for the number of available TX ring slots was off by 1 since a
slot is required for the skb header as well as each fragment. This could
result in overwriting a TX ring slot that was still in use.

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 94e39750932c..638f2fee8074 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -747,7 +747,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	num_frag = skb_shinfo(skb)->nr_frags;
 	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
-	if (axienet_check_tx_bd_space(lp, num_frag)) {
+	if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
 		if (netif_queue_stopped(ndev))
 			return NETDEV_TX_BUSY;
 
@@ -757,7 +757,7 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		smp_mb();
 
 		/* Space might have just been freed - check again */
-		if (axienet_check_tx_bd_space(lp, num_frag))
+		if (axienet_check_tx_bd_space(lp, num_frag + 1))
 			return NETDEV_TX_BUSY;
 
 		netif_wake_queue(ndev);
-- 
Gitee


From 40a84870f60eaad8d3363bdce01640a1bc4a3fde Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:31 -0600
Subject: [PATCH 089/100] net: axienet: fix for TX busy handling

stable inclusion
from stable-5.10.94
commit 557829d42d1f5cf77e743b0b8286940d0b1e4428
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit bb193e3db8b86a63f26889c99e14fd30c9ebd72a upstream.

Network driver documentation indicates we should be avoiding returning
NETDEV_TX_BUSY from ndo_start_xmit in normal cases, since it requires
the packets to be requeued. Instead the queue should be stopped after
a packet is added to the TX ring when there may not be enough room for an
additional one. Also, when TX ring entries are completed, we should only
wake the queue if we know there is room for another full maximally
fragmented packet.

Print a warning if there is insufficient space at the start of start_xmit,
since this should no longer happen.

Combined with increasing the default TX ring size (in a subsequent
patch), this appears to recover the TX performance lost by previous changes
to actually manage the TX ring state properly.

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 .../net/ethernet/xilinx/xilinx_axienet_main.c | 86 ++++++++++---------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 638f2fee8074..2dd8126eee87 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -660,6 +660,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
 	return i;
 }
 
+/**
+ * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
+ * @lp:		Pointer to the axienet_local structure
+ * @num_frag:	The number of BDs to check for
+ *
+ * Return: 0, on success
+ *	    NETDEV_TX_BUSY, if any of the descriptors are not free
+ *
+ * This function is invoked before BDs are allocated and transmission starts.
+ * This function returns 0 if a BD or group of BDs can be allocated for
+ * transmission. If the BD or any of the BDs are not free the function
+ * returns a busy status. This is invoked from axienet_start_xmit.
+ */
+static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
+					    int num_frag)
+{
+	struct axidma_bd *cur_p;
+
+	/* Ensure we see all descriptor updates from device or TX IRQ path */
+	rmb();
+	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
+	if (cur_p->cntrl)
+		return NETDEV_TX_BUSY;
+	return 0;
+}
+
 /**
  * axienet_start_xmit_done - Invoked once a transmit is completed by the
  * Axi DMA Tx channel.
@@ -689,33 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev)
 	/* Matches barrier in axienet_start_xmit */
 	smp_mb();
 
-	netif_wake_queue(ndev);
-}
-
-/**
- * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy
- * @lp:		Pointer to the axienet_local structure
- * @num_frag:	The number of BDs to check for
- *
- * Return: 0, on success
- *	    NETDEV_TX_BUSY, if any of the descriptors are not free
- *
- * This function is invoked before BDs are allocated and transmission starts.
- * This function returns 0 if a BD or group of BDs can be allocated for
- * transmission. If the BD or any of the BDs are not free the function
- * returns a busy status. This is invoked from axienet_start_xmit.
- */
-static inline int axienet_check_tx_bd_space(struct axienet_local *lp,
-					    int num_frag)
-{
-	struct axidma_bd *cur_p;
-
-	/* Ensure we see all descriptor updates from device or TX IRQ path */
-	rmb();
-	cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num];
-	if (cur_p->cntrl)
-		return NETDEV_TX_BUSY;
-	return 0;
+	if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+		netif_wake_queue(ndev);
 }
 
 /**
@@ -748,19 +749,14 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
 
 	if (axienet_check_tx_bd_space(lp, num_frag + 1)) {
-		if (netif_queue_stopped(ndev))
-			return NETDEV_TX_BUSY;
-
+		/* Should not happen as last start_xmit call should have
+		 * checked for sufficient space and queue should only be
+		 * woken when sufficient space is available.
+		 */
 		netif_stop_queue(ndev);
-
-		/* Matches barrier in axienet_start_xmit_done */
-		smp_mb();
-
-		/* Space might have just been freed - check again */
-		if (axienet_check_tx_bd_space(lp, num_frag + 1))
-			return NETDEV_TX_BUSY;
-
-		netif_wake_queue(ndev);
+		if (net_ratelimit())
+			netdev_warn(ndev, "TX ring unexpectedly full\n");
+		return NETDEV_TX_BUSY;
 	}
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -821,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	if (++lp->tx_bd_tail >= lp->tx_bd_num)
 		lp->tx_bd_tail = 0;
 
+	/* Stop queue if next transmit may not have space */
+	if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+		netif_stop_queue(ndev);
+
+		/* Matches barrier in axienet_start_xmit_done */
+		smp_mb();
+
+		/* Space might have just been freed - check again */
+		if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1))
+			netif_wake_queue(ndev);
+	}
+
 	return NETDEV_TX_OK;
 }
 
-- 
Gitee


From 972138b37e799c12bf897ee248677a3a7d463595 Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 18 Jan 2022 15:41:32 -0600
Subject: [PATCH 090/100] net: axienet: increase default TX ring size to 128

stable inclusion
from stable-5.10.94
commit d7544cf6939c1f978933992db483be4a6351494b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 2d19c3fd80178160dd505ccd7fed1643831227a5 upstream.

With previous changes to make the driver handle the TX ring size more
correctly, the default TX ring size of 64 appears to significantly
bottleneck TX performance to around 600 Mbps on a 1 Gbps link on ZynqMP.
Increasing this to 128 seems to bring performance up to near line rate and
shouldn't cause excess bufferbloat (this driver doesn't yet support modern
byte-based queue management).

Fixes: 8a3b7a252dca9 ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 2dd8126eee87..0baf85122f5a 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -41,7 +41,7 @@
 #include "xilinx_axienet.h"
 
 /* Descriptors defines for Tx and Rx DMA */
-#define TX_BD_NUM_DEFAULT		64
+#define TX_BD_NUM_DEFAULT		128
 #define RX_BD_NUM_DEFAULT		1024
 #define TX_BD_NUM_MIN			(MAX_SKB_FRAGS + 1)
 #define TX_BD_NUM_MAX			4096
-- 
Gitee


From 5447a4dafbb24b58b7e17945d997773b7afa66e3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 7 Jan 2022 12:09:36 -0800
Subject: [PATCH 091/100] HID: vivaldi: fix handling devices not using numbered
 reports

stable inclusion
from stable-5.10.94
commit 8b8ff4c793ee3d412569033c8ef0bc5d98a87262
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 3fe6acd4dc922237b30e55473c9349c6ce0690f3 upstream.

Unfortunately details of USB HID transport bled into HID core and
handling of numbered/unnumbered reports is quite a mess, with
hid_report_len() calculating the length according to USB rules,
and hid_hw_raw_request() adding report ID to the buffer for both
numbered and unnumbered reports.

Untangling it all requres a lot of changes in HID, so for now let's
handle this in the driver.

[jkosina@suse.cz: microoptimize field->report->id to report->id]
Fixes: 14c9c014babe ("HID: add vivaldi HID driver")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Stephen Boyd <swboyd@chromium.org> # CoachZ
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/hid/hid-vivaldi.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c
index 72957a9f7117..576518e704ee 100644
--- a/drivers/hid/hid-vivaldi.c
+++ b/drivers/hid/hid-vivaldi.c
@@ -74,10 +74,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
 				    struct hid_usage *usage)
 {
 	struct vivaldi_data *drvdata = hid_get_drvdata(hdev);
+	struct hid_report *report = field->report;
 	int fn_key;
 	int ret;
 	u32 report_len;
-	u8 *buf;
+	u8 *report_data, *buf;
 
 	if (field->logical != HID_USAGE_FN_ROW_PHYSMAP ||
 	    (usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL)
@@ -89,12 +90,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
 	if (fn_key > drvdata->max_function_row_key)
 		drvdata->max_function_row_key = fn_key;
 
-	buf = hid_alloc_report_buf(field->report, GFP_KERNEL);
-	if (!buf)
+	report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL);
+	if (!report_data)
 		return;
 
-	report_len = hid_report_len(field->report);
-	ret = hid_hw_raw_request(hdev, field->report->id, buf,
+	report_len = hid_report_len(report);
+	if (!report->id) {
+		/*
+		 * hid_hw_raw_request() will stuff report ID (which will be 0)
+		 * into the first byte of the buffer even for unnumbered
+		 * reports, so we need to account for this to avoid getting
+		 * -EOVERFLOW in return.
+		 * Note that hid_alloc_report_buf() adds 7 bytes to the size
+		 * so we can safely say that we have space for an extra byte.
+		 */
+		report_len++;
+	}
+
+	ret = hid_hw_raw_request(hdev, report->id, report_data,
 				 report_len, HID_FEATURE_REPORT,
 				 HID_REQ_GET_REPORT);
 	if (ret < 0) {
@@ -103,7 +116,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev,
 		goto out;
 	}
 
-	ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf,
+	if (!report->id) {
+		/*
+		 * Undo the damage from hid_hw_raw_request() for unnumbered
+		 * reports.
+		 */
+		report_data++;
+		report_len--;
+	}
+
+	ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data,
 				   report_len, 0);
 	if (ret) {
 		dev_warn(&hdev->dev, "failed to report feature %d\n",
-- 
Gitee


From db1efb48035d52eca794598b94a52386603264f3 Mon Sep 17 00:00:00 2001
From: Laurence de Bruxelles <lfdebrux@gmail.com>
Date: Sat, 1 Jan 2022 15:41:49 +0000
Subject: [PATCH 092/100] rtc: pxa: fix null pointer dereference

stable inclusion
from stable-5.10.94
commit c736ec01a2fc2f751c52ac6025a14a0e1308460b
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 34127b3632b21e5c391756e724b1198eb9917981 upstream.

With the latest stable kernel versions the rtc on the PXA based
Zaurus does not work, when booting I see the following kernel messages:

pxa-rtc pxa-rtc: failed to find rtc clock source
pxa-rtc pxa-rtc: Unable to init SA1100 RTC sub-device
pxa-rtc: probe of pxa-rtc failed with error -2
hctosys: unable to open rtc device (rtc0)

I think this is because commit f2997775b111 ("rtc: sa1100: fix possible
race condition") moved the allocation of the rtc_device struct out of
sa1100_rtc_init and into sa1100_rtc_probe. This means that pxa_rtc_probe
also needs to do allocation for the rtc_device struct, otherwise
sa1100_rtc_init will try to dereference a null pointer. This patch adds
that allocation by copying how sa1100_rtc_probe in
drivers/rtc/rtc-sa1100.c does it; after the IRQs are set up a managed
rtc_device is allocated.

I've tested this patch with `qemu-system-arm -machine akita` and with a
real Zaurus SL-C1000 applied to 4.19, 5.4, and 5.10.

Signed-off-by: Laurence de Bruxelles <lfdebrux@gmail.com>
Fixes: f2997775b111 ("rtc: sa1100: fix possible race condition")
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20220101154149.12026-1-lfdebrux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/rtc/rtc-pxa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index d2f1d8f754bf..cf8119b6d320 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
 	if (sa1100_rtc->irq_alarm < 0)
 		return -ENXIO;
 
+	sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(sa1100_rtc->rtc))
+		return PTR_ERR(sa1100_rtc->rtc);
+
 	pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start,
 				resource_size(pxa_rtc->ress));
 	if (!pxa_rtc->base) {
-- 
Gitee


From 6923956e7c6b9193a0a4e756e12ae5c70c72ab71 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Thu, 30 Dec 2021 16:20:24 +0200
Subject: [PATCH 093/100] vdpa/mlx5: Fix wrong configuration of
 virtio_version_1_0

stable inclusion
from stable-5.10.94
commit 0c4ebcb00d8871ed4b9916072d1d177c33bd43e3
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 97143b70aa847f2b0a1f959dde126b76ff7b5376 upstream.

Remove overriding of virtio_version_1_0 which forced the virtqueue
object to version 1.

Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Eli Cohen <elic@nvidia.com>
Link: https://lore.kernel.org/r/20211230142024.142979-1-elic@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index fbdc9468818d..65d6f8fd81e7 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -812,8 +812,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
-	if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type))
-		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1);
 
 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
 	if (err)
-- 
Gitee


From 7750257260a3a6cbfb3747e97beaea0b402b80b1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 6 Jan 2022 07:57:46 -0500
Subject: [PATCH 094/100] virtio_ring: mark ring unused on error

stable inclusion
from stable-5.10.94
commit 56daa21414e9cf66131de3458ba389a1f3a9c690
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 1861ba626ae9b98136f3e504208cdef6b29cd3ec upstream.

A recently added error path does not mark ring unused when exiting on
OOM, which will lead to BUG on the next entry in debug builds.

TODO: refactor code so we have START_USE and END_USE in the same function.

Fixes: fc6d70f40b3d ("virtio_ring: check desc == NULL when using indirect with packed")
Cc: "Xuan Zhuo" <xuanzhuo@linux.alibaba.com>
Cc: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/virtio/virtio_ring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cce75d3b3ba0..3cc2a4ee7152 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -1124,8 +1124,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	if (virtqueue_use_indirect(_vq, total_sg)) {
 		err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
 						    in_sgs, data, gfp);
-		if (err != -ENOMEM)
+		if (err != -ENOMEM) {
+			END_USE(vq);
 			return err;
+		}
 
 		/* fall back on direct */
 	}
-- 
Gitee


From 712b05e15df168bbb9dae4de88fb4279e5fc7480 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 Jan 2022 11:32:36 -0600
Subject: [PATCH 095/100] taskstats: Cleanup the use of task->exit_code

stable inclusion
from stable-5.10.94
commit 69e7e979ed668656551ca141dc235a756da32eb0
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 1b5a42d9c85f0e731f01c8d1129001fd8531a8a0 upstream.

In the function bacct_add_task the code reading task->exit_code was
introduced in commit f3cef7a99469 ("[PATCH] csa: basic accounting over
taskstats"), and it is not entirely clear what the taskstats interface
is trying to return as only returning the exit_code of the first task
in a process doesn't make a lot of sense.

As best as I can figure the intent is to return task->exit_code after
a task exits.  The field is returned with per task fields, so the
exit_code of the entire process is not wanted.  Only the value of the
first task is returned so this is not a useful way to get the per task
ptrace stop code.  The ordinary case of returning this value is
returning after a task exits, which also precludes use for getting
a ptrace value.

It is common to for the first task of a process to also be the last
task of a process so this field may have done something reasonable by
accident in testing.

Make ac_exitcode a reliable per task value by always returning it for
every exited task.

Setting ac_exitcode in a sensible mannter makes it possible to continue
to provide this value going forward.

Cc: Balbir Singh <bsingharora@gmail.com>
Fixes: f3cef7a99469 ("[PATCH] csa: basic accounting over taskstats")
Link: https://lkml.kernel.org/r/20220103213312.9144-5-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 kernel/tsacct.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 257ffb993ea2..fd2f7a052fdd 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX);
 	stats->ac_btime64 = btime;
 
-	if (thread_group_leader(tsk)) {
+	if (tsk->flags & PF_EXITING)
 		stats->ac_exitcode = tsk->exit_code;
-		if (tsk->flags & PF_FORKNOEXEC)
-			stats->ac_flag |= AFORK;
-	}
+	if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
+		stats->ac_flag |= AFORK;
 	if (tsk->flags & PF_SUPERPRIV)
 		stats->ac_flag |= ASU;
 	if (tsk->flags & PF_DUMPCORE)
-- 
Gitee


From 7e963e9d2f431926dd73f9feb9600eb49bd5970f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 13 Jan 2022 01:22:29 -0800
Subject: [PATCH 096/100] inet: frags: annotate races around fqdir->dead and
 fqdir->high_thresh

stable inclusion
from stable-5.10.94
commit edc09548ffc52433ed25ef4a829e664e353f615d
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 91341fa0003befd097e190ec2a4bf63ad957c49a upstream.

Both fields can be read/written without synchronization,
add proper accessors and documentation.

Fixes: d5dd88794a13 ("inet: fix various use-after-free in defrags units")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 include/net/inet_frag.h  | 11 +++++++++--
 include/net/ipv6_frag.h  |  3 ++-
 net/ipv4/inet_fragment.c |  8 +++++---
 net/ipv4/ip_fragment.c   |  3 ++-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index bac79e817776..4cbd413e71a3 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -116,8 +116,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
 
 static inline void fqdir_pre_exit(struct fqdir *fqdir)
 {
-	fqdir->high_thresh = 0; /* prevent creation of new frags */
-	fqdir->dead = true;
+	/* Prevent creation of new frags.
+	 * Pairs with READ_ONCE() in inet_frag_find().
+	 */
+	WRITE_ONCE(fqdir->high_thresh, 0);
+
+	/* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire()
+	 * and ip6frag_expire_frag_queue().
+	 */
+	WRITE_ONCE(fqdir->dead, true);
 }
 void fqdir_exit(struct fqdir *fqdir);
 
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 851029ecff13..0a4779175a52 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	struct sk_buff *head;
 
 	rcu_read_lock();
-	if (fq->q.fqdir->dead)
+	/* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */
+	if (READ_ONCE(fq->q.fqdir->dead))
 		goto out_rcu_unlock;
 	spin_lock(&fq->q.lock);
 
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 10d31733297d..e0e8a65d561e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
 		/* The RCU read lock provides a memory barrier
 		 * guaranteeing that if fqdir->dead is false then
 		 * the hash table destruction will not start until
-		 * after we unlock.  Paired with inet_frags_exit_net().
+		 * after we unlock.  Paired with fqdir_pre_exit().
 		 */
-		if (!fqdir->dead) {
+		if (!READ_ONCE(fqdir->dead)) {
 			rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
 					       fqdir->f->rhash_params);
 			refcount_dec(&fq->refcnt);
@@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
 struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
 {
+	/* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+	long high_thresh = READ_ONCE(fqdir->high_thresh);
 	struct inet_frag_queue *fq = NULL, *prev;
 
-	if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+	if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
 		return NULL;
 
 	rcu_read_lock();
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..fad803d2d711 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
 
 	rcu_read_lock();
 
-	if (qp->q.fqdir->dead)
+	/* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+	if (READ_ONCE(qp->q.fqdir->dead))
 		goto out_rcu_unlock;
 
 	spin_lock(&qp->q.lock);
-- 
Gitee


From d99c68df9aba2fa0dc4eb05d90fbb4477a440c79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 18 Jan 2022 03:43:40 -0800
Subject: [PATCH 097/100] netns: add schedule point in ops_exit_list()

stable inclusion
from stable-5.10.94
commit 2b1415c60b2e4aff93f790a7ae374dd6ef20d3a5
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 2836615aa22de55b8fca5e32fe1b27a67cda625e upstream.

When under stress, cleanup_net() can have to dismantle
netns in big numbers. ops_exit_list() currently calls
many helpers [1] that have no schedule point, and we can
end up with soft lockups, particularly on hosts
with many cpus.

Even for moderate amount of netns processed by cleanup_net()
this patch avoids latency spikes.

[1] Some of these helpers like fib_sync_up() and fib_sync_down_dev()
are very slow because net/ipv4/fib_semantics.c uses host-wide hash tables,
and ifindex is used as the only input of two hash functions.
    ifindexes tend to be the same for all netns (lo.ifindex==1 per instance)
    This will be fixed in a separate patch.

Fixes: 72ad937abd0a ("net: Add support for batching network namespace cleanups")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/core/net_namespace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ac852db83de9..cbff7d94b993 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -183,8 +183,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
 {
 	struct net *net;
 	if (ops->exit) {
-		list_for_each_entry(net, net_exit_list, exit_list)
+		list_for_each_entry(net, net_exit_list, exit_list) {
 			ops->exit(net);
+			cond_resched();
+		}
 	}
 	if (ops->exit_batch)
 		ops->exit_batch(net_exit_list);
-- 
Gitee


From fe6d02de4c5df41b8b16f86a89a05b60823dde90 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 10 Jan 2022 14:43:06 +0100
Subject: [PATCH 098/100] xfrm: Don't accidentally set RTO_ONLINK in
 decode_session4()

stable inclusion
from stable-5.10.94
commit 7f2ca96bd26644517ab03357bcf2982cf79c2ff7
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit 23e7b1bfed61e301853b5e35472820d919498278 upstream.

Similar to commit 94e2238969e8 ("xfrm4: strip ECN bits from tos field"),
clear the ECN bits from iph->tos when setting ->flowi4_tos.
This ensures that the last bit of ->flowi4_tos is cleared, so
ip_route_output_key_hash() isn't going to restrict the scope of the
route lookup.

Use ~INET_ECN_MASK instead of IPTOS_RT_MASK, because we have no reason
to clear the high order bits.

Found by code inspection, compile tested only.

Fixes: 4da3089f2b58 ("[IPSEC]: Use TOS when doing tunnel lookups")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 8fa258fbdcfc..e12de9b989f5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -31,6 +31,7 @@
 #include <linux/if_tunnel.h>
 #include <net/dst.h>
 #include <net/flow.h>
+#include <net/inet_ecn.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/gre.h>
@@ -3297,7 +3298,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
 	fl4->flowi4_proto = iph->protocol;
 	fl4->daddr = reverse ? iph->saddr : iph->daddr;
 	fl4->saddr = reverse ? iph->daddr : iph->saddr;
-	fl4->flowi4_tos = iph->tos;
+	fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
 
 	if (!ip_is_fragment(iph)) {
 		switch (iph->protocol) {
-- 
Gitee


From 0462a2582fcfa5b3b3162e5d4987168e57d981d2 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 10 Jan 2022 14:43:09 +0100
Subject: [PATCH 099/100] gre: Don't accidentally set RTO_ONLINK in
 gre_fill_metadata_dst()

stable inclusion
from stable-5.10.94
commit cd5c24d2230f418159d46766b2705a6058cf8650
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit f7716b318568b22fbf0e3be99279a979e217cf71 upstream.

Mask the ECN bits before initialising ->flowi4_tos. The tunnel key may
have the last ECN bit set, which will interfere with the route lookup
process as ip_route_output_key_hash() interpretes this bit specially
(to restrict the route scope).

Found by code inspection, compile tested only.

Fixes: 962924fa2b7a ("ip_gre: Refactor collect metatdata mode tunnel xmit to ip_md_tunnel_xmit")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 net/ipv4/ip_gre.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a9cc05043fa4..e4504dd510c6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -599,8 +599,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 
 	key = &info->key;
 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
-			    tunnel_id_to_key32(key->tun_id), key->tos, 0,
-			    skb->mark, skb_get_hash(skb));
+			    tunnel_id_to_key32(key->tun_id),
+			    key->tos & ~INET_ECN_MASK, 0, skb->mark,
+			    skb_get_hash(skb));
 	rt = ip_route_output_key(dev_net(dev), &fl4);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
-- 
Gitee


From cfe21f6bfd06ef76941e4ad441e7c43abb294eb1 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Mon, 10 Jan 2022 14:43:11 +0100
Subject: [PATCH 100/100] libcxgb: Don't accidentally set RTO_ONLINK in
 cxgb_find_route()

stable inclusion
from stable-5.10.94
commit 7b9d40e9f60d3fdc2a130b943223308c338fc49d
category: bugfix
issue: #I4UU4A
CVE: NA

Signed-off-by: Wenfei <wenfei9@huawei.com>
---------------------------------------

commit a915deaa9abe4fb3a440312c954253a6a733608e upstream.

Mask the ECN bits before calling ip_route_output_ports(). The tos
variable might be passed directly from an IPv4 header, so it may have
the last ECN bit set. This interferes with the route lookup process as
ip_route_output_key_hash() interpretes this bit specially (to restrict
the route scope).

Found by code inspection, compile tested only.

Fixes: 804c2f3e36ef ("libcxgb,iw_cxgb4,cxgbit: add cxgb_find_route()")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: wenfei <wenfei9@huawei.com>
---
 drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
index d04a6c163445..da8d10475a08 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -32,6 +32,7 @@
 
 #include <linux/tcp.h>
 #include <linux/ipv6.h>
+#include <net/inet_ecn.h>
 #include <net/route.h>
 #include <net/ip6_route.h>
 
@@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi,
 
 	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
 				   peer_port, local_port, IPPROTO_TCP,
-				   tos, 0);
+				   tos & ~INET_ECN_MASK, 0);
 	if (IS_ERR(rt))
 		return NULL;
 	n = dst_neigh_lookup(&rt->dst, &peer_ip);
-- 
Gitee