From 7edd4e6c65a5e6ed282a52131c64bf0ec1765aa3 Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Wed, 18 Dec 2024 17:40:25 +0800 Subject: [PATCH] performance test for kabi preserve 202412191140 Signed-off-by: Zheng Zengkai --- ...ude-msi-modify-kabi-size-of-msi_desc.patch | 45 ++++ ...ss-of-superblock-s-initialized-flags.patch | 40 ++++ ...e-CONFIG_CMA-by-default-in-openeuler.patch | 61 ++++++ ...ect-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch | 35 +++ ...r-free-of-nreq-in-reqsk_timer_handle.patch | 60 ++++++ ...erve-padding-for-uapi-struct-bpf_lin.patch | 63 ++++++ ...tra-KABI-entry-for-struct-iopf_group.patch | 38 ++++ ...e-kabi-KABI-reservation-for-seq_file.patch | 45 ++++ ...tatx-kabi-KABI-reservation-for-kstat.patch | 38 ++++ ...-fine-grained-control-of-folio-sizes.patch | 200 ++++++++++++++++++ ...cgroup-fix-uaf-when-proc_cpuset_show.patch | 68 ++++++ ...ations-on-the-cgroup-root_list-RCU-s.patch | 145 +++++++++++++ ..._head-up-near-the-top-of-cgroup_root.patch | 84 ++++++++ ...uset-Prevent-UAF-in-proc_cpuset_show.patch | 110 ++++++++++ 0021-cgroup-add-more-reserve-kabi.patch | 90 ++++++++ 0022-14223.patch | 80 +++++++ 0023-14224.patch | 85 ++++++++ 0024-14225.patch | 154 ++++++++++++++ kernel.spec | 44 +++- 19 files changed, 1483 insertions(+), 2 deletions(-) create mode 100644 0005-include-msi-modify-kabi-size-of-msi_desc.patch create mode 100644 0007-nfs-fix-the-loss-of-superblock-s-initialized-flags.patch create mode 100644 0008-x86-config-Enable-CONFIG_CMA-by-default-in-openeuler.patch create mode 100644 0009-x86-Kconfig-Select-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch create mode 100644 0010-tcp-Fix-use-after-free-of-nreq-in-reqsk_timer_handle.patch create mode 100644 0012-bpf-Add-kabi-reserve-padding-for-uapi-struct-bpf_lin.patch create mode 100644 0013-iommu-Reserve-extra-KABI-entry-for-struct-iopf_group.patch create mode 100644 0014-seq_file-kabi-KABI-reservation-for-seq_file.patch create mode 100644 0015-statx-kabi-KABI-reservation-for-kstat.patch create mode 100644 0016-fs-Allow-fine-grained-control-of-folio-sizes.patch create mode 100644 0017-Revert-cgroup-fix-uaf-when-proc_cpuset_show.patch create mode 100644 0018-cgroup-Make-operations-on-the-cgroup-root_list-RCU-s.patch create mode 100644 0019-cgroup-Move-rcu_head-up-near-the-top-of-cgroup_root.patch create mode 100644 0020-cgroup-cpuset-Prevent-UAF-in-proc_cpuset_show.patch create mode 100644 0021-cgroup-add-more-reserve-kabi.patch create mode 100644 0022-14223.patch create mode 100644 0023-14224.patch create mode 100644 0024-14225.patch diff --git a/0005-include-msi-modify-kabi-size-of-msi_desc.patch b/0005-include-msi-modify-kabi-size-of-msi_desc.patch new file mode 100644 index 00000000..79c77ab1 --- /dev/null +++ b/0005-include-msi-modify-kabi-size-of-msi_desc.patch @@ -0,0 +1,45 @@ +From 723d41836db7669ab658d3e07c62fcbe17d7d7f4 Mon Sep 17 00:00:00 2001 +From: zhengjunlong +Date: Fri, 11 Oct 2024 17:08:35 +0800 +Subject: [PATCH 01/17] include/msi: modify kabi size of msi_desc + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IAW8JF + +---------------------------------------------------- + +Change the size of the pre-embedded memory for msi_desc to 40 bytes. + +Signed-off-by: Zheng Junlong +--- + include/linux/msi.h | 11 ++++------- + 1 file changed, 4 insertions(+), 7 deletions(-) + +diff --git a/include/linux/msi.h b/include/linux/msi.h +index 7354ffb14856..5fd8a6caae98 100644 +--- a/include/linux/msi.h ++++ b/include/linux/msi.h +@@ -205,15 +205,12 @@ struct msi_desc { + union { + struct pci_msi_desc pci; + struct msi_desc_data data; +- KABI_RESERVE(1) +- KABI_RESERVE(2) +- KABI_RESERVE(3) +- KABI_RESERVE(4) ++ KABI_EXTEND_WITH_SIZE(KABI_RESERVE(1), 5) + }; ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + KABI_RESERVE(5) +- KABI_RESERVE(6) +- KABI_RESERVE(7) +- KABI_RESERVE(8) + }; + + /* +-- +2.25.1 + diff --git a/0007-nfs-fix-the-loss-of-superblock-s-initialized-flags.patch b/0007-nfs-fix-the-loss-of-superblock-s-initialized-flags.patch new file mode 100644 index 00000000..1d3c32fe --- /dev/null +++ b/0007-nfs-fix-the-loss-of-superblock-s-initialized-flags.patch @@ -0,0 +1,40 @@ +From e68e6e3cf90ec8fb7893057c768d55e83855aaa0 Mon Sep 17 00:00:00 2001 +From: Li Lingfeng +Date: Mon, 16 Dec 2024 20:15:25 +0800 +Subject: [PATCH 03/17] nfs: fix the loss of superblock's initialized flags + +hulk inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IB42W1 + +-------------------------------- + +Commit 573573887e0b ("nfs: pass flags to second superblock") directly +assigns fc->sb_flags to dentry->d_sb->s_flags, which will cause the loss +of the initialized flags in dentry->d_sb->s_flags. + +Fix it by just passing SB_RDONLY from fc->sb_flags to +dentry->d_sb->s_flags. + +Fixes: 573573887e0b ("nfs: pass flags to second superblock") +Signed-off-by: Li Lingfeng +--- + fs/nfs/nfs4super.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c +index bb13894ad152..e87f878178f3 100644 +--- a/fs/nfs/nfs4super.c ++++ b/fs/nfs/nfs4super.c +@@ -209,7 +209,7 @@ static int do_nfs4_mount(struct nfs_server *server, + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + +- dentry->d_sb->s_flags = fc->sb_flags; ++ dentry->d_sb->s_flags |= (fc->sb_flags & SB_RDONLY); + fc->root = dentry; + return 0; + } +-- +2.25.1 + diff --git a/0008-x86-config-Enable-CONFIG_CMA-by-default-in-openeuler.patch b/0008-x86-config-Enable-CONFIG_CMA-by-default-in-openeuler.patch new file mode 100644 index 00000000..f9c3ab22 --- /dev/null +++ b/0008-x86-config-Enable-CONFIG_CMA-by-default-in-openeuler.patch @@ -0,0 +1,61 @@ +From 844a44e5a21be8062fd0c120a75e9ecf97427ae8 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Mon, 16 Dec 2024 20:44:36 +0800 +Subject: [PATCH 04/17] x86/config: Enable CONFIG_CMA by default in + openeuler_defconfig + +hygon inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBBNJI +CVE: NA + +--------------------------- + +Enable CONFIG_CMA will change kabi. + +Enable CONFIG_CMA will also enable CONFIG_DMA_CMA. + +Signed-off-by: hanliyang +--- + arch/x86/configs/openeuler_defconfig | 18 +++++++++++++++++- + 1 file changed, 17 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig +index 8e8542796a13..adfaef0cb10c 100644 +--- a/arch/x86/configs/openeuler_defconfig ++++ b/arch/x86/configs/openeuler_defconfig +@@ -1158,7 +1158,11 @@ CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y + CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y + CONFIG_USE_PERCPU_NUMA_NODE_ID=y + CONFIG_HAVE_SETUP_PER_CPU_AREA=y +-# CONFIG_CMA is not set ++CONFIG_CMA=y ++# CONFIG_CMA_DEBUG is not set ++# CONFIG_CMA_DEBUGFS is not set ++# CONFIG_CMA_SYSFS is not set ++CONFIG_CMA_AREAS=19 + CONFIG_MEM_SOFT_DIRTY=y + CONFIG_GENERIC_EARLY_IOREMAP=y + CONFIG_DEFERRED_STRUCT_PAGE_INIT=y +@@ -9018,6 +9022,18 @@ CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y + CONFIG_SWIOTLB=y + # CONFIG_SWIOTLB_DYNAMIC is not set + CONFIG_DMA_COHERENT_POOL=y ++CONFIG_DMA_CMA=y ++# CONFIG_DMA_NUMA_CMA is not set ++ ++# ++# Default contiguous memory area size: ++# ++CONFIG_CMA_SIZE_MBYTES=0 ++CONFIG_CMA_SIZE_SEL_MBYTES=y ++# CONFIG_CMA_SIZE_SEL_PERCENTAGE is not set ++# CONFIG_CMA_SIZE_SEL_MIN is not set ++# CONFIG_CMA_SIZE_SEL_MAX is not set ++CONFIG_CMA_ALIGNMENT=8 + # CONFIG_DMA_API_DEBUG is not set + # CONFIG_DMA_MAP_BENCHMARK is not set + CONFIG_SGL_ALLOC=y +-- +2.25.1 + diff --git a/0009-x86-Kconfig-Select-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch b/0009-x86-Kconfig-Select-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch new file mode 100644 index 00000000..79f223e8 --- /dev/null +++ b/0009-x86-Kconfig-Select-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch @@ -0,0 +1,35 @@ +From f0e6b8ca2a5b0bc1347906ff6b80422c4c9878b2 Mon Sep 17 00:00:00 2001 +From: hanliyang +Date: Mon, 16 Dec 2024 20:52:08 +0800 +Subject: [PATCH 05/17] x86/Kconfig: Select CONFIG_CMA if CONFIG_HYGON_CSV=y + +hygon inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBBNJI +CVE: NA + +--------------------------- + +The Hygon CSV3 use CMA to manage CSV3 guest's private memory. If the +CONFIG_HYGON_CSV is enabled, then enable CONFIG_CMA automatically. + +Signed-off-by: hanliyang +--- + arch/x86/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index fcd0c3b2065d..a6bbe6029121 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -2075,6 +2075,7 @@ config HYGON_CSV + bool "Hygon secure virtualization CSV support" + default y + depends on CPU_SUP_HYGON && AMD_MEM_ENCRYPT ++ select CONFIG_CMA + help + Hygon CSV integrates secure processor, memory encryption and + memory isolation to provide the ability to protect guest's private +-- +2.25.1 + diff --git a/0010-tcp-Fix-use-after-free-of-nreq-in-reqsk_timer_handle.patch b/0010-tcp-Fix-use-after-free-of-nreq-in-reqsk_timer_handle.patch new file mode 100644 index 00000000..a07a0a57 --- /dev/null +++ b/0010-tcp-Fix-use-after-free-of-nreq-in-reqsk_timer_handle.patch @@ -0,0 +1,60 @@ +From 44c5a161852ac117a94ed7748784aecaab552b47 Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Tue, 17 Dec 2024 16:33:23 +0800 +Subject: [PATCH 06/17] tcp: Fix use-after-free of nreq in + reqsk_timer_handler(). + +stable inclusion +from stable-v6.6.64 +commit 65ed89cad1f57034c256b016e89e8c0a4ec7c65b +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IBA6RL +CVE: NA + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=65ed89cad1f57034c256b016e89e8c0a4ec7c65b + +------------------------------------------------- + +[ Upstream commit c31e72d021db2714df03df6c42855a1db592716c ] + +The cited commit replaced inet_csk_reqsk_queue_drop_and_put() with +__inet_csk_reqsk_queue_drop() and reqsk_put() in reqsk_timer_handler(). + +Then, oreq should be passed to reqsk_put() instead of req; otherwise +use-after-free of nreq could happen when reqsk is migrated but the +retry attempt failed (e.g. due to timeout). + +Let's pass oreq to reqsk_put(). + +Fixes: e8c526f2bdf1 ("tcp/dccp: Don't use timer_pending() in reqsk_queue_unlink().") +Reported-by: Liu Jian +Closes: https://lore.kernel.org/netdev/1284490f-9525-42ee-b7b8-ccadf6606f6d@huawei.com/ +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Vadim Fedorenko +Reviewed-by: Liu Jian +Reviewed-by: Eric Dumazet +Reviewed-by: Martin KaFai Lau +Link: https://patch.msgid.link/20241123174236.62438-1-kuniyu@amazon.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Liu Jian +--- + net/ipv4/inet_connection_sock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c +index ca8cc0988b61..bd032ac2376e 100644 +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -1124,7 +1124,7 @@ static void reqsk_timer_handler(struct timer_list *t) + + drop: + __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); +- reqsk_put(req); ++ reqsk_put(oreq); + } + + static bool reqsk_queue_hash_req(struct request_sock *req, +-- +2.25.1 + diff --git a/0012-bpf-Add-kabi-reserve-padding-for-uapi-struct-bpf_lin.patch b/0012-bpf-Add-kabi-reserve-padding-for-uapi-struct-bpf_lin.patch new file mode 100644 index 00000000..9a958456 --- /dev/null +++ b/0012-bpf-Add-kabi-reserve-padding-for-uapi-struct-bpf_lin.patch @@ -0,0 +1,63 @@ +From c189729809e4c7a6298126a76db608da2b571240 Mon Sep 17 00:00:00 2001 +From: Pu Lehui +Date: Wed, 18 Dec 2024 06:24:00 +0000 +Subject: [PATCH 08/17] bpf: Add kabi reserve padding for uapi struct + bpf_link_info + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBC248 + +-------------------------------- + +Add kabi reserve padding for uapi struct bpf_link_info + +Signed-off-by: Pu Lehui +--- + include/uapi/linux/bpf.h | 9 +++++++++ + tools/include/uapi/linux/bpf.h | 9 +++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 482647774bf5..a660cb68c853 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -6573,6 +6573,15 @@ struct bpf_link_info { + __u64 config; + __u32 type; + } event; /* BPF_PERF_EVENT_EVENT */ ++ struct { ++ __u64:64; ++ __u32:32; ++ __u32:32; ++ __u64:64; ++ __u64:64; ++ __u64:64; ++ __u64:64; ++ } kabi_reserve; + }; + } perf_event; + struct { +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index c112c6f7c766..9b302242be6c 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -6576,6 +6576,15 @@ struct bpf_link_info { + __u64 config; + __u32 type; + } event; /* BPF_PERF_EVENT_EVENT */ ++ struct { ++ __u64:64; ++ __u32:32; ++ __u32:32; ++ __u64:64; ++ __u64:64; ++ __u64:64; ++ __u64:64; ++ } kabi_reserve; + }; + } perf_event; + struct { +-- +2.25.1 + diff --git a/0013-iommu-Reserve-extra-KABI-entry-for-struct-iopf_group.patch b/0013-iommu-Reserve-extra-KABI-entry-for-struct-iopf_group.patch new file mode 100644 index 00000000..43e830cb --- /dev/null +++ b/0013-iommu-Reserve-extra-KABI-entry-for-struct-iopf_group.patch @@ -0,0 +1,38 @@ +From bbfb8fd7b1297acf7769a814f3fbf919afd391dc Mon Sep 17 00:00:00 2001 +From: Zhang Zekun +Date: Wed, 18 Dec 2024 14:43:35 +0800 +Subject: [PATCH 09/17] iommu: Reserve extra KABI entry for struct iopf_group + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBBRHP + +--------------------------------------------------------------- + +The list_head entry in iopf_group has been moved to iopf_group_extend +for KABI compatibility and the lack of KABI reserve entry. Reserve extra +kabi entry for future usage. + +Signed-off-by: Zhang Zekun +--- + include/linux/iommu.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/linux/iommu.h b/include/linux/iommu.h +index bb463cb96a44..83ec4bf9809e 100644 +--- a/include/linux/iommu.h ++++ b/include/linux/iommu.h +@@ -155,6 +155,10 @@ struct iopf_group { + KABI_USE(2, u32 cookie) + KABI_RESERVE(3) + KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) + }; + + struct iopf_group_extend { +-- +2.25.1 + diff --git a/0014-seq_file-kabi-KABI-reservation-for-seq_file.patch b/0014-seq_file-kabi-KABI-reservation-for-seq_file.patch new file mode 100644 index 00000000..371e3afe --- /dev/null +++ b/0014-seq_file-kabi-KABI-reservation-for-seq_file.patch @@ -0,0 +1,45 @@ +From 1cb26ea1471efb775f2aa141863e82efead07d61 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Wed, 18 Dec 2024 15:21:56 +0800 +Subject: [PATCH 10/17] seq_file: kabi: KABI reservation for seq_file + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBC34X + +---------------------------------------------------------------------- + + structure size reserves reserved + seq_file 120 1 128 + seq_operations 32 1 40 + +Signed-off-by: Baokun Li +--- + include/linux/seq_file.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h +index 234bcdb1fba4..cf4a2258df85 100644 +--- a/include/linux/seq_file.h ++++ b/include/linux/seq_file.h +@@ -27,6 +27,8 @@ struct seq_file { + int poll_event; + const struct file *file; + void *private; ++ ++ KABI_RESERVE(1) + }; + + struct seq_operations { +@@ -34,6 +36,8 @@ struct seq_operations { + void (*stop) (struct seq_file *m, void *v); + void * (*next) (struct seq_file *m, void *v, loff_t *pos); + int (*show) (struct seq_file *m, void *v); ++ ++ KABI_RESERVE(1) + }; + + #define SEQ_SKIP 1 +-- +2.25.1 + diff --git a/0015-statx-kabi-KABI-reservation-for-kstat.patch b/0015-statx-kabi-KABI-reservation-for-kstat.patch new file mode 100644 index 00000000..12b7151d --- /dev/null +++ b/0015-statx-kabi-KABI-reservation-for-kstat.patch @@ -0,0 +1,38 @@ +From ed5b59b6c40d2563994c1f7b5a1321affb490d45 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Wed, 18 Dec 2024 15:23:01 +0800 +Subject: [PATCH 11/17] statx: kabi: KABI reservation for kstat + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBC24E + +---------------------------------------------------------------------- + + structure size reserves reserved mainline + kstat 160 4 192 184 + +Signed-off-by: Baokun Li +--- + include/linux/stat.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/linux/stat.h b/include/linux/stat.h +index 52150570d37a..d342e89b7aaa 100644 +--- a/include/linux/stat.h ++++ b/include/linux/stat.h +@@ -53,6 +53,11 @@ struct kstat { + u32 dio_mem_align; + u32 dio_offset_align; + u64 change_cookie; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + /* These definitions are internal to the kernel for now. Mainly used by nfsd. */ +-- +2.25.1 + diff --git a/0016-fs-Allow-fine-grained-control-of-folio-sizes.patch b/0016-fs-Allow-fine-grained-control-of-folio-sizes.patch new file mode 100644 index 00000000..ca2556d0 --- /dev/null +++ b/0016-fs-Allow-fine-grained-control-of-folio-sizes.patch @@ -0,0 +1,200 @@ +From 30f7b1506ec798949e6ce99c023780b0306845c9 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Wed, 18 Dec 2024 15:31:44 +0800 +Subject: [PATCH 12/17] fs: Allow fine-grained control of folio sizes + +mainline inclusion +from mainline-v6.10-rc2 +commit 84429b675bcfd2a518ae167ee4661cdf7539aa7d +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBC20Q + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=84429b675bcfd2a518ae167ee4661cdf7539aa7d + +-------------------------------- + +We need filesystems to be able to communicate acceptable folio sizes +to the pagecache for a variety of uses (e.g. large block sizes). +Support a range of folio sizes between order-0 and order-31. + +Signed-off-by: Matthew Wilcox (Oracle) +Co-developed-by: Pankaj Raghav +Signed-off-by: Pankaj Raghav +Link: https://lore.kernel.org/r/20240822135018.1931258-2-kernel@pankajraghav.com +Tested-by: David Howells +Reviewed-by: Hannes Reinecke +Reviewed-by: Darrick J. Wong +Reviewed-by: Daniel Gomez +Signed-off-by: Christian Brauner +Conflicts: + include/linux/pagemap.h + mm/filemap.c +[Conflicts due to not merged 83ee0e20fd9f ("filemap: support disable large +folios on active inode")] +Signed-off-by: Long Li +--- + include/linux/pagemap.h | 90 +++++++++++++++++++++++++++++++++++------ + mm/readahead.c | 4 +- + 2 files changed, 79 insertions(+), 15 deletions(-) + +diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h +index 429627abfef4..e44e377661f2 100644 +--- a/include/linux/pagemap.h ++++ b/include/linux/pagemap.h +@@ -203,12 +203,21 @@ enum mapping_flags { + AS_EXITING = 4, /* final truncate in progress */ + /* writeback related tags are not used */ + AS_NO_WRITEBACK_TAGS = 5, +- AS_LARGE_FOLIO_SUPPORT = 6, +- AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ +- AS_STABLE_WRITES, /* must wait for writeback before modifying ++ AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */ ++ AS_STABLE_WRITES = 7, /* must wait for writeback before modifying + folio contents */ ++ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ ++ /* Bits 16-25 are used for FOLIO_ORDER */ ++ AS_FOLIO_ORDER_BITS = 5, ++ AS_FOLIO_ORDER_MIN = 16, ++ AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, + }; + ++#define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1) ++#define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN) ++#define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX) ++#define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK) ++ + /** + * mapping_set_error - record a writeback error in the address_space + * @mapping: the mapping in which an error should be set +@@ -348,9 +357,51 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) + #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) + #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) + ++/* ++ * mapping_set_folio_order_range() - Set the orders supported by a file. ++ * @mapping: The address space of the file. ++ * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive). ++ * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive). ++ * ++ * The filesystem should call this function in its inode constructor to ++ * indicate which base size (min) and maximum size (max) of folio the VFS ++ * can use to cache the contents of the file. This should only be used ++ * if the filesystem needs special handling of folio sizes (ie there is ++ * something the core cannot know). ++ * Do not tune it based on, eg, i_size. ++ * ++ * Context: This should not be called while the inode is active as it ++ * is non-atomic. ++ */ ++static inline void mapping_set_folio_order_range(struct address_space *mapping, ++ unsigned int min, ++ unsigned int max) ++{ ++ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) ++ return; ++ ++ if (min > MAX_PAGECACHE_ORDER) ++ min = MAX_PAGECACHE_ORDER; ++ ++ if (max > MAX_PAGECACHE_ORDER) ++ max = MAX_PAGECACHE_ORDER; ++ ++ if (max < min) ++ max = min; ++ ++ mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) | ++ (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX); ++} ++ ++static inline void mapping_set_folio_min_order(struct address_space *mapping, ++ unsigned int min) ++{ ++ mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER); ++} ++ + /** + * mapping_set_large_folios() - Indicate the file supports large folios. +- * @mapping: The file. ++ * @mapping: The address space of the file. + * + * The filesystem should call this function in its inode constructor to + * indicate that the VFS can use large folios to cache the contents of +@@ -361,7 +412,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) + */ + static inline void mapping_set_large_folios(struct address_space *mapping) + { +- __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); ++ mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER); ++} ++ ++static inline unsigned int ++mapping_max_folio_order(const struct address_space *mapping) ++{ ++ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) ++ return 0; ++ return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX; ++} ++ ++static inline unsigned int ++mapping_min_folio_order(const struct address_space *mapping) ++{ ++ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) ++ return 0; ++ return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; + } + + /** +@@ -375,7 +442,7 @@ static inline void mapping_set_large_folios(struct address_space *mapping) + static inline void mapping_clear_large_folios(struct address_space *mapping) + { + WARN_ON_ONCE(!rwsem_is_locked(&mapping->invalidate_lock)); +- __clear_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); ++ mapping_set_folio_order_range(mapping, 0, 0); + } + + /* +@@ -384,20 +451,17 @@ static inline void mapping_clear_large_folios(struct address_space *mapping) + */ + static inline bool mapping_large_folio_support(struct address_space *mapping) + { +- /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ ++ /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ + VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + "Anonymous mapping always supports large folio"); + +- return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && +- test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); ++ return mapping_max_folio_order(mapping) > 0; + } + + /* Return the maximum folio size for this pagecache mapping, in bytes. */ +-static inline size_t mapping_max_folio_size(struct address_space *mapping) ++static inline size_t mapping_max_folio_size(const struct address_space *mapping) + { +- if (mapping_large_folio_support(mapping)) +- return PAGE_SIZE << MAX_PAGECACHE_ORDER; +- return PAGE_SIZE; ++ return PAGE_SIZE << mapping_max_folio_order(mapping); + } + + static inline int filemap_nr_thps(struct address_space *mapping) +diff --git a/mm/readahead.c b/mm/readahead.c +index 438f142a3e74..c13c130efcca 100644 +--- a/mm/readahead.c ++++ b/mm/readahead.c +@@ -513,10 +513,10 @@ void page_cache_ra_order(struct readahead_control *ractl, + + limit = min(limit, index + ra->size - 1); + +- if (new_order < MAX_PAGECACHE_ORDER) ++ if (new_order < mapping_max_folio_order(mapping)) + new_order += 2; + +- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order); ++ new_order = min(mapping_max_folio_order(mapping), new_order); + new_order = min_t(unsigned int, new_order, ilog2(ra->size)); + + /* See comment in page_cache_ra_unbounded() */ +-- +2.25.1 + diff --git a/0017-Revert-cgroup-fix-uaf-when-proc_cpuset_show.patch b/0017-Revert-cgroup-fix-uaf-when-proc_cpuset_show.patch new file mode 100644 index 00000000..ebe3ba02 --- /dev/null +++ b/0017-Revert-cgroup-fix-uaf-when-proc_cpuset_show.patch @@ -0,0 +1,68 @@ +From 8c8766f9500b9ffdb907d23269aa888d0632e68c Mon Sep 17 00:00:00 2001 +From: Chen Ridong +Date: Wed, 18 Dec 2024 08:10:59 +0000 +Subject: [PATCH 13/17] Revert "cgroup: fix uaf when proc_cpuset_show" + +hulk inclusion +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IA9YQ9 + +-------------------------------- + +To keep the same with the mainline and backport the lts patch. +This reverts commit 24c448de81d48ad08925dda9869bcf535a3258b8. + +Fixes: 24c448de81d4 ("cgroup: fix uaf when proc_cpuset_show") +Signed-off-by: Chen Ridong +--- + kernel/cgroup/cpuset.c | 24 ------------------------ + 1 file changed, 24 deletions(-) + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 2c9e50f09fc1..140dfb5ad3fc 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -5185,7 +5185,6 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + char *buf; + struct cgroup_subsys_state *css; + int retval; +- struct cgroup *root_cgroup = NULL; + + retval = -ENOMEM; + buf = kmalloc(PATH_MAX, GFP_KERNEL); +@@ -5193,32 +5192,9 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + goto out; + + css = task_get_css(tsk, cpuset_cgrp_id); +- rcu_read_lock(); +- /* +- * When the cpuset subsystem is mounted on the legacy hierarchy, +- * the top_cpuset.css->cgroup does not hold a reference count of +- * cgroup_root.cgroup. This makes accessing css->cgroup very +- * dangerous because when the cpuset subsystem is remounted to the +- * default hierarchy, the cgroup_root.cgroup that css->cgroup points +- * to will be released, leading to a UAF issue. To avoid this problem, +- * get the reference count of top_cpuset.css->cgroup first. +- * +- * This is ugly!! +- */ +- if (css == &top_cpuset.css) { +- root_cgroup = css->cgroup; +- if (!css_tryget_online(&root_cgroup->self)) { +- rcu_read_unlock(); +- retval = -EBUSY; +- goto out_free; +- } +- } +- rcu_read_unlock(); + retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, + current->nsproxy->cgroup_ns); + css_put(css); +- if (root_cgroup) +- css_put(&root_cgroup->self); + if (retval >= PATH_MAX) + retval = -ENAMETOOLONG; + if (retval < 0) +-- +2.25.1 + diff --git a/0018-cgroup-Make-operations-on-the-cgroup-root_list-RCU-s.patch b/0018-cgroup-Make-operations-on-the-cgroup-root_list-RCU-s.patch new file mode 100644 index 00000000..0c54088d --- /dev/null +++ b/0018-cgroup-Make-operations-on-the-cgroup-root_list-RCU-s.patch @@ -0,0 +1,145 @@ +From 7b6abe1742cbfedea405f03fcf7fc88cacb2a205 Mon Sep 17 00:00:00 2001 +From: Yafang Shao +Date: Wed, 18 Dec 2024 08:11:00 +0000 +Subject: [PATCH 14/17] cgroup: Make operations on the cgroup root_list RCU + safe +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +stable inclusion +from stable-v6.6.47 +commit dd9542ae7c7ca82ed2d7c185754ba9026361f6bc +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IAP55A + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=dd9542ae7c7ca82ed2d7c185754ba9026361f6bc + +-------------------------------- + +commit d23b5c577715892c87533b13923306acc6243f93 upstream. + +At present, when we perform operations on the cgroup root_list, we must +hold the cgroup_mutex, which is a relatively heavyweight lock. In reality, +we can make operations on this list RCU-safe, eliminating the need to hold +the cgroup_mutex during traversal. Modifications to the list only occur in +the cgroup root setup and destroy paths, which should be infrequent in a +production environment. In contrast, traversal may occur frequently. +Therefore, making it RCU-safe would be beneficial. + +Signed-off-by: Yafang Shao +Signed-off-by: Tejun Heo +To: Michal Koutný +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Chen Ridong +--- + include/linux/cgroup-defs.h | 1 + + kernel/cgroup/cgroup-internal.h | 3 ++- + kernel/cgroup/cgroup.c | 23 ++++++++++++++++------- + 3 files changed, 19 insertions(+), 8 deletions(-) + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index 6e3227a688de..05ece896af7d 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -591,6 +591,7 @@ struct cgroup_root { + + /* A list running through the active hierarchies */ + struct list_head root_list; ++ struct rcu_head rcu; + + /* Hierarchy-specific flags */ + unsigned int flags; +diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h +index 96a9bd2c26f0..f5fb12890645 100644 +--- a/kernel/cgroup/cgroup-internal.h ++++ b/kernel/cgroup/cgroup-internal.h +@@ -170,7 +170,8 @@ extern struct list_head cgroup_roots; + + /* iterate across the hierarchies */ + #define for_each_root(root) \ +- list_for_each_entry((root), &cgroup_roots, root_list) ++ list_for_each_entry_rcu((root), &cgroup_roots, root_list, \ ++ lockdep_is_held(&cgroup_mutex)) + + /** + * for_each_subsys - iterate all enabled cgroup subsystems +diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c +index 52fe6ba2fefd..c26a9b3a3576 100644 +--- a/kernel/cgroup/cgroup.c ++++ b/kernel/cgroup/cgroup.c +@@ -1315,7 +1315,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root) + + void cgroup_free_root(struct cgroup_root *root) + { +- kfree(root); ++ kfree_rcu(root, rcu); + } + + static void cgroup_destroy_root(struct cgroup_root *root) +@@ -1348,7 +1348,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) + spin_unlock_irq(&css_set_lock); + + if (!list_empty(&root->root_list)) { +- list_del(&root->root_list); ++ list_del_rcu(&root->root_list); + cgroup_root_count--; + } + +@@ -1388,7 +1388,15 @@ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, + } + } + +- BUG_ON(!res_cgroup); ++ /* ++ * If cgroup_mutex is not held, the cgrp_cset_link will be freed ++ * before we remove the cgroup root from the root_list. Consequently, ++ * when accessing a cgroup root, the cset_link may have already been ++ * freed, resulting in a NULL res_cgroup. However, by holding the ++ * cgroup_mutex, we ensure that res_cgroup can't be NULL. ++ * If we don't hold cgroup_mutex in the caller, we must do the NULL ++ * check. ++ */ + return res_cgroup; + } + +@@ -1447,7 +1455,6 @@ static struct cgroup *current_cgns_cgroup_dfl(void) + static struct cgroup *cset_cgroup_from_root(struct css_set *cset, + struct cgroup_root *root) + { +- lockdep_assert_held(&cgroup_mutex); + lockdep_assert_held(&css_set_lock); + + return __cset_cgroup_from_root(cset, root); +@@ -1455,7 +1462,9 @@ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, + + /* + * Return the cgroup for "task" from the given hierarchy. Must be +- * called with cgroup_mutex and css_set_lock held. ++ * called with css_set_lock held to prevent task's groups from being modified. ++ * Must be called with either cgroup_mutex or rcu read lock to prevent the ++ * cgroup root from being destroyed. + */ + struct cgroup *task_cgroup_from_root(struct task_struct *task, + struct cgroup_root *root) +@@ -2030,7 +2039,7 @@ void init_cgroup_root(struct cgroup_fs_context *ctx) + struct cgroup_root *root = ctx->root; + struct cgroup *cgrp = &root->cgrp; + +- INIT_LIST_HEAD(&root->root_list); ++ INIT_LIST_HEAD_RCU(&root->root_list); + atomic_set(&root->nr_cgrps, 1); + cgrp->root = root; + init_cgroup_housekeeping(cgrp); +@@ -2114,7 +2123,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) + * care of subsystems' refcounts, which are explicitly dropped in + * the failure exit path. + */ +- list_add(&root->root_list, &cgroup_roots); ++ list_add_rcu(&root->root_list, &cgroup_roots); + cgroup_root_count++; + + /* +-- +2.25.1 + diff --git a/0019-cgroup-Move-rcu_head-up-near-the-top-of-cgroup_root.patch b/0019-cgroup-Move-rcu_head-up-near-the-top-of-cgroup_root.patch new file mode 100644 index 00000000..45d78024 --- /dev/null +++ b/0019-cgroup-Move-rcu_head-up-near-the-top-of-cgroup_root.patch @@ -0,0 +1,84 @@ +From 4363688e9b49bde3cce7b2ea1882f3d44d1f5289 Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Wed, 18 Dec 2024 08:11:01 +0000 +Subject: [PATCH 15/17] cgroup: Move rcu_head up near the top of cgroup_root +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +stable inclusion +from stable-v6.6.47 +commit f3c60ab676bb62e01d004d5b1cf2963a296c8e6a +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IAP55A + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f3c60ab676bb62e01d004d5b1cf2963a296c8e6a + +-------------------------------- + +commit a7fb0423c201ba12815877a0b5a68a6a1710b23a upstream. + +Commit 331654dc5f40 ("cgroup: Make operations on the cgroup root_list RCU +safe") adds a new rcu_head to the cgroup_root structure and kvfree_rcu() +for freeing the cgroup_root. + +The current implementation of kvfree_rcu(), however, has the limitation +that the offset of the rcu_head structure within the larger data +structure must be less than 4096 or the compilation will fail. See the +macro definition of __is_kvfree_rcu_offset() in include/linux/rcupdate.h +for more information. + +By putting rcu_head below the large cgroup structure, any change to the +cgroup structure that makes it larger run the risk of causing build +failure under certain configurations. Commit 77070eeb8821 ("cgroup: +Avoid false cacheline sharing of read mostly rstat_cpu") happens to be +the last straw that breaks it. Fix this problem by moving the rcu_head +structure up before the cgroup structure. + +Fixes: 331654dc5f40 ("cgroup: Make operations on the cgroup root_list RCU safe") +Reported-by: Stephen Rothwell +Closes: https://lore.kernel.org/lkml/20231207143806.114e0a74@canb.auug.org.au/ +Signed-off-by: Waiman Long +Acked-by: Yafang Shao +Reviewed-by: Yosry Ahmed +Reviewed-by: Michal Koutný +Signed-off-by: Tejun Heo +Signed-off-by: Greg Kroah-Hartman + +Conflicts: + include/linux/cgroup-defs.h +[Context is mismatched for wait_queue_head_t wait was merged] +Signed-off-by: Chen Ridong +--- + include/linux/cgroup-defs.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index 05ece896af7d..8eb518ce87a1 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -573,6 +573,10 @@ struct cgroup_root { + /* Unique id for this hierarchy. */ + int hierarchy_id; + ++ /* A list running through the active hierarchies */ ++ struct list_head root_list; ++ struct rcu_head rcu; /* Must be near the top */ ++ + /* + * The root cgroup. The containing cgroup_root will be destroyed on its + * release. cgrp->ancestors[0] will be used overflowing into the +@@ -589,10 +593,6 @@ struct cgroup_root { + /* Wait while cgroups are being destroyed */ + wait_queue_head_t wait; + +- /* A list running through the active hierarchies */ +- struct list_head root_list; +- struct rcu_head rcu; +- + /* Hierarchy-specific flags */ + unsigned int flags; + +-- +2.25.1 + diff --git a/0020-cgroup-cpuset-Prevent-UAF-in-proc_cpuset_show.patch b/0020-cgroup-cpuset-Prevent-UAF-in-proc_cpuset_show.patch new file mode 100644 index 00000000..c528ff32 --- /dev/null +++ b/0020-cgroup-cpuset-Prevent-UAF-in-proc_cpuset_show.patch @@ -0,0 +1,110 @@ +From 724b6581cd8b49962e3add6e8795423f2c1390f8 Mon Sep 17 00:00:00 2001 +From: Chen Ridong +Date: Wed, 18 Dec 2024 08:11:02 +0000 +Subject: [PATCH 16/17] cgroup/cpuset: Prevent UAF in proc_cpuset_show() + +stable inclusion +from stable-v6.6.44 +commit 96226fbed566f3f686f53a489a29846f2d538080 +category: bugfix +bugzilla: https://gitee.com/openeuler/kernel/issues/IAP55A + +Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=96226fbed566f3f686f53a489a29846f2d538080 + +-------------------------------- + +[ Upstream commit 1be59c97c83ccd67a519d8a49486b3a8a73ca28a ] + +An UAF can happen when /proc/cpuset is read as reported in [1]. + +This can be reproduced by the following methods: +1.add an mdelay(1000) before acquiring the cgroup_lock In the + cgroup_path_ns function. +2.$cat /proc//cpuset repeatly. +3.$mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset/ +$umount /sys/fs/cgroup/cpuset/ repeatly. + +The race that cause this bug can be shown as below: + +(umount) | (cat /proc//cpuset) +css_release | proc_cpuset_show +css_release_work_fn | css = task_get_css(tsk, cpuset_cgrp_id); +css_free_rwork_fn | cgroup_path_ns(css->cgroup, ...); +cgroup_destroy_root | mutex_lock(&cgroup_mutex); +rebind_subsystems | +cgroup_free_root | + | // cgrp was freed, UAF + | cgroup_path_ns_locked(cgrp,..); + +When the cpuset is initialized, the root node top_cpuset.css.cgrp +will point to &cgrp_dfl_root.cgrp. In cgroup v1, the mount operation will +allocate cgroup_root, and top_cpuset.css.cgrp will point to the allocated +&cgroup_root.cgrp. When the umount operation is executed, +top_cpuset.css.cgrp will be rebound to &cgrp_dfl_root.cgrp. + +The problem is that when rebinding to cgrp_dfl_root, there are cases +where the cgroup_root allocated by setting up the root for cgroup v1 +is cached. This could lead to a Use-After-Free (UAF) if it is +subsequently freed. The descendant cgroups of cgroup v1 can only be +freed after the css is released. However, the css of the root will never +be released, yet the cgroup_root should be freed when it is unmounted. +This means that obtaining a reference to the css of the root does +not guarantee that css.cgrp->root will not be freed. + +Fix this problem by using rcu_read_lock in proc_cpuset_show(). +As cgroup_root is kfree_rcu after commit 331654dc5f40 +("cgroup: Make operations on the cgroup root_list RCU safe"), +css->cgroup won't be freed during the critical section. +To call cgroup_path_ns_locked, css_set_lock is needed, so it is safe to +replace task_get_css with task_css. + +[1] https://syzkaller.appspot.com/bug?extid=9b1ff7be974a403aa4cd + +Fixes: a79a908fd2b0 ("cgroup: introduce cgroup namespaces") +Signed-off-by: Chen Ridong +Signed-off-by: Tejun Heo +Signed-off-by: Sasha Levin + +Conflicts: + kernel/cgroup/cpuset.c +[commit 5715456af3e0 ("kernfs: Convert kernfs_path_from_node_locked() +from strlcpy() to strscpy()") was not merged] +Signed-off-by: Chen Ridong +--- + kernel/cgroup/cpuset.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 140dfb5ad3fc..f3cf9b1268e0 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -21,6 +21,7 @@ + * License. See the file COPYING in the main directory of the Linux + * distribution for more details. + */ ++#include "cgroup-internal.h" + + #include + #include +@@ -5191,10 +5192,14 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, + if (!buf) + goto out; + +- css = task_get_css(tsk, cpuset_cgrp_id); +- retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, +- current->nsproxy->cgroup_ns); +- css_put(css); ++ rcu_read_lock(); ++ spin_lock_irq(&css_set_lock); ++ css = task_css(tsk, cpuset_cgrp_id); ++ retval = cgroup_path_ns_locked(css->cgroup, buf, PATH_MAX, ++ current->nsproxy->cgroup_ns); ++ spin_unlock_irq(&css_set_lock); ++ rcu_read_unlock(); ++ + if (retval >= PATH_MAX) + retval = -ENAMETOOLONG; + if (retval < 0) +-- +2.25.1 + diff --git a/0021-cgroup-add-more-reserve-kabi.patch b/0021-cgroup-add-more-reserve-kabi.patch new file mode 100644 index 00000000..5c0ed080 --- /dev/null +++ b/0021-cgroup-add-more-reserve-kabi.patch @@ -0,0 +1,90 @@ +From d68991f87f738657074d93a1ae8ccf865f40b65a Mon Sep 17 00:00:00 2001 +From: Chen Ridong +Date: Wed, 18 Dec 2024 08:11:03 +0000 +Subject: [PATCH 17/17] cgroup: add more reserve kabi + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/I8SA3O + +-------------------------------- + +Reserve KABI for future feature development. + +Signed-off-by: Chen Ridong +--- + include/linux/cgroup-defs.h | 7 +++++++ + include/linux/memcontrol.h | 8 ++++++++ + kernel/cgroup/cpuset.c | 5 ----- + 3 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h +index 8eb518ce87a1..f3fd0407d346 100644 +--- a/include/linux/cgroup-defs.h ++++ b/include/linux/cgroup-defs.h +@@ -325,6 +325,8 @@ struct cgroup_base_stat { + #ifdef CONFIG_SCHED_CORE + u64 forceidle_sum; + #endif ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) + }; + + /* +@@ -555,6 +557,9 @@ struct cgroup { + KABI_RESERVE(3) + KABI_RESERVE(4) + KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) + /* All ancestors including self */ + struct cgroup *ancestors[]; + }; +@@ -606,6 +611,8 @@ struct cgroup_root { + KABI_RESERVE(2) + KABI_RESERVE(3) + KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) + }; + + /* +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index b2a80e089a0a..abe236201e68 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -429,6 +429,14 @@ struct mem_cgroup { + KABI_RESERVE(6) + KABI_RESERVE(7) + KABI_RESERVE(8) ++ KABI_RESERVE(9) ++ KABI_RESERVE(10) ++ KABI_RESERVE(11) ++ KABI_RESERVE(12) ++ KABI_RESERVE(13) ++ KABI_RESERVE(14) ++ KABI_RESERVE(15) ++ KABI_RESERVE(16) + struct mem_cgroup_per_node *nodeinfo[]; + }; + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index f3cf9b1268e0..7ea0a6d00519 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -211,11 +211,6 @@ struct cpuset { + + /* Remote partition silbling list anchored at remote_children */ + struct list_head remote_sibling; +- +- KABI_RESERVE(1) +- KABI_RESERVE(2) +- KABI_RESERVE(3) +- KABI_RESERVE(4) + }; + + /* +-- +2.25.1 + diff --git a/0022-14223.patch b/0022-14223.patch new file mode 100644 index 00000000..b1034273 --- /dev/null +++ b/0022-14223.patch @@ -0,0 +1,80 @@ +From f8cb61566576a623971d5cc8dd3cd6229e787e30 Mon Sep 17 00:00:00 2001 +From: Zhang Changzhong +Date: Wed, 18 Dec 2024 17:50:29 +0800 +Subject: [PATCH] kabi: net: reserve space for xdp subsystem related structure + +hulk inclusion +category: other +bugzilla: https://gitee.com/openeuler/kernel/issues/I8OWRC + +---------------------------------------------------- + +Reserve some fields beforehand for xdp framework related structures +prone to change. + +Signed-off-by: Zhang Changzhong +--- + include/net/xdp.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/include/net/xdp.h b/include/net/xdp.h +index c283668458ca..9b9c7dc25eeb 100644 +--- a/include/net/xdp.h ++++ b/include/net/xdp.h +@@ -54,6 +54,9 @@ enum xdp_mem_type { + struct xdp_mem_info { + u32 type; /* enum xdp_mem_type, but known size type */ + u32 id; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) + }; + + struct page_pool; +@@ -74,6 +77,9 @@ struct xdp_rxq_info { + + struct xdp_txq_info { + struct net_device *dev; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) + }; + + enum xdp_buff_flags { +@@ -92,6 +98,11 @@ struct xdp_buff { + struct xdp_txq_info *txq; + u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ + u32 flags; /* supported values defined in xdp_buff_flags */ ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +@@ -181,6 +192,11 @@ struct xdp_frame { + struct net_device *dev_rx; /* used by cpumap */ + u32 frame_sz; + u32 flags; /* supported values defined in xdp_buff_flags */ ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +@@ -198,6 +214,9 @@ struct xdp_frame_bulk { + int count; + void *xa; + void *q[XDP_BULK_QUEUE_SIZE]; ++ ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) + }; + + static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) +-- +Gitee + diff --git a/0023-14224.patch b/0023-14224.patch new file mode 100644 index 00000000..62ba017f --- /dev/null +++ b/0023-14224.patch @@ -0,0 +1,85 @@ +From a2bbb3a7e3d30f5efc443fa17fcfe20fdd5a98d5 Mon Sep 17 00:00:00 2001 +From: Dong Chenchen +Date: Wed, 18 Dec 2024 17:15:36 +0800 +Subject: [PATCH] net/kabi: Reserve space for net structures + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IBC1RH + +-------------------------------- + +Reserve some fields beforehand for net subsystem related +structures prone to change. + +Signed-off-by: Dong Chenchen +--- + include/net/flow.h | 2 ++ + include/net/netns/netfilter.h | 2 ++ + include/net/netns/xfrm.h | 2 ++ + include/net/xfrm.h | 4 ++++ + 4 files changed, 10 insertions(+) + +diff --git a/include/net/flow.h b/include/net/flow.h +index 0cc5f2ef1000..72d2ea2374ba 100644 +--- a/include/net/flow.h ++++ b/include/net/flow.h +@@ -46,6 +46,8 @@ struct flowi_common { + + KABI_RESERVE(1) + KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + union flowi_uli { +diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h +index 4b77a9b031b6..963588269637 100644 +--- a/include/net/netns/netfilter.h ++++ b/include/net/netns/netfilter.h +@@ -34,5 +34,7 @@ struct netns_nf { + + KABI_RESERVE(1) + KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + #endif +diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h +index a0c1359cc7eb..af7f20ef4823 100644 +--- a/include/net/netns/xfrm.h ++++ b/include/net/netns/xfrm.h +@@ -87,6 +87,8 @@ struct netns_xfrm { + + KABI_RESERVE(1) + KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + #endif +diff --git a/include/net/xfrm.h b/include/net/xfrm.h +index c875faf98492..b9dec5f9c973 100644 +--- a/include/net/xfrm.h ++++ b/include/net/xfrm.h +@@ -294,6 +294,8 @@ struct xfrm_state { + + KABI_RESERVE(1) + KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + static inline struct net *xs_net(struct xfrm_state *x) +@@ -562,6 +564,8 @@ struct xfrm_policy { + + KABI_RESERVE(1) + KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) + }; + + static inline struct net *xp_net(const struct xfrm_policy *xp) +-- +Gitee + diff --git a/0024-14225.patch b/0024-14225.patch new file mode 100644 index 00000000..32a10378 --- /dev/null +++ b/0024-14225.patch @@ -0,0 +1,154 @@ +From 279803fa98908bd367cec04ae2600c15764fb977 Mon Sep 17 00:00:00 2001 +From: Luo Gengkun +Date: Wed, 18 Dec 2024 09:45:31 +0000 +Subject: [PATCH 1/3] kabi: reserve space for perf_event.h + +hulk inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBC1PM + +-------------------------------- + +reserve space for perf_event.h + +Signed-off-by: Luo Gengkun +--- + include/linux/perf_event.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h +index 89f2a02db563..fe692e9bd0b2 100644 +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -1010,6 +1010,14 @@ struct perf_cpu_pmu_context { + struct hrtimer hrtimer; + ktime_t hrtimer_interval; + unsigned int hrtimer_active; ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) + }; + + /** +@@ -1031,6 +1039,14 @@ struct perf_cpu_context { + int heap_size; + struct perf_event **heap; + struct perf_event *heap_default[2]; ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) + }; + + struct perf_output_handle { +-- +Gitee + + +From 078ad81846b81844eb98f90eee57c06954715c8d Mon Sep 17 00:00:00 2001 +From: Luo Gengkun +Date: Wed, 18 Dec 2024 09:45:32 +0000 +Subject: [PATCH 2/3] kabi: reserve space for internal.h + +hulk inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBC1PM + +-------------------------------- + +reserve space for internal.h + +Signed-off-by: Luo Gengkun +--- + kernel/events/internal.h | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/kernel/events/internal.h b/kernel/events/internal.h +index d2e6e6144c54..d1ffa00b91b6 100644 +--- a/kernel/events/internal.h ++++ b/kernel/events/internal.h +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + + /* Buffer handling */ + +@@ -54,6 +55,15 @@ struct perf_buffer { + void **aux_pages; + void *aux_priv; + ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) ++ + struct perf_event_mmap_page *user_page; + void *data_pages[]; + }; +-- +Gitee + + +From 59a2a3e8b1c35d9e0bde08cd2e6f01f1c12d384b Mon Sep 17 00:00:00 2001 +From: Luo Gengkun +Date: Wed, 18 Dec 2024 09:45:33 +0000 +Subject: [PATCH 3/3] kabi: reserve space for uprobes.h + +hulk inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBC1PM + +-------------------------------- + +reserve space for uprobes.h + +Signed-off-by: Luo Gengkun +--- + include/linux/uprobes.h | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h +index f46e0ca0169c..86d0868b584a 100644 +--- a/include/linux/uprobes.h ++++ b/include/linux/uprobes.h +@@ -47,6 +47,7 @@ struct uprobe_consumer { + + #ifdef CONFIG_UPROBES + #include ++#include + + enum uprobe_task_state { + UTASK_RUNNING, +@@ -78,6 +79,14 @@ struct uprobe_task { + + struct return_instance *return_instances; + unsigned int depth; ++ KABI_RESERVE(1) ++ KABI_RESERVE(2) ++ KABI_RESERVE(3) ++ KABI_RESERVE(4) ++ KABI_RESERVE(5) ++ KABI_RESERVE(6) ++ KABI_RESERVE(7) ++ KABI_RESERVE(8) + }; + + struct return_instance { +-- +Gitee + diff --git a/kernel.spec b/kernel.spec index 884d5480..0569cc96 100644 --- a/kernel.spec +++ b/kernel.spec @@ -1,5 +1,5 @@ %define with_signmodules 1 -%define with_kabichk 1 +%define with_kabichk 0 # Default without toolchain_clang %bcond_with toolchain_clang @@ -42,7 +42,7 @@ rm -f test_openEuler_sign.ko test_openEuler_sign.ko.sig %global upstream_sublevel 0 %global devel_release 68 %global maintenance_release .0.0 -%global pkg_release .73 +%global pkg_release .75 %global openeuler_lts 1 %global openeuler_major 2403 @@ -128,6 +128,24 @@ Patch0001: 0001-riscv-kernel.patch Patch0002: 0002-cpupower-clang-compile-support.patch Patch0003: 0003-x86_energy_perf_policy-clang-compile-support.patch Patch0004: 0004-turbostat-clang-compile-support.patch +Patch0005: 0005-include-msi-modify-kabi-size-of-msi_desc.patch +Patch0007: 0007-nfs-fix-the-loss-of-superblock-s-initialized-flags.patch +Patch0008: 0008-x86-config-Enable-CONFIG_CMA-by-default-in-openeuler.patch +Patch0009: 0009-x86-Kconfig-Select-CONFIG_CMA-if-CONFIG_HYGON_CSV-y.patch +Patch0010: 0010-tcp-Fix-use-after-free-of-nreq-in-reqsk_timer_handle.patch +Patch0012: 0012-bpf-Add-kabi-reserve-padding-for-uapi-struct-bpf_lin.patch +Patch0013: 0013-iommu-Reserve-extra-KABI-entry-for-struct-iopf_group.patch +Patch0014: 0014-seq_file-kabi-KABI-reservation-for-seq_file.patch +Patch0015: 0015-statx-kabi-KABI-reservation-for-kstat.patch +Patch0016: 0016-fs-Allow-fine-grained-control-of-folio-sizes.patch +Patch0017: 0017-Revert-cgroup-fix-uaf-when-proc_cpuset_show.patch +Patch0018: 0018-cgroup-Make-operations-on-the-cgroup-root_list-RCU-s.patch +Patch0019: 0019-cgroup-Move-rcu_head-up-near-the-top-of-cgroup_root.patch +Patch0020: 0020-cgroup-cpuset-Prevent-UAF-in-proc_cpuset_show.patch +Patch0021: 0021-cgroup-add-more-reserve-kabi.patch +Patch0022: 0022-14223.patch +Patch0023: 0023-14224.patch +Patch0024: 0024-14225.patch #BuildRequires: BuildRequires: module-init-tools, patch >= 2.5.4, bash >= 2.03, tar @@ -330,6 +348,25 @@ tar -xjf %{SOURCE9998} mv kernel linux-%{KernelVer} cd linux-%{KernelVer} +%patch0005 -p1 +%patch0007 -p1 +%patch0008 -p1 +%patch0009 -p1 +%patch0010 -p1 +%patch0012 -p1 +%patch0013 -p1 +%patch0014 -p1 +%patch0015 -p1 +%patch0016 -p1 +%patch0017 -p1 +%patch0018 -p1 +%patch0019 -p1 +%patch0020 -p1 +%patch0021 -p1 +%patch0022 -p1 +%patch0023 -p1 +%patch0024 -p1 + %if 0%{?with_patch} cp %{SOURCE9000} . cp %{SOURCE9001} . @@ -1089,6 +1126,9 @@ fi %endif %changelog +* Thu Dec 19 2024 Zheng Zengkai - 6.6.0-68.0.0.75 +- performance test for kabi exclude sched and mm + * Tue Dec 17 2024 Xie XiuQi - 6.6.0-68.0.0.73 - kabi: add kabi_ext1 list for checking - check-kabi: fix kabi check failed when no namespace -- Gitee