From 4b431016071ab0e96d26748fac2215f5efd7b2ed Mon Sep 17 00:00:00 2001 From: chenmudan Date: Fri, 6 May 2022 20:10:12 +0800 Subject: [PATCH 01/26] add ccache for kernel Signed-off-by: chenmudan Change-Id: Idf56129c01fc186cf47ca5fbce469e418be9f288 --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 756479b101f8..dce02fbb756e 100644 --- a/Makefile +++ b/Makefile @@ -426,8 +426,9 @@ KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) # Make variables (CC, etc...) CPP = $(CC) -E +CCACHE = ccache ifneq ($(LLVM),) -CC = clang +CC = $(CCACHE) clang LD = ld.lld AR = llvm-ar NM = llvm-nm @@ -436,7 +437,7 @@ OBJDUMP = llvm-objdump READELF = llvm-readelf STRIP = llvm-strip else -CC = $(CROSS_COMPILE)gcc +CC = $(CCACHE) $(CROSS_COMPILE)gcc LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm -- Gitee From ccb5dc83a45f1c5792de43ff6a98c092e09aa7e4 Mon Sep 17 00:00:00 2001 From: lk_hauwei Date: Fri, 13 May 2022 11:13:34 +0800 Subject: [PATCH 02/26] Add ashmem process info ohos inclusion category: bugfix issue: #I57716 CVE: NA -------------------------------- Add ashmem process info Signed-off-by: lk_hauwei --- drivers/staging/android/ashmem.c | 36 ++++++++++++- drivers/staging/android/ashmem.h | 5 ++ include/linux/memcheck.h | 17 ++++++ mm/Kconfig | 9 ++++ mm/Makefile | 1 + mm/memtrace_ashmem.c | 93 ++++++++++++++++++++++++++++++++ 6 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 include/linux/memcheck.h create mode 100644 mm/memtrace_ashmem.c diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index 10b4be1f3e78..764e9d9c0830 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "ashmem.h" #define ASHMEM_NAME_PREFIX "dev/ashmem/" @@ -104,6 +105,16 @@ static struct kmem_cache *ashmem_range_cachep __read_mostly; */ static struct lock_class_key backing_shmem_inode_class; +void ashmem_mutex_lock(void) +{ + mutex_lock(&ashmem_mutex); +} + +void ashmem_mutex_unlock(void) +{ + mutex_unlock(&ashmem_mutex); +} + static inline unsigned long range_size(struct ashmem_range *range) { return range->pgend - range->pgstart + 1; @@ -913,12 +924,35 @@ static const struct file_operations ashmem_fops = { #endif }; +int is_ashmem_file(struct file *file) +{ + return file->f_op == &ashmem_fops; +} + static struct miscdevice ashmem_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "ashmem", .fops = &ashmem_fops, }; +size_t get_ashmem_size_by_file(struct file *f) +{ + struct ashmem_area *asma = f->private_data; + + if (asma) + return asma->size; + return 0; +} + +char *get_ashmem_name_by_file(struct file *f) +{ + struct ashmem_area *asma = f->private_data; + + if (asma) + return asma->name; + return NULL; +} + static int __init ashmem_init(void) { int ret = -ENOMEM; @@ -950,7 +984,7 @@ static int __init ashmem_init(void) pr_err("failed to register shrinker!\n"); goto out_demisc; } - + init_ashmem_process_info(); pr_info("initialized\n"); return 0; diff --git a/drivers/staging/android/ashmem.h b/drivers/staging/android/ashmem.h index 1a478173cd21..601441d87f1f 100644 --- a/drivers/staging/android/ashmem.h +++ b/drivers/staging/android/ashmem.h @@ -21,4 +21,9 @@ #define COMPAT_ASHMEM_SET_PROT_MASK _IOW(__ASHMEMIOC, 5, unsigned int) #endif +int is_ashmem_file(struct file *file); +size_t get_ashmem_size_by_file(struct file *f); +char *get_ashmem_name_by_file(struct file *f); +void ashmem_mutex_lock(void); +void ashmem_mutex_unlock(void); #endif /* _LINUX_ASHMEM_H */ diff --git a/include/linux/memcheck.h b/include/linux/memcheck.h new file mode 100644 index 000000000000..7f5fe3a854db --- /dev/null +++ b/include/linux/memcheck.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/memcheck.h + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + */ +#ifndef _MEMCHECK_H +#define _MEMCHECK_H + +#ifdef CONFIG_MEMTRACE_ASHMEM +void init_ashmem_process_info(void); +#else +static inline void init_ashmem_process_info(void) {} +#endif + +#endif /* _MEMCHECK_H */ + diff --git a/mm/Kconfig b/mm/Kconfig index 68aaed4cdc9f..c35f9f8a0857 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -938,5 +938,14 @@ config LMKD_DBG default n help print processes info when lmk happen per several seconds +# +# Show the process ashmem for debug +# +config MEMTRACE_ASHMEM + bool "Ashmem Process Info Show" + depends on ASHMEM + default n + help + Enable the Ashmem Process Info Show endmenu diff --git a/mm/Makefile b/mm/Makefile index 6b1897637368..4c0ca82c4a64 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -66,6 +66,7 @@ ifdef CONFIG_MMU obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o endif +obj-$(CONFIG_MEMTRACE_ASHMEM) += memtrace_ashmem.o obj-$(CONFIG_LMKD_DBG) += lmkd_dbg_trigger.o obj-$(CONFIG_LOWMEM) += lowmem_dbg.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o swap_slots.o diff --git a/mm/memtrace_ashmem.c b/mm/memtrace_ashmem.c new file mode 100644 index 000000000000..9d61f32d4f78 --- /dev/null +++ b/mm/memtrace_ashmem.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * mm/memtrace_ashmem.c + * + * Copyright (c) 2022 Huawei Technologies Co., Ltd. + */ +#include +#include +#include +#include +#include +#include +#include +#include "../drivers/staging/android/ashmem.h" + +static int ashmem_debug_process_info_open(struct inode *inode, + struct file *file); + +struct ashmem_debug_process_info_args { + struct seq_file *seq; + struct task_struct *tsk; +}; + +static const struct proc_ops debug_process_ashmem_info_fops = { + .proc_open = ashmem_debug_process_info_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int ashmem_debug_process_info_cb(const void *data, + struct file *f, unsigned int fd) +{ + const struct ashmem_debug_process_info_args *args = data; + struct task_struct *tsk = args->tsk; + + if (!is_ashmem_file(f)) + return 0; + seq_printf(args->seq, + "%s %u %u %s %zu\n", + tsk->comm, tsk->pid, fd, + get_ashmem_name_by_file(f), + get_ashmem_size_by_file(f)); + return 0; +} + +static int ashmem_debug_process_info_show(struct seq_file *s, void *d) +{ + struct task_struct *tsk = NULL; + struct ashmem_debug_process_info_args cb_args; + + seq_puts(s, "Process ashmem detail info:\n"); + seq_puts(s, "----------------------------------------------------\n"); + seq_printf(s, "%s %s %s %s %s\n", + "Process name", "Process ID", + "fd", "ashmem_name", "size"); + + ashmem_mutex_lock(); + rcu_read_lock(); + for_each_process(tsk) { + if (tsk->flags & PF_KTHREAD) + continue; + cb_args.seq = s; + cb_args.tsk = tsk; + + task_lock(tsk); + iterate_fd(tsk->files, 0, + ashmem_debug_process_info_cb, (void *)&cb_args); + task_unlock(tsk); + } + rcu_read_unlock(); + ashmem_mutex_unlock(); + seq_puts(s, "----------------------------------------------------\n"); + return 0; +} + +static int ashmem_debug_process_info_open(struct inode *inode, + struct file *file) +{ + return single_open(file, ashmem_debug_process_info_show, + inode->i_private); +} + +void init_ashmem_process_info(void) +{ + struct proc_dir_entry *entry = NULL; + + entry = proc_create_data("ashmem_process_info", 0444, + NULL, &debug_process_ashmem_info_fops, NULL); + if (!entry) + pr_err("Failed to create ashmem debug info\n"); +} + -- Gitee From f0bedc8aa9ca862a00b9430d3707f1acc88fa012 Mon Sep 17 00:00:00 2001 From: CY Fan Date: Wed, 18 May 2022 17:00:37 +0800 Subject: [PATCH 03/26] reclaim_acct: fix the compiler warnings ohos inclusion category: bugfix issue: #I583PI CVE: NA ----------------- Signed-off-by: CY Fan --- mm/internal.h | 8 -------- mm/reclaim_acct.c | 11 +++++++++-- mm/reclaimacct_show.c | 7 +++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 4ab71a59e6b0..708a18c63bff 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -789,14 +789,6 @@ struct reclaim_acct { unsigned int reclaim_type; }; -static const char *stub_name[NR_RA_STUBS] = { - "direct_reclaim", - "drain_all_pages", - "shrink_file_list", - "shrink_anon_list", - "shrink_slab", -}; - bool reclaimacct_initialize_show_data(void); void reclaimacct_destroy_show_data(void); diff --git a/mm/reclaim_acct.c b/mm/reclaim_acct.c index 5f1a06fc8337..e9fffff3a092 100644 --- a/mm/reclaim_acct.c +++ b/mm/reclaim_acct.c @@ -10,6 +10,15 @@ #include "internal.h" + +const char *stub_name[NR_RA_STUBS] = { + "direct_reclaim", + "drain_all_pages", + "shrink_file_list", + "shrink_anon_list", + "shrink_slab", +}; + /* Once initialized, the variable should never be changed */ static bool reclaimacct_is_off = true; static int reclaimacct_disable = 1; @@ -124,8 +133,6 @@ void reclaimacct_end(enum reclaim_type type) /* Reclaim accounting module initialize */ static int reclaimacct_init_handle(void *p) { - int i; - if (!reclaimacct_initialize_show_data()) goto alloc_show_failed; diff --git a/mm/reclaimacct_show.c b/mm/reclaimacct_show.c index c6b5f83a3028..db2354d7473c 100644 --- a/mm/reclaimacct_show.c +++ b/mm/reclaimacct_show.c @@ -95,6 +95,13 @@ static int reclaimacct_proc_show(struct seq_file *m, void *v) { int i, j; struct reclaimacct_show show; + const char *stub_name[NR_RA_STUBS] = { + "direct_reclaim", + "drain_all_pages", + "shrink_file_list", + "shrink_anon_list", + "shrink_slab", + }; if (!ra_show) return 0; -- Gitee From 23a09cc8d94ff071e513308839740788157637de Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Jan 2022 14:06:07 -0800 Subject: [PATCH 04/26] mm: move anon_vma declarations to linux/mm_inline.h mainline inclusion from mainline-v5.17-rc1 commit 17fca131cee21724ee953a17c185c14e9533af5b issue: #I58DRB CVE: NA Signed-off-by: Yuanzheng Song -------------------------------- The patch to add anonymous vma names causes a build failure in some configurations: include/linux/mm_types.h: In function 'is_same_vma_anon_name': include/linux/mm_types.h:924:37: error: implicit declaration of function 'strcmp' [-Werror=implicit-function-declaration] 924 | return name && vma_name && !strcmp(name, vma_name); | ^~~~~~ include/linux/mm_types.h:22:1: note: 'strcmp' is defined in header ''; did you forget to '#include '? This should not really be part of linux/mm_types.h in the first place, as that header is meant to only contain structure defintions and need a minimum set of indirect includes itself. While the header clearly includes more than it should at this point, let's not make it worse by including string.h as well, which would pull in the expensive (compile-speed wise) fortify-string logic. Move the new functions into a separate header that only needs to be included in a couple of locations. Link: https://lkml.kernel.org/r/20211207125710.2503446-1-arnd@kernel.org Fixes: "mm: add a field to store names for private anonymous memory" Signed-off-by: Arnd Bergmann Cc: Al Viro Cc: Colin Cross Cc: Eric Biederman Cc: Kees Cook Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Peter Zijlstra (Intel) Cc: Stephen Rothwell Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 1 + fs/userfaultfd.c | 1 + include/linux/mm_inline.h | 50 +++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 48 ------------------------------------- kernel/fork.c | 1 + mm/madvise.c | 1 + mm/mmap.c | 1 + 7 files changed, 55 insertions(+), 48 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 0e22a47e8a6b..40e44c264ee9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 4a864090ea8b..38af2fb47ae5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8fc71e9d7bb0..589e828f8fa8 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include #include +#include /** * page_is_file_lru - should the page be on a file LRU or anon LRU? @@ -125,4 +126,53 @@ static __always_inline enum lru_list page_lru(struct page *page) } return lru; } + +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling vma_anon_name() and while using + * the returned pointer. + */ +extern const char *vma_anon_name(struct vm_area_struct *vma); + +/* + * mmap_lock should be read-locked for orig_vma->vm_mm. + * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be + * isolated. + */ +extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma); + +/* + * mmap_lock should be write-locked or vma should have been isolated under + * write-locked mmap_lock protection. + */ +extern void free_vma_anon_name(struct vm_area_struct *vma); + +/* mmap_lock should be read-locked */ +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + const char *vma_name = vma_anon_name(vma); + + /* either both NULL, or pointers to same string */ + if (vma_name == name) + return true; + + return name && vma_name && !strcmp(name, vma_name); +} +#else /* CONFIG_ANON_VMA_NAME */ +static inline const char *vma_anon_name(struct vm_area_struct *vma) +{ + return NULL; +} +static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_vma_anon_name(struct vm_area_struct *vma) {} +static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, + const char *name) +{ + return true; +} +#endif /* CONFIG_ANON_VMA_NAME */ + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f2a7fc951350..49747e3c1ac9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -823,52 +823,4 @@ typedef struct { unsigned long val; } swp_entry_t; -#ifdef CONFIG_ANON_VMA_NAME -/* - * mmap_lock should be read-locked when calling vma_anon_name() and while using - * the returned pointer. - */ -extern const char *vma_anon_name(struct vm_area_struct *vma); - -/* - * mmap_lock should be read-locked for orig_vma->vm_mm. - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be - * isolated. - */ -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma); - -/* - * mmap_lock should be write-locked or vma should have been isolated under - * write-locked mmap_lock protection. - */ -extern void free_vma_anon_name(struct vm_area_struct *vma); - -/* mmap_lock should be read-locked */ -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) -{ - const char *vma_name = vma_anon_name(vma); - - /* either both NULL, or pointers to same string */ - if (vma_name == name) - return true; - - return name && vma_name && !strcmp(name, vma_name); -} -#else /* CONFIG_ANON_VMA_NAME */ -static inline const char *vma_anon_name(struct vm_area_struct *vma) -{ - return NULL; -} -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) {} -static inline void free_vma_anon_name(struct vm_area_struct *vma) {} -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) -{ - return true; -} -#endif /* CONFIG_ANON_VMA_NAME */ - #endif /* _LINUX_MM_TYPES_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 7b3ce57416cd..c2897f0c29bf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/madvise.c b/mm/madvise.c index 23b48a0049cb..6dea30a7d356 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/mmap.c b/mm/mmap.c index 1f13b3069cb1..e348bcc5064f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include -- Gitee From 7a8e6fc515bdbe9eb550d9be499730b9855c6e8a Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 4 Mar 2022 20:28:51 -0800 Subject: [PATCH 05/26] mm: refactor vm_area_struct::anon_vma_name usage code mainline inclusion from mainline-v5.17-rc7 commit 5c26f6ac9416b63d093e29c30e79b3297e425472 issue: #I58DRB CVE: NA Signed-off-by: Yuanzheng Song -------------------------------- Avoid mixing strings and their anon_vma_name referenced pointers by using struct anon_vma_name whenever possible. This simplifies the code and allows easier sharing of anon_vma_name structures when they represent the same name. [surenb@google.com: fix comment] Link: https://lkml.kernel.org/r/20220223153613.835563-1-surenb@google.com Link: https://lkml.kernel.org/r/20220224231834.1481408-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Matthew Wilcox Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Colin Cross Cc: Sumit Semwal Cc: Dave Hansen Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Vlastimil Babka Cc: Johannes Weiner Cc: "Eric W. Biederman" Cc: Christian Brauner Cc: Alexey Gladkov Cc: Sasha Levin Cc: Chris Hyser Cc: Davidlohr Bueso Cc: Peter Collingbourne Cc: Xiaofeng Cao Cc: David Hildenbrand Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 6 +-- fs/userfaultfd.c | 6 +-- include/linux/mm.h | 7 ++-- include/linux/mm_inline.h | 87 ++++++++++++++++++++++++++------------- include/linux/mm_types.h | 5 ++- kernel/fork.c | 4 +- kernel/sys.c | 19 +++++---- mm/madvise.c | 87 +++++++++++++-------------------------- mm/mempolicy.c | 2 +- mm/mlock.c | 2 +- mm/mmap.c | 12 +++--- mm/mprotect.c | 2 +- 12 files changed, 125 insertions(+), 114 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 40e44c264ee9..46407d4e004d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) name = arch_vma_name(vma); if (!name) { - const char *anon_name; + struct anon_vma_name *anon_name; if (!mm) { name = "[vdso]"; @@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) goto done; } - anon_name = vma_anon_name(vma); + anon_name = anon_vma_name(vma); if (anon_name) { seq_pad(m, ' '); - seq_printf(m, "[anon:%s]", anon_name); + seq_printf(m, "[anon:%s]", anon_name->name); } } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 38af2fb47ae5..4229881e1601 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -870,7 +870,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, vma_anon_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) vma = prev; else @@ -1412,7 +1412,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), - vma_anon_name(vma)); + anon_vma_name(vma)); if (prev) { vma = prev; goto next; @@ -1582,7 +1582,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, vma_anon_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { vma = prev; goto next; diff --git a/include/linux/mm.h b/include/linux/mm.h index c9b37ce2db0b..01d012727a27 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2534,7 +2534,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx, const char *); + struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); @@ -3215,11 +3215,12 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) #ifdef CONFIG_ANON_VMA_NAME int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name); + unsigned long len_in, + struct anon_vma_name *anon_name); #else static inline int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name) { + unsigned long len_in, struct anon_vma_name *anon_name) { return 0; } #endif diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 589e828f8fa8..faba1953a2f1 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -129,50 +129,81 @@ static __always_inline enum lru_list page_lru(struct page *page) #ifdef CONFIG_ANON_VMA_NAME /* - * mmap_lock should be read-locked when calling vma_anon_name() and while using - * the returned pointer. + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. */ -extern const char *vma_anon_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name_alloc(const char *name); +extern void anon_vma_name_free(struct kref *kref); -/* - * mmap_lock should be read-locked for orig_vma->vm_mm. - * mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be - * isolated. - */ -extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma); +/* mmap_lock should be read-locked */ +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_get(&anon_name->kref); +} -/* - * mmap_lock should be write-locked or vma should have been isolated under - * write-locked mmap_lock protection. - */ -extern void free_vma_anon_name(struct vm_area_struct *vma); +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) +{ + if (anon_name) + kref_put(&anon_name->kref, anon_vma_name_free); +} -/* mmap_lock should be read-locked */ -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) { - const char *vma_name = vma_anon_name(vma); + struct anon_vma_name *anon_name = anon_vma_name(orig_vma); + + if (anon_name) { + anon_vma_name_get(anon_name); + new_vma->anon_name = anon_name; + } +} - /* either both NULL, or pointers to same string */ - if (vma_name == name) +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ + /* + * Not using anon_vma_name because it generates a warning if mmap_lock + * is not held, which might be the case here. + */ + if (!vma->vm_file) + anon_vma_name_put(vma->anon_name); +} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + if (anon_name1 == anon_name2) return true; - return name && vma_name && !strcmp(name, vma_name); + return anon_name1 && anon_name2 && + !strcmp(anon_name1->name, anon_name2->name); } + #else /* CONFIG_ANON_VMA_NAME */ -static inline const char *vma_anon_name(struct vm_area_struct *vma) +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) { return NULL; } -static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) {} -static inline void free_vma_anon_name(struct vm_area_struct *vma) {} -static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, - const char *name) + +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_anon_vma_name(struct vm_area_struct *vma) {} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) { return true; } + #endif /* CONFIG_ANON_VMA_NAME */ #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 49747e3c1ac9..c0aedba56912 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -355,7 +355,10 @@ struct vm_area_struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; - /* Serialized by mmap_sem. */ + /* + * Serialized by mmap_sem. Never use directly because it is + * valid only when vm_file is NULL. Use anon_vma_name instead. + */ struct anon_vma_name *anon_name; }; diff --git a/kernel/fork.c b/kernel/fork.c index c2897f0c29bf..298c44dc5e77 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -369,14 +369,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) *new = data_race(*orig); INIT_LIST_HEAD(&new->anon_vma_chain); new->vm_next = new->vm_prev = NULL; - dup_vma_anon_name(orig, new); + dup_anon_vma_name(orig, new); } return new; } void vm_area_free(struct vm_area_struct *vma) { - free_vma_anon_name(vma); + free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); } diff --git a/kernel/sys.c b/kernel/sys.c index ab78ae833268..c63de71889bf 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -2290,15 +2291,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr, { struct mm_struct *mm = current->mm; const char __user *uname; - char *name, *pch; + struct anon_vma_name *anon_name = NULL; int error; switch (opt) { case PR_SET_VMA_ANON_NAME: uname = (const char __user *)arg; if (uname) { - name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); + char *name, *pch; + name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); if (IS_ERR(name)) return PTR_ERR(name); @@ -2308,15 +2310,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr, return -EINVAL; } } - } else { - /* Reset the name */ - name = NULL; + /* anon_vma has its own copy */ + anon_name = anon_vma_name_alloc(name); + kfree(name); + if (!anon_name) + return -ENOMEM; + } mmap_write_lock(mm); - error = madvise_set_anon_name(mm, addr, size, name); + error = madvise_set_anon_name(mm, addr, size, anon_name); mmap_write_unlock(mm); - kfree(name); + anon_vma_name_put(anon_name); break; default: error = -EINVAL; diff --git a/mm/madvise.c b/mm/madvise.c index 6dea30a7d356..a37f3e0d7913 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -63,7 +63,7 @@ static int madvise_need_mmap_write(int behavior) } #ifdef CONFIG_ANON_VMA_NAME -static struct anon_vma_name *anon_vma_name_alloc(const char *name) +struct anon_vma_name *anon_vma_name_alloc(const char *name) { struct anon_vma_name *anon_name; size_t count; @@ -79,78 +79,49 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name) return anon_name; } -static void vma_anon_name_free(struct kref *kref) +void anon_vma_name_free(struct kref *kref) { struct anon_vma_name *anon_name = container_of(kref, struct anon_vma_name, kref); kfree(anon_name); } -static inline bool has_vma_anon_name(struct vm_area_struct *vma) +struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) { - return !vma->vm_file && vma->anon_name; -} - -const char *vma_anon_name(struct vm_area_struct *vma) -{ - if (!has_vma_anon_name(vma)) - return NULL; - mmap_assert_locked(vma->vm_mm); - return vma->anon_name->name; -} - -void dup_vma_anon_name(struct vm_area_struct *orig_vma, - struct vm_area_struct *new_vma) -{ - if (!has_vma_anon_name(orig_vma)) - return; - - kref_get(&orig_vma->anon_name->kref); - new_vma->anon_name = orig_vma->anon_name; -} - -void free_vma_anon_name(struct vm_area_struct *vma) -{ - struct anon_vma_name *anon_name; - - if (!has_vma_anon_name(vma)) - return; + if (vma->vm_file) + return NULL; - anon_name = vma->anon_name; - vma->anon_name = NULL; - kref_put(&anon_name->kref, vma_anon_name_free); + return vma->anon_name; } /* mmap_lock should be write-locked */ -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) +static int replace_anon_vma_name(struct vm_area_struct *vma, + struct anon_vma_name *anon_name) { - const char *anon_name; + struct anon_vma_name *orig_name = anon_vma_name(vma); - if (!name) { - free_vma_anon_name(vma); + if (!anon_name) { + vma->anon_name = NULL; + anon_vma_name_put(orig_name); return 0; } - anon_name = vma_anon_name(vma); - if (anon_name) { - /* Same name, nothing to do here */ - if (!strcmp(name, anon_name)) - return 0; + if (anon_vma_name_eq(orig_name, anon_name)) + return 0; - free_vma_anon_name(vma); - } - vma->anon_name = anon_vma_name_alloc(name); - if (!vma->anon_name) - return -ENOMEM; + anon_vma_name_get(anon_name); + vma->anon_name = anon_name; + anon_vma_name_put(orig_name); return 0; } #else /* CONFIG_ANON_VMA_NAME */ -static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) +static int replace_anon_vma_name(struct vm_area_struct *vma, + struct anon_vma_name *anon_name) { - if (name) + if (anon_name) return -EINVAL; return 0; @@ -163,13 +134,13 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) static int madvise_update_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long new_flags, - const char *name) + struct anon_vma_name *anon_name) { struct mm_struct *mm = vma->vm_mm; int error; pgoff_t pgoff; - if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) { + if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { *prev = vma; return 0; } @@ -177,7 +148,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, name); + vma->vm_userfaultfd_ctx, anon_name); if (*prev) { vma = *prev; goto success; @@ -207,7 +178,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, */ vma->vm_flags = new_flags; if (!vma->vm_file) { - error = replace_vma_anon_name(vma, name); + error = replace_anon_vma_name(vma, anon_name); if (error) return error; } @@ -978,7 +949,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, } error = madvise_update_vma(vma, prev, start, end, new_flags, - vma_anon_name(vma)); + anon_vma_name(vma)); out: /* @@ -1164,7 +1135,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, static int madvise_vma_anon_name(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - unsigned long name) + unsigned long anon_name) { int error; @@ -1173,7 +1144,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, return -EBADF; error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, - (const char *)name); + (struct anon_vma_name *)anon_name); /* * madvise() returns EAGAIN if kernel resources, such as @@ -1185,7 +1156,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, } int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, - unsigned long len_in, const char *name) + unsigned long len_in, struct anon_vma_name *anon_name) { unsigned long end; unsigned long len; @@ -1205,7 +1176,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, if (end == start) return 0; - return madvise_walk_vmas(mm, start, end, (unsigned long)name, + return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, madvise_vma_anon_name); } #endif /* CONFIG_ANON_VMA_NAME */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 35e323d605e7..46efcd21dd61 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -830,7 +830,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, new_pol, vma->vm_userfaultfd_ctx, - vma_anon_name(vma)); + anon_vma_name(vma)); if (prev) { vma = prev; next = vma->vm_next; diff --git a/mm/mlock.c b/mm/mlock.c index 08b4f44efbd5..7080803ef2d5 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -540,7 +540,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mmap.c b/mm/mmap.c index e348bcc5064f..a5e0958acd05 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1030,7 +1030,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct anon_vma_name *anon_name) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we @@ -1048,7 +1048,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, return 0; if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) return 0; - if (!is_same_vma_anon_name(vma, anon_name)) + if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return 0; return 1; } @@ -1083,7 +1083,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct anon_vma_name *anon_name) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { @@ -1105,7 +1105,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct anon_vma_name *anon_name) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { @@ -1166,7 +1166,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char *anon_name) + struct anon_vma_name *anon_name) { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; @@ -3306,7 +3306,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, return NULL; /* should never get here */ new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mprotect.c b/mm/mprotect.c index 8f7dda857ef3..53b6b1b8fb67 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -454,7 +454,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); -- Gitee From cde06060faa1b264b12b3a8ca861f62ae6d8c10c Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 4 Mar 2022 20:28:55 -0800 Subject: [PATCH 06/26] mm: prevent vm_area_struct::anon_name refcount saturation mainline inclusion from mainline-v5.17-rc7 commit 96403e11283def1d1c465c8279514c9a504d8630 issue: #I58DRB CVE: NA Signed-off-by: Yuanzheng Song -------------------------------- A deep process chain with many vmas could grow really high. With default sysctl_max_map_count (64k) and default pid_max (32k) the max number of vmas in the system is 2147450880 and the refcounter has headroom of 1073774592 before it reaches REFCOUNT_SATURATED (3221225472). Therefore it's unlikely that an anonymous name refcounter will overflow with these defaults. Currently the max for pid_max is PID_MAX_LIMIT (4194304) and for sysctl_max_map_count it's INT_MAX (2147483647). In this configuration anon_vma_name refcount overflow becomes theoretically possible (that still require heavy sharing of that anon_vma_name between processes). kref refcounting interface used in anon_vma_name structure will detect a counter overflow when it reaches REFCOUNT_SATURATED value but will only generate a warning and freeze the ref counter. This would lead to the refcounted object never being freed. A determined attacker could leak memory like that but it would be rather expensive and inefficient way to do so. To ensure anon_vma_name refcount does not overflow, stop anon_vma_name sharing when the refcount reaches REFCOUNT_MAX (2147483647), which still leaves INT_MAX/2 (1073741823) values before the counter reaches REFCOUNT_SATURATED. This should provide enough headroom for raising the refcounts temporarily. Link: https://lkml.kernel.org/r/20220223153613.835563-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Michal Hocko Acked-by: Michal Hocko Cc: Alexey Gladkov Cc: Chris Hyser Cc: Christian Brauner Cc: Colin Cross Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Johannes Weiner Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Peter Collingbourne Cc: Sasha Levin Cc: Sumit Semwal Cc: Vlastimil Babka Cc: Xiaofeng Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_inline.h | 18 ++++++++++++++---- mm/madvise.c | 3 +-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index faba1953a2f1..8939659fc32f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -150,15 +150,25 @@ static inline void anon_vma_name_put(struct anon_vma_name *anon_name) kref_put(&anon_name->kref, anon_vma_name_free); } +static inline +struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name) +{ + /* Prevent anon_name refcount saturation early on */ + if (kref_read(&anon_name->kref) < REFCOUNT_MAX) { + anon_vma_name_get(anon_name); + return anon_name; + + } + return anon_vma_name_alloc(anon_name->name); +} + static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, struct vm_area_struct *new_vma) { struct anon_vma_name *anon_name = anon_vma_name(orig_vma); - if (anon_name) { - anon_vma_name_get(anon_name); - new_vma->anon_name = anon_name; - } + if (anon_name) + new_vma->anon_name = anon_vma_name_reuse(anon_name); } static inline void free_anon_vma_name(struct vm_area_struct *vma) diff --git a/mm/madvise.c b/mm/madvise.c index a37f3e0d7913..447bd740497c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -111,8 +111,7 @@ static int replace_anon_vma_name(struct vm_area_struct *vma, if (anon_vma_name_eq(orig_name, anon_name)) return 0; - anon_vma_name_get(anon_name); - vma->anon_name = anon_name; + vma->anon_name = anon_vma_name_reuse(anon_name); anon_vma_name_put(orig_name); return 0; -- Gitee From af5519dd87c5b4c87bc6dccba63db6fdce34dde2 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 4 Mar 2022 20:28:58 -0800 Subject: [PATCH 07/26] mm: fix use-after-free when anon vma name is used after vma is freed mainline inclusion from mainline-v5.17-rc7 commit 942341dcc5748d9c1fc7009a359fc1916bfe0ef0 issue: #I58DRB CVE: NA Signed-off-by: Yuanzheng Song -------------------------------- When adjacent vmas are being merged it can result in the vma that was originally passed to madvise_update_vma being destroyed. In the current implementation, the name parameter passed to madvise_update_vma points directly to vma->anon_name and it is used after the call to vma_merge. In the cases when vma_merge merges the original vma and destroys it, this might result in UAF. For that the original vma would have to hold the anon_vma_name with the last reference. The following vma would need to contain a different anon_vma_name object with the same string. Such scenario is shown below: madvise_vma_behavior(vma) madvise_update_vma(vma, ..., anon_name == vma->anon_name) vma_merge(vma) __vma_adjust(vma) <-- merges vma with adjacent one vm_area_free(vma) <-- frees the original vma replace_vma_anon_name(anon_name) <-- UAF of vma->anon_name Fix this by raising the name refcount and stabilizing it. Link: https://lkml.kernel.org/r/20220224231834.1481408-3-surenb@google.com Link: https://lkml.kernel.org/r/20220223153613.835563-3-surenb@google.com Fixes: 9a10064f5625 ("mm: add a field to store names for private anonymous memory") Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+aa7b3d4b35f9dc46a366@syzkaller.appspotmail.com Acked-by: Michal Hocko Cc: Alexey Gladkov Cc: Chris Hyser Cc: Christian Brauner Cc: Colin Cross Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: "Eric W. Biederman" Cc: Johannes Weiner Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Michal Hocko Cc: Peter Collingbourne Cc: Sasha Levin Cc: Sumit Semwal Cc: Vlastimil Babka Cc: Xiaofeng Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index 447bd740497c..b23f6155a453 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -129,6 +129,8 @@ static int replace_anon_vma_name(struct vm_area_struct *vma, /* * Update the vm_flags on region of a vma, splitting it or merging it as * necessary. Must be called with mmap_sem held for writing; + * Caller should ensure anon_name stability by raising its refcount even when + * anon_name belongs to a valid vma because this function might free that vma. */ static int madvise_update_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, @@ -885,6 +887,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, unsigned long behavior) { int error; + struct anon_vma_name *anon_name; unsigned long new_flags = vma->vm_flags; switch (behavior) { @@ -947,8 +950,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, break; } + anon_name = anon_vma_name(vma); + anon_vma_name_get(anon_name); error = madvise_update_vma(vma, prev, start, end, new_flags, - anon_vma_name(vma)); + anon_name); + anon_vma_name_put(anon_name); out: /* -- Gitee From 25adc1b548e33d2ac3e92ade74bf5268f3a5acab Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Nov 2021 18:37:21 +0800 Subject: [PATCH 08/26] hamradio: defer ax25 kfree after unregister_netdev stable inclusion from stable-5.10.95 commit 450121075a6a6f1d50f97225d3396315309d61a1 category: bugfix issue: I57ESG CVE: CVE-2022-1195 Signed-off-by: gaochao --------------------------------------- hamradio: defer ax25 kfree after unregister_netdev commit 3e0588c291d6ce225f2b891753ca41d45ba42469 upstream. There is a possible race condition (use-after-free) like below (USE) | (FREE) ax25_sendmsg | ax25_queue_xmit | dev_queue_xmit | __dev_queue_xmit | __dev_xmit_skb | sch_direct_xmit | ... xmit_one | netdev_start_xmit | tty_ldisc_kill __netdev_start_xmit | mkiss_close ax_xmit | kfree ax_encaps | | Even though there are two synchronization primitives before the kfree: 1. wait_for_completion(&ax->dead). This can prevent the race with routines from mkiss_ioctl. However, it cannot stop the routine coming from upper layer, i.e., the ax25_sendmsg. 2. netif_stop_queue(ax->dev). It seems that this line of code aims to halt the transmit queue but it fails to stop the routine that already being xmit. This patch reorder the kfree after the unregister_netdev to avoid the possible UAF as the unregister_netdev() is well synchronized and won't return if there is a running routine. Signed-off-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/mkiss.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 920e9f888cc3..7ca013155354 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -792,13 +792,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - /* Free all AX25 frame buffers. */ - kfree(ax->rbuff); - kfree(ax->xbuff); - ax->tty = NULL; unregister_netdev(ax->dev); + + /* Free all AX25 frame buffers after unreg. */ + kfree(ax->rbuff); + kfree(ax->xbuff); + free_netdev(ax->dev); } -- Gitee From 3d959df43a57f8b569ff8a3455e3d20443755d4b Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Fri, 17 Dec 2021 10:13:56 +0800 Subject: [PATCH 09/26] hamradio: improve the incomplete fix to avoid NPD stable inclusion from stable-5.10.95 commit 7dd52af1eb5798f590d9d9e1c56ed8f5744ee0ca category: bugfix issue: I57ESG CVE: CVE-2022-1195 Signed-off-by: gaochao --------------------------------------- hamradio: improve the incomplete fix to avoid NPD commit b2f37aead1b82a770c48b5d583f35ec22aabb61e upstream. The previous commit 3e0588c291d6 ("hamradio: defer ax25 kfree after unregister_netdev") reorder the kfree operations and unregister_netdev operation to prevent UAF. This commit improves the previous one by also deferring the nullify of the ax->tty pointer. Otherwise, a NULL pointer dereference bug occurs. Partial of the stack trace is shown below. BUG: kernel NULL pointer dereference, address: 0000000000000538 RIP: 0010:ax_xmit+0x1f9/0x400 ... Call Trace: dev_hard_start_xmit+0xec/0x320 sch_direct_xmit+0xea/0x240 __qdisc_run+0x166/0x5c0 __dev_queue_xmit+0x2c7/0xaf0 ax25_std_establish_data_link+0x59/0x60 ax25_connect+0x3a0/0x500 ? security_socket_connect+0x2b/0x40 __sys_connect+0x96/0xc0 ? __hrtimer_init+0xc0/0xc0 ? common_nsleep+0x2e/0x50 ? switch_fpu_return+0x139/0x1a0 __x64_sys_connect+0x11/0x20 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The crash point is shown as below static void ax_encaps(...) { ... set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); // ax->tty = NULL! ... } By placing the nullify action after the unregister_netdev, the ax->tty pointer won't be assigned as NULL net_device framework layer is well synchronized. Signed-off-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/mkiss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 7ca013155354..63502a85a975 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -792,14 +792,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - ax->tty = NULL; - unregister_netdev(ax->dev); /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); + ax->tty = NULL; + free_netdev(ax->dev); } -- Gitee From 5577defbc77971308dbe099e5c15ac640c1bc1b8 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Thu, 17 Feb 2022 09:43:03 +0800 Subject: [PATCH 10/26] drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() stable inclusion from stable-5.10.110 commit f67a1400788f550d201c71aeaf56706afe57f0da category: bugfix issue: I57ESG CVE: CVE-2022-1195 Signed-off-by: gaochao --------------------------------------- drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() commit efe4186e6a1b54bf38b9e05450d43b0da1fd7739 upstream. When a 6pack device is detaching, the sixpack_close() will act to cleanup necessary resources. Although del_timer_sync() in sixpack_close() won't return if there is an active timer, one could use mod_timer() in sp_xmit_on_air() to wake up timer again by calling userspace syscall such as ax25_sendmsg(), ax25_connect() and ax25_ioctl(). This unexpected waked handler, sp_xmit_on_air(), realizes nothing about the undergoing cleanup and may still call pty_write() to use driver layer resources that have already been released. One of the possible race conditions is shown below: (USE) | (FREE) ax25_sendmsg() | ax25_queue_xmit() | ... | sp_xmit() | sp_encaps() | sixpack_close() sp_xmit_on_air() | del_timer_sync(&sp->tx_t) mod_timer(&sp->tx_t,...) | ... | unregister_netdev() | ... (wait a while) | tty_release() | tty_release_struct() | release_tty() sp_xmit_on_air() | tty_kref_put(tty_struct) //FREE pty_write(tty_struct) //USE | ... The corresponding fail log is shown below: =============================================================== BUG: KASAN: use-after-free in __run_timers.part.0+0x170/0x470 Write of size 8 at addr ffff88800a652ab8 by task swapper/2/0 ... Call Trace: ... queue_work_on+0x3f/0x50 pty_write+0xcd/0xe0pty_write+0xcd/0xe0 sp_xmit_on_air+0xb2/0x1f0 call_timer_fn+0x28/0x150 __run_timers.part.0+0x3c2/0x470 run_timer_softirq+0x3b/0x80 __do_softirq+0xf1/0x380 ... This patch reorders the del_timer_sync() after the unregister_netdev() to avoid UAF bugs. Because the unregister_netdev() is well synchronized, it flushs out any pending queues, waits the refcount of net_device decreases to zero and removes net_device from kernel. There is not any running routines after executing unregister_netdev(). Therefore, we could not arouse timer from userspace again. Signed-off-by: Duoming Zhou Reviewed-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/6pack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index bd0beb16d68a..02d6f3ad9aca 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -674,14 +674,14 @@ static void sixpack_close(struct tty_struct *tty) */ netif_stop_queue(sp->dev); + unregister_netdev(sp->dev); + del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); /* Free all 6pack frame buffers. */ kfree(sp->rbuff); kfree(sp->xbuff); - - unregister_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ -- Gitee From 61849fb28a1703c8504a28cd9daf5e3d8e5f649a Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Mon, 8 Nov 2021 18:37:59 +0800 Subject: [PATCH 11/26] hamradio: defer 6pack kfree after unregister_netdev stable inclusion from stable-5.10.112 commit 80a4df14643f78b14f1e8e2c7f9ca3da41b01654 category: bugfix issue: I57ESG CVE: CVE-2022-1195 Signed-off-by: gaochao --------------------------------------- hamradio: defer 6pack kfree after unregister_netdev commit 0b9111922b1f399aba6ed1e1b8f2079c3da1aed8 upstream. There is a possible race condition (use-after-free) like below (USE) | (FREE) dev_queue_xmit | __dev_queue_xmit | __dev_xmit_skb | sch_direct_xmit | ... xmit_one | netdev_start_xmit | tty_ldisc_kill __netdev_start_xmit | 6pack_close sp_xmit | kfree sp_encaps | | According to the patch "defer ax25 kfree after unregister_netdev", this patch reorder the kfree after the unregister_netdev to avoid the possible UAF as the unregister_netdev() is well synchronized and won't return if there is a running routine. Signed-off-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Xu Jia Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/6pack.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 02d6f3ad9aca..82507a688efe 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -679,9 +679,11 @@ static void sixpack_close(struct tty_struct *tty) del_timer_sync(&sp->tx_t); del_timer_sync(&sp->resync_t); - /* Free all 6pack frame buffers. */ + /* Free all 6pack frame buffers after unreg. */ kfree(sp->rbuff); kfree(sp->xbuff); + + free_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ -- Gitee From 25e2c34a6438865510517f58c95e69ffac541a49 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Thu, 11 Nov 2021 22:14:02 +0800 Subject: [PATCH 12/26] hamradio: remove needs_free_netdev to avoid UAF stable inclusion from stable-5.10.112 commit cfa98ffc42f16a432b77e438e2fefcdb942eeb04 category: bugfix issue: I57ESG CVE: CVE-2022-1195 Signed-off-by: gaochao --------------------------------------- hamradio: remove needs_free_netdev to avoid UAF commit 81b1d548d00bcd028303c4f3150fa753b9b8aa71 upstream. The former patch "defer 6pack kfree after unregister_netdev" reorders the kfree of two buffer after the unregister_netdev to prevent the race condition. It also adds free_netdev() function in sixpack_close(), which is a direct copy from the similar code in mkiss_close(). However, in sixpack driver, the flag needs_free_netdev is set to true in sp_setup(), hence the unregister_netdev() will free the netdev automatically. Therefore, as the sp is netdev_priv, use-after-free occurs. This patch removes the needs_free_netdev = true and just let the free_netdev to finish this deallocation task. Fixes: 0b9111922b1f ("hamradio: defer 6pack kfree after unregister_netdev") Signed-off-by: Lin Ma Link: https://lore.kernel.org/r/20211111141402.7551-1-linma@zju.edu.cn Signed-off-by: Jakub Kicinski Signed-off-by: Xu Jia Signed-off-by: Greg Kroah-Hartman --- drivers/net/hamradio/6pack.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 82507a688efe..83dc1c2c3b84 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -311,7 +311,6 @@ static void sp_setup(struct net_device *dev) { /* Finish setting up the DEVICE info. */ dev->netdev_ops = &sp_netdev_ops; - dev->needs_free_netdev = true; dev->mtu = SIXP_MTU; dev->hard_header_len = AX25_MAX_HEADER_LEN; dev->header_ops = &ax25_header_ops; -- Gitee From 17e02d049a8fe96adad3551c902ce6a1388ae6ee Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 19 Oct 2021 13:27:10 +0100 Subject: [PATCH 13/26] drm/i915: Flush TLBs before releasing backing store stable inclusion from stable-5.10.95 commit 6a6acf927895c38bdd9f3cd76b8dbfc25ac03e88 category: bugfix issue: I57ESG CVE: CVE-2022-0330 Signed-off-by: gaochao --------------------------------------- drm/i915: Flush TLBs before releasing backing store commit 7938d61591d33394a21bdd7797a245b65428f44c upstream. We need to flush TLBs before releasing backing store otherwise userspace is able to encounter stale entries if a) it is not declaring access to certain buffers and b) it races with the backing store release from a such undeclared execution already executing on the GPU in parallel. The approach taken is to mark any buffer objects which were ever bound to the GPU and to trigger a serialized TLB flush when their backing store is released. Alternatively the flushing could be done on VMA unbind, at which point we would be able to ascertain whether there is potential a parallel GPU execution (which could race), but essentially it boils down to paying the cost of TLB flushes potentially needlessly at VMA unbind time (when the backing store is not known to be going away so not needed for safety), versus potentially needlessly at backing store relase time (since we at that point cannot tell whether there is anything executing on the GPU which uses that object). Thereforce simplicity of implementation has been chosen for now with scope to benchmark and refine later as required. Signed-off-by: Tvrtko Ursulin Reported-by: Sushma Venkatesh Reddy Reviewed-by: Daniel Vetter Acked-by: Dave Airlie Cc: Daniel Vetter Cc: Jon Bloomfield Cc: Joonas Lahtinen Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 + drivers/gpu/drm/i915/gem/i915_gem_pages.c | 10 ++ drivers/gpu/drm/i915/gt/intel_gt.c | 102 ++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 11 ++ drivers/gpu/drm/i915/i915_vma.c | 3 + drivers/gpu/drm/i915/intel_uncore.c | 26 ++++- drivers/gpu/drm/i915/intel_uncore.h | 2 + 9 files changed, 155 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d6711caa7f39..dbc88fc7136b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -159,6 +159,7 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) #define I915_BO_READONLY BIT(2) +#define I915_BO_WAS_BOUND_BIT 3 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f60ca6dc911f..27d24cb38c0d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -186,6 +188,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 39b428c5049c..6615eb5147e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -26,6 +26,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -661,3 +663,103 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9157c7411f60..d9a1168172ae 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 6d39a4a11bf3..78c061614d8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -36,6 +36,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce8c91c5fdd3..12488996a7f4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2639,6 +2639,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2728,6 +2734,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index caa9b041616b..50a86fd89d00 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -439,6 +439,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 97ded2a59cf4..01849840ac56 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -694,7 +694,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -709,7 +710,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - uncore->funcs.force_wake_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + uncore->funcs.force_wake_put(uncore, domain->mask); } } @@ -730,7 +735,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->funcs.force_wake_put) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -772,7 +790,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->funcs.force_wake_put) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index c4b22d9d0b45..034f04e0de8b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); -- Gitee From b6e4500fd049adf9bde1233a8061052aa2c02b9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=B6hle?= Date: Fri, 4 Feb 2022 15:11:37 +0000 Subject: [PATCH 14/26] mmc: block: fix read single on recovery logic stable inclusion from stable-5.10.102 commit ab2b4e65a130d67478bd5b35ca9004b2075805fa category: bugfix issue: I57ESG CVE: CVE-2022-20008 Signed-off-by: gaochao --------------------------------------- mmc: block: fix read single on recovery logic commit 54309fde1a352ad2674ebba004a79f7d20b9f037 upstream. On reads with MMC_READ_MULTIPLE_BLOCK that fail, the recovery handler will use MMC_READ_SINGLE_BLOCK for each of the blocks, up to MMC_READ_SINGLE_RETRIES times each. The logic for this is fixed to never report unsuccessful reads as success to the block layer. On command error with retries remaining, blk_update_request was called with whatever value error was set last to. In case it was last set to BLK_STS_OK (default), the read will be reported as success, even though there was no data read from the device. This could happen on a CRC mismatch for the response, a card rejecting the command (e.g. again due to a CRC mismatch). In case it was last set to BLK_STS_IOERR, the error is reported correctly, but no retries will be attempted. Fixes: 81196976ed946c ("mmc: block: Add blk-mq support") Cc: stable@vger.kernel.org Signed-off-by: Christian Loehle Reviewed-by: Adrian Hunter Link: https://lore.kernel.org/r/bc706a6ab08c4fe2834ba0c05a804672@hyperstone.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/block.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 94caee49da99..99b981a05b6c 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1642,31 +1642,31 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_card *card = mq->card; struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; - int retries = 0; do { u32 status; int err; + int retries = 0; - mmc_blk_rw_rq_prep(mqrq, card, 1, mq); + while (retries++ <= MMC_READ_SINGLE_RETRIES) { + mmc_blk_rw_rq_prep(mqrq, card, 1, mq); - mmc_wait_for_req(host, mrq); + mmc_wait_for_req(host, mrq); - err = mmc_send_status(card, &status); - if (err) - goto error_exit; - - if (!mmc_host_is_spi(host) && - !mmc_ready_for_data(status)) { - err = mmc_blk_fix_state(card, req); + err = mmc_send_status(card, &status); if (err) goto error_exit; - } - if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES) - continue; + if (!mmc_host_is_spi(host) && + !mmc_ready_for_data(status)) { + err = mmc_blk_fix_state(card, req); + if (err) + goto error_exit; + } - retries = 0; + if (!mrq->cmd->error) + break; + } if (mrq->cmd->error || mrq->data->error || -- Gitee From b2a3dc7f7577822d428486820b91b360f7d140b9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 19 Mar 2022 02:08:37 +0100 Subject: [PATCH 15/26] ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE stable inclusion from stable-5.10.110 commit 5a41a3033a9344d7683340e3d83f5435ffb06501 category: bugfix issue: I57ESG CVE: CVE-2022-30594 Signed-off-by: gaochao --------------------------------------- ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE commit ee1fee900537b5d9560e9f937402de5ddc8412f3 upstream. Setting PTRACE_O_SUSPEND_SECCOMP is supposed to be a highly privileged operation because it allows the tracee to completely bypass all seccomp filters on kernels with CONFIG_CHECKPOINT_RESTORE=y. It is only supposed to be settable by a process with global CAP_SYS_ADMIN, and only if that process is not subject to any seccomp filters at all. However, while these permission checks were done on the PTRACE_SETOPTIONS path, they were missing on the PTRACE_SEIZE path, which also sets user-specified ptrace flags. Move the permissions checks out into a helper function and let both ptrace_attach() and ptrace_setoptions() call it. Cc: stable@kernel.org Fixes: 13c4a90119d2 ("seccomp: add ptrace options for suspend/resume") Signed-off-by: Jann Horn Link: https://lkml.kernel.org/r/20220319010838.1386861-1-jannh@google.com Signed-off-by: Eric W. Biederman Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index eb4d04cb3aaf..d99f73f83bf5 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -370,6 +370,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } +static int check_ptrace_options(unsigned long data) +{ + if (data & ~(unsigned long)PTRACE_O_MASK) + return -EINVAL; + + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || + !IS_ENABLED(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + return 0; +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -381,8 +401,16 @@ static int ptrace_attach(struct task_struct *task, long request, if (seize) { if (addr != 0) goto out; + /* + * This duplicates the check in check_ptrace_options() because + * ptrace_attach() and ptrace_setoptions() have historically + * used different error codes for unknown ptrace options. + */ if (flags & ~(unsigned long)PTRACE_O_MASK) goto out; + retval = check_ptrace_options(flags); + if (retval) + return retval; flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); } else { flags = PT_PTRACED; @@ -655,22 +683,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds static int ptrace_setoptions(struct task_struct *child, unsigned long data) { unsigned flags; + int ret; - if (data & ~(unsigned long)PTRACE_O_MASK) - return -EINVAL; - - if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { - if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) || - !IS_ENABLED(CONFIG_SECCOMP)) - return -EINVAL; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || - current->ptrace & PT_SUSPEND_SECCOMP) - return -EPERM; - } + ret = check_ptrace_options(data); + if (ret) + return ret; /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; -- Gitee From 102861bf025c20718afda229bf3130fe685569f4 Mon Sep 17 00:00:00 2001 From: Duoming Zhou Date: Sat, 26 Mar 2022 18:43:46 +0800 Subject: [PATCH 16/26] net/x25: Fix null-ptr-deref caused by x25_disconnect stable inclusion from stable-5.10.110 commit 5c94b6205e87411dbe9dc1ca088eb36b8837fb47 category: bugfix issue: I57ESG CVE: CVE-2022-1516 Signed-off-by: gaochao --------------------------------------- net/x25: Fix null-ptr-deref caused by x25_disconnect [ Upstream commit 7781607938c8371d4c2b243527430241c62e39c2 ] When the link layer is terminating, x25->neighbour will be set to NULL in x25_disconnect(). As a result, it could cause null-ptr-deref bugs in x25_sendmsg(),x25_recvmsg() and x25_connect(). One of the bugs is shown below. (Thread 1) | (Thread 2) x25_link_terminated() | x25_recvmsg() x25_kill_by_neigh() | ... x25_disconnect() | lock_sock(sk) ... | ... x25->neighbour = NULL //(1) | ... | x25->neighbour->extended //(2) The code sets NULL to x25->neighbour in position (1) and dereferences x25->neighbour in position (2), which could cause null-ptr-deref bug. This patch adds lock_sock() in x25_kill_by_neigh() in order to synchronize with x25_sendmsg(), x25_recvmsg() and x25_connect(). What`s more, the sock held by lock_sock() is not NULL, because it is extracted from x25_list and uses x25_list_lock to synchronize. Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect") Signed-off-by: Duoming Zhou Reviewed-by: Lin Ma Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/x25/af_x25.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 03ed170b8125..d231d4620c38 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1775,10 +1775,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb) write_lock_bh(&x25_list_lock); - sk_for_each(s, &x25_list) - if (x25_sk(s)->neighbour == nb) + sk_for_each(s, &x25_list) { + if (x25_sk(s)->neighbour == nb) { + write_unlock_bh(&x25_list_lock); + lock_sock(s); x25_disconnect(s, ENETUNREACH, 0, 0); - + release_sock(s); + write_lock_bh(&x25_list_lock); + } + } write_unlock_bh(&x25_list_lock); /* Remove any related forwards */ -- Gitee From 74b296f63ff3efe895e1787d989c54be6651fd7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 26 Oct 2021 14:01:46 +0200 Subject: [PATCH 17/26] x86,bugs: Unconditionally allow spectre_v2=retpoline,amd stable inclusion from stable-5.10.105 commit 206cfe2dac3ed79bcd1c759f05400593a5f55488 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86,bugs: Unconditionally allow spectre_v2=retpoline,amd commit f8a66d608a3e471e1202778c2a36cbdc96bae73b upstream. Currently Linux prevents usage of retpoline,amd on !AMD hardware, this is unfriendly and gets in the way of testing. Remove this restriction. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Borislav Petkov Acked-by: Josh Poimboeuf Tested-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20211026120310.487348118@infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d41b70fe4918..d4e7a8755834 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -845,13 +845,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON && - boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); - return SPECTRE_V2_CMD_AUTO; - } - spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; -- Gitee From 14990027610204dc9e3b235a1bb6f1dda6efb7e5 Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Wed, 16 Feb 2022 20:57:00 +0100 Subject: [PATCH 18/26] x86/speculation: Rename RETPOLINE_AMD to RETPOLINE_LFENCE stable inclusion from stable-5.10.105 commit f38774bb6e231d647d40ceeb8ddf9082eabde667 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Rename RETPOLINE_AMD to RETPOLINE_LFENCE commit d45476d9832409371537013ebdd8dc1a7781f97a upstream. The RETPOLINE_AMD name is unfortunate since it isn't necessarily AMD only, in fact Hygon also uses it. Furthermore it will likely be sufficient for some Intel processors. Therefore rename the thing to RETPOLINE_LFENCE to better describe what it is. Add the spectre_v2=retpoline,lfence option as an alias to spectre_v2=retpoline,amd to preserve existing setups. However, the output of /sys/devices/system/cpu/vulnerabilities/spectre_v2 will be changed. [ bp: Fix typos, massage. ] Co-developed-by: Josh Poimboeuf Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner [fllinden@amazon.com: backported to 5.10] Signed-off-by: Frank van der Linden Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 12 +++++----- arch/x86/kernel/cpu/bugs.c | 29 +++++++++++++++--------- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..3b407f46f1a0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index cb9ad6b73973..fb6899cb22b0 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -82,7 +82,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *%\reg #endif @@ -92,7 +92,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *%\reg #endif @@ -134,7 +134,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) @@ -164,7 +164,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -176,8 +176,8 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, SPECTRE_V2_IBRS_ENHANCED, }; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4e7a8755834..b0b50240cb05 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -627,7 +627,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, + SPECTRE_V2_CMD_RETPOLINE_LFENCE, }; enum spectre_v2_user_cmd { @@ -787,8 +787,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", + [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", }; @@ -800,7 +800,8 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, + { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, }; @@ -838,13 +839,19 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || + cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && !IS_ENABLED(CONFIG_RETPOLINE)) { pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE) && + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + spec_v2_print_cond(mitigation_options[i].option, mitigation_options[i].secure); return cmd; @@ -879,9 +886,9 @@ static void __init spectre_v2_select_mitigation(void) if (IS_ENABLED(CONFIG_RETPOLINE)) goto retpoline_auto; break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; + goto retpoline_lfence; break; case SPECTRE_V2_CMD_RETPOLINE_GENERIC: if (IS_ENABLED(CONFIG_RETPOLINE)) @@ -898,17 +905,17 @@ static void __init spectre_v2_select_mitigation(void) retpoline_auto: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_amd: + retpoline_lfence: if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = SPECTRE_V2_RETPOLINE_AMD; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); + mode = SPECTRE_V2_LFENCE; + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = SPECTRE_V2_RETPOLINE_GENERIC; + mode = SPECTRE_V2_RETPOLINE; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..b58730cc12e8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -- Gitee From 408976ca439742a0e798b118e7e763c712a9aafc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Feb 2022 20:57:01 +0100 Subject: [PATCH 19/26] x86/speculation: Add eIBRS + Retpoline options stable inclusion from stable-5.10.105 commit a6a119d647ad1f73067d3cffb43104df3f920bcc category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Add eIBRS + Retpoline options commit 1e19da8522c81bf46b335f84137165741e0d82b7 upstream. Thanks to the chaps at VUsec it is now clear that eIBRS is not sufficient, therefore allow enabling of retpolines along with eIBRS. Add spectre_v2=eibrs, spectre_v2=eibrs,lfence and spectre_v2=eibrs,retpoline options to explicitly pick your preferred means of mitigation. Since there's new mitigations there's also user visible changes in /sys/devices/system/cpu/vulnerabilities/spectre_v2 to reflect these new mitigations. [ bp: Massage commit message, trim error messages, do more precise eIBRS mode checking. ] Co-developed-by: Josh Poimboeuf Signed-off-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Patrick Colp Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/nospec-branch.h | 4 +- arch/x86/kernel/cpu/bugs.c | 133 +++++++++++++++++++-------- 2 files changed, 99 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fb6899cb22b0..4d0f5386e637 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -178,7 +178,9 @@ enum spectre_v2_mitigation { SPECTRE_V2_NONE, SPECTRE_V2_RETPOLINE, SPECTRE_V2_LFENCE, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b0b50240cb05..3a5c6724ea06 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -628,6 +628,9 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, SPECTRE_V2_CMD_RETPOLINE_LFENCE, + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, }; enum spectre_v2_user_cmd { @@ -700,6 +703,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return (mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE); +} + static void __init spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { @@ -767,7 +777,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -789,7 +799,9 @@ static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", }; static const struct { @@ -803,6 +815,9 @@ static const struct { { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "eibrs", SPECTRE_V2_CMD_EIBRS, false }, + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, }; @@ -840,15 +855,29 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if ((cmd == SPECTRE_V2_CMD_RETPOLINE || cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_EIBRS || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } - if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE) && + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) && !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -857,6 +886,25 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) +{ + if (!IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("Kernel not compiled with retpoline; no mitigation available!"); + return SPECTRE_V2_NONE; + } + + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("LFENCE not serializing, switching to generic retpoline\n"); + return SPECTRE_V2_RETPOLINE; + } + return SPECTRE_V2_LFENCE; + } + + return SPECTRE_V2_RETPOLINE; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -877,49 +925,60 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - mode = SPECTRE_V2_IBRS_ENHANCED; - /* Force it so VMEXIT will restore correctly */ - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; } - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + + mode = spectre_v2_select_retpoline(); break; + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_lfence; + mode = SPECTRE_V2_LFENCE; break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_generic; + mode = SPECTRE_V2_RETPOLINE; break; + case SPECTRE_V2_CMD_RETPOLINE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + mode = spectre_v2_select_retpoline(); + break; + + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; + + case SPECTRE_V2_CMD_EIBRS_LFENCE: + mode = SPECTRE_V2_EIBRS_LFENCE; + break; + + case SPECTRE_V2_CMD_EIBRS_RETPOLINE: + mode = SPECTRE_V2_EIBRS_RETPOLINE; break; } - pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; -retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_lfence: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = SPECTRE_V2_LFENCE; + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + + switch (mode) { + case SPECTRE_V2_NONE: + case SPECTRE_V2_EIBRS: + break; + + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); + fallthrough; + + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); - } else { - retpoline_generic: - mode = SPECTRE_V2_RETPOLINE; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + break; } -specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -945,7 +1004,7 @@ static void __init spectre_v2_select_mitigation(void) * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -1621,7 +1680,7 @@ static ssize_t tsx_async_abort_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { -- Gitee From 666dfd14c7b4bba108ca42b7285db0713d9ef060 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Feb 2022 20:57:02 +0100 Subject: [PATCH 20/26] Documentation/hw-vuln: Update spectre doc stable inclusion from stable-5.10.105 commit 071e8b69d7808d96f388d7c5ed606e75fd3d518d category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- Documentation/hw-vuln: Update spectre doc commit 5ad3eb1132453b9795ce5fd4572b1c18b292cca9 upstream. Update the doc with the new fun. [ bp: Massage commit message. ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner [fllinden@amazon.com: backported to 5.10] Signed-off-by: Frank van der Linden Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 42 +++++++++++++------ .../admin-guide/kernel-parameters.txt | 8 +++- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index e05e581af5cf..4776c112fe7d 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the speculative execution's side effects left in level 1 cache to infer the victim's data. +Yet another variant 2 attack vector is for the attacker to poison the +Branch History Buffer (BHB) to speculatively steer an indirect branch +to a specific Branch Target Buffer (BTB) entry, even if the entry isn't +associated with the source address of the indirect branch. Specifically, +the BHB might be shared across privilege levels even in the presence of +Enhanced IBRS. + +Currently the only known real-world BHB attack vector is via +unprivileged eBPF. Therefore, it's highly recommended to not enable +unprivileged eBPF, especially when eIBRS is used (without retpolines). +For a full mitigation against BHB attacks, it's recommended to use +retpolines (or eIBRS combined with retpolines). + Attack scenarios ---------------- @@ -364,13 +377,15 @@ The possible values in this file are: - Kernel status: - ==================================== ================================= - 'Not affected' The processor is not vulnerable - 'Vulnerable' Vulnerable, no mitigation - 'Mitigation: Full generic retpoline' Software-focused mitigation - 'Mitigation: Full AMD retpoline' AMD-specific software mitigation - 'Mitigation: Enhanced IBRS' Hardware-focused mitigation - ==================================== ================================= + ======================================== ================================= + 'Not affected' The processor is not vulnerable + 'Mitigation: None' Vulnerable, no mitigation + 'Mitigation: Retpolines' Use Retpoline thunks + 'Mitigation: LFENCE' Use LFENCE instructions + 'Mitigation: Enhanced IBRS' Hardware-focused mitigation + 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines + 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE + ======================================== ================================= - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is used to protect against Spectre variant 2 attacks when calling firmware (x86 only). @@ -584,12 +599,13 @@ kernel command line. Specific mitigations can also be selected manually: - retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline auto pick between generic,lfence + retpoline,generic Retpolines + retpoline,lfence LFENCE; indirect branch + retpoline,amd alias for retpoline,lfence + eibrs enhanced IBRS + eibrs,retpoline enhanced IBRS + Retpolines + eibrs,lfence enhanced IBRS + LFENCE Not specifying this option is equivalent to spectre_v2=auto. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4c0a0ca7542e..86efe5fbb979 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4952,8 +4952,12 @@ Specific mitigations can also be selected manually: retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline,generic - Retpolines + retpoline,lfence - LFENCE; indirect branch + retpoline,amd - alias for retpoline,lfence + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE Not specifying this option is equivalent to spectre_v2=auto. -- Gitee From 6451b666168cb38b885a95bcc37b242d224f6eb0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 May 2021 22:35:17 +0200 Subject: [PATCH 21/26] bpf: Add kconfig knob for disabling unpriv bpf by default stable inclusion from stable-5.10.90 commit 8c15bfb36a442d63aec74a3379cb7a197f3e5f99 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- bpf: Add kconfig knob for disabling unpriv bpf by default commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream. Add a kconfig knob which allows for unprivileged bpf to be disabled by default. If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2. This still allows a transition of 2 -> {0,1} through an admin. Similarly, this also still keeps 1 -> {1} behavior intact, so that once set to permanently disabled, it cannot be undone aside from a reboot. We've also added extra2 with max of 2 for the procfs handler, so that an admin still has a chance to toggle between 0 <-> 2. Either way, as an additional alternative, applications can make use of CAP_BPF that we added a while ago. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net Cc: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 17 +++++++++--- init/Kconfig | 10 +++++++ kernel/bpf/syscall.c | 3 ++- kernel/sysctl.c | 29 +++++++++++++++++---- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d4b32cc32bb7..7d5e8a67c775 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled ========================= Writing 1 to this entry will disable unprivileged calls to ``bpf()``; -once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return -``-EPERM``. +once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF`` +will return ``-EPERM``. Once set to 1, this can't be cleared from the +running kernel anymore. -Once set, this can't be cleared. +Writing 2 to this entry will also disable unprivileged calls to ``bpf()``, +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. +If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this +entry will default to 2 instead of 0. + += ============================================================= +0 Unprivileged calls to ``bpf()`` are enabled +1 Unprivileged calls to ``bpf()`` are disabled without recovery +2 Unprivileged calls to ``bpf()`` are disabled += ============================================================= watchdog ======== diff --git a/init/Kconfig b/init/Kconfig index ded631516e22..2e5b9288081e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1779,6 +1779,16 @@ config BPF_JIT_DEFAULT_ON def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON depends on HAVE_EBPF_JIT && BPF_JIT +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + source "kernel/bpf/preload/Kconfig" config USERFAULTFD diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bb9a9cb1f321..209e6567cdab 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e34d6937594c..a8d5607dc823 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -233,7 +233,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write, mutex_unlock(&bpf_stats_enabled_mutex); return ret; } -#endif + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + return ret; +} +#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ /* * /proc/sys support @@ -2678,10 +2698,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", -- Gitee From 1c17e683604f2964ad58c9f4294350737c0d5350 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Feb 2022 11:49:08 -0800 Subject: [PATCH 22/26] x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting stable inclusion from stable-5.10.105 commit afc2d635b5e18e2b33116d8e121ee149882e33eb category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting commit 44a3918c8245ab10c6c9719dd12e7a8d291980d8 upstream. With unprivileged eBPF enabled, eIBRS (without retpoline) is vulnerable to Spectre v2 BHB-based attacks. When both are enabled, print a warning message and report it in the 'spectre_v2' sysfs vulnerabilities file. Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner [fllinden@amazon.com: backported to 5.10] Signed-off-by: Frank van der Linden Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++++++------ include/linux/bpf.h | 12 ++++++++++++ kernel/sysctl.c | 7 +++++++ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a5c6724ea06..e97ab5ca9358 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -613,6 +614,16 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && !new_state) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -957,6 +968,9 @@ static void __init spectre_v2_select_mitigation(void) break; } + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + if (spectre_v2_in_eibrs_mode(mode)) { /* Force it so VMEXIT will restore correctly */ x86_spec_ctrl_base |= SPEC_CTRL_IBRS; @@ -1710,6 +1724,20 @@ static char *ibpb_state(void) return ""; } +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + return sprintf(buf, "%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); +} + static ssize_t srbds_show_state(char *buf) { return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); @@ -1735,12 +1763,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 17502f263035..3a383a485190 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1564,6 +1564,12 @@ struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); + +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; +} + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1758,6 +1764,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } + +static inline bool unprivileged_ebpf_enabled(void) +{ + return false; +} + #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a8d5607dc823..a5be63fcb4ca 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -234,6 +234,10 @@ static int bpf_stats_handler(struct ctl_table *table, int write, return ret; } +void __weak unpriv_ebpf_notify(int new_state) +{ +} + static int bpf_unpriv_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -251,6 +255,9 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, return -EPERM; *(int *)table->data = unpriv_enable; } + + unpriv_ebpf_notify(unpriv_enable); + return ret; } #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ -- Gitee From 958e5f1327533ca2c37fa0c648f4fe32520fed8d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 28 Feb 2022 11:23:15 -0600 Subject: [PATCH 23/26] x86/speculation: Use generic retpoline by default on AMD stable inclusion from stable-5.10.105 commit 2fdf67a1d215574c31b1a716f80fa0fdccd401d7 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Use generic retpoline by default on AMD commit 244d00b5dd4755f8df892c86cab35fb2cfd4f14b upstream. AMD retpoline may be susceptible to speculation. The speculation execution window for an incorrect indirect branch prediction using LFENCE/JMP sequence may potentially be large enough to allow exploitation using Spectre V2. By default, don't use retpoline,lfence on AMD. Instead, use the generic retpoline. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e97ab5ca9358..935a4812a442 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -904,15 +904,6 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_NONE; } - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("LFENCE not serializing, switching to generic retpoline\n"); - return SPECTRE_V2_RETPOLINE; - } - return SPECTRE_V2_LFENCE; - } - return SPECTRE_V2_RETPOLINE; } -- Gitee From 2807b88716adb43721e26359975979062b29766e Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 28 Feb 2022 11:23:16 -0600 Subject: [PATCH 24/26] x86/speculation: Update link to AMD speculation whitepaper stable inclusion from stable-5.10.105 commit e335384560d1e106b609e8febd7e0427075a8938 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Update link to AMD speculation whitepaper commit e9b6013a7ce31535b04b02ba99babefe8a8599fa upstream. Update the link to the "Software Techniques for Managing Speculation on AMD Processors" whitepaper. Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/hw-vuln/spectre.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index 4776c112fe7d..3a6e1fcad3c0 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -60,8 +60,8 @@ privileged data touched during the speculative execution. Spectre variant 1 attacks take advantage of speculative execution of conditional branches, while Spectre variant 2 attacks use speculative execution of indirect branches to leak privileged memory. -See :ref:`[1] ` :ref:`[5] ` :ref:`[7] ` -:ref:`[10] ` :ref:`[11] `. +See :ref:`[1] ` :ref:`[5] ` :ref:`[6] ` +:ref:`[7] ` :ref:`[10] ` :ref:`[11] `. Spectre variant 1 (Bounds Check Bypass) --------------------------------------- @@ -746,7 +746,7 @@ AMD white papers: .. _spec_ref6: -[6] `Software techniques for managing speculation on AMD processors `_. +[6] `Software techniques for managing speculation on AMD processors `_. ARM white papers: -- Gitee From 3b279f204025dc8c739fd9ee2cee42ba36ee7393 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 25 Feb 2022 14:31:49 -0800 Subject: [PATCH 25/26] x86/speculation: Warn about Spectre v2 LFENCE mitigation stable inclusion from stable-5.10.105 commit cc9e3e55bde71b2fac1494f503d5ffc560c7fb8d category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Warn about Spectre v2 LFENCE mitigation commit eafd987d4a82c7bb5aa12f0e3b4f8f3dea93e678 upstream. With: f8a66d608a3e ("x86,bugs: Unconditionally allow spectre_v2=retpoline,amd") it became possible to enable the LFENCE "retpoline" on Intel. However, Intel doesn't recommend it, as it has some weaknesses compared to retpoline. Now AMD doesn't recommend it either. It can still be left available as a cmdline option. It's faster than retpoline but is weaker in certain scenarios -- particularly SMT, but even non-SMT may be vulnerable in some cases. So just unconditionally warn if the user requests it on the cmdline. [ bp: Massage commit message. ] Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 935a4812a442..bade6afeb169 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -614,6 +614,7 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" #ifdef CONFIG_BPF_SYSCALL @@ -935,6 +936,7 @@ static void __init spectre_v2_select_mitigation(void) break; case SPECTRE_V2_CMD_RETPOLINE_LFENCE: + pr_err(SPECTRE_V2_LFENCE_MSG); mode = SPECTRE_V2_LFENCE; break; @@ -1717,6 +1719,9 @@ static char *ibpb_state(void) static ssize_t spectre_v2_show_state(char *buf) { + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) + return sprintf(buf, "Vulnerable: LFENCE\n"); + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); -- Gitee From e8c32db42ec349c415831b57b7ad9ec9cc9fa560 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 25 Feb 2022 14:32:28 -0800 Subject: [PATCH 26/26] x86/speculation: Warn about eIBRS + LFENCE + Unprivileged eBPF + SMT stable inclusion from stable-5.10.105 commit d04937ae94903087279e4a016b7741cdee59d521 category: bugfix issue: I57ESG CVE: CVE-2022-0001 Signed-off-by: gaochao --------------------------------------- x86/speculation: Warn about eIBRS + LFENCE + Unprivileged eBPF + SMT commit 0de05d056afdb00eca8c7bbb0c79a3438daf700c upstream. The commit 44a3918c8245 ("x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting") added a warning for the "eIBRS + unprivileged eBPF" combination, which has been shown to be vulnerable against Spectre v2 BHB-based attacks. However, there's no warning about the "eIBRS + LFENCE retpoline + unprivileged eBPF" combo. The LFENCE adds more protection by shortening the speculation window after a mispredicted branch. That makes an attack significantly more difficult, even with unprivileged eBPF. So at least for now the logic doesn't warn about that combination. But if you then add SMT into the mix, the SMT attack angle weakens the effectiveness of the LFENCE considerably. So extend the "eIBRS + unprivileged eBPF" warning to also include the "eIBRS + LFENCE + unprivileged eBPF + SMT" case. [ bp: Massage commit message. ] Suggested-by: Alyssa Milburn Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bade6afeb169..78b9514a3844 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -616,12 +616,27 @@ static inline const char *spectre_v2_module_string(void) { return ""; } #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" #ifdef CONFIG_BPF_SYSCALL void unpriv_ebpf_notify(int new_state) { - if (spectre_v2_enabled == SPECTRE_V2_EIBRS && !new_state) + if (new_state) + return; + + /* Unprivileged eBPF is enabled */ + + switch (spectre_v2_enabled) { + case SPECTRE_V2_EIBRS: pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + break; + case SPECTRE_V2_EIBRS_LFENCE: + if (sched_smt_active()) + pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + break; + default: + break; + } } #endif @@ -1081,6 +1096,10 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; @@ -1723,7 +1742,11 @@ static ssize_t spectre_v2_show_state(char *buf) return sprintf(buf, "Vulnerable: LFENCE\n"); if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) - return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], -- Gitee