From b2cb0dd3e743086e48ccba236ce20f9fd8bde585 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 27 Oct 2022 14:54:41 -0700 Subject: [PATCH 1/7] x86/mm: Randomize per-cpu entry area mainline inclusion from mainline-v6.2-rc1 commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97e3d26b5e5f371b3ee223d94dd123e6c442ba80 Signed-off-by: gaochao --------------------------------------- Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings. On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little. As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space. [ bp: Fix le build. ] Reported-by: Seth Jenkins Reviewed-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov Confilict: arch/x86/mm/cpu_entry_area.c Use get_random_u32() instead of prandom_u32_max() in init_cea_offsets(). With CONFIG_RANDOMIZE_BASE=y, KASLR use prandom_seed_state() init prandom seed before init_cea_offsets(). But when CONFIG_RANDOMIZE_BASE=n, prandom seed init after init_cea_offsets() cause cea is always 0. The patch d4150779e60f("random32: use real rng for non-deterministic randomness") use get_random_u32() instead of prandom_u32() in prandom_u32_max() that make prandom_u32_max() don't need to wait prandom seed init(). But the patch has many pre-patches that have not been merged, So,we adopt the current solution as a workaround. directly use get_random_u32() in init_cea_offsets() to simplify code. Signed-off-by: Ke Liu --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 51 ++++++++++++++++++++++++--- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 3d52b094850a..669e5d4df4fe 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -124,10 +124,6 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995c..4f056fb88174 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) -#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif #endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 668a4a6533d9..bbb0f737aab1 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) /* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true; /* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index f5e1e60c9095..697fb1d1ef72 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -15,16 +16,57 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif -#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + /* + * Directly use get_random_u32() instead of prandom_u32_max + * to avoid seed can't be generated when CONFIG_RANDOMIZE_BASE=n. + */ + cea = (u32)(((u64) get_random_u32() * max_cea) >> 32); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif /* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); return (struct cpu_entry_area *) va; @@ -198,7 +240,6 @@ static __init void setup_cpu_entry_area_ptes(void) /* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; @@ -214,6 +255,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu; + init_cea_offsets(); + setup_cpu_entry_area_ptes(); for_each_possible_cpu(cpu) -- Gitee From 3fbc823c940eeefa3eab25d2bd539e97c6405519 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 28 Oct 2022 00:31:04 +0300 Subject: [PATCH 2/7] x86/kasan: Map shadow for percpu pages on demand mainline inclusion from mainline-v6.2-rc1 commit 3f148f3318140035e87decc1214795ff0755757b category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3f148f3318140035e87decc1214795ff0755757b Signed-off-by: gaochao --------------------------------------- KASAN maps shadow for the entire CPU-entry-area: [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE] This will explode once the per-cpu entry areas are randomized since it will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to allocate shadow for such big area. Fix this by allocating KASAN shadow only for really used cpu entry area addresses mapped by cea_map_percpu_pages() Thanks to the 0day folks for finding and reporting this to be an issue. [ dhansen: tweak changelog since this will get committed before peterz's actual cpu-entry-area randomization ] Signed-off-by: Andrey Ryabinin Signed-off-by: Dave Hansen Tested-by: Yujie Liu Cc: kernel test robot Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com Signed-off-by: gaochao --- arch/x86/include/asm/kasan.h | 3 +++ arch/x86/mm/cpu_entry_area.c | 8 +++++++- arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38bed..de75306b932e 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 697fb1d1ef72..046807c59f75 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -10,6 +10,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -95,8 +96,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { + phys_addr_t pa = per_cpu_ptr_to_phys(ptr); + + kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, + early_pfn_to_nid(PFN_DOWN(pa))); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + cea_set_pte(cea_vaddr, pa, prot); } static void __init percpu_setup_debug_store(unsigned int cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 1a50434c8a4d..18d574af30ef 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,6 +318,18 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = (unsigned long)kasan_mem_to_shadow(va); + shadow_start = round_down(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); + shadow_end = round_up(shadow_end, PAGE_SIZE); + + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { int i; @@ -395,9 +407,6 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin); - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- Gitee From 0cf37c9da5b68255343a9211669f6dfe836f8b65 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:00 +0000 Subject: [PATCH 3/7] x86/mm: Recompute physical address for every page of per-CPU CEA mapping mainline inclusion from mainline-v6.2-rc1 commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 Signed-off-by: gaochao --------------------------------------- Recompute the physical address for each per-CPU page in the CPU entry area, a recent commit inadvertantly modified cea_map_percpu_pages() such that every PTE is mapped to the physical address of the first page. Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com Signed-off-by: gaochao --- arch/x86/mm/cpu_entry_area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 046807c59f75..83249f33b0d8 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -102,7 +102,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) early_pfn_to_nid(PFN_DOWN(pa))); for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, pa, prot); + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } static void __init percpu_setup_debug_store(unsigned int cpu) -- Gitee From a4eb538e3e9d87e7d888fc7430d96a79f903c11c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:01 +0000 Subject: [PATCH 4/7] x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area mainline inclusion from mainline-v6.2-rc1 commit 97650148a15e0b30099d6175ffe278b9f55ec66a category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=97650148a15e0b30099d6175ffe278b9f55ec66a Signed-off-by: gaochao --------------------------------------- Populate a KASAN shadow for the entire possible per-CPU range of the CPU entry area instead of requiring that each individual chunk map a shadow. Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping was left behind, which can lead to not-present page faults during KASAN validation if the kernel performs a software lookup into the GDT. The DS buffer is also likely affected. The motivation for mapping the per-CPU areas on-demand was to avoid mapping the entire 512GiB range that's reserved for the CPU entry area, shaving a few bytes by not creating shadows for potentially unused memory was not a goal. The bug is most easily reproduced by doing a sigreturn with a garbage CS in the sigcontext, e.g. int main(void) { struct sigcontext regs; syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); memset(®s, 0, sizeof(regs)); regs.cs = 0x1d0; syscall(__NR_rt_sigreturn); return 0; } to coerce the kernel into doing a GDT lookup to compute CS.base when reading the instruction bytes on the subsequent #GP to determine whether or not the #GP is something the kernel should handle, e.g. to fixup UMIP violations or to emulate CLI/STI for IOPL=3 applications. BUG: unable to handle page fault for address: fffffbc8379ace00 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 Call Trace: get_desc+0xb0/0x1d0 insn_get_seg_base+0x104/0x270 insn_fetch_from_user+0x66/0x80 fixup_umip_exception+0xb1/0x530 exc_general_protection+0x181/0x210 asm_exc_general_protection+0x22/0x30 RIP: 0003:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0003:0000000000000000 EFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com Suggested-by: Andrey Ryabinin Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com Signed-off-by: gaochao --- arch/x86/mm/cpu_entry_area.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 83249f33b0d8..cd653d981560 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -96,11 +96,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - phys_addr_t pa = per_cpu_ptr_to_phys(ptr); - - kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, - early_pfn_to_nid(PFN_DOWN(pa))); - for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } @@ -193,6 +188,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); cea_map_percpu_pages(&cea->entry_stack_page, -- Gitee From 5c4e710272980da1514c8bf5a85973ac8431cc41 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:02 +0000 Subject: [PATCH 5/7] x86/kasan: Rename local CPU_ENTRY_AREA variables to shorten names mainline inclusion from mainline-v6.2-rc1 commit 7077d2ccb94dafd00b29cc2d601c9f6891648f5b category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7077d2ccb94dafd00b29cc2d601c9f6891648f5b Signed-off-by: gaochao --------------------------------------- Rename the CPU entry area variables in kasan_init() to shorten their names, a future fix will reference the beginning of the per-CPU portion of the CPU entry area, and shadow_cpu_entry_per_cpu_begin is a bit much. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-4-seanjc@google.com Signed-off-by: gaochao --- arch/x86/mm/kasan_init_64.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 18d574af30ef..75b0eb482487 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -333,7 +333,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { int i; - void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; + void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -374,16 +374,16 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin); - shadow_cpu_entry_begin = (void *)round_down( - (unsigned long)shadow_cpu_entry_begin, PAGE_SIZE); + shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; + shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); + shadow_cea_begin = (void *)round_down( + (unsigned long)shadow_cea_begin, PAGE_SIZE); - shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE + + shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); - shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end); - shadow_cpu_entry_end = (void *)round_up( - (unsigned long)shadow_cpu_entry_end, PAGE_SIZE); + shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); + shadow_cea_end = (void *)round_up( + (unsigned long)shadow_cea_end, PAGE_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -405,9 +405,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cpu_entry_begin); + shadow_cea_begin); - kasan_populate_early_shadow(shadow_cpu_entry_end, + kasan_populate_early_shadow(shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- Gitee From 251a0300734a60a0440c1e38b77603b15fcee0b1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:03 +0000 Subject: [PATCH 6/7] x86/kasan: Add helpers to align shadow addresses up and down mainline inclusion from mainline-v6.2-rc1 commit bde258d97409f2a45243cb393a55ea9ecfc7aba5 category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bde258d97409f2a45243cb393a55ea9ecfc7aba5 Signed-off-by: gaochao --------------------------------------- Add helpers to dedup code for aligning shadow address up/down to page boundaries when translating an address to its shadow. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-5-seanjc@google.com Signed-off-by: gaochao --- arch/x86/mm/kasan_init_64.c | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 75b0eb482487..c1d99556c024 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -318,22 +318,33 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_down(shadow, PAGE_SIZE); +} + +static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) +{ + unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); + + return round_up(shadow, PAGE_SIZE); +} + void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) { unsigned long shadow_start, shadow_end; - shadow_start = (unsigned long)kasan_mem_to_shadow(va); - shadow_start = round_down(shadow_start, PAGE_SIZE); - shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); - shadow_end = round_up(shadow_end, PAGE_SIZE); - + shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va); + shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size); kasan_populate_shadow(shadow_start, shadow_end, nid); } void __init kasan_init(void) { + unsigned long shadow_cea_begin, shadow_cea_end; int i; - void *shadow_cea_begin, *shadow_cea_end; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -374,16 +385,9 @@ void __init kasan_init(void) map_range(&pfn_mapped[i]); } - shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE; - shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin); - shadow_cea_begin = (void *)round_down( - (unsigned long)shadow_cea_begin, PAGE_SIZE); - - shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE + - CPU_ENTRY_AREA_MAP_SIZE); - shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end); - shadow_cea_end = (void *)round_up( - (unsigned long)shadow_cea_end, PAGE_SIZE); + shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + + CPU_ENTRY_AREA_MAP_SIZE); kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), @@ -405,9 +409,9 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)VMALLOC_END + 1), - shadow_cea_begin); + (void *)shadow_cea_begin); - kasan_populate_early_shadow(shadow_cea_end, + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext), -- Gitee From 2bf812ae4cdb29bdbea3382718f87434c53ae716 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 10 Nov 2022 20:35:04 +0000 Subject: [PATCH 7/7] x86/kasan: Populate shadow for shared chunk of the CPU entry area mainline inclusion from mainline-v6.2-rc1 commit 1cfaac2400c73378e78182a706be0f3ac8b93cd7 category: bugfix issue: CVE: CVE-2023-0597 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1cfaac2400c73378e78182a706be0f3ac8b93cd7 Signed-off-by: gaochao --------------------------------------- Popuplate the shadow for the shared portion of the CPU entry area, i.e. the read-only IDT mapping, during KASAN initialization. A recent change modified KASAN to map the per-CPU areas on-demand, but forgot to keep a shadow for the common area that is shared amongst all CPUs. Map the common area in KASAN init instead of letting idt_map_in_cea() do the dirty work so that it Just Works in the unlikely event more shared data is shoved into the CPU entry area. The bug manifests as a not-present #PF when software attempts to lookup an IDT entry, e.g. when KVM is handling IRQs on Intel CPUs (KVM performs direct CALL to the IRQ handler to avoid the overhead of INTn): BUG: unable to handle page fault for address: fffffbc0000001d8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 5 PID: 901 Comm: repro Tainted: G W 6.1.0-rc3+ #410 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 vmx_handle_exit_irqoff+0x152/0x290 [kvm_intel] vcpu_run+0x1d89/0x2bd0 [kvm] kvm_arch_vcpu_ioctl_run+0x3ce/0xa70 [kvm] kvm_vcpu_ioctl+0x349/0x900 [kvm] __x64_sys_ioctl+0xb8/0xf0 do_syscall_64+0x2b/0x50 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+8cdd16fd5a6c0565e227@syzkaller.appspotmail.com Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221110203504.1985010-6-seanjc@google.com Signed-off-by: gaochao --- arch/x86/mm/kasan_init_64.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index c1d99556c024..b4b187960dd0 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -343,7 +343,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) void __init kasan_init(void) { - unsigned long shadow_cea_begin, shadow_cea_end; + unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; int i; memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); @@ -386,6 +386,7 @@ void __init kasan_init(void) } shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); + shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE); @@ -411,6 +412,15 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), (void *)shadow_cea_begin); + /* + * Populate the shadow for the shared portion of the CPU entry area. + * Shadows for the per-CPU areas are mapped on-demand, as each CPU's + * area is randomly placed somewhere in the 512GiB range and mapping + * the entire 512GiB range is prohibitively expensive. + */ + kasan_populate_shadow(shadow_cea_begin, + shadow_cea_per_cpu_begin, 0); + kasan_populate_early_shadow((void *)shadow_cea_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); -- Gitee