diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6b77da9629f39cdf062bd9904de386e59fffed72..80b75e147c487f7176a6b1fc67666e91aa01d133 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1549,6 +1549,52 @@ config AMD_MEM_ENCRYPT Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory Encryption (SME). + +config IEE + bool "Isolated Execution Environment Framework(IEE)" + depends on X86_64 + help + Support for Isolated Execution Environment Framework. Foundation of HAOC. + Could isolate kernel critical data and enforce all write access made and + verified in IEE APIs. + +config IEE_PTRP + bool "Pointer Protection for IEE(IEE_PTRP)" + depends on IEE + help + Provide IEE matadata for each process called task_token to allow + verifing pointers inside task_struct, like struct cred* that determines + the capabilities of a process. + Could be a enhancement of other sub-module of HAOC. + +config IEE_SIP + bool "Sensitive Instruction Protection for IEE(IEE_SIP)" + depends on IEE + help + Protects critical instructions that may break the isolation of IEE, + such as writing system control registers. These instructions would + be executated inside IEE. + +config PTP + bool "Page Table Protection (PTP)" + depends on IEE + help + Enable Page Table Protection feature for IEE. + +config PTP_RESERVE_ORDER + depends on PTP + int "maximum allowable 2^PTP_RESERVE_ORDER pages for one level page table" + range 9 15 + default 12 + +config CREDP + bool "Struct cred protection(CREDP)" + depends on IEE + help + Protects kernel struct cred. All modifications of cred must be made and + verified by IEE APIs, and critical dereferences of cred would be monitored + by IEE either. + If unsure, say N. # Common NUMA Features config NUMA diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index 8ecb4d40e20ddc6736a4d4421dc676d5aa969605..6dd8049b145dd0713dc7effed7b9546a63adb46f 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -23,7 +23,11 @@ /* Use the static base for this part of the boot process */ #undef __PAGE_OFFSET #define __PAGE_OFFSET __PAGE_OFFSET_BASE +#ifdef CONFIG_PTP +#include "../../kernel/haoc/ptp/ident_map.c" +#else #include "../../mm/ident_map.c" +#endif #define _SETUP #include /* For COMMAND_LINE_SIZE */ @@ -101,9 +105,15 @@ void kernel_add_identity_map(unsigned long start, unsigned long end) return; /* Build the mapping. */ + #ifdef CONFIG_PTP + ret = ptp_kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); + if (ret) + error("Error: kernel_ident_mapping_init_for_iee() failed\n"); + #else ret = kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt, start, end); if (ret) error("Error: kernel_ident_mapping_init() failed\n"); + #endif } /* Locates and clears a region for a new top level page table. */ @@ -180,7 +190,14 @@ void initialize_identity_maps(void *rmode) sev_prep_identity_maps(top_level_pgt); /* Load the new page-table. */ + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr3_early(top_level_pgt); + else + write_cr3(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif /* * Now that the required page table mappings are established and a @@ -208,7 +225,14 @@ static pte_t *split_large_pmd(struct x86_mapping_info *info, /* Populate the PTEs */ for (i = 0; i < PTRS_PER_PMD; i++) { + #ifdef CONFIG_PTP + if(haoc_enabled) + ptp_set_pte_pre_init(&pte[i], __pte(address | page_flags)); + else + set_pte(&pte[i], __pte(address | page_flags)); + #else set_pte(&pte[i], __pte(address | page_flags)); + #endif address += PAGE_SIZE; } @@ -222,9 +246,23 @@ static pte_t *split_large_pmd(struct x86_mapping_info *info, * of a TLB multihit. */ pmd = __pmd((unsigned long)pte | info->kernpg_flag); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_pmd_pre_init(pmdp, pmd); + else + set_pmd(pmdp, pmd); + #else set_pmd(pmdp, pmd); + #endif /* Flush TLB to establish the new PMD */ + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr3_early(top_level_pgt); + else + write_cr3(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif return pte + pte_index(__address); } @@ -314,7 +352,14 @@ static int set_clr_page_flags(struct x86_mapping_info *info, pte = *ptep; pte = pte_set_flags(pte, set); pte = pte_clear_flags(pte, clr); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_pte_pre_init(ptep, pte); + else + set_pte(ptep, pte); + #else set_pte(ptep, pte); + #endif /* * If the encryption attribute is being set, then change the page state to @@ -325,7 +370,14 @@ static int set_clr_page_flags(struct x86_mapping_info *info, snp_set_page_private(__pa(address & PAGE_MASK)); /* Flush TLB after changing encryption attribute */ + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr3_early(top_level_pgt); + else + write_cr3(top_level_pgt); + #else write_cr3(top_level_pgt); + #endif return 0; } diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 15354673d3aa7fbb749d1524ee6b9817dd41087e..c160cee8f071995f664bffcae96ed406a294e273 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -197,7 +197,14 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) * Move the top level page table out of trampoline memory. */ memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr3_early((unsigned long)pgtable); + else + native_write_cr3((unsigned long)pgtable); + #else native_write_cr3((unsigned long)pgtable); + #endif /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); diff --git a/arch/x86/configs/tencent.config b/arch/x86/configs/tencent.config index 20236c14190e6d878152880a5df60bdc906e88ea..ca9987c839a5d45b08eae5c5433528d8377102ce 100644 --- a/arch/x86/configs/tencent.config +++ b/arch/x86/configs/tencent.config @@ -85,6 +85,12 @@ CONFIG_PERF_EVENTS_AMD_POWER=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_AMD_MEM_ENCRYPT=y +CONFIG_IEE=y +CONFIG_IEE_PTRP=y +CONFIG_IEE_SIP=y +CONFIG_CREDP=y +CONFIG_PTP=y +CONFIG_PTP_RESERVE_ORDER=12 CONFIG_NUMA=y CONFIG_NUMA_EMU=y CONFIG_ARCH_MEMORY_PROBE=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 409e9182bd29b441d261fa8e1f9de1d1d2e3379d..8ccbbb74a264ba13605a7596d5741380bd412305 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -33,6 +33,7 @@ CONFIG_PARAVIRT=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +CONFIG_IEE=y CONFIG_NUMA=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y # CONFIG_MTRR_SANITIZER is not set diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index ab97b22ac04a263ab78782a9d00f2237c98e0f20..4d3dd0eec916b42e6b28b1447404cce9b50e7db7 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -13,6 +13,11 @@ #include #include +#ifdef CONFIG_IEE_SIP +#include +extern bool haoc_enabled; +#endif + static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) { desc->limit0 = info->limit & 0x0ffff; @@ -210,9 +215,23 @@ static inline void native_load_gdt(const struct desc_ptr *dtr) asm volatile("lgdt %0"::"m" (*dtr)); } +#ifdef CONFIG_IEE_SIP +static __always_inline void iee_load_idt_early(const struct desc_ptr *dtr) +{ + asm volatile("lidt %0"::"m" (*dtr)); +} +#endif + static __always_inline void native_load_idt(const struct desc_ptr *dtr) { + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_load_idt((void *)dtr); + else + asm volatile("lidt %0"::"m" (*dtr)); + #else asm volatile("lidt %0"::"m" (*dtr)); + #endif } static inline void native_store_gdt(struct desc_ptr *dtr) diff --git a/arch/x86/include/asm/haoc/haoc-def.h b/arch/x86/include/asm/haoc/haoc-def.h new file mode 100644 index 0000000000000000000000000000000000000000..667b0e9bdb6ff65cc0c06b26871cf1e6ad920fae --- /dev/null +++ b/arch/x86/include/asm/haoc/haoc-def.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#ifndef _LINUX_HAOC_DEF_H +#define _LINUX_HAOC_DEF_H + +enum { + IEE_OP_MEMCPY, + IEE_OP_MEMSET, + IEE_OP_SET_FREEPTR, + IEE_OP_TEST_CLEAR_BIT, +#ifdef CONFIG_IEE_PTRP + IEE_OP_SET_TOKEN_PGD, + IEE_OP_INVALIDATE_TOKEN, + IEE_OP_VALIDATE_TOKEN, +#endif +#ifdef CONFIG_PTP +#ifdef CONFIG_IEE_PTRP + IEE_OP_UNSET_TOKEN, + IEE_OP_SET_TOKEN, +#endif + IEE_OP_SET_PTE, + IEE_OP_SET_PMD, + IEE_OP_SET_PUD, + IEE_OP_SET_P4D, + IEE_OP_SET_PGD, + IEE_OP_SET_PTE_TEXT_POKE, +#endif +#ifdef CONFIG_CREDP + IEE_OP_COPY_CRED, + IEE_OP_SET_CRED_UID, + IEE_OP_SET_CRED_GID, + IEE_OP_SET_CRED_SUID, + IEE_OP_SET_CRED_SGID, + IEE_OP_SET_CRED_EUID, + IEE_OP_SET_CRED_EGID, + IEE_OP_SET_CRED_FSUID, + IEE_OP_SET_CRED_FSGID, + IEE_OP_SET_CRED_USER, + IEE_OP_SET_CRED_USER_NS, + IEE_OP_SET_CRED_GROUP_INFO, + IEE_OP_SET_CRED_SECUREBITS, + IEE_OP_SET_CRED_CAP_INHER, + IEE_OP_SET_CRED_CAP_PERM, + IEE_OP_SET_CRED_CAP_EFFECT, + IEE_OP_SET_CRED_CAP_BSET, + IEE_OP_SET_CRED_CAP_AMBIENT, + IEE_OP_SET_CRED_JIT_KEYRING, + IEE_OP_SET_CRED_SESS_KEYRING, + IEE_OP_SET_CRED_PROC_KEYRING, + IEE_OP_SET_CRED_THREAD_KEYRING, + IEE_OP_SET_CRED_REQ_KEYRING, + IEE_OP_SET_CRED_NON_RCU, + IEE_OP_SET_CRED_ATSET_USAGE, + IEE_OP_SET_CRED_ATOP_USAGE, + IEE_OP_SET_CRED_SECURITY, + IEE_OP_SET_CRED_RCU, + IEE_OP_SET_CRED_UCOUNTS, +#endif + IEE_FLAG_END +}; + +#ifdef CONFIG_IEE_SIP +#define IEE_SIP_TEST 0 +#define IEE_WRITE_CR0 1 +#define IEE_WRITE_CR3 2 +#define IEE_WRITE_CR4 3 +#define IEE_LOAD_IDT 4 +#endif + +#ifdef CONFIG_CREDP +#define AT_ADD 1 +#define AT_INC_NOT_ZERO 2 +#define AT_SUB_AND_TEST 3 +#endif +#endif diff --git a/arch/x86/include/asm/haoc/haoc.h b/arch/x86/include/asm/haoc/haoc.h new file mode 100644 index 0000000000000000000000000000000000000000..56579cdb8fde6afcd50c4cdc211a30dcb34457c1 --- /dev/null +++ b/arch/x86/include/asm/haoc/haoc.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#ifndef _LINUX_HAOC_H +#define _LINUX_HAOC_H + +#include +#include + +void _iee_memcpy(unsigned long __unused,void *dst,void *src, size_t n); +void _iee_memset(unsigned long __unused,void *ptr, int data, size_t n); +void _iee_set_freeptr(unsigned long __unused,void **pptr,void *ptr); +unsigned long _iee_test_and_clear_bit(unsigned long __unused, + long nr, unsigned long *addr); +#ifdef CONFIG_IEE_PTRP +void _iee_set_token_pgd(unsigned long __unused, struct task_struct *tsk, + pgd_t *pgd); +void _iee_invalidate_token(unsigned long __unused, struct task_struct *tsk); +void _iee_validate_token(unsigned long __unused, struct task_struct *tsk); + +#ifdef CONFIG_PTP +#ifdef CONFIG_IEE_PTRP +void _iee_unset_token(unsigned long __unused, pte_t *token_ptep, + pte_t *token_page_ptep, unsigned long token, unsigned int order); +void _iee_set_token(unsigned long __unused, pte_t *token_ptep, + pte_t *token_page_ptep, unsigned long token_page, unsigned int order); +#endif +void _iee_set_pte(unsigned long __unused, pte_t *ptep, pte_t pte); +void _iee_set_pmd(unsigned long __unused, pmd_t *pmdp, pmd_t pmd); +void _iee_set_pud(unsigned long __unused, pud_t *pudp, pud_t pud); +void _iee_set_p4d(unsigned long __unused, p4d_t *p4dp, p4d_t p4d); +void _iee_set_pgd(unsigned long __unused, pgd_t *pgdp, pgd_t pgd); +void _iee_set_pte_text_poke(unsigned long __unused, pte_t *ptep, pte_t pte); +#endif +#endif + +#ifdef CONFIG_CREDP +#include + +void _iee_copy_cred(unsigned long __unused, struct cred *old, struct cred *new); +void _iee_set_cred_uid(unsigned long __unused, struct cred *cred, kuid_t uid); +void _iee_set_cred_gid(unsigned long __unused, struct cred *cred, kgid_t gid); +void _iee_set_cred_suid(unsigned long __unused, struct cred *cred, kuid_t suid); +void _iee_set_cred_sgid(unsigned long __unused, struct cred *cred, kgid_t sgid); +void _iee_set_cred_euid(unsigned long __unused, struct cred *cred, kuid_t euid); +void _iee_set_cred_egid(unsigned long __unused, struct cred *cred, kgid_t egid); +void _iee_set_cred_fsuid(unsigned long __unused, struct cred *cred, kuid_t fsuid); +void _iee_set_cred_fsgid(unsigned long __unused, struct cred *cred, kgid_t fsgid); +void _iee_set_cred_user(unsigned long __unused, struct cred *cred, struct user_struct *user); +void _iee_set_cred_user_ns(unsigned long __unused, + struct cred *cred, struct user_namespace *user_ns); +void _iee_set_cred_group_info(unsigned long __unused, + struct cred *cred, struct group_info *group_info); +void _iee_set_cred_securebits(unsigned long __unused, + struct cred *cred, unsigned int securebits); +void _iee_set_cred_cap_inheritable(unsigned long __unused, + struct cred *cred, kernel_cap_t cap_inheritable); +void _iee_set_cred_cap_permitted(unsigned long __unused, + struct cred *cred, kernel_cap_t cap_permitted); +void _iee_set_cred_cap_effective(unsigned long __unused, + struct cred *cred, kernel_cap_t cap_effective); +void _iee_set_cred_cap_bset(unsigned long __unused, struct cred *cred, kernel_cap_t cap_bset); +void _iee_set_cred_cap_ambient(unsigned long __unused, struct cred *cred, kernel_cap_t cap_ambient); +void _iee_set_cred_jit_keyring(unsigned long __unused, + struct cred *cred, unsigned char jit_keyring); +void _iee_set_cred_session_keyring(unsigned long __unused, + struct cred *cred, struct key *session_keyring); +void _iee_set_cred_process_keyring(unsigned long __unused, + struct cred *cred, struct key *process_keyring); +void _iee_set_cred_thread_keyring(unsigned long __unused, + struct cred *cred, struct key *thread_keyring); +void _iee_set_cred_request_key_auth(unsigned long __unused, + struct cred *cred, struct key *request_key_auth); +void _iee_set_cred_non_rcu(unsigned long __unused, struct cred *cred, int non_rcu); +void _iee_set_cred_atomic_set_usage(unsigned long __unused, struct cred *cred, int i); +unsigned long _iee_set_cred_atomic_op_usage(unsigned long __unused, + struct cred *cred, int flag, int nr); +void _iee_set_cred_security(unsigned long __unused, struct cred *cred,void *security); +void _iee_set_cred_rcu(unsigned long __unused, struct cred *cred, struct rcu_head *rcu); +void _iee_set_cred_ucounts(unsigned long __unused, struct cred *cred, struct ucounts *ucounts); +#endif +#endif diff --git a/arch/x86/include/asm/haoc/iee-access.h b/arch/x86/include/asm/haoc/iee-access.h new file mode 100644 index 0000000000000000000000000000000000000000..de0168e821a9965cd1e256813c167eac2152ebe9 --- /dev/null +++ b/arch/x86/include/asm/haoc/iee-access.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#ifndef _LINUX_IEE_ACCESS_H +#define _LINUX_IEE_ACCESS_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +static inline void iee_memcpy(void *dst, const void *src, size_t n) +{ + if (haoc_enabled) + iee_rw_gate(IEE_OP_MEMCPY, dst, src, n); + else + memcpy(dst, src, n); +} + +static inline void iee_memset(void *ptr, int data, size_t n) +{ + if (haoc_enabled) + iee_rw_gate(IEE_OP_MEMSET, ptr, data, n); + else + memset(ptr, data, n); +} + +static inline void iee_set_freeptr(void **pptr, void *ptr) +{ + if (haoc_enabled) + iee_rw_gate(IEE_OP_SET_FREEPTR, pptr, ptr); + else + *pptr = ptr; +} + +static inline unsigned long iee_test_and_clear_bit(long nr, unsigned long *addr) +{ + if (haoc_enabled) + return iee_rw_gate(IEE_OP_TEST_CLEAR_BIT, nr, addr); + else + return test_and_clear_bit(nr, addr); +} + +#endif diff --git a/arch/x86/include/asm/haoc/iee-cred.h b/arch/x86/include/asm/haoc/iee-cred.h new file mode 100644 index 0000000000000000000000000000000000000000..1d463e85821d7103d8b398b674103f1e4258e723 --- /dev/null +++ b/arch/x86/include/asm/haoc/iee-cred.h @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_IEE_CRED_H +#define _LINUX_IEE_CRED_H + +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +static inline void __maybe_unused iee_copy_cred(const struct cred *old, struct cred *new) +{ + if(!haoc_enabled) + { + memcpy(new, old, sizeof(struct cred)); + return; + } + iee_rw_gate(IEE_OP_COPY_CRED, old, new); +} + +static inline void __maybe_unused iee_set_cred_uid(struct cred *cred, kuid_t uid) +{ + if(!haoc_enabled) + { + cred->uid = uid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_UID, cred, uid); +} + +static inline void __maybe_unused iee_set_cred_gid(struct cred *cred, kgid_t gid) +{ + if(!haoc_enabled) + { + cred->gid = gid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_GID, cred, gid); +} + +static inline void __maybe_unused iee_set_cred_suid(struct cred *cred, kuid_t suid) +{ + if(!haoc_enabled) + { + cred->suid = suid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_SUID, cred, suid); +} + +static inline void __maybe_unused iee_set_cred_sgid(struct cred *cred, kgid_t sgid) +{ + if(!haoc_enabled) + { + cred->sgid = sgid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_SGID, cred, sgid); +} + +static inline void __maybe_unused iee_set_cred_euid(struct cred *cred, kuid_t euid) +{ + if(!haoc_enabled) + { + cred->euid = euid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_EUID, cred, euid); +} + +static inline void __maybe_unused iee_set_cred_egid(struct cred *cred, kgid_t egid) +{ + if(!haoc_enabled) + { + cred->egid = egid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_EGID, cred, egid); +} + +static inline void __maybe_unused iee_set_cred_fsuid(struct cred *cred, kuid_t fsuid) +{ + if(!haoc_enabled) + { + cred->fsuid = fsuid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_FSUID, cred, fsuid); +} + +static inline void __maybe_unused iee_set_cred_fsgid(struct cred *cred, kgid_t fsgid) +{ + if(!haoc_enabled) + { + cred->fsgid = fsgid; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_FSGID, cred, fsgid); +} + +static inline void __maybe_unused iee_set_cred_user(struct cred *cred, struct user_struct *user) +{ + if(!haoc_enabled) + { + cred->user = user; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_USER, cred, user); +} + +static inline void __maybe_unused iee_set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) +{ + if(!haoc_enabled) + { + cred->user_ns = user_ns; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_USER_NS, cred, user_ns); +} + +static inline void __maybe_unused iee_set_cred_ucounts(struct cred *cred, struct ucounts *ucounts) +{ + if(!haoc_enabled) + { + cred->ucounts = ucounts; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_UCOUNTS, cred, ucounts); +} + +static inline void __maybe_unused iee_set_cred_group_info(struct cred *cred, struct group_info *group_info) +{ + if(!haoc_enabled) + { + cred->group_info = group_info; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_GROUP_INFO, cred, group_info); +} + +static inline void __maybe_unused iee_set_cred_securebits(struct cred *cred, + unsigned int securebits) +{ + if(!haoc_enabled) + { + cred->securebits = securebits; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_SECUREBITS, cred, securebits); +} + +static inline void __maybe_unused iee_set_cred_cap_inheritable(struct cred *cred, + kernel_cap_t cap_inheritable) +{ + if(!haoc_enabled) + { + cred->cap_inheritable = cap_inheritable; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_CAP_INHER, cred, cap_inheritable); +} + +static inline void __maybe_unused iee_set_cred_cap_permitted(struct cred *cred, kernel_cap_t cap_permitted) +{ + if(!haoc_enabled) + { + cred->cap_permitted = cap_permitted; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_CAP_PERM, cred, cap_permitted); +} + +static inline void __maybe_unused iee_set_cred_cap_effective(struct cred *cred, kernel_cap_t cap_effective) +{ + if(!haoc_enabled) + { + cred->cap_effective = cap_effective; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_CAP_EFFECT, cred, cap_effective); +} + +static inline void __maybe_unused iee_set_cred_cap_bset(struct cred *cred, kernel_cap_t cap_bset) +{ + if(!haoc_enabled) + { + cred->cap_bset = cap_bset; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_CAP_BSET, cred, cap_bset); +} + +static inline void __maybe_unused iee_set_cred_cap_ambient(struct cred *cred, kernel_cap_t cap_ambient) +{ + if(!haoc_enabled) + { + cred->cap_ambient = cap_ambient; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_CAP_AMBIENT, cred, cap_ambient); +} + +static inline void __maybe_unused iee_set_cred_atomic_set_usage(struct cred *cred, int i) +{ + if(!haoc_enabled) + { + atomic_long_set(&cred->usage, i); + return; + } + iee_rw_gate(IEE_OP_SET_CRED_ATSET_USAGE, cred, i); +} + +static inline void __maybe_unused iee_set_cred_rcu(struct cred *cred, struct rcu_head *rcu) +{ + iee_rw_gate(IEE_OP_SET_CRED_RCU, cred, rcu); +} + +#ifdef CONFIG_KEYS +static inline void __maybe_unused iee_set_cred_jit_keyring(struct cred *cred, unsigned char jit_keyring) +{ + if(!haoc_enabled) + { + cred->jit_keyring = jit_keyring; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_JIT_KEYRING, cred, jit_keyring); +} + +static inline void __maybe_unused iee_set_cred_session_keyring(struct cred *cred, + struct key *session_keyring) +{ + if(!haoc_enabled) + { + cred->session_keyring = session_keyring; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_SESS_KEYRING, cred, session_keyring); +} + +static inline void __maybe_unused iee_set_cred_process_keyring(struct cred *cred, + struct key *process_keyring) +{ + if(!haoc_enabled) + { + cred->process_keyring = process_keyring; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_PROC_KEYRING, cred, process_keyring); +} + +static inline void __maybe_unused iee_set_cred_thread_keyring(struct cred *cred, + struct key *thread_keyring) +{ + if(!haoc_enabled) + { + cred->thread_keyring = thread_keyring; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_THREAD_KEYRING, cred, thread_keyring); +} + +static inline void __maybe_unused iee_set_cred_request_key_auth(struct cred *cred, + struct key *request_key_auth) +{ + if(!haoc_enabled) + { + cred->request_key_auth = request_key_auth; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_REQ_KEYRING, cred, request_key_auth); +} +#endif + +#ifdef CONFIG_SECURITY +static inline void __maybe_unused iee_set_cred_security(struct cred *cred, void *security) +{ + if(!haoc_enabled) + { + cred->security = security; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_SECURITY, cred, security); +} +#endif + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/haoc/iee-func.h b/arch/x86/include/asm/haoc/iee-func.h new file mode 100644 index 0000000000000000000000000000000000000000..15bb4a22cf9405e8a582f7611cd96d71bb2f2235 --- /dev/null +++ b/arch/x86/include/asm/haoc/iee-func.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#ifndef _LINUX_IEE_FUNC_H +#define _LINUX_IEE_FUNC_H + +#define HUGE_PMD_ORDER 9 + +#include +#include +extern void set_iee_page(unsigned long addr, unsigned int order); +extern void unset_iee_page(unsigned long addr, unsigned int order); +extern bool iee_free_slab_data(struct kmem_cache *s, struct slab *slab, unsigned int order); +extern unsigned int iee_calculate_order(struct kmem_cache *s, unsigned int order); + +extern void iee_free_slab(struct kmem_cache *s, struct slab *slab, + void (*do_free_slab)(struct work_struct *work)); +extern void iee_free_cred_slab(struct work_struct *work); +#endif /* _LINUX_IEE_FUNC_H */ diff --git a/arch/x86/include/asm/haoc/iee-si.h b/arch/x86/include/asm/haoc/iee-si.h new file mode 100644 index 0000000000000000000000000000000000000000..3baf1813f19d3f30a2ad4c90a96a0e0a7f2ee603 --- /dev/null +++ b/arch/x86/include/asm/haoc/iee-si.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IEE_SI_H +#define _LINUX_IEE_SI_H +#include +#include + +#define __iee_si_code __section(".iee.si_text") +#define __iee_si_data __section(".iee.si_data") + +extern unsigned long cr4_pinned_mask; +extern struct static_key_false cr_pinning; +extern unsigned long cr4_pinned_bits; + +extern unsigned long __iee_si_text_start[]; +extern unsigned long __iee_si_text_end[]; +extern unsigned long __iee_si_data_start[]; +extern unsigned long __iee_si_data_end[]; +extern void iee_sip_init(void); + +extern void iee_rwx_gate(int flag, ...); + +static inline void iee_sip_test(void) +{ + iee_rwx_gate(IEE_SIP_TEST); +} +static inline void iee_write_cr0(unsigned long val) +{ + iee_rwx_gate(IEE_WRITE_CR0, val); +} +static inline void iee_write_cr3(unsigned long val) +{ + iee_rwx_gate(IEE_WRITE_CR3, val); +} +static inline void iee_write_cr4(unsigned long val) +{ + iee_rwx_gate(IEE_WRITE_CR4, val); +} +static inline void iee_load_idt(void *ptr) +{ + iee_rwx_gate(IEE_LOAD_IDT, ptr); +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/haoc/iee-token.h b/arch/x86/include/asm/haoc/iee-token.h new file mode 100644 index 0000000000000000000000000000000000000000..a96638f3260c05c8a3a29263c5bef60cf042d1af --- /dev/null +++ b/arch/x86/include/asm/haoc/iee-token.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IEE_TOKEN_H +#define _LINUX_IEE_TOKEN_H +#include +#include +#include +#include + +extern unsigned long long iee_rw_gate(int flag, ...); + +extern struct kmem_cache *task_struct_cachep; + +extern void iee_set_token_page_valid(unsigned long token, unsigned long new, + unsigned int order); +extern void iee_set_token_page_invalid(unsigned long token_addr, + unsigned long __unused, + unsigned int order); +extern struct slab *iee_alloc_task_token_slab(struct kmem_cache *s, + struct slab *slab, + unsigned int order); + +struct task_token { + pgd_t *pgd; + bool valid; +}; + +static inline void iee_verify_token_pgd(struct task_struct *tsk) +{ + struct task_token *token; + + if (tsk == &init_task) + return; + + token = (struct task_token *)__addr_to_iee(tsk); + if (token->pgd != tsk->mm->pgd) + panic("IEE Pgd Error: tsk_pgd: 0x%lx, token_pgd: 0x%lx", + (unsigned long)tsk->mm->pgd, (unsigned long)token->pgd); +} + +static inline void iee_set_token_pgd(struct task_struct *tsk, pgd_t *pgd) +{ + iee_rw_gate(IEE_OP_SET_TOKEN_PGD, tsk, pgd); +} + +static inline void iee_invalidate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_OP_INVALIDATE_TOKEN, tsk); +} + +static inline void iee_validate_token(struct task_struct *tsk) +{ + iee_rw_gate(IEE_OP_VALIDATE_TOKEN, tsk); +} + +#endif \ No newline at end of file diff --git a/arch/x86/include/asm/haoc/iee.h b/arch/x86/include/asm/haoc/iee.h new file mode 100644 index 0000000000000000000000000000000000000000..09b939a4b0c6cd443bb4aa9b13e4700fa9a7c913 --- /dev/null +++ b/arch/x86/include/asm/haoc/iee.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#ifndef _LINUX_IEE_H +#define _LINUX_IEE_H + +#include + +extern unsigned long IEE_OFFSET; +#define __iee_pa(x) (__pa(x - IEE_OFFSET)) +#define __phys_to_iee(x) ((void *)(__va(x) + IEE_OFFSET)) +#define __page_to_phys(x) (page_to_pfn(x) << PAGE_SHIFT) +#define __page_to_iee(x) ((unsigned long)(__phys_to_iee(__page_to_phys(x)))) +#define __slab_to_iee(x) (__page_to_iee(folio_page(slab_folio(x), 0))) +#define __addr_to_iee(x) (__phys_to_iee(__pa(x))) + +#define IEE_DATA_ORDER (PMD_SHIFT - PAGE_SHIFT) +#define IEE_STACK_ORDER 0 +struct iee_stack { + void *stack; +}; + +DECLARE_PER_CPU(struct iee_stack, iee_stacks); + +extern void *alloc_low_pages(unsigned int num); +extern void iee_init(void); +extern bool haoc_enabled; +extern bool iee_init_done; +#endif diff --git a/arch/x86/include/asm/haoc/ptp.h b/arch/x86/include/asm/haoc/ptp.h new file mode 100644 index 0000000000000000000000000000000000000000..e435d7fd608ce85c5bfda67da08a85605f878078 --- /dev/null +++ b/arch/x86/include/asm/haoc/ptp.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PTP_H +#define _LINUX_PTP_H + +#include +#include + +extern unsigned long long ptp_rw_gate(int flag, ...); + +static inline void ptp_set_pte(pte_t *ptep, pte_t pte) +{ + compiletime_assert_rwonce_type(*ptep); + ptp_rw_gate(IEE_OP_SET_PTE, ptep, pte); +} + +static inline void ptp_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + compiletime_assert_rwonce_type(*pmdp); + ptp_rw_gate(IEE_OP_SET_PMD, pmdp, pmd); +} + +static inline void ptp_set_pud(pud_t *pudp, pud_t pud) +{ + compiletime_assert_rwonce_type(*pudp); + ptp_rw_gate(IEE_OP_SET_PUD, pudp, pud); +} + +static inline void ptp_set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + compiletime_assert_rwonce_type(*p4dp); + ptp_rw_gate(IEE_OP_SET_P4D, p4dp, p4d); +} + +static inline void ptp_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + compiletime_assert_rwonce_type(*pgdp); + ptp_rw_gate(IEE_OP_SET_PGD, pgdp, pgd); +} + +static inline void ptp_set_pte_text_poke(pte_t *ptep, pte_t pte) +{ + compiletime_assert_rwonce_type(*ptep); + ptp_rw_gate(IEE_OP_SET_PTE_TEXT_POKE, ptep, pte); +} + +extern pgprotval_t ptp_xchg(pgprotval_t *pgprotp, pgprotval_t pgprotval); +extern pgprotval_t ptp_try_cmpxchg(pgprotval_t *pgprotp, + pgprotval_t old_pgprot, pgprotval_t new_pgprotval); +extern void ptp_mark_all_pgtable_ro(void); +extern struct pg_cache pgd_cache; + +#include +struct iee_disable_t { + /* Writable but considered safe to expose */ + unsigned long disabled_cnt; +}; + +DECLARE_PER_CPU(struct iee_disable_t, iee_disables); +extern void ptp_iee_disable_init(void); +extern void ptp_disable_iee(unsigned long *reg); +extern void ptp_enable_iee(unsigned long reg); +extern void ptp_context_enable_iee(int *disabled_cnt, unsigned long *reg); +extern void ptp_context_restore_iee(int disabled_cnt, unsigned long reg); +#endif diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index c7ec5bb88334eab119ccf78002be2e7679291113..f9cc311b237fc9fb1a12ad043137b5082cdac331 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -9,6 +9,9 @@ #define __HAVE_ARCH_PTE_ALLOC_ONE #define __HAVE_ARCH_PGD_FREE #include +#ifdef CONFIG_PTP +#include +#endif static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } @@ -153,7 +156,14 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; + #ifdef CONFIG_PTP + if(haoc_enabled) + return (p4d_t *)ptp_pg_alloc(&pg_cache, GFP_KERNEL | __GFP_ZERO); + else + return (p4d_t *)get_zeroed_page(gfp); + #else return (p4d_t *)get_zeroed_page(gfp); + #endif } static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) @@ -162,7 +172,14 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) return; BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); + #ifdef CONFIG_PTP + if(haoc_enabled) + ptp_pg_free(&pg_cache, p4d); + else + free_page((unsigned long)p4d); + #else free_page((unsigned long)p4d); + #endif } extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bd42a1630a9cc7603ab8330c09c85be048c867f1..a713f170b60ce3d8fc5cdd7092fd18717e0629a2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,6 +23,10 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#include +#endif extern pgd_t early_top_pgt[PTRS_PER_PGD]; bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -1306,9 +1310,22 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, pte_t old_pte, new_pte; old_pte = READ_ONCE(*ptep); + #ifdef CONFIG_PTP + if(haoc_enabled) { + do { + new_pte = pte_wrprotect(old_pte); + } while (!ptp_try_cmpxchg((long *)ptep, pte_val(old_pte), pte_val(new_pte))); + } + else { + do { + new_pte = pte_wrprotect(old_pte); + } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); + } + #else do { new_pte = pte_wrprotect(old_pte); } while (!try_cmpxchg((long *)&ptep->pte, (long *)&old_pte, *(long *)&new_pte)); + #endif } #define flush_tlb_fix_spurious_fault(vma, address, ptep) do { } while (0) @@ -1368,9 +1385,21 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, pmd_t old_pmd, new_pmd; old_pmd = READ_ONCE(*pmdp); + #ifdef CONFIG_PTP + if(haoc_enabled){ + do { + new_pmd = pmd_wrprotect(old_pmd); + } while (!ptp_try_cmpxchg((long *)pmdp, pmd_val(old_pmd), pmd_val(new_pmd))); + } else { + do { + new_pmd = pmd_wrprotect(old_pmd); + } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); + } + #else do { new_pmd = pmd_wrprotect(old_pmd); } while (!try_cmpxchg((long *)pmdp, (long *)&old_pmd, *(long *)&new_pmd)); + #endif } #ifndef pmdp_establish @@ -1380,10 +1409,24 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); if (IS_ENABLED(CONFIG_SMP)) { + #ifdef CONFIG_PTP + if(haoc_enabled) + return native_make_pmd(ptp_xchg((pgprotval_t *)pmdp, pmd_val(pmd))); + else + return xchg(pmdp, pmd); + #else return xchg(pmdp, pmd); + #endif } else { pmd_t old = *pmdp; + #ifdef CONFIG_PTP + if(haoc_enabled) + set_pmd(pmdp, pmd); + else + WRITE_ONCE(*pmdp, pmd); + #else WRITE_ONCE(*pmdp, pmd); + #endif return old; } } @@ -1471,13 +1514,29 @@ static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) */ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) { + #ifdef CONFIG_PTP + if(haoc_enabled) + iee_memcpy(dst, src, count * sizeof(pgd_t)); + else + memcpy(dst, src, count * sizeof(pgd_t)); + #else memcpy(dst, src, count * sizeof(pgd_t)); + #endif #ifdef CONFIG_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */ + #ifdef CONFIG_PTP + if(haoc_enabled) + iee_memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); + else + memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), + count * sizeof(pgd_t)); + #else memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src), count * sizeof(pgd_t)); + #endif #endif } diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a629b1b9f65a61cad916d6d9f8ef3853b5deadf8..84add6150bbc4ae0c7523cf6ce774a0f533e5a83 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -15,6 +15,9 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#endif extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; @@ -64,7 +67,14 @@ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); static inline void native_set_pte(pte_t *ptep, pte_t pte) { + #ifdef CONFIG_PTP + if(haoc_enabled) + ptp_set_pte(ptep, pte); + else + WRITE_ONCE(*ptep, pte); + #else WRITE_ONCE(*ptep, pte); + #endif } static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, @@ -80,7 +90,14 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { + #ifdef CONFIG_PTP + if(haoc_enabled) + ptp_set_pmd(pmdp, pmd); + else + WRITE_ONCE(*pmdp, pmd); + #else WRITE_ONCE(*pmdp, pmd); + #endif } static inline void native_pmd_clear(pmd_t *pmd) @@ -91,7 +108,14 @@ static inline void native_pmd_clear(pmd_t *pmd) static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + if (haoc_enabled) + return native_make_pte(ptp_xchg((pgprotval_t *)xp, 0)); + else + return native_make_pte(xchg(&xp->pte, 0)); + #else return native_make_pte(xchg(&xp->pte, 0)); + #endif #else /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ @@ -104,7 +128,14 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + if (haoc_enabled) + return native_make_pmd(ptp_xchg((pgprotval_t *)xp, 0)); + else + return native_make_pmd(xchg(&xp->pmd, 0)); + #else return native_make_pmd(xchg(&xp->pmd, 0)); + #endif #else /* native_local_pmdp_get_and_clear, but duplicated because of cyclic dependency */ @@ -116,7 +147,14 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) static inline void native_set_pud(pud_t *pudp, pud_t pud) { + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_pud(pudp, pud); + else + WRITE_ONCE(*pudp, pud); + #else WRITE_ONCE(*pudp, pud); + #endif } static inline void native_pud_clear(pud_t *pud) @@ -127,7 +165,14 @@ static inline void native_pud_clear(pud_t *pud) static inline pud_t native_pudp_get_and_clear(pud_t *xp) { #ifdef CONFIG_SMP + #ifdef CONFIG_PTP + if (haoc_enabled) + return native_make_pud(ptp_xchg((pgprotval_t *)xp, 0)); + else + return native_make_pud(xchg(&xp->pud, 0)); + #else return native_make_pud(xchg(&xp->pud, 0)); + #endif #else /* native_local_pudp_get_and_clear, * but duplicated because of cyclic dependency @@ -144,13 +189,27 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_p4d(p4dp, p4d); + else + WRITE_ONCE(*p4dp, p4d); + #else WRITE_ONCE(*p4dp, p4d); + #endif return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_p4d(p4dp, native_make_p4d(native_pgd_val(pgd))); + else + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); + #else WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); + #endif } static inline void native_p4d_clear(p4d_t *p4d) @@ -160,7 +219,14 @@ static inline void native_p4d_clear(p4d_t *p4d) static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_pgd(pgdp, pti_set_user_pgtbl(pgdp, pgd)); + else + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); + #else WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); + #endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 1c5513b04f0387b5972c3d1e3ded7f4b26598dc1..de9508e4d912441e41e9a8cbcc076f9b513d2835 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -9,6 +9,10 @@ #include #include #include +#ifdef CONFIG_IEE_SIP +#include +extern bool haoc_enabled; +#endif /* * The compiler should not reorder volatile asm statements with respect to each @@ -49,9 +53,23 @@ static inline unsigned long __native_read_cr3(void) return val; } +#ifdef CONFIG_IEE_SIP +static inline void iee_write_cr3_early(unsigned long val) +{ + asm volatile("mov %0,%%cr3" : : "r" (val) : "memory"); +} +#endif + static inline void native_write_cr3(unsigned long val) { + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr3(val); + else + asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); + #else asm volatile("mov %0,%%cr3": : "r" (val) : "memory"); + #endif } static inline unsigned long native_read_cr4(void) diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h index 4d3c9d00d6b6b2e08617e068f449ac8564f203a6..a9db313c344d1874290e713281f662ec36cda769 100644 --- a/arch/x86/include/asm/tlb.h +++ b/arch/x86/include/asm/tlb.h @@ -34,6 +34,14 @@ static inline void __tlb_remove_table(void *table) free_page_and_swap_cache(table); } +#ifdef CONFIG_PTP +#include +static inline void __ptp_tlb_remove_table(void *table) +{ + ptp_pg_free(&pg_cache, page_to_virt((struct page *)table)); +} +#endif + static inline void invlpg(unsigned long addr) { asm volatile("invlpg (%0)" ::"r" (addr) : "memory"); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f4717d0da6c75a02d7ee6a4847b8f22acebac58d..84a58b289edd2955fb79a2e76586a71b9297394b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -158,3 +158,4 @@ ifeq ($(CONFIG_X86_64),y) endif obj-$(CONFIG_HYGON_CSV) += csv.o +obj-$(CONFIG_IEE) += haoc/ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 40299ba271efaea97818ee36a58dc8ef97f2b798..aa6654fa212804f785d7c5326ab73b62a7c6b2f8 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1995,6 +1995,25 @@ static void text_poke_memset(void *dst, const void *src, size_t len) typedef void text_poke_f(void *dst, const void *src, size_t len); +#ifdef CONFIG_PTP +static inline void set_ptes_text_poke(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte, unsigned int nr) +{ + page_table_check_ptes_set(mm, ptep, pte, nr); + + arch_enter_lazy_mmu_mode(); + for (;;) { + compiletime_assert_rwonce_type(*ptep); + ptp_set_pte_text_poke(ptep, pte); + if (--nr == 0) + break; + ptep++; + pte = pte_next_pfn(pte); + } + arch_leave_lazy_mmu_mode(); +} +#endif + static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len) { bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; @@ -2046,11 +2065,25 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l local_irq_save(flags); pte = mk_pte(pages[0], pgprot); + #ifdef CONFIG_PTP + if(haoc_enabled) + set_ptes_text_poke(poking_mm, poking_addr, ptep, pte, 1); + else + set_pte_at(poking_mm, poking_addr, ptep, pte); + #else set_pte_at(poking_mm, poking_addr, ptep, pte); + #endif if (cross_page_boundary) { pte = mk_pte(pages[1], pgprot); + #ifdef CONFIG_PTP + if(haoc_enabled) + set_ptes_text_poke(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte, 1); + else + set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte); + #else set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte); + #endif } /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 50383bc46dd77d24af067eaaea80959adb177d91..797baa89bf1189329960bb606f23343ec8e74de6 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -25,6 +25,9 @@ #include #endif +#ifdef CONFIG_IEE +#include +#endif #ifdef CONFIG_X86_32 # include "asm-offsets_32.c" #else @@ -127,4 +130,11 @@ static void __used common(void) OFFSET(ARIA_CTX_rounds, aria_ctx, rounds); #endif +#ifdef CONFIG_IEE + /* Offset for fields in iee_stack */ + OFFSET(IEE_STACK, iee_stack, stack); +#endif +#ifdef CONFIG_PTP + OFFSET(IEE_DISABLE, iee_disable_t, disabled_cnt); +#endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e2dc837c22c3df3cdaf176381b47a3a38214080d..6df9e923ab71dada4d2885b89c45381907f00eef 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,12 @@ #include #include #include +#ifdef CONFIG_IEE +#include +#endif +#ifdef CONFIG_IEE_SIP +#include +#endif #include "cpu.h" @@ -404,14 +410,42 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) } /* These bits should not change their value after CPU init is finished. */ +#ifdef CONFIG_IEE_SIP +unsigned long cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; +DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +unsigned long cr4_pinned_bits __ro_after_init; +#else static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; +#endif void native_write_cr0(unsigned long val) { + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr0(val); + else{ + unsigned long bits_missing = 0; + + set_register: + asm volatile("mov %0,%%cr0": "+r" (val) : : "memory"); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } + } + #else unsigned long bits_missing = 0; set_register: @@ -426,11 +460,33 @@ void native_write_cr0(unsigned long val) /* Warn after we've set the missing bits. */ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); } + #endif } EXPORT_SYMBOL(native_write_cr0); void __no_profile native_write_cr4(unsigned long val) { + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_write_cr4(val); + else{ + unsigned long bits_changed = 0; + + set_register: + asm volatile("mov %0,%%cr4": "+r" (val) : : "memory"); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_mask) != cr4_pinned_bits)) { + bits_changed = (val & cr4_pinned_mask) ^ cr4_pinned_bits; + val = (val & ~cr4_pinned_mask) | cr4_pinned_bits; + goto set_register; + } + /* Warn after we've corrected the changed bits. */ + WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", + bits_changed); + } + } + #else unsigned long bits_changed = 0; set_register: @@ -446,6 +502,7 @@ void __no_profile native_write_cr4(unsigned long val) WARN_ONCE(bits_changed, "pinned CR4 bits changed: 0x%lx!?\n", bits_changed); } + #endif } #if IS_MODULE(CONFIG_LKDTM) EXPORT_SYMBOL_GPL(native_write_cr4); @@ -596,6 +653,20 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c) if (!IS_ENABLED(CONFIG_X86_CET)) return; +#ifdef CONFIG_IEE + if (haoc_enabled) { + /* + * NOTE: IEE relies on CR0.WP (Write Protection). + * According to Intel SDM Vol.3(Section 2.5): + * This flag must be set before software can set CR4.CET, + * and it cannot be cleared as long as CR4.CET = 1. + * Therefore, IEE does not enable CET during kernel boot. + */ + pr_info("CET disabled because of the contradiction with IEE"); + return; + } +#endif + kernel_ibt = HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT); user_shstk = cpu_feature_enabled(X86_FEATURE_SHSTK) && IS_ENABLED(CONFIG_X86_USER_SHADOW_STACK); @@ -2553,4 +2624,8 @@ void __init arch_cpu_finalize_init(void) * hypercalls work when the SWIOTLB bounce buffers are decrypted. */ mem_encrypt_init(); + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_sip_init(); + #endif } diff --git a/arch/x86/kernel/haoc/Makefile b/arch/x86/kernel/haoc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3db03b5e33104cb37e7ead7d94e53dcecc10d5ec --- /dev/null +++ b/arch/x86/kernel/haoc/Makefile @@ -0,0 +1,4 @@ +obj-y += haoc.o +obj-y += iee/ +obj-$(CONFIG_PTP) += ptp/ +obj-$(CONFIG_CREDP) += credp/ \ No newline at end of file diff --git a/arch/x86/kernel/haoc/credp/Makefile b/arch/x86/kernel/haoc/credp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cc494acaa7a16a8d6e33c0cdf22aacb0e5a83f47 --- /dev/null +++ b/arch/x86/kernel/haoc/credp/Makefile @@ -0,0 +1,3 @@ +obj-y += credp.o + +ccflags-y += -I$(srctree)/mm \ No newline at end of file diff --git a/arch/x86/kernel/haoc/credp/credp.c b/arch/x86/kernel/haoc/credp/credp.c new file mode 100644 index 0000000000000000000000000000000000000000..7ad23a6973db4601a32d05da49589481f3d14585 --- /dev/null +++ b/arch/x86/kernel/haoc/credp/credp.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include +#include +#include +#include "slab.h" + +extern struct cred init_cred; + +void _iee_set_cred_rcu(unsigned long __unused, struct cred *cred, struct rcu_head *rcu) +{ + *((struct rcu_head **)(&(cred->rcu.func))) = rcu; +} + +void _iee_set_cred_security(unsigned long __unused, struct cred *cred, void *security) +{ + cred->security = security; +} + +unsigned long _iee_set_cred_atomic_op_usage(unsigned long __unused, + struct cred *cred, int flag, int nr) +{ + switch (flag) { + case AT_ADD: { + atomic_long_add(nr, &cred->usage); + return 0; + } + case AT_INC_NOT_ZERO: { + return atomic_long_inc_not_zero(&cred->usage); + } + case AT_SUB_AND_TEST: { + return atomic_long_sub_and_test(nr, &cred->usage); + } + } + return 0; +} + +void _iee_set_cred_atomic_set_usage(unsigned long __unused, struct cred *cred, int i) +{ + atomic_long_set(&cred->usage, i); +} + +void _iee_set_cred_non_rcu(unsigned long __unused, struct cred *cred, int non_rcu) +{ + cred->non_rcu = non_rcu; +} + +void _iee_set_cred_session_keyring(unsigned long __unused, struct cred *cred, + struct key *session_keyring) +{ + cred->session_keyring = session_keyring; +} + +void _iee_set_cred_process_keyring(unsigned long __unused, struct cred *cred, + struct key *process_keyring) +{ + cred->process_keyring = process_keyring; +} + +void _iee_set_cred_thread_keyring(unsigned long __unused, struct cred *cred, + struct key *thread_keyring) +{ + cred->thread_keyring = thread_keyring; +} + +void _iee_set_cred_request_key_auth(unsigned long __unused, struct cred *cred, + struct key *request_key_auth) +{ + cred->request_key_auth = request_key_auth; +} + +void _iee_set_cred_jit_keyring(unsigned long __unused, struct cred *cred, unsigned char jit_keyring) +{ + cred->jit_keyring = jit_keyring; +} + +void _iee_set_cred_cap_inheritable(unsigned long __unused, struct cred *cred, + kernel_cap_t cap_inheritable) +{ + cred->cap_inheritable = cap_inheritable; +} + +void _iee_set_cred_cap_permitted(unsigned long __unused, struct cred *cred, + kernel_cap_t cap_permitted) +{ + cred->cap_permitted = cap_permitted; +} + +void _iee_set_cred_cap_effective(unsigned long __unused, struct cred *cred, + kernel_cap_t cap_effective) +{ + cred->cap_effective = cap_effective; +} + +void _iee_set_cred_cap_bset(unsigned long __unused, struct cred *cred, kernel_cap_t cap_bset) +{ + cred->cap_bset = cap_bset; +} + +void _iee_set_cred_cap_ambient(unsigned long __unused, struct cred *cred, kernel_cap_t cap_ambient) +{ + cred->cap_ambient = cap_ambient; +} + +void _iee_set_cred_securebits(unsigned long __unused, struct cred *cred, + unsigned int securebits) +{ + cred->securebits = securebits; +} + +void _iee_set_cred_group_info(unsigned long __unused, struct cred *cred, + struct group_info *group_info) +{ + cred->group_info = group_info; +} + +void _iee_set_cred_ucounts(unsigned long __unused, struct cred *cred, struct ucounts *ucounts) +{ + cred->ucounts = ucounts; +} + +void _iee_set_cred_user_ns(unsigned long __unused, struct cred *cred, + struct user_namespace *user_ns) +{ + cred->user_ns = user_ns; +} + +void _iee_set_cred_user(unsigned long __unused, struct cred *cred, struct user_struct *user) +{ + cred->user = user; +} + +void _iee_set_cred_fsgid(unsigned long __unused, struct cred *cred, kgid_t fsgid) +{ + cred->fsgid = fsgid; +} + +void _iee_set_cred_fsuid(unsigned long __unused, struct cred *cred, kuid_t fsuid) +{ + cred->fsuid = fsuid; +} + +void _iee_set_cred_egid(unsigned long __unused, struct cred *cred, kgid_t egid) +{ + cred->egid = egid; +} + +void _iee_set_cred_euid(unsigned long __unused, struct cred *cred, kuid_t euid) +{ + cred->euid = euid; +} + +void _iee_set_cred_sgid(unsigned long __unused, struct cred *cred, kgid_t sgid) +{ + cred->sgid = sgid; +} + +void _iee_set_cred_suid(unsigned long __unused, struct cred *cred, kuid_t suid) +{ + cred->suid = suid; +} + +void _iee_copy_cred(unsigned long __unused, struct cred *old, struct cred *new) +{ + if (new == &init_cred) + panic("copy_cred for init_cred: %lx\n", (unsigned long)new); + struct rcu_head *rcu = (struct rcu_head *)(new->rcu.func); + + memcpy(new, old, sizeof(struct cred)); + *(struct rcu_head **)(&(new->rcu.func)) = rcu; + *(struct rcu_head *)(new->rcu.func) = *(struct rcu_head *)(old->rcu.func); +} + +void _iee_set_cred_gid(unsigned long __unused, struct cred *cred, kgid_t gid) +{ + cred->gid = gid; +} + +void _iee_set_cred_uid(unsigned long __unused, struct cred *cred, kuid_t uid) +{ + cred->uid = uid; +} + +struct iee_free_slab_work { + struct work_struct work; + struct kmem_cache *s; + struct slab *slab; +}; + +void iee_free_cred_slab(struct work_struct *work) +{ + struct iee_free_slab_work *iee_free_slab_work = + container_of(work, struct iee_free_slab_work, work); + struct slab *slab = iee_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + int order = folio_order(folio); + + unset_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), order); + __free_pages(&folio->page, order); + kfree(iee_free_slab_work); +} diff --git a/arch/x86/kernel/haoc/haoc.c b/arch/x86/kernel/haoc/haoc.c new file mode 100644 index 0000000000000000000000000000000000000000..2eb5ca0b9e9f7233d1eb09d493d82c2d5dcc4d2a --- /dev/null +++ b/arch/x86/kernel/haoc/haoc.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#include + +typedef void (*iee_func)(void); +iee_func iee_funcs[] = { + (iee_func)_iee_memcpy, + (iee_func)_iee_memset, + (iee_func)_iee_set_freeptr, + (iee_func)_iee_test_and_clear_bit, +#ifdef CONFIG_IEE_PTRP + (iee_func)_iee_set_token_pgd, + (iee_func)_iee_invalidate_token, + (iee_func)_iee_validate_token, +#endif +#ifdef CONFIG_PTP +#ifdef CONFIG_IEE_PTRP + (iee_func)_iee_unset_token, + (iee_func)_iee_set_token, +#endif + (iee_func)_iee_set_pte, + (iee_func)_iee_set_pmd, + (iee_func)_iee_set_pud, + (iee_func)_iee_set_p4d, + (iee_func)_iee_set_pgd, + (iee_func)_iee_set_pte_text_poke, +#endif +#ifdef CONFIG_CREDP + (iee_func)_iee_copy_cred, + (iee_func)_iee_set_cred_uid, + (iee_func)_iee_set_cred_gid, + (iee_func)_iee_set_cred_suid, + (iee_func)_iee_set_cred_sgid, + (iee_func)_iee_set_cred_euid, + (iee_func)_iee_set_cred_egid, + (iee_func)_iee_set_cred_fsuid, + (iee_func)_iee_set_cred_fsgid, + (iee_func)_iee_set_cred_user, + (iee_func)_iee_set_cred_user_ns, + (iee_func)_iee_set_cred_group_info, + (iee_func)_iee_set_cred_securebits, + (iee_func)_iee_set_cred_cap_inheritable, + (iee_func)_iee_set_cred_cap_permitted, + (iee_func)_iee_set_cred_cap_effective, + (iee_func)_iee_set_cred_cap_bset, + (iee_func)_iee_set_cred_cap_ambient, + (iee_func)_iee_set_cred_jit_keyring, + (iee_func)_iee_set_cred_session_keyring, + (iee_func)_iee_set_cred_process_keyring, + (iee_func)_iee_set_cred_thread_keyring, + (iee_func)_iee_set_cred_request_key_auth, + (iee_func)_iee_set_cred_non_rcu, + (iee_func)_iee_set_cred_atomic_set_usage, + (iee_func)_iee_set_cred_atomic_op_usage, + (iee_func)_iee_set_cred_security, + (iee_func)_iee_set_cred_rcu, + (iee_func)_iee_set_cred_ucounts, +#endif + NULL +}; diff --git a/arch/x86/kernel/haoc/iee/Makefile b/arch/x86/kernel/haoc/iee/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6c13071f54d3434e5f215232431098a87f375a89 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/Makefile @@ -0,0 +1,4 @@ +obj-y += iee-gate.o iee-init.o iee.o iee-func.o +obj-$(CONFIG_IEE_SIP) += iee-si.o +obj-$(CONFIG_IEE_PTRP) += iee-token.o +ccflags-y += -I$(srctree)/mm diff --git a/arch/x86/kernel/haoc/iee/iee-func.c b/arch/x86/kernel/haoc/iee/iee-func.c new file mode 100644 index 0000000000000000000000000000000000000000..0054c1b3d6169b972e751853fc2f68ec59d37f08 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee-func.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#include +#include +#include "slab.h" +#ifdef CONFIG_IEE_PTRP +#include +#endif +extern bool haoc_enabled; + +void set_iee_page(unsigned long addr, unsigned int order) +{ + set_memory_ro(addr, 1 << order); +} + +void unset_iee_page(unsigned long addr, unsigned int order) +{ + set_memory_rw(addr, 1 << order); +} + +struct iee_free_slab_work { + struct work_struct work; + struct kmem_cache *s; + struct slab *slab; +}; + +void iee_free_slab(struct kmem_cache *s, struct slab *slab, + void (*do_free_slab)(struct work_struct *work)) +{ + if(haoc_enabled) + return; + struct iee_free_slab_work *iee_free_slab_work = + kmalloc(sizeof(struct iee_free_slab_work), GFP_ATOMIC); + + iee_free_slab_work->s = s; + iee_free_slab_work->slab = slab; + INIT_WORK(&iee_free_slab_work->work, do_free_slab); + schedule_work(&iee_free_slab_work->work); +} + +#ifdef CONFIG_IEE_PTRP +static void iee_free_task_struct_slab(struct work_struct *work) +{ + struct iee_free_slab_work *iee_free_slab_work = + container_of(work, struct iee_free_slab_work, work); + struct slab *slab = iee_free_slab_work->slab; + struct folio *folio = slab_folio(slab); + unsigned int order = folio_order(folio); + unsigned long token = __slab_to_iee(slab); + // Free token. + iee_set_token_page_invalid(token, 0, order); + __free_pages(&folio->page, order); + kfree(iee_free_slab_work); +} +#endif + +bool iee_free_slab_data(struct kmem_cache *s, struct slab *slab, + unsigned int order) +{ +#ifdef CONFIG_IEE_PTRP + if (s == task_struct_cachep) { + iee_free_slab(s, slab, iee_free_task_struct_slab); + return true; + } +#endif + return false; +} + +unsigned int iee_calculate_order(struct kmem_cache *s, unsigned int order) +{ + if(!haoc_enabled) + { + return order; + } +#ifdef CONFIG_IEE_PTRP + if (strcmp(s->name, "task_struct") == 0) + return IEE_DATA_ORDER; +#endif +#ifdef CONFIG_CREDP + if (strcmp(s->name, "cred_jar") == 0) + order = IEE_DATA_ORDER; +#endif + return order; +} \ No newline at end of file diff --git a/arch/x86/kernel/haoc/iee/iee-gate.S b/arch/x86/kernel/haoc/iee/iee-gate.S new file mode 100644 index 0000000000000000000000000000000000000000..35f8e7f8a620546597be92122e88f1986deb496b --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee-gate.S @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#include +#include +#include +#include +#include + +#define X86_CR4_SMEP_SMAP (X86_CR4_SMEP | X86_CR4_SMAP) + +/* + * scratch_reg would be changed, + * caller should dertimine if scratch_reg should be saved and restored. + */ +.macro DISABLE_WP scratch_reg:req + /* Disable write protection*/ + movq %cr0, %\scratch_reg + andq $(~X86_CR0_WP), %\scratch_reg + movq %\scratch_reg, %cr0 +.endm + +.macro ENABLE_WP scratch_reg:req + /* Enable write protection */ + movq %cr0, %\scratch_reg +1: + orq $X86_CR0_WP, %\scratch_reg + movq %\scratch_reg, %cr0 + testq $X86_CR0_WP, %\scratch_reg + je 1b +.endm + +/* + * IEE memory access gate. + * Kernel calls the gate to modify IEE-protected memory. + */ + +SYM_FUNC_START(iee_rw_gate) + /* save Interrupt flag */ + pushfq + /* close irq*/ + cli + + pushq %r12 + +#ifdef CONFIG_PTP + movq PER_CPU_VAR(iee_disables) + IEE_DISABLE, %r12 + testq %r12, %r12 + jz rw_wp_enabled + + leaq iee_funcs(%rip), %rax + call *(%rax, %rdi, 8) + + popq %r12 + popfq + jmp __x86_return_thunk + +rw_wp_enabled: +#endif + DISABLE_WP r12 + + /* switch to iee stack */ + movq %rsp, %r12 + movq PER_CPU_VAR(iee_stacks) + IEE_STACK, %rsp + + /* call iee func */ + leaq iee_funcs(%rip), %rax + call *(%rax, %rdi, 8) + + /* switch to kernel stack */ + movq %r12, %rsp + + ENABLE_WP r12 + + popq %r12 + + /* restore irq*/ + popfq + + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_rw_gate) +EXPORT_SYMBOL(iee_rw_gate) + +#ifdef CONFIG_IEE_SIP +SYM_FUNC_START(iee_rwx_gate) + pushq %r12 + + /* save Interrupt flag*/ + pushfq + /* close irq */ + cli + +#ifdef CONFIG_PTP + movq PER_CPU_VAR(iee_disables) + IEE_DISABLE, %r12 + testq %r12, %r12 + jz rwx_wp_enabled + + movq %cr4, %rax /* rax -> cr4 */ + andq $(~X86_CR4_SMEP), %rax + movq %rax, %cr4 + + call _iee_si_handler + + movq %cr4, %rax /* rax -> cr4 */ +3: orq $X86_CR4_SMEP_SMAP, %rax + movq %rax, %cr4 + andq $X86_CR4_SMEP_SMAP, %rax + cmpq $X86_CR4_SMEP_SMAP, %rax + jnz 3 + + popfq + popq %r12 + jmp __x86_return_thunk /* ret */ + +rwx_wp_enabled: +#endif + /* set SMEP=0 to enable supervisor-mode exec user-mode insn */ + movq %cr4, %rax /* rax -> cr4 */ + andq $(~X86_CR4_SMEP), %rax + movq %rax, %cr4 + + DISABLE_WP r12 + + movq %rsp, %r12 + /* If iee hasn't been initialized, skip stack switch. */ + cmpb $0, iee_init_done(%rip) + jz 2f + + /* switch to iee stack */ + movq PER_CPU_VAR(iee_stacks) + IEE_STACK, %rsp + +2: call _iee_si_handler + /* switch to kernel stack. If iee hasn't been initialized, skip switch*/ + movq %r12, %rsp + + ENABLE_WP r12 + + /* set SMEP=1 to disable supervisor-mode exec user-mode insn */ + movq %cr4, %rax /* rax -> cr4 */ +1: orq $X86_CR4_SMEP_SMAP, %rax + movq %rax, %cr4 + andq $X86_CR4_SMEP_SMAP, %rax + cmpq $X86_CR4_SMEP_SMAP, %rax + jnz 1 + + /* restore irq*/ + popfq + + popq %r12 + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(iee_rwx_gate) +#endif \ No newline at end of file diff --git a/arch/x86/kernel/haoc/iee/iee-init.c b/arch/x86/kernel/haoc/iee/iee-init.c new file mode 100644 index 0000000000000000000000000000000000000000..7ac4704769f4d075d3b7623722684b193b54d177 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee-init.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_IEE_SIP +#include +#endif + +/* IEE_OFFSET = pgtable_l5_enabled() ? 0x40000000000000 : 0x200000000000; */ +unsigned long IEE_OFFSET = 0x200000000000; +#ifdef CONFIG_IEE_SIP +bool iee_init_done __iee_si_data; +#else +bool iee_init_done; +#endif +DEFINE_PER_CPU(struct iee_stack, iee_stacks); + +static void __init _iee_mapping_populate_pud(pud_t *pud, unsigned long addr, unsigned long end) +{ + void *p; + pmd_t *pmd; + unsigned long pmd_next; + phys_addr_t phys; + pgprot_t pgprot_shadow_pmd; + + addr = ALIGN_DOWN(addr, PMD_SIZE); + phys = __iee_pa(addr); + pgprot_shadow_pmd = __pgprot(pgprot_val(PAGE_KERNEL_LARGE) & (~__RW) & (~___D)); + + if (pud_none(*pud)) { + p = alloc_low_pages(1); + pud_populate(&init_mm, pud, p); + } + + pmd = pmd_offset(pud, addr); + do { + pmd_next = pmd_addr_end(addr, end); + set_pmd(pmd, __pmd(phys | pgprot_val(pgprot_shadow_pmd))); + phys += pmd_next - addr; + } while (pmd++, addr = pmd_next, addr != end); +} + +static void __init _iee_mapping_populate_p4d(p4d_t *p4d, unsigned long addr, unsigned long end) +{ + void *p; + pud_t *pud; + unsigned long pud_next; + + if (p4d_none(*p4d)) { + p = alloc_low_pages(1); + p4d_populate(&init_mm, p4d, p); + } + + pud = pud_offset(p4d, addr); + do { + pud_next = pud_addr_end(addr, end); + pr_info("IEE: iee_populate_pud(%#010lx, %#010lx)\n", + addr, pud_next); + _iee_mapping_populate_pud(pud, addr, pud_next); + } while (pud++, addr = pud_next, addr != end); +} + +static void __init _iee_mapping_populate_pgd(pgd_t *pgd, unsigned long addr, unsigned long end) +{ + void *p; + p4d_t *p4d; + unsigned long p4d_next; + + if (pgd_none(*pgd)) { + p = alloc_low_pages(1); + pgd_populate(&init_mm, pgd, p); + } + + p4d = p4d_offset(pgd, addr); + do { + p4d_next = p4d_addr_end(addr, end); + pr_info("IEE: iee_populate_p4d(%#010lx, %#010lx)\n", + addr, p4d_next); + _iee_mapping_populate_p4d(p4d, addr, p4d_next); + } while (p4d++, addr = p4d_next, addr != end); +} + +static void __init _iee_init_mapping(phys_addr_t start_paddr, phys_addr_t end_paddr) +{ + unsigned long addr = (unsigned long)__phys_to_iee(start_paddr); + unsigned long end = (unsigned long)__phys_to_iee(end_paddr); + unsigned long pgd_next; + + pgd_t *pgd = pgd_offset_k(addr); + + spin_lock(&pgd_lock); + do { + pgd_next = pgd_addr_end(addr, end); + pr_info("IEE: iee_populate_pgd(%#010lx, %#010lx)\n", + addr, pgd_next); + _iee_mapping_populate_pgd(pgd, addr, pgd_next); + } while (pgd++, addr = pgd_next, addr != end); + spin_unlock(&pgd_lock); +} + +static void __init _iee_mapping_init(void) +{ + struct memblock_region *r; + unsigned long start_pfn, end_pfn; + phys_addr_t start_paddr, end_paddr; + unsigned long nr_pages = 0; + + for_each_mem_region(r) { + start_pfn = memblock_region_memory_base_pfn(r); + end_pfn = memblock_region_memory_end_pfn(r); + + start_paddr = PFN_PHYS(start_pfn); + end_paddr = PFN_PHYS(end_pfn); + + nr_pages += end_pfn - start_pfn; + + pr_info("IEE: mapping iee mapping [mem %#010lx-%#010lx]\n", + (unsigned long)start_paddr, (unsigned long)end_paddr); + + _iee_init_mapping(start_paddr, end_paddr); + } + pr_info("IEE: IEE shadow mapping init done"); +} + +static void __init _iee_stack_init(void) +{ + int cpu; + struct iee_stack *iee_stack; + void *stack_base; + struct page *page; + + for_each_possible_cpu(cpu) { + stack_base = (void *)page_address(alloc_pages(GFP_KERNEL, IEE_STACK_ORDER)); + iee_stack = per_cpu_ptr(&iee_stacks, cpu); + page = alloc_pages(GFP_KERNEL, IEE_STACK_ORDER); + iee_stack->stack = (void *)page_address(page) + PAGE_SIZE * (1 << IEE_STACK_ORDER); + pr_info("IEE: cpu %d, iee_stack 0x%lx", cpu, (unsigned long)iee_stack->stack); + set_memory_ro((unsigned long)stack_base, (1 << IEE_STACK_ORDER)); + } +} + +static void __init _iee_offset_init(void) +{ + if (pgtable_l5_enabled()) + IEE_OFFSET = 0x40000000000000; +} + +void __init iee_init(void) +{ + _iee_offset_init(); + _iee_mapping_init(); + _iee_stack_init(); +} + +bool __ro_after_init haoc_enabled; +EXPORT_SYMBOL(haoc_enabled); +#ifdef CONFIG_IEE_SIP +extern unsigned long cr4_pinned_mask; +#endif +static int __init parse_haoc_enabled(char *str) +{ + int ret = kstrtobool(str, &haoc_enabled); + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + { + cr4_pinned_mask = + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; + } + #endif + return ret; +} +early_param("haoc", parse_haoc_enabled); diff --git a/arch/x86/kernel/haoc/iee/iee-si.c b/arch/x86/kernel/haoc/iee/iee-si.c new file mode 100644 index 0000000000000000000000000000000000000000..214c7617cfed0784d6fc8dcb8a935d1ed3013890 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee-si.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include + +unsigned long __iee_si_code notrace _iee_si_handler(int flag, ...) +{ + va_list pArgs; + unsigned long val; + + va_start(pArgs, flag); + switch (flag) { + case IEE_SIP_TEST: + break; + case IEE_WRITE_CR0: { + val = va_arg(pArgs, u64); + unsigned long bits_missing = 0; + set_register_cr0: + asm volatile("mov %0,%%cr0" : "+r"(val) : : "memory"); + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register_cr0; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } + break; + } + case IEE_WRITE_CR3: { + val = va_arg(pArgs, u64); + asm volatile("mov %0,%%cr3" : : "r"(val) : "memory"); + break; + } + case IEE_WRITE_CR4: { + val = va_arg(pArgs, u64); + val &= ~(X86_CR4_SMEP); + asm volatile("mov %0,%%cr4" : "+r" (val) : : "memory"); + break; + } + case IEE_LOAD_IDT: { + const struct desc_ptr *new_val; + + new_val = va_arg(pArgs, const struct desc_ptr*); + asm volatile("lidt %0"::"m" (*new_val)); + break; + } + } + va_end(pArgs); + return 0; +} + +static void __init _iee_set_kernel_upage(unsigned long addr) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, addr); + pgd_t pgd = READ_ONCE(*pgdp); + + pgd = __pgd((pgd_val(pgd) | _USR) & ~___G); + set_pgd(pgdp, pgd); + + p4d_t *p4dp = p4d_offset(pgdp, addr); + p4d_t p4d = READ_ONCE(*p4dp); + + p4d = __p4d((p4d_val(p4d) | _USR) & ~___G); + set_p4d(p4dp, p4d); + + pud_t *pudp = pud_offset(p4dp, addr); + pud_t pud = READ_ONCE(*pudp); + + pud = __pud((pud_val(pud) | _USR) & ~___G); + set_pud(pudp, pud); + + pmd_t *pmdp = pmd_offset(pudp, addr); + pmd_t pmd = READ_ONCE(*pmdp); + + pmd = __pmd((pmd_val(pmd) | _USR) & ~___G); + set_pmd(pmdp, pmd); + + pte_t *ptep = pte_offset_kernel(pmdp, addr); + pte_t pte = READ_ONCE(*ptep); + + pte = __pte((pte_val(pte) | _USR) & ~___G); + set_pte(ptep, pte); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); +} + +void __init iee_sip_init(void) +{ + unsigned long addr, start, end; + int num_pages; + /* Map .iee.text as U RWX pages */ + start = (unsigned long)__iee_si_text_start; + end = (unsigned long)__iee_si_text_end; + pr_info("IEE: mapping .iee.text:[0x%lx, 0x%lx] as U pages...", start, end); + addr = start; + for ( ; addr < end; addr += PAGE_SIZE) { + set_memory_4k(addr, 1); + _iee_set_kernel_upage(addr); + set_memory_4k((unsigned long)__va(__pa(addr)), 1); + _iee_set_kernel_upage((unsigned long)__va(__pa(addr))); + } + iee_init_done = true; + /* Map .iee.data as RO pages */ + start = (unsigned long)__iee_si_data_start; + end = (unsigned long)__iee_si_data_end; + num_pages = (end - start) / PAGE_SIZE; + set_memory_ro(start, num_pages); + /* All initialization is done. Do some simple tests. */ + pr_info("IEE: testing iee_exec_entry si_test..."); + iee_sip_test(); +} \ No newline at end of file diff --git a/arch/x86/kernel/haoc/iee/iee-token.c b/arch/x86/kernel/haoc/iee/iee-token.c new file mode 100644 index 0000000000000000000000000000000000000000..c3490ae6d8caba97a737433dd49932b4dd77cd03 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee-token.c @@ -0,0 +1,222 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "slab.h" + +void iee_set_token_page_valid(unsigned long token, unsigned long token_page, + unsigned int order) +{ + set_memory_4k(token, 1 << order); + set_memory_4k(token_page, 1 << order); + + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, token); + p4d_t *p4dp = p4d_offset(pgdp, token); + pud_t *pudp = pud_offset(p4dp, token); + pmd_t *token_pmdp = pmd_offset(pudp, token); + pte_t *token_ptep = pte_offset_kernel(token_pmdp, token); + + if (!token_page) + panic("Token of task_struct was unset.\n"); + + pgdp = pgd_offset_pgd(pgdir, token_page); + p4dp = p4d_offset(pgdp, token_page); + pudp = pud_offset(p4dp, token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, token_page); + pte_t *token_page_ptep = pte_offset_kernel(token_page_pmdp, token_page); + + #ifdef CONFIG_PTP + if(haoc_enabled) + iee_rw_gate(IEE_OP_SET_TOKEN, token_ptep, token_page_ptep, token_page, order); + else{ + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | + (__phys_to_pfn(__pa(token_page + i * PAGE_SIZE)) + << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } + #else + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | + (__phys_to_pfn(__pa(token_page + i * PAGE_SIZE)) + << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + #endif + + flush_tlb_kernel_range(token, token + (PAGE_SIZE * (1 << order))); + flush_tlb_kernel_range(token_page, + token_page + (PAGE_SIZE * (1 << order))); +} + +void iee_set_token_page_invalid(unsigned long token, unsigned long __unused, + unsigned int order) +{ + pgd_t *pgdir = swapper_pg_dir; + pgd_t *pgdp = pgd_offset_pgd(pgdir, token); + p4d_t *p4dp = p4d_offset(pgdp, token); + pud_t *pudp = pud_offset(p4dp, token); + pmd_t *token_pmdp = pmd_offset(pudp, token); + pte_t *token_ptep = pte_offset_kernel(token_pmdp, token); + unsigned long token_page = + (unsigned long)page_address(pte_page(*token_ptep)); + + if (!token_page) + panic("Token of task_struct was unset.\n"); + + pgdp = pgd_offset_pgd(pgdir, token_page); + p4dp = p4d_offset(pgdp, token_page); + pudp = pud_offset(p4dp, token_page); + pmd_t *token_page_pmdp = pmd_offset(pudp, token_page); + pte_t *token_page_ptep = pte_offset_kernel(token_page_pmdp, token_page); + + #ifdef CONFIG_PTP + if(haoc_enabled) + iee_rw_gate(IEE_OP_UNSET_TOKEN, token_ptep, token_page_ptep, token, order); + else{ + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | + (__phys_to_pfn(__iee_pa(token + i * PAGE_SIZE)) + << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + } + #else + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | + (__phys_to_pfn(__iee_pa(token + i * PAGE_SIZE)) + << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } + #endif + free_pages(token_page, order); + flush_tlb_kernel_range(token, token + (PAGE_SIZE * (1 << order))); + flush_tlb_kernel_range(token_page, + token_page + (PAGE_SIZE * (1 << order))); +} + +struct slab *iee_alloc_task_token_slab(struct kmem_cache *s, struct slab *slab, + unsigned int order) +{ + if (!slab || s != task_struct_cachep) + return slab; + + struct folio *folio = slab_folio(slab); + unsigned long token_addr = __slab_to_iee(slab); + unsigned long alloc_token = + __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + + /* Allocation of task_struct and token pages must be done at the same time. */ + if (!alloc_token) { + /* Failed on allocation of token page. Free the allocated ones, + * return and try smaller order. + */ + __slab_clear_pfmemalloc(slab); + folio->mapping = NULL; + /* Make the mapping reset visible before clearing the flag */ + smp_wmb(); + __folio_clear_slab(folio); + __free_pages((struct page *)folio, order); + return NULL; + } + + /* Map allocated token pages to token addresses. */ + iee_set_token_page_valid(token_addr, alloc_token, order); + return slab; +} + +void _iee_set_token_pgd(unsigned long __unused, struct task_struct *tsk, + pgd_t *pgd) +{ + struct task_token *token = (struct task_token *)__addr_to_iee(tsk); + + token->pgd = pgd; +} + +void _iee_invalidate_token(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__addr_to_iee(tsk); + + token->pgd = NULL; + token->valid = false; +} + +void _iee_validate_token(unsigned long __unused, struct task_struct *tsk) +{ + struct task_token *token = (struct task_token *)__addr_to_iee(tsk); + + if (token->valid) + pr_err("IEE: validate token for multiple times."); + token->valid = true; +} + +#ifdef CONFIG_PTP +void _iee_unset_token(unsigned long __unused, pte_t *token_ptep, + pte_t *token_page_ptep, unsigned long token, unsigned int order) +{ + token_ptep = (pte_t *)__addr_to_iee(token_ptep); + token_page_ptep = (pte_t *)__addr_to_iee(token_page_ptep); + + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte((pte_val(pte) & ~PTE_PFN_MASK) | + (__phys_to_pfn(__iee_pa(token + i * PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte(pte_val(pte) | ___D | __RW); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } +} + +void _iee_set_token(unsigned long __unused, pte_t *token_ptep, + pte_t *token_page_ptep, unsigned long token_page, unsigned int order) +{ + token_ptep = (pte_t *)__addr_to_iee(token_ptep); + token_page_ptep = (pte_t *)__addr_to_iee(token_page_ptep); + + for (int i = 0; i < (0x1 << order); i++) { + pte_t pte = READ_ONCE(*token_ptep); + + pte = __pte(((pte_val(pte) & ~PTE_PFN_MASK)) | + (__phys_to_pfn(__pa(token_page + i * PAGE_SIZE)) << PAGE_SHIFT)); + WRITE_ONCE(*token_ptep, pte); + pte = READ_ONCE(*token_page_ptep); + pte = __pte((pte_val(pte) & ~__RW) & ~___D); + WRITE_ONCE(*token_page_ptep, pte); + token_ptep++; + token_page_ptep++; + } +} +#endif \ No newline at end of file diff --git a/arch/x86/kernel/haoc/iee/iee.c b/arch/x86/kernel/haoc/iee/iee.c new file mode 100644 index 0000000000000000000000000000000000000000..35c54c3352ac35b8e040d0c22584e359227245d0 --- /dev/null +++ b/arch/x86/kernel/haoc/iee/iee.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HAOC feature support + * + * Copyright (C) 2025 ZGCLAB + * Authors: Shu Hang + * Hu Bing + */ + +#include + +void _iee_memcpy(unsigned long __unused, void *dst, void *src, size_t n) +{ + memcpy(dst, src, n); +} + +void _iee_memset(unsigned long __unused, void *ptr, int data, size_t n) +{ + memset(ptr, data, n); +} + +void _iee_set_freeptr(unsigned long __unused, void **pptr, void *ptr) +{ + *pptr = ptr; +} + +unsigned long _iee_test_and_clear_bit(unsigned long __unused, long nr, unsigned long *addr) +{ + kcsan_mb(); + instrument_atomic_read_write(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_and_clear_bit(nr, addr); +} diff --git a/arch/x86/kernel/haoc/ptp/Makefile b/arch/x86/kernel/haoc/ptp/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ab30a58e0e7398069f2802fc11c5c774aeda21d8 --- /dev/null +++ b/arch/x86/kernel/haoc/ptp/Makefile @@ -0,0 +1 @@ +obj-y += ptp.o ptp-gate.o diff --git a/arch/x86/kernel/haoc/ptp/ident_map.c b/arch/x86/kernel/haoc/ptp/ident_map.c new file mode 100644 index 0000000000000000000000000000000000000000..bd2e3e6c82b67046332544ce017db3c796561c9b --- /dev/null +++ b/arch/x86/kernel/haoc/ptp/ident_map.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helper routines for building identity mapping page tables. This is + * included by both the compressed kernel and the regular kernel. + */ + +static inline void ptp_set_pte_pre_init(pte_t *ptep, pte_t pte) +{ + WRITE_ONCE(*ptep, pte); +} + +static inline void ptp_set_pmd_pre_init(pmd_t *pmdp, pmd_t pmd) +{ + WRITE_ONCE(*pmdp, pmd); +} + +static inline void ptp_set_pud_pre_init(pud_t *pudp, pud_t pud) +{ + WRITE_ONCE(*pudp, pud); +} + +static inline void ptp_set_p4d_pre_init(p4d_t *p4dp, p4d_t p4d) +{ + pgd_t pgd; + + if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + WRITE_ONCE(*p4dp, p4d); + return; + } + + pgd = native_make_pgd(native_p4d_val(p4d)); + pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); + WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); +} + +static inline void ptp_set_pgd_pre_init(pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); +} + +static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page, + unsigned long addr, unsigned long end) +{ + addr &= PMD_MASK; + for (; addr < end; addr += PMD_SIZE) { + pmd_t *pmd = pmd_page + pmd_index(addr); + + if (pmd_present(*pmd)) + continue; + + ptp_set_pmd_pre_init(pmd, __pmd((addr - info->offset) | info->page_flag)); + } +} + +static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + + if (info->direct_gbpages) { + pud_t pudval; + + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; + pudval = __pud((addr - info->offset) | info->page_flag); + ptp_set_pud_pre_init(pud, pudval); + continue; + } + + if (pud_present(*pud)) { + pmd = pmd_offset(pud, 0); + ident_pmd_init(info, pmd, addr, next); + continue; + } + pmd = (pmd_t *)info->alloc_pgt_page(info->context); + if (!pmd) + return -ENOMEM; + ident_pmd_init(info, pmd, addr, next); + ptp_set_pud_pre_init(pud, __pud(__pa(pmd) | info->kernpg_flag)); + } + + return 0; +} + +static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + int result; + + for (; addr < end; addr = next) { + p4d_t *p4d = p4d_page + p4d_index(addr); + pud_t *pud; + + next = (addr & P4D_MASK) + P4D_SIZE; + if (next > end) + next = end; + + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, 0); + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + + continue; + } + pud = (pud_t *)info->alloc_pgt_page(info->context); + if (!pud) + return -ENOMEM; + + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + + ptp_set_p4d_pre_init(p4d, __p4d(__pa(pud) | info->kernpg_flag)); + } + + return 0; +} + +int ptp_kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long pstart, unsigned long pend) +{ + unsigned long addr = pstart + info->offset; + unsigned long end = pend + info->offset; + unsigned long next; + int result; + + /* Set the default pagetable flags if not supplied */ + if (!info->kernpg_flag) + info->kernpg_flag = _KERNPG_TABLE; + + /* Filter out unsupported __PAGE_KERNEL_* bits: */ + info->kernpg_flag &= __default_kernel_pte_mask; + + for (; addr < end; addr = next) { + pgd_t *pgd = pgd_page + pgd_index(addr); + p4d_t *p4d; + + next = (addr & PGDIR_MASK) + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, 0); + result = ident_p4d_init(info, p4d, addr, next); + if (result) + return result; + continue; + } + + p4d = (p4d_t *)info->alloc_pgt_page(info->context); + if (!p4d) + return -ENOMEM; + result = ident_p4d_init(info, p4d, addr, next); + if (result) + return result; + if (pgtable_l5_enabled()) { + ptp_set_pgd_pre_init(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); + } else { + /* + * With p4d folded, pgd is equal to p4d. + * The pgd entry has to point to the pud page table in this case. + */ + pud_t *pud = pud_offset(p4d, 0); + + ptp_set_pgd_pre_init(pgd, __pgd(__pa(pud) | info->kernpg_flag)); + } + } + + return 0; +} diff --git a/arch/x86/kernel/haoc/ptp/ptp-gate.S b/arch/x86/kernel/haoc/ptp/ptp-gate.S new file mode 100644 index 0000000000000000000000000000000000000000..4272574620d6e7acfe436ecb25a12af9dab58680 --- /dev/null +++ b/arch/x86/kernel/haoc/ptp/ptp-gate.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include +#include + +/* scratch_reg would be changed, +caller should dertimine if scratch_reg should be saved and restored */ +.macro DISABLE_WP scratch_reg:req + /* Disable write protection*/ + movq %cr0, %\scratch_reg + andq $(~X86_CR0_WP), %\scratch_reg + movq %\scratch_reg, %cr0 +.endm + +.macro ENABLE_WP scratch_reg:req + /* Enable write protection */ + movq %cr0, %\scratch_reg +1: + orq $X86_CR0_WP, %\scratch_reg + movq %\scratch_reg, %cr0 + testq $X86_CR0_WP, %\scratch_reg + je 1b +.endm + +SYM_FUNC_START(ptp_xchg) + /* save RFLAGS, close irq */ + pushfq + cli + + pushq %r12 + +#ifdef CONFIG_PTP + movq PER_CPU_VAR(iee_disables) + IEE_DISABLE, %r12 + testq %r12, %r12 + jz xchg_wp_enabled + + xchg %rsi, (%rdi) + movq %rsi, %rax + + popq %r12 + popfq + jmp __x86_return_thunk + +xchg_wp_enabled: +#endif + DISABLE_WP r12 + + xchg %rsi, (%rdi) + movq %rsi, %rax + + ENABLE_WP r12 + + popq %r12 + + /* restore RFLAGS*/ + popfq + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(ptp_xchg) + +SYM_FUNC_START(ptp_try_cmpxchg) + /* save RFLAGS, close irq */ + pushfq + cli + + pushq %r12 + +#ifdef CONFIG_PTP + movq PER_CPU_VAR(iee_disables) + IEE_DISABLE, %r12 + testq %r12, %r12 + jz cmpxchg_wp_enabled + + movq %rsi, %rax + lock cmpxchgq %rdx, (%rdi) + + popq %r12 + popfq + jmp __x86_return_thunk + +cmpxchg_wp_enabled: +#endif + DISABLE_WP r12 + + movq %rsi, %rax + lock cmpxchgq %rdx, (%rdi) + + ENABLE_WP r12 + + popq %r12 + /* restore RFLAGS*/ + popfq + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(ptp_try_cmpxchg) + +SYM_FUNC_START(ptp_rw_gate) + /* save Interrupt flag */ + pushfq + /* close irq*/ + cli + + pushq %r12 + +#ifdef CONFIG_PTP + movq PER_CPU_VAR(iee_disables) + IEE_DISABLE, %r12 + testq %r12, %r12 + jz ptprw_wp_enabled + + leaq iee_funcs(%rip), %rax + call *(%rax, %rdi, 8) + + popq %r12 + popfq + jmp __x86_return_thunk + +ptprw_wp_enabled: +#endif + DISABLE_WP r12 + + /* call iee func */ + leaq iee_funcs(%rip), %rax + call *(%rax, %rdi, 8) + + ENABLE_WP r12 + + popq %r12 + + /* restore irq*/ + popfq + + jmp __x86_return_thunk /* ret */ +SYM_FUNC_END(ptp_rw_gate) +EXPORT_SYMBOL(ptp_rw_gate) diff --git a/arch/x86/kernel/haoc/ptp/ptp.c b/arch/x86/kernel/haoc/ptp/ptp.c new file mode 100644 index 0000000000000000000000000000000000000000..673dfcec2d545ef5ed2a3fdc083e5c19866ecdd7 --- /dev/null +++ b/arch/x86/kernel/haoc/ptp/ptp.c @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define __pte_to_phys(pte) (pte_pfn(pte) << PAGE_SHIFT) +#define __pmd_to_phys(pmd) (__pte_to_phys(__pte(pmd_val(pmd)))) +#define __pud_to_phys(pud) (__pte_to_phys(__pte(pud_val(pud)))) +#define __p4d_to_phys(p4d) (__pte_to_phys(__pte(p4d_val(p4d)))) +#define __pgd_to_phys(pgd) (__pte_to_phys(__pte(pgd_val(pgd)))) + +static void __init _ptp_set_pte_table_ro(pmd_t *pmdp, unsigned long addr, unsigned long end) +{ + pmd_t pmd = READ_ONCE(*pmdp); + unsigned long logical_addr = (unsigned long)__va(__pmd_to_phys(pmd)); + + set_iee_page(logical_addr, 0); +} + +static void __init _ptp_set_pmd_table_ro(pud_t *pudp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t pud = READ_ONCE(*pudp); + pmd_t *pmdp; + pmd_t pmd; + unsigned long logical_addr = (unsigned long)__va(__pud_to_phys(pud)); + + set_iee_page(logical_addr, 0); + pmdp = pmd_offset(pudp, addr); + do { + next = pmd_addr_end(addr, end); + pmd = READ_ONCE(*pmdp); + if (pmd_val(pmd) & _PSE) + continue; + else + _ptp_set_pte_table_ro(pmdp, addr, next); + } while (pmdp++, addr = next, addr != end); +} + +static void __init _ptp_set_pud_table_ro(p4d_t *p4dp, unsigned long addr, unsigned long end) +{ + unsigned long next; + p4d_t p4d = READ_ONCE(*p4dp); + pud_t *pudp; + pud_t pud; + unsigned long logical_addr = (unsigned long)__va(__p4d_to_phys(p4d)); + + set_iee_page(logical_addr, 0); + pudp = pud_offset(p4dp, addr); + do { + next = pud_addr_end(addr, end); + pud = READ_ONCE(*pudp); + if (pud_val(pud) & _PSE) { + // _PSE = 1 means a page, not a table + continue; + } else { + _ptp_set_pmd_table_ro(pudp, addr, next); + } + } while (pudp++, addr = next, addr != end); +} + +static void __init _ptp_set_p4d_table_ro(pgd_t *pgdp, unsigned long addr, unsigned long end) +{ + unsigned long next; + pgd_t pgd = READ_ONCE(*pgdp); + p4d_t *p4dp; + p4d_t p4d; + unsigned long logical_addr = (unsigned long)__va(__pgd_to_phys(pgd)); + + set_iee_page(logical_addr, 0); + p4dp = p4d_offset(pgdp, addr); + do { + next = p4d_addr_end(addr, end); + p4d = READ_ONCE(*p4dp); + /* No 512 GiB huge pages yet */ + _ptp_set_pud_table_ro(p4dp, addr, next); + } while (p4dp++, addr = next, addr != end); +} + +static void __init _ptp_mark_iee_pgtable_for_one_region_ro(pgd_t *pgdir, + unsigned long va_start, unsigned long va_end) +{ + unsigned long addr, end, next; + pgd_t *pgdp = pgd_offset_pgd(pgdir, va_start); + + addr = va_start & PAGE_MASK; + end = PAGE_ALIGN(va_end); + + do { + next = pgd_addr_end(addr, end); + _ptp_set_p4d_table_ro(pgdp, addr, next); + } while (pgdp++, addr = next, addr != end); +} + +void __init ptp_mark_all_pgtable_ro(void) +{ + unsigned long logical_addr; + phys_addr_t start, end; + u64 i; + pgd_t *pgdp; + + // handing 1-level page table swapper_pg_dir + pgdp = swapper_pg_dir; + set_iee_page((unsigned long)swapper_pg_dir, 0); + logical_addr = (unsigned long)__va(__pa_symbol(swapper_pg_dir)); + set_iee_page(logical_addr, 0); + + // handling 2/3/4/5-level page table for kernel + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)_text, (unsigned long)_etext); + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)__start_rodata, (unsigned long)__end_rodata); + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)_sdata, (unsigned long)_edata); + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)__bss_start, (unsigned long)__bss_stop); + + // handling 2/3/4/5-level statically allocated page table + #ifdef CONFIG_X86_5LEVEL + set_iee_page((unsigned long)level4_kernel_pgt, 0); + logical_addr = (unsigned long)__va(__pa_symbol(level4_kernel_pgt)); + set_iee_page(logical_addr, 0); + #endif + + set_iee_page((unsigned long)level3_kernel_pgt, 0); + logical_addr = (unsigned long)__va(__pa_symbol(level3_kernel_pgt)); + set_iee_page(logical_addr, 0); + + set_iee_page((unsigned long)level2_kernel_pgt, 0); + logical_addr = (unsigned long)__va(__pa_symbol(level2_kernel_pgt)); + set_iee_page(logical_addr, 0); + + for (int i = 0; i < FIXMAP_PMD_NUM; i++) { + set_iee_page((unsigned long)level2_fixmap_pgt, 0); + logical_addr = (unsigned long)__va(__pa_symbol(level2_fixmap_pgt)); + set_iee_page(logical_addr, 0); + } + + for (int i = 0; i < FIXMAP_PMD_NUM; i++) { + set_iee_page((unsigned long)level1_fixmap_pgt, 0); + logical_addr = (unsigned long)__va(__pa_symbol(level2_fixmap_pgt)); + set_iee_page(logical_addr, 0); + } + + // handling 2/3/4-level page table for logical mem and iee + for_each_mem_range(i, &start, &end) { + if (start >= end) + break; + /* + * The linear map must allow allocation tags reading/writing + * if MTE is present. Otherwise, it has the same attributes as + * PAGE_KERNEL. + */ + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)__va(start), (unsigned long)__va(end)); + _ptp_mark_iee_pgtable_for_one_region_ro(pgdp, + (unsigned long)__phys_to_iee(start), (unsigned long)__phys_to_iee(end)); + } + pr_info("PTP: mark all kernel page tables ro done"); +} + +static inline void check_addr_range_ro(unsigned long addr_start, unsigned long addr_end, + unsigned long start, unsigned long end, pte_t pte, const char *msg) +{ + if (unlikely((addr_start >= start && addr_start < end) + || (addr_end > start && addr_end < end))) + if (unlikely(pte_write(pte))) + panic("IEE Error: Are you trying to write %s: [0x%lx, 0x%lx]?", + msg, addr_start, addr_end); +} + +static inline void check_dep_and_ro(unsigned long image_addr, unsigned long size, pte_t pte) +{ + if (unlikely(!kernel_set_to_readonly)) + return; + + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)_stext, (unsigned long)_etext, pte, "text"); + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)__start_rodata, (unsigned long)__end_rodata, pte, "rodata"); + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)__iee_si_text_start, (unsigned long)__iee_si_text_end, pte, "iee_si_text"); + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)__iee_si_data_start, (unsigned long)__iee_si_data_end, pte, "iee_si_data"); +} + +void _iee_set_pte(unsigned long __unused, pte_t *ptep, pte_t pte) +{ + if (!(pte_val(pte) & _PAGE_PRESENT)) { + WRITE_ONCE(*ptep, pte); + return; + } + + phys_addr_t phys_addr = __pte_to_phys(pte); + unsigned long image_addr = phys_addr + __START_KERNEL_map - phys_base; + + check_dep_and_ro(image_addr, PAGE_SIZE, pte); + WRITE_ONCE(*ptep, pte); +} + +void _iee_set_pmd(unsigned long __unused, pmd_t *pmdp, pmd_t pmd) +{ + phys_addr_t phys_addr; + unsigned long image_addr; + + if (!(pmd_val(pmd) & _PAGE_PRESENT)) { + WRITE_ONCE(*pmdp, pmd); + return; + } + if (pmd_leaf(pmd)) { + phys_addr = __pmd_to_phys(pmd); + image_addr = phys_addr + __START_KERNEL_map - phys_base; + check_dep_and_ro(image_addr, PMD_SIZE, __pte(pmd_val(pmd))); + } + + WRITE_ONCE(*pmdp, pmd); +} + +void _iee_set_pud(unsigned long __unused, pud_t *pudp, pud_t pud) +{ + phys_addr_t phys_addr; + unsigned long image_addr; + + if (!(pud_val(pud) & _PAGE_PRESENT)) { + WRITE_ONCE(*pudp, pud); + return; + } + if (pud_leaf(pud)) { + phys_addr = __pud_to_phys(pud); + image_addr = phys_addr + __START_KERNEL_map - phys_base; + check_dep_and_ro(image_addr, PUD_SIZE, __pte(pud_val(pud))); + } + + WRITE_ONCE(*pudp, pud); +} + +void _iee_set_p4d(unsigned long __unused, p4d_t *p4dp, p4d_t p4d) +{ + WRITE_ONCE(*p4dp, p4d); +} + +void _iee_set_pgd(unsigned long __unused, pgd_t *pgdp, pgd_t pgd) +{ + WRITE_ONCE(*pgdp, pgd); +} + +static inline void check_text_poke_ro(unsigned long image_addr, unsigned long size, pte_t pte) +{ + if (unlikely(!kernel_set_to_readonly)) + return; + + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)__start_rodata, (unsigned long)__end_rodata, pte, "rodata"); + check_addr_range_ro(image_addr, image_addr + size, +(unsigned long)__iee_si_data_start, (unsigned long)__iee_si_data_end, pte, "iee_si_data"); +} + +void _iee_set_pte_text_poke(unsigned long __unused, pte_t *ptep, pte_t pte) +{ + if (!(pte_val(pte) & _PAGE_PRESENT)) { + WRITE_ONCE(*ptep, pte); + return; + } + + phys_addr_t phys_addr = __pte_to_phys(pte); + unsigned long image_addr = phys_addr + __START_KERNEL_map - phys_base; + + check_text_poke_ro(image_addr, PAGE_SIZE, pte); + WRITE_ONCE(*ptep, pte); +} + +DEFINE_PER_CPU(struct iee_disable_t, iee_disables); + +void __init ptp_iee_disable_init(void) +{ + int cpu; + struct iee_disable_t *iee_disable; + + for_each_possible_cpu(cpu) { + iee_disable = per_cpu_ptr(&iee_disables, cpu); + iee_disable->disabled_cnt = 0; + } +} + +void ptp_disable_iee(unsigned long *reg) +{ + unsigned long irq_flags; + int cpu; + struct iee_disable_t *iee_disable; + + local_irq_save(irq_flags); + cpu = get_cpu(); + iee_disable = per_cpu_ptr(&iee_disables, cpu); + if (iee_disable->disabled_cnt == 0) { + *reg = read_cr0(); + asm volatile("mov %0, %%cr0" : : "r"(*reg & ~X86_CR0_WP)); + } + iee_disable->disabled_cnt++; + put_cpu(); + local_irq_restore(irq_flags); +} + +void ptp_enable_iee(unsigned long reg) +{ + unsigned long irq_flags; + int cpu; + struct iee_disable_t *iee_disable; + + local_irq_save(irq_flags); + cpu = get_cpu(); + iee_disable = per_cpu_ptr(&iee_disables, cpu); + iee_disable->disabled_cnt--; + if (iee_disable->disabled_cnt == 0) + asm volatile("mov %0, %%cr0" : : "r"(reg | X86_CR0_WP)); + put_cpu(); + local_irq_restore(irq_flags); +} + +void ptp_context_enable_iee(int *disabled_cnt, unsigned long *reg) +{ + unsigned long irq_flags; + int cpu; + struct iee_disable_t *iee_disable; + + local_irq_save(irq_flags); + cpu = get_cpu(); + iee_disable = per_cpu_ptr(&iee_disables, cpu); + *disabled_cnt = iee_disable->disabled_cnt; + if (*disabled_cnt > 0) { + *reg = read_cr0(); + iee_disable->disabled_cnt = 0; + asm volatile("mov %0, %%cr0" : : "r"(*reg | X86_CR0_WP)); + } + put_cpu(); + local_irq_restore(irq_flags); +} + +void ptp_context_restore_iee(int disabled_cnt, unsigned long reg) +{ + unsigned long irq_flags; + int cpu; + struct iee_disable_t *iee_disable; + + local_irq_save(irq_flags); + if (disabled_cnt > 0) { + cpu = get_cpu(); + iee_disable = per_cpu_ptr(&iee_disables, cpu); + asm volatile("mov %0, %%cr0" : : "r"(reg & ~X86_CR0_WP)); + iee_disable->disabled_cnt = disabled_cnt; + put_cpu(); + } + local_irq_restore(irq_flags); +} diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 1defe865de67ec8fafa3149817b4da57050a74b6..f78dcb4f11fa4b67c18b87a2066b6b683f731397 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -720,7 +720,14 @@ static void startup_64_load_idt(unsigned long physbase) } desc->address = (unsigned long)idt; + #ifdef CONFIG_IEE_SIP + if(haoc_enabled) + iee_load_idt_early(desc); + else + native_load_idt(desc); + #else native_load_idt(desc); + #endif } /* This is used when running on kernel addresses */ diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 234851fe0ef8e7e499f4a33bff9e245fc04d092f..52141e3825cdc04fdb086894169bc1054be5dfec 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -81,7 +81,14 @@ void __init native_pv_lock_init(void) static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) { + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_tlb_remove_page(tlb, table); + else + tlb_remove_page(tlb, table); + #else tlb_remove_page(tlb, table); + #endif } unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 93dc119c8e2e89aa17ac94926a0c010ffd4f67f3..3f556e286d094e7bf922f3977ff7df756450868c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1011,6 +1011,27 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#ifdef CONFIG_IEE_SIP + /* + * Perform a one-time check for IEE_SIP prerequisites. This must be done + * early in setup_arch() before any code might rely on these features. + * At this point, CPU features (from early_cpu_init) and kernel command + * line (from parse_early_param) are both available. + */ + if (haoc_enabled) { + bool smep_ok = cpu_feature_enabled(X86_FEATURE_SMEP); + bool smap_ok = cpu_feature_enabled(X86_FEATURE_SMAP); + if (smep_ok && smap_ok) { + pr_info("IEE_SIP: Feature is active. CPU supports SMEP and SMAP.\n"); + } else { + // Fail-fast: The user wants the feature, but the hardware + // doesn't support it. This is a fatal configuration error. + panic("IEE_SIP: FATAL: Feature enabled via 'haoc=on' but hardware is missing support (SMEP:%d, SMAP:%d).\n", + smep_ok, smap_ok); + } + } +#endif + if (efi_enabled(EFI_BOOT)) efi_memblock_x86_reserve_range(); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0929d7fe7e2740c5c3a78d54e955d0aee99d0a52..03050eca6bc20e547a283e8486016e52611e3f36 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -99,6 +99,28 @@ jiffies = jiffies_64; #endif +#ifdef CONFIG_IEE_SIP +#define IEE_SI_TEXT \ +. = ALIGN(PAGE_SIZE); \ +__iee_si_text_start = .; \ +*(.iee.si_text) \ +. = ALIGN(PAGE_SIZE); \ +__iee_si_text_end = .; + +#define IEE_SI_DATA \ + . = ALIGN(PAGE_SIZE); \ + __iee_si_data_start = .; \ +*(.iee.si_data) \ +. = ALIGN(PAGE_SIZE); \ +__iee_si_data_end = .; +#endif +#ifdef CONFIG_CREDP +#define CRED_DATA \ + . = ALIGN(PAGE_SIZE); \ + *(.iee.cred) \ + . = ALIGN(PAGE_SIZE); +#endif + PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(6); /* RW_ */ @@ -131,6 +153,9 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT +#ifdef CONFIG_IEE_SIP + IEE_SI_TEXT +#endif SOFTIRQENTRY_TEXT #ifdef CONFIG_RETPOLINE *(.text..__x86.indirect_thunk) @@ -181,6 +206,12 @@ SECTIONS CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) DATA_DATA +#ifdef CONFIG_IEE_SIP + IEE_SI_DATA +#endif +#ifdef CONFIG_CREDP + CRED_DATA +#endif CONSTRUCTORS /* rarely changed data like cpu maps */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3d74656164e6c2c4f5bcbf7a9201697c5287b7e9..6e06a35de9ee708175bde15defea3a79c6c7a950 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -56,6 +56,9 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif #include "mm_internal.h" #include "ident_map.c" @@ -1369,6 +1372,16 @@ static void __init preallocate_vmalloc_pages(void) void __init mem_init(void) { +#ifdef CONFIG_PTP +if (haoc_enabled){ + ptp_pg_cache_init(&pgd_cache, PGD_ALLOCATION_ORDER, 1, "pgd_cache"); + #ifdef CONFIG_X86_5LEVEL + ptp_pg_cache_init(&pg_cache, 0, 4, "pg_cache"); + #else + ptp_pg_cache_init(&pg_cache, 0, 3, "pg_cache"); + #endif +} +#endif pci_iommu_alloc(); /* clear_bss() already clear the empty_zero_page */ @@ -1390,6 +1403,24 @@ void __init mem_init(void) if (get_gate_vma(&init_mm)) kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER); + #ifdef CONFIG_IEE + /* + * Split the linear mapping region of the kernel address space into two equally-sized parts. + * The lower region retains the original linear mapping. + * The upper region becomes the IEE linear mapping area. + * Note that the IEE mapping region is mapped with read-only permissions. + */ + if (haoc_enabled) + iee_init(); + #endif + #ifdef CONFIG_PTP + if(haoc_enabled){ + ptp_set_iee_reserved(&pg_cache); + ptp_set_iee_reserved(&pgd_cache); + ptp_mark_all_pgtable_ro(); + ptp_iee_disable_init(); + } + #endif preallocate_vmalloc_pages(); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 98aa303ad0546fa5e57d6ace903b875afe5f4b3e..6f9fbc2f9c23d2bf2b0d46287ab00e69011dd5cb 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -6,6 +6,10 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#include +#endif #ifdef CONFIG_DYNAMIC_PHYSICAL_MASK phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; @@ -415,15 +419,34 @@ static inline void _pgd_free(pgd_t *pgd) } #else +#ifdef CONFIG_PTP +struct pg_cache pgd_cache; +#endif + static inline pgd_t *_pgd_alloc(void) { + #ifdef CONFIG_PTP + if (haoc_enabled) + return (pgd_t *)ptp_pg_alloc(&pgd_cache, GFP_PGTABLE_USER); + else + return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, + PGD_ALLOCATION_ORDER); + #else return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); + #endif } static inline void _pgd_free(pgd_t *pgd) { + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_pg_free(&pgd_cache, pgd); + else + free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + #else free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER); + #endif } #endif /* CONFIG_X86_PAE */ @@ -557,8 +580,17 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pte_young(*ptep)) + #ifdef CONFIG_PTP + if (haoc_enabled) + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *) &ptep->pte); + else + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *) &ptep->pte); + #else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *) &ptep->pte); + #endif return ret; } @@ -570,8 +602,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pmd_young(*pmdp)) + #ifdef CONFIG_PTP + if (haoc_enabled) + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pmdp); + else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pmdp); + #else + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pmdp); + #endif return ret; } @@ -584,8 +625,17 @@ int pudp_test_and_clear_young(struct vm_area_struct *vma, int ret = 0; if (pud_young(*pudp)) + #ifdef CONFIG_PTP + if (haoc_enabled) + ret = iee_test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + else + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + #else ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, (unsigned long *)pudp); + #endif return ret; } @@ -835,14 +885,28 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) for (i = 0; i < PTRS_PER_PMD; i++) { if (!pmd_none(pmd_sv[i])) { pte = (pte_t *)pmd_page_vaddr(pmd_sv[i]); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_pg_free(&pg_cache, pte); + else + free_page((unsigned long)pte); + #else free_page((unsigned long)pte); + #endif } } free_page((unsigned long)pmd_sv); pagetable_pmd_dtor(virt_to_ptdesc(pmd)); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_pg_free(&pg_cache, pmd); + else + free_page((unsigned long)pmd); + #else free_page((unsigned long)pmd); + #endif return 1; } @@ -865,7 +929,14 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) /* INVLPG to clear all paging-structure caches */ flush_tlb_kernel_range(addr, addr + PAGE_SIZE-1); + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_pg_free(&pg_cache, pte); + else + free_page((unsigned long)pte); + #else free_page((unsigned long)pte); + #endif return 1; } diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 982fa921ac3777eac0ce0b80d2ef51342678c58a..53675348803d10f1350fd38e25ebb030b32b0577 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -139,7 +139,14 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd) * The user page tables get the full PGD, accessible from * userspace: */ + #ifdef CONFIG_PTP + if (haoc_enabled) + ptp_set_pgd(kernel_to_user_pgdp(pgdp), pgd); + else + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + #else kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; + #endif /* * If this is normal user memory, make it NX in the kernel @@ -292,7 +299,14 @@ static void __init pti_setup_vsyscall(void) if (WARN_ON(!target_pte)) return; + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pte(target_pte, *pte); + else + *target_pte = *pte; + #else *target_pte = *pte; + #endif set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir)); } #else @@ -370,14 +384,28 @@ pti_clone_pgtable(unsigned long start, unsigned long end, * code that only set this bit when supported. */ if (boot_cpu_has(X86_FEATURE_PGE)) + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pmd(pmd, pmd_set_flags(*pmd, _PAGE_GLOBAL)); + else + *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); + #else *pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL); + #endif /* * Copy the PMD. That is, the kernelmode and usermode * tables will share the last-level page tables of this * address range */ + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pmd(target_pmd, *pmd); + else + *target_pmd = *pmd; + #else *target_pmd = *pmd; + #endif addr = round_up(addr + 1, PMD_SIZE); @@ -401,10 +429,24 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Set GLOBAL bit in both PTEs */ if (boot_cpu_has(X86_FEATURE_PGE)) + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pte(pte, pte_set_flags(*pte, _PAGE_GLOBAL)); + else + *pte = pte_set_flags(*pte, _PAGE_GLOBAL); + #else *pte = pte_set_flags(*pte, _PAGE_GLOBAL); + #endif /* Clone the PTE */ + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pte(target_pte, *pte); + else + *target_pte = *pte; + #else *target_pte = *pte; + #endif addr = round_up(addr + 1, PAGE_SIZE); @@ -430,7 +472,14 @@ static void __init pti_clone_p4d(unsigned long addr) kernel_pgd = pgd_offset_k(addr); kernel_p4d = p4d_offset(kernel_pgd, addr); + #ifdef CONFIG_PTP + if (haoc_enabled) + set_p4d(user_p4d, *kernel_p4d); + else + *user_p4d = *kernel_p4d; + #else *user_p4d = *kernel_p4d; + #endif } /* @@ -461,7 +510,14 @@ static void __init pti_clone_user_shared(void) if (WARN_ON(!target_pte)) return; + #ifdef CONFIG_PTP + if (haoc_enabled) + set_pte(target_pte, pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL)); + else + *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); + #else *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL); + #endif } } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 91d31ac422d6cde7ab5434bcd02176cbcd6b07ff..2bed0652805fa906692a2bcfc35d916ed511045d 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -48,6 +48,10 @@ #include #include #include +#ifdef CONFIG_PTP +#include +#include +#endif /* * We allocate runtime services regions top-down, starting from -4G, i.e. @@ -73,7 +77,14 @@ int __init efi_alloc_page_tables(void) gfp_t gfp_mask; gfp_mask = GFP_KERNEL | __GFP_ZERO; + #ifdef CONFIG_PTP + if (haoc_enabled) + efi_pgd = ptp_pg_alloc(&pgd_cache, gfp_mask); + else + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + #else efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); + #endif if (!efi_pgd) goto fail; @@ -116,7 +127,14 @@ void efi_sync_low_kernel_mappings(void) pgd_k = pgd_offset_k(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); + #ifdef CONFIG_PTP + if (haoc_enabled) + iee_memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + else + memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + #else memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + #endif pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END); @@ -124,7 +142,14 @@ void efi_sync_low_kernel_mappings(void) p4d_k = p4d_offset(pgd_k, 0); num_entries = p4d_index(EFI_VA_END); + #ifdef CONFIG_PTP + if (haoc_enabled) + iee_memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + else + memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + #else memcpy(p4d_efi, p4d_k, sizeof(p4d_t) * num_entries); + #endif /* * We share all the PUD entries apart from those that map the @@ -139,13 +164,27 @@ void efi_sync_low_kernel_mappings(void) pud_k = pud_offset(p4d_k, 0); num_entries = pud_index(EFI_VA_END); + #ifdef CONFIG_PTP + if (haoc_enabled) + iee_memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + else + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #else memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #endif pud_efi = pud_offset(p4d_efi, EFI_VA_START); pud_k = pud_offset(p4d_k, EFI_VA_START); num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); + #ifdef CONFIG_PTP + if (haoc_enabled) + iee_memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + else + memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #else memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + #endif } /* diff --git a/fs/coredump.c b/fs/coredump.c index d3a4f5dc2e362a3b65498df7afbb289c0f4eb84e..8d9d9ede316adcee27c3f967e834424ffb32aa80 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -54,6 +54,10 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + static bool dump_vma_snapshot(struct coredump_params *cprm); static void free_vma_snapshot(struct coredump_params *cprm); @@ -633,7 +637,11 @@ void do_coredump(const kernel_siginfo_t *siginfo) */ if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { /* Setuid core dump mode */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(cred, GLOBAL_ROOT_UID); + #else cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + #endif need_suid_safe = true; } diff --git a/fs/exec.c b/fs/exec.c index 588ddf9b5d56d4b26a5d6dab6adea173d378fd20..2359a53e319f4d4922131b0ac639fe8dd2f8d8c2 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -77,6 +77,16 @@ #include #include +#ifdef CONFIG_IEE_PTRP +#include +#endif +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif +#ifdef CONFIG_CREDP +#include +#endif + static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; @@ -735,7 +745,14 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, next ? next->vm_start : USER_PGTABLES_CEILING); } + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_tlb_finish_mmu(&tlb); + else + tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif vma_prev(&vmi); /* Shrink the vma to just the new range */ @@ -1025,6 +1042,10 @@ static int exec_mmap(struct mm_struct *mm) if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(tsk, mm->pgd); +#endif if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); lru_gen_add_mm(mm); @@ -1672,12 +1693,20 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_euid(bprm->cred, vfsuid_into_kuid(vfsuid)); + #else bprm->cred->euid = vfsuid_into_kuid(vfsuid); + #endif } if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; + #ifdef CONFIG_CREDP + iee_set_cred_egid(bprm->cred, vfsgid_into_kgid(vfsgid)); + #else bprm->cred->egid = vfsgid_into_kgid(vfsgid); + #endif } } @@ -1928,10 +1957,18 @@ static int do_execveat_common(int fd, struct filename *filename, { struct linux_binprm *bprm; int retval; +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif if (IS_ERR(filename)) return PTR_ERR(filename); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +if (haoc_enabled) + ptp_disable_iee(®); +#endif + /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs @@ -2007,6 +2044,10 @@ static int do_execveat_common(int fd, struct filename *filename, out_ret: putname(filename); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +if (haoc_enabled) + ptp_enable_iee(reg); +#endif return retval; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 42c73c647a27fe2ec93018d8b216aa6478c3ce37..a2c12656f5eaa7285aa64d1bcb144b90dcc17e19 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -15,6 +15,10 @@ #include +#ifdef CONFIG_CREDP +#include +#endif + #include "flexfilelayout.h" #include "../nfs4session.h" #include "../nfs4idmap.h" @@ -502,8 +506,13 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, rc = -ENOMEM; if (!kcred) goto out_err_free; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(kcred, uid); + iee_set_cred_fsgid(kcred, gid); + #else kcred->fsuid = uid; kcred->fsgid = gid; + #endif cred = RCU_INITIALIZER(kcred); if (lgr->range.iomode == IOMODE_READ) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 25a7c771cfd89f3e6d494f26a78212d3d619c135..820a7c45d633198f6cc1a96bd3ce66da3bda1a5b 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -48,6 +48,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + #include "internal.h" #include "netns.h" #include "nfs4idmap.h" @@ -226,8 +230,13 @@ int nfs_idmap_init(void) goto failed_reg_legacy; set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred, keyring); + iee_set_cred_jit_keyring(cred, KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif id_resolver_cache = cred; return 0; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index e6beaaf4f1700b0ac78d05128dc23fd74c902dcb..79250fcc12889bef82098b878d19ec3356d3d9c1 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -2,6 +2,9 @@ /* Copyright (C) 1995, 1996 Olaf Kirch */ #include +#ifdef CONFIG_CREDP +#include +#endif #include "nfsd.h" #include "auth.h" @@ -32,22 +35,40 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, rqstp->rq_cred.cr_uid); + iee_set_cred_fsgid(new, rqstp->rq_cred.cr_gid); + #else new->fsuid = rqstp->rq_cred.cr_uid; new->fsgid = rqstp->rq_cred.cr_gid; + #endif rqgi = rqstp->rq_cred.cr_group_info; if (flags & NFSEXP_ALLSQUASH) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, exp->ex_anon_uid); + iee_set_cred_fsgid(new, exp->ex_anon_gid); + #else new->fsuid = exp->ex_anon_uid; new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(0); if (!gi) goto oom; } else if (flags & NFSEXP_ROOTSQUASH) { if (uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, GLOBAL_ROOT_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new, exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif gi = groups_alloc(rqgi->ngroups); if (!gi) @@ -67,18 +88,35 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) } if (uid_eq(new->fsuid, INVALID_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, exp->ex_anon_uid); + #else new->fsuid = exp->ex_anon_uid; + #endif if (gid_eq(new->fsgid, INVALID_GID)) + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new, exp->ex_anon_gid); + #else new->fsgid = exp->ex_anon_gid; + #endif set_groups(new, gi); put_group_info(gi); if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, cap_drop_nfsd_set(new->cap_effective)); + #else new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); return 0; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index d47173d98eef7105ab86673525f9154ce84981da..eb977c4d55ccc2cc08d6da7784616c2289a68bd0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -35,6 +35,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "nfsd.h" #include "state.h" #include "netns.h" @@ -946,8 +949,13 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r if (!kcred) return NULL; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(kcred, ses->se_cb_sec.uid); + iee_set_cred_fsgid(kcred, ses->se_cb_sec.gid); + #else kcred->fsuid = ses->se_cb_sec.uid; kcred->fsgid = ses->se_cb_sec.gid; + #endif return kcred; } } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 892fecce18b8029226981bc07c90800e6f39e3ee..65d446239ed52f322b3d9529897289886307d6e8 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -44,6 +44,10 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif + #include "nfsd.h" #include "state.h" #include "vfs.h" @@ -78,8 +82,13 @@ nfs4_save_creds(const struct cred **original_creds) if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, GLOBAL_ROOT_UID); + iee_set_cred_fsgid(new, GLOBAL_ROOT_GID); + #else new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; + #endif *original_creds = override_creds(new); put_cred(new); return 0; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 283c1a60c846032141b0d368d1584431de50c9b7..b7be5e4298cbc99175b1f60fcae4345ab35007ea 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -11,6 +11,9 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "nfsd.h" #include "vfs.h" #include "auth.h" @@ -223,9 +226,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) error = nfserrno(-ENOMEM); goto out; } + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); + #endif put_cred(override_creds(new)); put_cred(new); } else { diff --git a/fs/open.c b/fs/open.c index 4679db501d432d770909b91ca16cc1778af76c1d..6e5bc83b9548e18ef2e6671ed7b528bdcc58a129 100644 --- a/fs/open.c +++ b/fs/open.c @@ -34,6 +34,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "internal.h" @@ -414,17 +417,35 @@ static const struct cred *access_override_creds(void) * routine. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred, override_cred->uid); + iee_set_cred_fsgid(override_cred, override_cred->gid); + #else override_cred->fsuid = override_cred->uid; override_cred->fsgid = override_cred->gid; + #endif if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ kuid_t root_uid = make_kuid(override_cred->user_ns, 0); if (!uid_eq(override_cred->uid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = override_cred->cap_effective; + + cap_clear(tmp_cap); + iee_set_cred_cap_effective(override_cred, tmp_cap); + } while (0); + #else cap_clear(override_cred->cap_effective); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(override_cred, override_cred->cap_permitted); + #else override_cred->cap_effective = override_cred->cap_permitted; + #endif } /* @@ -444,7 +465,11 @@ static const struct cred *access_override_creds(void) * expecting RCU freeing. But normal thread-synchronous * cred accesses will keep things non-RCY. */ + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(override_cred, 1); + #else override_cred->non_rcu = 1; + #endif old_cred = override_creds(override_cred); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index d1e0488ad0d8cd362615d02011a0759b37b87445..90e07084a3dfae5c8103946ddf78e16bd4278907 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -14,6 +14,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "overlayfs.h" static unsigned short ovl_redirect_max = 256; @@ -586,8 +589,13 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, * create a new inode, so just use the ovl mounter's * fs{u,g}id. */ + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(override_cred, inode->i_uid); + iee_set_cred_fsgid(override_cred, inode->i_gid); + #else override_cred->fsuid = inode->i_uid; override_cred->fsgid = inode->i_gid; + #endif err = security_dentry_create_files_as(dentry, attr->mode, &dentry->d_name, old_cred, override_cred); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b6feb8892872949d66c56c4a973636415d741298..0b3972806621f630637411476e4364833e788131 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -18,6 +18,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "overlayfs.h" #include "params.h" @@ -1467,7 +1470,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_export_op = &ovl_export_fid_operations; /* Never override disk quota limits or use reserved space */ + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = cred->cap_effective; + + cap_lower(tmp, CAP_SYS_RESOURCE); + iee_set_cred_cap_effective(cred, tmp); + } + #else cap_lower(cred->cap_effective, CAP_SYS_RESOURCE); + #endif sb->s_magic = OVERLAYFS_SUPER_MAGIC; sb->s_xattr = ovl_xattr_handlers(ofs); diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index bc1c1e9b288addb8b01b9192c3d8af3cbb1f53ed..2c5ce8b8c31bca2d5ffbebf92abd7fb150c73154 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -14,6 +14,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "cifsglob.h" #include "cifs_spnego.h" #include "cifs_debug.h" @@ -230,8 +233,13 @@ init_cifs_spnego(void) * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred, keyring); + iee_set_cred_jit_keyring(cred, KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif spnego_cred = cred; cifs_dbg(FYI, "cifs spnego keyring: %d\n", key_serial(keyring)); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 7bd29e827c8f1bb95e8412187a40d822af2f371e..5fbe87f425239274ca94aba3bff895d8fa9ee09b 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -17,6 +17,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "cifspdu.h" #include "cifsglob.h" #include "cifsacl.h" @@ -491,8 +494,13 @@ init_cifs_idmap(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred, keyring); + iee_set_cred_jit_keyring(cred, KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif root_cred = cred; cifs_dbg(FYI, "cifs idmap keyring: %d\n", key_serial(keyring)); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 91a9c362e57b70782f8f270760f7c48718bad536..ee46be08abe317528c966e8c8544a168d28e714a 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -207,6 +207,9 @@ struct mmu_table_batch { ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) extern void tlb_remove_table(struct mmu_gather *tlb, void *table); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +extern void ptp_tlb_remove_table(struct mmu_gather *tlb, void *table); +#endif #else /* !CONFIG_MMU_GATHER_HAVE_TABLE_FREE */ @@ -357,6 +360,9 @@ struct mmu_gather { }; void tlb_flush_mmu(struct mmu_gather *tlb); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +void ptp_tlb_flush_mmu(struct mmu_gather *tlb); +#endif static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, @@ -472,14 +478,36 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, tlb_flush_mmu(tlb); } +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static inline void ptp_tlb_remove_page_size(struct mmu_gather *tlb, + struct page *page, int page_size) +{ + if (__tlb_remove_page_size(tlb, page, false, page_size)) + ptp_tlb_flush_mmu(tlb); +} +#endif + static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); } +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static inline void ptp_tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + return ptp_tlb_remove_page_size(tlb, page, PAGE_SIZE); +} +#endif static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_tlb_remove_table(tlb, pt); + else + tlb_remove_table(tlb, pt); + #else tlb_remove_table(tlb, pt); + #endif } /* Like tlb_remove_ptdesc, but for page-like page directories. */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7884a8e1ce87c946aaf49c55337e320c8a37fd02..9e62b911eb390de8983f8bc24878b94559f4508c 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -1101,6 +1101,14 @@ * They will fit only a subset of the architectures */ +#ifdef CONFIG_CREDP +#define CRED_DATA \ +. = ALIGN(PAGE_SIZE); \ +*(.iee.cred) \ +. = ALIGN(PAGE_SIZE); +#else +#define CRED_DATA +#endif /* * Writeable data. @@ -1118,6 +1126,7 @@ . = ALIGN(PAGE_SIZE); \ .data : AT(ADDR(.data) - LOAD_OFFSET) { \ INIT_TASK_DATA(inittask) \ + CRED_DATA \ NOSAVE_DATA \ PAGE_ALIGNED_DATA(pagealigned) \ CACHELINE_ALIGNED_DATA(cacheline) \ diff --git a/include/linux/cred.h b/include/linux/cred.h index de8d8cb71ad1c0c6f7c3868e874606a0a738cfdb..8e7617cf628750879143709747489b48c0d37f52 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -16,6 +16,10 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +extern unsigned long long iee_rw_gate(int flag, ...); +#endif struct cred; struct inode; @@ -182,6 +186,27 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } +#ifdef CONFIG_CREDP +static void __maybe_unused iee_set_cred_non_rcu(struct cred *cred, int non_rcu) +{ + if(!haoc_enabled) + { + cred->non_rcu = 0; + return; + } + iee_rw_gate(IEE_OP_SET_CRED_NON_RCU, cred, non_rcu); + *(int *)(&(((struct rcu_head *)(cred->rcu.func))->next)) = non_rcu; +} + +static bool __maybe_unused iee_set_cred_atomic_op_usage(struct cred *cred, int flag, int nr) +{ + bool ret; + + ret = iee_rw_gate(IEE_OP_SET_CRED_ATOP_USAGE, cred, flag, nr); + return ret; +} +#endif + /** * get_new_cred - Get a reference on a new set of credentials * @cred: The new credentials to reference @@ -191,7 +216,14 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) */ static inline struct cred *get_new_cred(struct cred *cred) { + #ifdef CONFIG_CREDP + if(haoc_enabled) + iee_set_cred_atomic_op_usage(cred, AT_ADD, 1); + else + atomic_long_inc(&cred->usage); + #else atomic_long_inc(&cred->usage); + #endif return cred; } @@ -213,7 +245,11 @@ static inline const struct cred *get_cred(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred, 0); + #else nonconst_cred->non_rcu = 0; + #endif return get_new_cred(nonconst_cred); } @@ -222,9 +258,25 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return NULL; + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + if (!iee_set_cred_atomic_op_usage(nonconst_cred, AT_INC_NOT_ZERO, 0)) + return NULL; + } + else{ + if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) + return NULL; + } + #else if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_non_rcu(nonconst_cred, 0); + #else nonconst_cred->non_rcu = 0; + #endif return cred; } @@ -244,8 +296,20 @@ static inline void put_cred(const struct cred *_cred) struct cred *cred = (struct cred *) _cred; if (cred) { + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + if (iee_set_cred_atomic_op_usage(cred, AT_SUB_AND_TEST, 1)) + __put_cred(cred); + } + else{ + if (atomic_long_dec_and_test(&(cred)->usage)) + __put_cred(cred); + } + #else if (atomic_long_dec_and_test(&(cred)->usage)) __put_cred(cred); + #endif } } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1e6c0837ed2ecec46be100180530aeed218f19a3..fd81f8d7b2873fec63e16b30b4fb054322991fa4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1250,6 +1250,9 @@ struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_finish_mmu(struct mmu_gather *tlb); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +extern void ptp_tlb_finish_mmu(struct mmu_gather *tlb); +#endif struct vm_fault; diff --git a/include/linux/ptp-cache.h b/include/linux/ptp-cache.h new file mode 100644 index 0000000000000000000000000000000000000000..57d3c232b0c24c32a49d8e05e110f9ae9699b050 --- /dev/null +++ b/include/linux/ptp-cache.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PTP_CACHE_H +#define _LINUX_PTP_CACHE_H +#include +union freelist_aba_t { + struct { + void **freelist; + unsigned long counter; + }; + u128 full; +}; + +struct pg_cache { + union { + struct { + void **freelist; + unsigned long tid; + }; + union freelist_aba_t freelist_tid; + }; + unsigned long reserve_order; + unsigned long reserve_start_addr; + unsigned long reserve_end_addr; + unsigned long object_order; + int levels; + const char *name; + atomic_t count; + atomic_t fail_count; +}; + +extern struct pg_cache pg_cache; +extern void ptp_pg_cache_init(struct pg_cache *cache, unsigned long object_order, + int levels, const char *name); +extern void ptp_set_iee_reserved(struct pg_cache *cache); +extern void *ptp_pg_alloc(struct pg_cache *cache, gfp_t gfp); +extern void ptp_pg_free(struct pg_cache *cache, void *object); +#endif diff --git a/kernel/cred.c b/kernel/cred.c index 64404d51c052785829a00428ea212e6a94603367..14bfbbfbd80742185dd34e881e3674b351d5aeba 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -19,6 +19,10 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#include +#endif #if 0 #define kdebug(FMT, ...) \ @@ -33,7 +37,12 @@ do { \ } while (0) #endif +#ifdef CONFIG_CREDP +struct kmem_cache *cred_jar; +static struct kmem_cache *rcu_jar; +#else static struct kmem_cache *cred_jar; +#endif /* init to 2 - one for init_task, one to ensure it is never freed */ static struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; @@ -41,6 +50,32 @@ static struct group_info init_groups = { .usage = ATOMIC_INIT(2) }; /* * The initial credentials for the initial task */ +#ifdef CONFIG_CREDP +struct cred init_cred __section(".iee.cred") = { + .usage = ATOMIC_INIT(4), +#ifdef CONFIG_DEBUG_CREDENTIALS + .subscribers = ATOMIC_INIT(2), + .magic = CRED_MAGIC, +#endif + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; +#else struct cred init_cred = { .usage = ATOMIC_INIT(4), .uid = GLOBAL_ROOT_UID, @@ -61,13 +96,22 @@ struct cred init_cred = { .group_info = &init_groups, .ucounts = &init_ucounts, }; +#endif /* * The RCU callback to actually dispose of a set of credentials */ static void put_cred_rcu(struct rcu_head *rcu) { + #ifdef CONFIG_CREDP + struct cred *cred = NULL; + if(haoc_enabled) + cred = *(struct cred **)(rcu + 1); + else + cred = container_of(rcu, struct cred, rcu); + #else struct cred *cred = container_of(rcu, struct cred, rcu); + #endif kdebug("put_cred_rcu(%p)", cred); @@ -86,6 +130,12 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->ucounts) put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + kmem_cache_free(rcu_jar, (struct rcu_head *)(cred->rcu.func)); + } + #endif kmem_cache_free(cred_jar, cred); } @@ -104,10 +154,26 @@ void __put_cred(struct cred *cred) BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + if (*(int *)(&(((struct rcu_head *)(cred->rcu.func))->next))) + put_cred_rcu((struct rcu_head *)(cred->rcu.func)); + else + call_rcu((struct rcu_head *)(cred->rcu.func), put_cred_rcu); + } + else{ + if (cred->non_rcu) + put_cred_rcu(&cred->rcu); + else + call_rcu(&cred->rcu, put_cred_rcu); + } + #else if (cred->non_rcu) put_cred_rcu(&cred->rcu); else call_rcu(&cred->rcu, put_cred_rcu); + #endif } EXPORT_SYMBOL(__put_cred); @@ -173,7 +239,21 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + iee_set_cred_rcu(new, kmem_cache_zalloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + } + iee_set_cred_atomic_set_usage(new, 1); + #else atomic_long_set(&new->usage, 1); + + #ifdef CONFIG_DEBUG_CREDENTIALS + new->magic = CRED_MAGIC; + #endif + #endif + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -208,13 +288,27 @@ struct cred *prepare_creds(void) if (!new) return NULL; + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + iee_set_cred_rcu(new, kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + } + #endif + kdebug("prepare_creds() alloc %p", new); old = task->cred; + #ifdef CONFIG_CREDP + iee_copy_cred(old, new); + iee_set_cred_non_rcu(new, 0); + iee_set_cred_atomic_set_usage(new, 1); + #else memcpy(new, old, sizeof(struct cred)); new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -227,10 +321,18 @@ struct cred *prepare_creds(void) #endif #ifdef CONFIG_SECURITY + #ifdef CONFIG_CREDP + iee_set_cred_security(new, NULL); + #else new->security = NULL; + #endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -260,15 +362,30 @@ struct cred *prepare_exec_creds(void) #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new, NULL); + #else new->thread_keyring = NULL; + #endif /* inherit the session keyring; new process keyring */ key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new, NULL); + #else new->process_keyring = NULL; + #endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, new->euid); + iee_set_cred_suid(new, new->euid); + iee_set_cred_fsgid(new, new->egid); + iee_set_cred_sgid(new, new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif return new; } @@ -323,7 +440,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * had one */ if (new->thread_keyring) { key_put(new->thread_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new, NULL); + #else new->thread_keyring = NULL; + #endif if (clone_flags & CLONE_THREAD) install_thread_keyring_to_cred(new); } @@ -333,7 +454,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) */ if (!(clone_flags & CLONE_THREAD)) { key_put(new->process_keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new, NULL); + #else new->process_keyring = NULL; + #endif } #endif @@ -591,7 +716,11 @@ int set_cred_ucounts(struct cred *new) if (!(new_ucounts = alloc_ucounts(new->user_ns, new->uid))) return -EAGAIN; + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, new_ucounts); + #else new->ucounts = new_ucounts; + #endif put_ucounts(old_ucounts); return 0; @@ -603,8 +732,24 @@ int set_cred_ucounts(struct cred *new) void __init cred_init(void) { /* allocate a slab in which we can store credentials */ + #ifdef CONFIG_CREDP + if (haoc_enabled){ + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + + rcu_jar = kmem_cache_create("rcu_jar", sizeof(struct rcu_head) + sizeof(struct cred *), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + // Map init_cred + iee_set_cred_rcu(&init_cred, kmem_cache_zalloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(init_cred.rcu.func)) + 1) = &init_cred; + pr_info("HAOC: CONFIG_CREDP enabled."); + } else + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + #else cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); + #endif } /** @@ -635,29 +780,59 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; + #ifdef CONFIG_CREDP + if(haoc_enabled) + { + iee_set_cred_rcu(new, kmem_cache_alloc(rcu_jar, GFP_KERNEL)); + *(struct cred **)(((struct rcu_head *)(new->rcu.func)) + 1) = new; + } + #endif + kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); + #ifdef CONFIG_CREDP + iee_copy_cred(old, new); + iee_set_cred_non_rcu(new, 0); + iee_set_cred_atomic_set_usage(new, 1); + #else *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); + #endif get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); #ifdef CONFIG_KEYS + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(new, NULL); + iee_set_cred_process_keyring(new, NULL); + iee_set_cred_thread_keyring(new, NULL); + iee_set_cred_request_key_auth(new, NULL); + iee_set_cred_jit_keyring(new, KEY_REQKEY_DEFL_THREAD_KEYRING); + #else new->session_keyring = NULL; new->process_keyring = NULL; new->thread_keyring = NULL; new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif #endif #ifdef CONFIG_SECURITY + #ifdef CONFIG_CREDP + iee_set_cred_security(new, NULL); + #else new->security = NULL; + #endif #endif + #ifdef CONFIG_CREDP + iee_set_cred_ucounts(new, get_ucounts(new->ucounts)); + #else new->ucounts = get_ucounts(new->ucounts); + #endif if (!new->ucounts) goto error; @@ -724,8 +899,13 @@ int set_create_files_as(struct cred *new, struct inode *inode) { if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid)) return -EINVAL; + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, inode->i_uid); + iee_set_cred_fsgid(new, inode->i_gid); + #else new->fsuid = inode->i_uid; new->fsgid = inode->i_gid; + #endif return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); diff --git a/kernel/exit.c b/kernel/exit.c index 2a4c8b44baf34a7b266290f7660aeede459d78e0..1d767f67752cd257a550a5c47822b9c08a1daae8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -79,6 +79,9 @@ #include #include #include +#ifdef CONFIG_IEE_PTRP +#include +#endif extern int sysctl_vm_memory_qos; unsigned long sysctl_async_mem_free_pages = ULONG_MAX; @@ -571,6 +574,10 @@ static void exit_mm(void) smp_mb__after_spinlock(); local_irq_disable(); current->mm = NULL; +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(current, NULL); +#endif membarrier_update_current_mm(NULL); enter_lazy_tlb(mm, current); local_irq_enable(); diff --git a/kernel/fork.c b/kernel/fork.c index b23a8625f01441e1ba5952827f3f5f5bbab9a35e..3a031f44e7de88394e0025f6db18640d6465b31b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -114,6 +114,12 @@ #include #include +#ifdef CONFIG_IEE_PTRP +#include +#endif +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif /* * Minimum number of threads to boot the kernel @@ -181,7 +187,11 @@ void __weak arch_release_task_struct(struct task_struct *tsk) } #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR +#ifdef CONFIG_IEE_PTRP +struct kmem_cache *task_struct_cachep; +#else static struct kmem_cache *task_struct_cachep; +#endif static inline struct task_struct *alloc_task_struct_node(int node) { @@ -639,6 +649,10 @@ void free_task(struct task_struct *tsk) if (tsk->flags & PF_KTHREAD) free_kthread_struct(tsk); bpf_task_storage_free(tsk); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_invalidate_token(tsk); +#endif free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1739,6 +1753,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) #endif tsk->mm = NULL; +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(tsk, NULL); +#endif tsk->active_mm = NULL; /* @@ -1760,6 +1778,10 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) } tsk->mm = mm; +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(tsk, mm->pgd); +#endif tsk->active_mm = mm; sched_mm_cid_fork(tsk); return 0; @@ -2347,6 +2369,10 @@ __latent_entropy struct task_struct *copy_process( p = dup_task_struct(current, node); if (!p) goto fork_out; +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_validate_token(p); +#endif p->flags &= ~PF_KTHREAD; if (args->kthread) p->flags |= PF_KTHREAD; @@ -2902,6 +2928,9 @@ pid_t kernel_clone(struct kernel_clone_args *args) struct task_struct *p; int trace = 0; pid_t nr; +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif /* * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument @@ -2935,7 +2964,15 @@ pid_t kernel_clone(struct kernel_clone_args *args) trace = 0; } + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_disable_iee(®); + #endif p = copy_process(NULL, trace, NUMA_NO_NODE, args); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_enable_iee(reg); + #endif add_latent_entropy(); if (IS_ERR(p)) diff --git a/kernel/groups.c b/kernel/groups.c index 9aaed2a310730f1f1b918ca727628e559d5d4eb3..86715829d5a0addca317e4ba842eb387401f50a1 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -11,6 +11,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif struct group_info *groups_alloc(int gidsetsize) { @@ -119,7 +122,11 @@ void set_groups(struct cred *new, struct group_info *group_info) { put_group_info(new->group_info); get_group_info(group_info); + #ifdef CONFIG_CREDP + iee_set_cred_group_info(new, group_info); + #else new->group_info = group_info; + #endif } EXPORT_SYMBOL(set_groups); diff --git a/kernel/kthread.c b/kernel/kthread.c index 980e6b325b7dc71a2ba5885d0420cdb4a6b7417b..0caf7249eda5632994609fd8da02ca4fd180c958 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -29,6 +29,9 @@ #include #include #include +#ifdef CONFIG_IEE_PTRP +#include +#endif static DEFINE_SPINLOCK(kthread_create_lock); @@ -1457,6 +1460,10 @@ void kthread_use_mm(struct mm_struct *mm) tsk->active_mm = mm; tsk->mm = mm; membarrier_update_current_mm(mm); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(tsk, mm->pgd); +#endif switch_mm_irqs_off(active_mm, mm, tsk); local_irq_enable(); task_unlock(tsk); @@ -1501,6 +1508,10 @@ void kthread_unuse_mm(struct mm_struct *mm) local_irq_disable(); tsk->mm = NULL; membarrier_update_current_mm(NULL); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_set_token_pgd(tsk, NULL); +#endif mmgrab_lazy_tlb(mm); /* active_mm is still 'mm' */ enter_lazy_tlb(mm, tsk); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index eb1a3472a0de74a782e3bd54556e253bc855b04d..c12bb776c38b23aeeb2148c8b3bfc4646cc3c989 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -104,6 +104,12 @@ #include "../workqueue_internal.h" #include "../../io_uring/io-wq.h" #include "../smpboot.h" +#ifdef CONFIG_IEE_PTRP +#include +#endif +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); @@ -5564,6 +5570,10 @@ context_switch(struct rq *rq, struct task_struct *prev, * case 'prev->active_mm == next->mm' through * finish_task_switch()'s mmdrop(). */ +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + iee_verify_token_pgd(next); +#endif switch_mm_irqs_off(prev->active_mm, next->mm, next); lru_gen_use_mm(next->mm); @@ -5579,9 +5589,20 @@ context_switch(struct rq *rq, struct task_struct *prev, prepare_lock_switch(rq, next, rf); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + int disabled_cnt = 0; + unsigned long reg = 0; + if (haoc_enabled){ + ptp_context_enable_iee(&disabled_cnt, ®); + } + #endif /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); barrier(); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_context_restore_iee(disabled_cnt, reg); + #endif return finish_task_switch(prev); } diff --git a/kernel/sys.c b/kernel/sys.c index 47cb10a16b009d1a59687af2af763e82fdcf685b..1f3ef34ec1903410b43362f09184aba7cc433706 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -74,6 +74,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "uid16.h" @@ -395,7 +398,11 @@ long __sys_setregid(gid_t rgid, gid_t egid) if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new, krgid); + #else new->gid = krgid; + #endif else goto error; } @@ -404,15 +411,27 @@ long __sys_setregid(gid_t rgid, gid_t egid) gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new, kegid); + #else new->egid = kegid; + #endif else goto error; } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new, new->egid); + #else new->sgid = new->egid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new, new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RE); if (retval < 0) @@ -454,9 +473,25 @@ long __sys_setgid(gid_t gid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETGID)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new, kgid); + iee_set_cred_sgid(new, kgid); + iee_set_cred_egid(new, kgid); + iee_set_cred_gid(new, kgid); + } + #else new->gid = new->egid = new->sgid = new->fsgid = kgid; + #endif else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) + #ifdef CONFIG_CREDP + { + iee_set_cred_fsgid(new, kgid); + iee_set_cred_egid(new, kgid); + } + #else new->egid = new->fsgid = kgid; + #endif else goto error; @@ -488,7 +523,11 @@ static int set_user(struct cred *new) return -EAGAIN; free_uid(new->user); + #ifdef CONFIG_CREDP + iee_set_cred_user(new, new_user); + #else new->user = new_user; + #endif return 0; } @@ -549,7 +588,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) retval = -EPERM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new, kruid); + #else new->uid = kruid; + #endif if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && !ns_capable_setid(old->user_ns, CAP_SETUID)) @@ -557,7 +600,11 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (euid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new, keuid); + #else new->euid = keuid; + #endif if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && @@ -572,8 +619,16 @@ long __sys_setreuid(uid_t ruid, uid_t euid) } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new, new->euid); + #else new->suid = new->euid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RE); if (retval < 0) @@ -626,7 +681,12 @@ long __sys_setuid(uid_t uid) retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETUID)) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new, kuid); + iee_set_cred_suid(new, kuid); + #else new->suid = new->uid = kuid; + #endif if (!uid_eq(kuid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -636,7 +696,12 @@ long __sys_setuid(uid_t uid) goto error; } + #ifdef CONFIG_CREDP + iee_set_cred_euid(new, kuid); + iee_set_cred_fsuid(new, kuid); + #else new->fsuid = new->euid = kuid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_ID); if (retval < 0) @@ -710,7 +775,11 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return -ENOMEM; if (ruid != (uid_t) -1) { + #ifdef CONFIG_CREDP + iee_set_cred_uid(new, kruid); + #else new->uid = kruid; + #endif if (!uid_eq(kruid, old->uid)) { retval = set_user(new); if (retval < 0) @@ -718,10 +787,22 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) } } if (euid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_euid(new, keuid); + #else new->euid = keuid; + #endif if (suid != (uid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_suid(new, ksuid); + #else new->suid = ksuid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, new->euid); + #else new->fsuid = new->euid; + #endif retval = security_task_fix_setuid(new, old, LSM_SETID_RES); if (retval < 0) @@ -810,12 +891,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return -ENOMEM; if (rgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_gid(new, krgid); + #else new->gid = krgid; + #endif if (egid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_egid(new, kegid); + #else new->egid = kegid; + #endif if (sgid != (gid_t) -1) + #ifdef CONFIG_CREDP + iee_set_cred_sgid(new, ksgid); + #else new->sgid = ksgid; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new, new->egid); + #else new->fsgid = new->egid; + #endif retval = security_task_fix_setgid(new, old, LSM_SETID_RES); if (retval < 0) @@ -882,7 +979,11 @@ long __sys_setfsuid(uid_t uid) uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, kuid); + #else new->fsuid = kuid; + #endif if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) goto change_okay; } @@ -926,7 +1027,11 @@ long __sys_setfsgid(gid_t gid) gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { + #ifdef CONFIG_CREDP + iee_set_cred_fsgid(new, kgid); + #else new->fsgid = kgid; + #endif if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) goto change_okay; } diff --git a/kernel/umh.c b/kernel/umh.c index 1b13c5d34624878edf4a4807d0c8b857d41b97fb..3202d889635e99e5960c92e202c6f92eb95d5b16 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -31,6 +31,9 @@ #include #include +#ifdef CONFIG_CREDP +#include +#endif static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; @@ -91,9 +94,15 @@ static int call_usermodehelper_exec_async(void *data) goto out; spin_lock(&umh_sysctl_lock); + #ifdef CONFIG_CREDP + iee_set_cred_cap_bset(new, cap_intersect(usermodehelper_bset, new->cap_bset)); + iee_set_cred_cap_inheritable(new, cap_intersect(usermodehelper_inheritable, + new->cap_inheritable)); + #else new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); new->cap_inheritable = cap_intersect(usermodehelper_inheritable, new->cap_inheritable); + #endif spin_unlock(&umh_sysctl_lock); if (sub_info->init) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 1d8e47bed3f118ae071b4c675a6812001c78e238..fdbc4d1d1b1931d3c8548788ac5b9514bb7d7eed 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); @@ -45,6 +48,19 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) /* Start with the same capabilities as init but useless for doing * anything as the capabilities are bound to the new user namespace. */ + #ifdef CONFIG_CREDP + iee_set_cred_securebits(cred, SECUREBITS_DEFAULT); + iee_set_cred_cap_inheritable(cred, CAP_EMPTY_SET); + iee_set_cred_cap_permitted(cred, CAP_FULL_SET); + iee_set_cred_cap_effective(cred, CAP_FULL_SET); + iee_set_cred_cap_ambient(cred, CAP_EMPTY_SET); + iee_set_cred_cap_bset(cred, CAP_FULL_SET); +#ifdef CONFIG_KEYS + key_put(cred->request_key_auth); + iee_set_cred_request_key_auth(cred, NULL); +#endif + iee_set_cred_user_ns(cred, user_ns); + #else cred->securebits = SECUREBITS_DEFAULT; cred->cap_inheritable = CAP_EMPTY_SET; cred->cap_permitted = CAP_FULL_SET; @@ -57,6 +73,7 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) #endif /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ cred->user_ns = user_ns; + #endif } static unsigned long enforced_nproc_rlimit(void) diff --git a/mm/Makefile b/mm/Makefile index 3ddb4127a2044a6870fa41267ef9adc9da0da2e1..f186dc610b0c04c0c0be2f147dc593d9a2731c6a 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -139,4 +139,5 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o -obj-y += unevictable.o \ No newline at end of file +obj-y += unevictable.o +obj-$(CONFIG_IEE) += haoc/ diff --git a/mm/haoc/Makefile b/mm/haoc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ad37f99f562d23fe3e880a170a52ca184df27085 --- /dev/null +++ b/mm/haoc/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_PTP) += ptp-pg_cache.o diff --git a/mm/haoc/ptp-pg_cache.c b/mm/haoc/ptp-pg_cache.c new file mode 100644 index 0000000000000000000000000000000000000000..3e0b10c98151e9226592e9b37094aec21f901efd --- /dev/null +++ b/mm/haoc/ptp-pg_cache.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct pg_cache pg_cache; + +static inline void *__get_freepointer(void *object) +{ + return *((void **)object); +}; + +static inline void __set_freepointer(void *object, void *next_object) +{ + *(void **)object = next_object; +}; + +static inline void __iee_set_freepointer(void *object, void *next_object) +{ + iee_set_freeptr(object, next_object); +}; + +static inline bool __update_freelist(struct pg_cache *cache, void *freelist_old, void *freelist_new, + unsigned long tid) +{ + union freelist_aba_t old = { .freelist = freelist_old, .counter = tid }; + union freelist_aba_t new = { .freelist = freelist_new, .counter = tid + 1 }; + + return try_cmpxchg128(&(cache->freelist_tid.full), &old.full, new.full); +} + +#ifdef CONFIG_ARM64 +static void __ptp_set_iee_pages(unsigned long start_addr, unsigned long end_addr, + struct pg_cache *cache) +{ + unsigned long addr; + + if (start_addr != ALIGN(start_addr, PMD_SIZE)) + panic("IEE: %s pool not aligned.", cache->name); + + addr = start_addr; + while (addr < end_addr) { + set_iee_page(addr, PMD_ORDER, IEE_PGTABLE); + addr += PMD_SIZE; + } + flush_tlb_kernel_range(start_addr, end_addr); +} +#endif + +void __init ptp_pg_cache_init(struct pg_cache *cache, unsigned long object_order, + int levels, const char *name) +{ + unsigned long addr; + unsigned long addr_next; + unsigned long reserve_order; + unsigned long reserve_pages; + unsigned long start_addr; + unsigned long end_addr; + unsigned long object_size; + struct page *page; + + object_size = (1 << object_order) * PAGE_SIZE; + reserve_order = CONFIG_PTP_RESERVE_ORDER; + while (1) { + reserve_pages = (1 << reserve_order) * levels; + start_addr = (unsigned long)memblock_alloc(reserve_pages * PAGE_SIZE, + reserve_pages * PAGE_SIZE); + if (start_addr) + break; + reserve_order--; + /* Allocate pages in pmd blocks to reduce the mapping cost. */ + if (reserve_order < PMD_ORDER) + panic("IEE: fail to reserve pages for %s", name); + } + end_addr = start_addr + reserve_pages * PAGE_SIZE; + pr_err("IEE: reserve %ld pages for %s, range[0x%lx, 0x%lx]", + reserve_pages, name, start_addr, end_addr); + + addr = start_addr; + addr_next = addr + object_size; + while (addr < end_addr) { + page = virt_to_page(addr); + set_page_count(page, 1); + __set_freepointer((void *)addr, (void *)addr_next); + addr += object_size; + addr_next += object_size; + } + __set_freepointer((void *)(addr - object_size), NULL); + + cache->object_order = object_order; + cache->reserve_order = reserve_order; + cache->reserve_start_addr = start_addr; + cache->reserve_end_addr = end_addr; + cache->levels = levels; + cache->freelist = (void *)start_addr; + cache->tid = 0; + cache->levels = levels; + cache->name = name; + +#ifdef CONFIG_ARM64 + /* IEE for ARM64 needs to access these data by IEE addresses. */ + __ptp_set_iee_pages(start_addr, end_addr, cache); +#endif +#ifdef DEBUG + atomic_set(&cache->count, reserve_pages); + atomic_set(&cache->fail_count, 0); + pr_info("IEE: %s ready. object size 0x%lx, count %d.", + name, object_size, atomic_read(&cache->count)); +#endif +} + +void __init ptp_set_iee_reserved(struct pg_cache *cache) +{ + #ifdef CONFIG_X86_64 + unsigned long addr = cache->reserve_start_addr; + + for (int i = 0; i < cache->levels; i++) { + set_iee_page(addr,cache->reserve_order); + addr += (1 << cache->reserve_order) * PAGE_SIZE; + } + #endif +} + +#ifdef CONFIG_ARM64 +/* Expand the cache pool with PMD_SIZE for each time. */ +static int __ref ptp_pg_cache_expand(struct pg_cache *cache, gfp_t gfp) +{ + unsigned long addr; + unsigned long addr_next; + unsigned long start_addr; + unsigned long end_addr; + unsigned long object_size; + unsigned long tid; + void **freelist; + struct page *page; + + if (slab_is_available()) + start_addr = __get_free_pages(gfp, PMD_ORDER); + else + start_addr = (unsigned long)memblock_alloc(PMD_SIZE, PMD_SIZE); + + if (!start_addr) + return 0; + + addr = start_addr; + object_size = (1 << cache->object_order) * PAGE_SIZE; + addr_next = addr + object_size; + end_addr = start_addr + PMD_SIZE; + while (addr < end_addr) { + page = virt_to_page(addr); + set_page_count(page, 1); + __set_freepointer((void *)addr, (void *)addr_next); + addr += object_size; + addr_next += object_size; + } + __set_freepointer((void *)(addr - object_size), NULL); + + /* IEE for ARM64 needs to access these data by IEE addresses. */ + __ptp_set_iee_pages(start_addr, end_addr, cache); + +#ifdef DEBUG + atomic_add(1 << PMD_ORDER, &cache->count); + pr_info("IEE: %s expand to count %d. Curr failed: %d", cache->name, + atomic_read(&cache->count), atomic_read(&cache->fail_count)); +#endif + +/* Fill the new allocated pages into the cache freelist. */ +redo: + tid = READ_ONCE(cache->tid); + barrier(); + freelist = READ_ONCE(cache->freelist); + __iee_set_freepointer((void *)(end_addr - object_size), freelist); + if (unlikely(!__update_freelist(cache, freelist, (void *)start_addr, tid))) + goto redo; + + return 1; +} +#endif + +void *ptp_pg_alloc(struct pg_cache *cache, gfp_t gfp) +{ + unsigned long tid; + void *object; + void *next_object; +redo: + tid = READ_ONCE(cache->tid); + barrier(); + object = READ_ONCE(cache->freelist); + if (unlikely(!object)) { + // slow path alloc + #ifdef CONFIG_ARM64 + if (ptp_pg_cache_expand(cache, gfp) || READ_ONCE(cache->freelist)) + goto redo; + + /* If the expandsion failed, alloc a singel object without RO protection to + * avoid block spliting. + */ + object = (void *)__get_free_pages(gfp, cache->object_order); + set_iee_address_valid((unsigned long)object, cache->object_order); + iee_set_bitmap_type((unsigned long)object, cache->object_order, IEE_PGTABLE); + #ifdef DEBUG + WARN_ONCE(1, "IEE: Failed on %s expansion.", cache->name); + atomic_add(1 << cache->object_order, &cache->fail_count); + #endif + #else + object = (void *)__get_free_pages(gfp, cache->object_order); + set_iee_page((unsigned long)object, cache->object_order); + #endif + } else { + // fast path alloc + next_object = __get_freepointer(object); + if (unlikely(!__update_freelist(cache, object, next_object, tid))) + goto redo; + prefetchw(next_object); + if (gfp & __GFP_ZERO) + __iee_set_freepointer(object, NULL); + } + return object; +}; + +void ptp_pg_free(struct pg_cache *cache, void *object) +{ + unsigned long tid; + void **freelist; + + #ifdef CONFIG_X86_64 + if (unlikely((unsigned long)object < cache->reserve_start_addr + || (unsigned long)object >= cache->reserve_end_addr)) { + // slow path free + unset_iee_page((unsigned long)object, cache->object_order); + free_pages((unsigned long)object, cache->object_order); + return; + } + #endif + + // fast path free +redo: + tid = READ_ONCE(cache->tid); + barrier(); + freelist = READ_ONCE(cache->freelist); + __iee_set_freepointer(object, freelist); + if (unlikely(!__update_freelist(cache, freelist, object, tid))) + goto redo; +} diff --git a/mm/memory.c b/mm/memory.c index e7b67d1cc480edbfab1b8563249bfa2679283616..186d6286550d933e12b7c78c4e52a192acd90e55 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -86,6 +86,9 @@ #include #include #include +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif #ifdef CONFIG_CGROUP_SLI #include #endif @@ -5936,6 +5939,9 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, /* If the fault handler drops the mmap_lock, vma may be freed */ struct mm_struct *mm = vma->vm_mm; vm_fault_t ret; +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif __set_current_state(TASK_RUNNING); @@ -5962,7 +5968,20 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + { + if (haoc_enabled) + { + ptp_disable_iee(®); + ret = __handle_mm_fault(vma, address, flags); + ptp_enable_iee(reg); + } + else + ret = __handle_mm_fault(vma, address, flags); + } + #else ret = __handle_mm_fault(vma, address, flags); + #endif lru_gen_exit_fault(); diff --git a/mm/mmap.c b/mm/mmap.c index 0ee19f3b151d9420709faf1996bd36800f6f0014..0b9e104a4884ec33f58d1c46e055722f0861502a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -53,6 +53,9 @@ #include #include #include +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif #define CREATE_TRACE_POINTS #include @@ -2374,10 +2377,21 @@ static void unmap_region(struct mm_struct *mm, struct ma_state *mas, update_hiwater_rss(mm); unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked); mas_set(mas, mt_start); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + tlb_flush_mmu(&tlb); + #endif free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING, mm_wr_locked); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_tlb_finish_mmu(&tlb); + else + tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif } /* @@ -2592,6 +2606,9 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, MA_STATE(mas_detach, &mt_detach, 0, 0); mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_on_stack(mt_detach); +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif /* * If we need to split any vma, do it now to save pain later. @@ -2707,8 +2724,16 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, * were isolated before we downgraded mmap_lock. */ mas_set(&mas_detach, 1); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_disable_iee(®); + #endif unmap_region(mm, &mas_detach, vma, prev, next, start, end, count, !unlock); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_enable_iee(reg); + #endif /* Statistics and freeing VMAs */ mas_set(&mas_detach, 0); remove_mt(mm, &mas_detach); @@ -3408,6 +3433,9 @@ void exit_mmap(struct mm_struct *mm) unsigned long nr_accounted = 0; MA_STATE(mas, &mm->mm_mt, 0, 0); int count = 0; +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); @@ -3427,7 +3455,15 @@ void exit_mmap(struct mm_struct *mm) tlb_gather_mmu_fullmm(&tlb, mm); /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */ + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_disable_iee(®); + #endif unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_enable_iee(reg); + #endif mmap_read_unlock(mm); /* @@ -3438,9 +3474,20 @@ void exit_mmap(struct mm_struct *mm) mmap_write_lock(mm); mt_clear_in_rcu(&mm->mm_mt); mas_set(&mas, vma->vm_end); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + tlb_flush_mmu(&tlb); + #endif free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING, true); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_tlb_finish_mmu(&tlb); + else + tlb_finish_mmu(&tlb); + #else tlb_finish_mmu(&tlb); + #endif /* * Walk the list again, actually closing and freeing it, with preemption diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 99b3e9408aa0fb8961c980fe7cf18162fde1d427..715e9f8fc5dca76a942d1fb049f10a4d1f3bb7ac 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -141,6 +141,55 @@ static void __tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) } } +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static void __ptp_tlb_batch_free_encoded_pages(struct mmu_gather_batch *batch) +{ + struct encoded_page **pages = batch->encoded_pages; + unsigned int nr, nr_pages; + + while (batch->nr) { + if (!page_poisoning_enabled_static() && !want_init_on_free()) { + nr = min(MAX_NR_FOLIOS_PER_FREE, batch->nr); + + /* + * Make sure we cover page + nr_pages, and don't leave + * nr_pages behind when capping the number of entries. + */ + if (unlikely(encoded_page_flags(pages[nr - 1]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr++; + } else { + /* + * With page poisoning and init_on_free, the time it + * takes to free memory grows proportionally with the + * actual memory size. Therefore, limit based on the + * actual memory size and not the number of involved + * folios. + */ + for (nr = 0, nr_pages = 0; + nr < batch->nr && nr_pages < MAX_NR_FOLIOS_PER_FREE; + nr++) { + if (unlikely(encoded_page_flags(pages[nr]) & + ENCODED_PAGE_BIT_NR_PAGES_NEXT)) + nr_pages += encoded_nr_pages(pages[++nr]); + else + nr_pages++; + } + } + + for (int i = 0; i < nr; i++) { + struct page *page = encoded_page_ptr(pages[i]); + + __ptp_tlb_remove_table((void *)page); + } + pages += nr; + batch->nr -= nr; + + cond_resched(); + } +} +#endif + static void tlb_batch_pages_flush(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; @@ -150,6 +199,17 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb) tlb->active = &tlb->local; } +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static void ptp_tlb_batch_pages_flush(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) + __ptp_tlb_batch_free_encoded_pages(batch); + tlb->active = &tlb->local; +} +#endif + static void tlb_batch_list_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch, *next; @@ -347,6 +407,90 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) tlb_table_flush(tlb); } +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static void __ptp_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + int i; + + for (i = 0; i < batch->nr; i++) + __ptp_tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE +struct ptp_remove_table_work { + struct work_struct work; + struct mmu_table_batch *batch; +}; + +static void ptp_remove_table(struct work_struct *work) +{ + struct ptp_remove_table_work *ptp_remove_table_work = + container_of(work, struct ptp_remove_table_work, work); + + __ptp_tlb_remove_table_free(ptp_remove_table_work->batch); + kfree(ptp_remove_table_work); +} + +static void ptp_tlb_remove_table_rcu(struct rcu_head *head) +{ + struct ptp_remove_table_work *ptp_remove_table_work = + kmalloc(sizeof(struct ptp_remove_table_work), GFP_ATOMIC); + + ptp_remove_table_work->batch = container_of(head, struct mmu_table_batch, rcu); + INIT_WORK(&ptp_remove_table_work->work, ptp_remove_table); + schedule_work(&ptp_remove_table_work->work); +} + +static void ptp_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + call_rcu(&batch->rcu, ptp_tlb_remove_table_rcu); +} +#else +static void ptp_tlb_remove_table_free(struct mmu_table_batch *batch) +{ + __ptp_tlb_remove_table_free(batch); +} +#endif + +static void ptp_tlb_remove_table_one(void *table) +{ + tlb_remove_table_sync_one(); + __ptp_tlb_remove_table(table); +} + +static void ptp_tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + ptp_tlb_remove_table_free(*batch); + *batch = NULL; + } +} + +void ptp_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + ptp_tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + ptp_tlb_table_flush(tlb); +} +#endif + static inline void tlb_table_init(struct mmu_gather *tlb) { tlb->batch = NULL; @@ -469,3 +613,55 @@ void tlb_finish_mmu(struct mmu_gather *tlb) #endif dec_tlb_flush_pending(tlb->mm); } + +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +static void ptp_tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + ptp_tlb_table_flush(tlb); +#ifndef CONFIG_MMU_GATHER_NO_GATHER + ptp_tlb_batch_pages_flush(tlb); +#endif +} + +void ptp_tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + ptp_tlb_flush_mmu_free(tlb); +} + +void ptp_tlb_finish_mmu(struct mmu_gather *tlb) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB + * flush by batching, one thread may end up seeing inconsistent PTEs + * and result in having stale TLB entries. So flush TLB forcefully + * if we detect parallel PTE batching threads. + * + * However, some syscalls, e.g. munmap(), may free page tables, this + * needs force flush everything in the given range. Otherwise this + * may result in having stale TLB entries for some architectures, + * e.g. aarch64, that could specify flush what level TLB. + */ + if (mm_tlb_flush_nested(tlb->mm)) { + /* + * The aarch64 yields better performance with fullmm by + * avoiding multiple CPUs spamming TLBI messages at the + * same time. + * + * On x86 non-fullmm doesn't yield significant difference + * against fullmm. + */ + tlb->fullmm = 1; + __tlb_reset_range(tlb); + tlb->freed_tables = 1; + } + + ptp_tlb_flush_mmu(tlb); + +#ifndef CONFIG_MMU_GATHER_NO_GATHER + tlb_batch_list_free(tlb); +#endif + dec_tlb_flush_pending(tlb->mm); +} +#endif diff --git a/mm/mprotect.c b/mm/mprotect.c index 0fd7083a76662e5672571a43c887823fe4120e5d..e7508d4d3375f1ec7142cbb1e4eb20eb9ce5ca61 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -36,6 +36,9 @@ #include #include #include +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +#include +#endif #include "internal.h" @@ -585,6 +588,9 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, unsigned int mm_cp_flags = 0; unsigned long charged = 0; int error; +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + unsigned long reg; +#endif if (newflags == oldflags) { *pprev = vma; @@ -645,7 +651,15 @@ mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_disable_iee(®); + #endif change_protection(tlb, vma, start, end, mm_cp_flags); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled) + ptp_enable_iee(reg); + #endif /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index e8721990b9cbb34ec73fe24a5b2eba04081eb3a1..8bfc587b1091007b1a89577c6e8ba51da1d3764f 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -238,6 +238,22 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, } #endif +#if defined(CONFIG_PTP) && defined(CONFIG_X86_64) +struct ptp_pte_free_now_work { + struct work_struct work; + struct page *page; +}; + +static void ptp_pte_free_now(struct work_struct *work) +{ + struct ptp_pte_free_now_work *ptp_work = + container_of(work, struct ptp_pte_free_now_work, work); + + pte_free(NULL, ptp_work->page); + kfree(ptp_work); +} +#endif + /* arch define pte_free_defer in asm/pgalloc.h for its own implementation */ #ifndef pte_free_defer static void pte_free_now(struct rcu_head *head) @@ -245,7 +261,20 @@ static void pte_free_now(struct rcu_head *head) struct page *page; page = container_of(head, struct page, rcu_head); + #if defined(CONFIG_PTP) && defined(CONFIG_X86_64) + if (haoc_enabled){ + struct ptp_pte_free_now_work *ptp_work = + kmalloc(sizeof(struct ptp_pte_free_now_work), GFP_ATOMIC); + + ptp_work->page = page; + INIT_WORK(&ptp_work->work, ptp_pte_free_now); + schedule_work(&ptp_work->work); + } + else + pte_free(NULL /* mm not passed and not used */, (pgtable_t)page); + #else pte_free(NULL /* mm not passed and not used */, (pgtable_t)page); + #endif } void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable) diff --git a/mm/slab.h b/mm/slab.h index 62df6eeeb5ead7d70ca28451577eba773751e420..49e803d16b5ddaac369c1a5d78951cd14ba9ca43 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -25,6 +25,9 @@ typedef u64 freelist_full_t; #if defined(system_has_freelist_aba) && !defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) #undef system_has_freelist_aba #endif +#ifdef CONFIG_CREDP +extern struct kmem_cache *cred_jar; +#endif /* * Freelist pointer and counter to cmpxchg together, avoids the typical ABA @@ -721,7 +724,9 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, return s; } - +#ifdef CONFIG_CREDP +#include +#endif static inline void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, gfp_t flags, size_t size, void **p, bool init, @@ -766,7 +771,16 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, for (i = 0; i < size; i++) { p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init); if (p[i] && init && (!kasan_init || !kasan_has_integrated_init())) + { + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) + iee_memset(p[i], 0, zero_size); + else + memset(p[i], 0, zero_size); + #else memset(p[i], 0, zero_size); + #endif + } kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, flags); kmsan_slab_alloc(s, p[i], flags); diff --git a/mm/slub.c b/mm/slub.c index 6c7b35de3ae6e449b290323e3c2bca8d4319a10a..25d26c3b4ea711b05e64ae5684d45d866646310d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -44,6 +44,15 @@ #include #include +#ifdef CONFIG_IEE +#include +#endif +#ifdef CONFIG_IEE_PTRP +#include +#endif +#if defined(CONFIG_CREDP) +#include +#endif #include "internal.h" @@ -165,6 +174,17 @@ * options set. This moves slab handling out of * the fast path and disables lockless freelists. */ +#ifdef CONFIG_IEE +void __weak iee_allocate_slab_data(struct kmem_cache *s, struct slab *slab, unsigned int order) {} +bool __weak iee_free_slab_data(struct kmem_cache *s, struct slab *slab, unsigned int order) +{ + return false; +} +unsigned int __weak iee_calculate_order(struct kmem_cache *s, unsigned int order) +{ + return order; +} +#endif /* * We could simply use migrate_disable()/enable() but as long as it's a @@ -449,6 +469,13 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) #endif freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr); + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) { + iee_set_freeptr((void **)freeptr_addr, + (void *)(freelist_ptr_encode(s, fp, freeptr_addr).v)); + return; + } + #endif WRITE_ONCE(*(freeptr_t *)freeptr_addr, freelist_ptr_encode(s, fp, freeptr_addr)); } @@ -837,6 +864,29 @@ static void set_track_update(struct kmem_cache *s, void *object, { struct track *p = get_track(s, object, alloc); +#ifdef CONFIG_CREDP + struct track tmp; + + if (haoc_enabled && s == cred_jar) { + tmp = *p; + #ifdef CONFIG_STACKDEPOT + tmp.handle = handle; + #endif + tmp.addr = addr; + tmp.cpu = smp_processor_id(); + tmp.pid = current->pid; + tmp.when = jiffies; + iee_memcpy(p, &tmp, sizeof(struct track)); + } else { + #ifdef CONFIG_STACKDEPOT + p->handle = handle; + #endif + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current->pid; + p->when = jiffies; + } + #else #ifdef CONFIG_STACKDEPOT p->handle = handle; #endif @@ -844,6 +894,7 @@ static void set_track_update(struct kmem_cache *s, void *object, p->cpu = smp_processor_id(); p->pid = current->pid; p->when = jiffies; + #endif } static __always_inline void set_track(struct kmem_cache *s, void *object, @@ -862,7 +913,14 @@ static void init_tracking(struct kmem_cache *s, void *object) return; p = get_track(s, object, TRACK_ALLOC); + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) + iee_memset(p, 0, 2*sizeof(struct track)); + else + memset(p, 0, 2*sizeof(struct track)); + #else memset(p, 0, 2*sizeof(struct track)); + #endif } static void print_track(const char *s, struct track *t, unsigned long pr_time) @@ -1033,7 +1091,14 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) unsigned int poison_size = s->object_size; if (s->flags & SLAB_RED_ZONE) { + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) + iee_memset(p - s->red_left_pad, val, s->red_left_pad); + else + memset(p - s->red_left_pad, val, s->red_left_pad); + #else memset(p - s->red_left_pad, val, s->red_left_pad); + #endif if (slub_debug_orig_size(s) && val == SLUB_RED_ACTIVE) { /* @@ -1046,12 +1111,31 @@ static void init_object(struct kmem_cache *s, void *object, u8 val) } if (s->flags & __OBJECT_POISON) { + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) { + iee_memset(p, POISON_FREE, poison_size - 1); + iee_memset(&p[poison_size - 1], POISON_END, 1); + } else { + memset(p, POISON_FREE, poison_size - 1); + p[poison_size - 1] = POISON_END; + } + #else memset(p, POISON_FREE, poison_size - 1); + #endif p[poison_size - 1] = POISON_END; } if (s->flags & SLAB_RED_ZONE) + #ifdef CONFIG_CREDP + { + if (haoc_enabled && s == cred_jar) + iee_memset(p + poison_size, val, s->inuse - poison_size); + else + memset(p + poison_size, val, s->inuse - poison_size); + } + #else memset(p + poison_size, val, s->inuse - poison_size); + #endif } static void restore_bytes(struct kmem_cache *s, char *message, u8 data, @@ -1426,7 +1510,14 @@ void setup_slab_debug(struct kmem_cache *s, struct slab *slab, void *addr) return; metadata_access_enable(); + #ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) + iee_memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + else + memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #else memset(kasan_reset_tag(addr), POISON_INUSE, slab_size(slab)); + #endif metadata_access_disable(); } @@ -2036,6 +2127,10 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_RECLAIM; slab = alloc_slab_page(alloc_gfp, node, oo); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + slab = iee_alloc_task_token_slab(s, slab, oo_order(oo)); +#endif if (unlikely(!slab)) { oo = s->min; alloc_gfp = flags; @@ -2044,6 +2139,10 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) * Try a lower order alloc if possible */ slab = alloc_slab_page(alloc_gfp, node, oo); +#ifdef CONFIG_IEE_PTRP + if(haoc_enabled) + slab = iee_alloc_task_token_slab(s, slab, oo_order(oo)); +#endif if (unlikely(!slab)) return NULL; stat(s, ORDER_FALLBACK); @@ -2053,6 +2152,15 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->inuse = 0; slab->frozen = 0; +#ifdef CONFIG_IEE + if(haoc_enabled) + iee_allocate_slab_data(s, slab, oo_order(oo)); +#endif +#ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) + set_iee_page((unsigned long)page_address(folio_page(slab_folio(slab), 0)), + oo_order(oo)); +#endif account_slab(slab, oo_order(oo), s, flags); slab->slab_cache = s; @@ -2105,6 +2213,23 @@ static void __free_slab(struct kmem_cache *s, struct slab *slab) __folio_clear_slab(folio); mm_account_reclaimed_pages(pages); unaccount_slab(slab, order, s); +#ifdef CONFIG_IEE + if(haoc_enabled) + { + if (iee_free_slab_data(s, slab, order)) + return; + } +#endif +#ifdef CONFIG_CREDP + if (haoc_enabled && s == cred_jar) { + #ifdef CONFIG_X86_64 + iee_free_slab(s, slab, iee_free_cred_slab); + return; + #else + unset_iee_page((unsigned long)page_address(folio_page(folio, 0)), order); + #endif + } +#endif __free_pages(&folio->page, order); } @@ -3488,10 +3613,17 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list if (!s) return NULL; + #ifdef CONFIG_CREDP + if(haoc_enabled) + goto slab_alloc; + #endif object = kfence_alloc(s, orig_size, gfpflags); if (unlikely(object)) goto out; +#ifdef CONFIG_CREDP +slab_alloc: +#endif object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); maybe_wipe_obj_freeptr(s, object); @@ -3978,6 +4110,11 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, local_lock_irqsave(&s->cpu_slab->lock, irqflags); for (i = 0; i < size; i++) { + #ifdef CONFIG_CREDP + /* Skip kfence_alloc for iee kmem caches. */ + if(haoc_enabled) + goto slab_alloc; + #endif void *object = kfence_alloc(s, s->object_size, flags); if (unlikely(object)) { @@ -3985,6 +4122,9 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, continue; } +#ifdef CONFIG_CREDP +slab_alloc: +#endif object = c->freelist; if (unlikely(!object)) { /* @@ -4504,6 +4644,9 @@ static int calculate_sizes(struct kmem_cache *s) s->size = size; s->reciprocal_size = reciprocal_value(size); order = calculate_order(size); + #ifdef CONFIG_IEE + order = iee_calculate_order(s, order); + #endif if ((int)order < 0) return 0; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c42ddd85ff1f9c3613b015c576b0ef458075d28e..a22c036b6050d8d868abf735c4e8d1d9a81112b0 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -32,6 +32,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "internal.h" MODULE_DESCRIPTION("DNS Resolver"); @@ -365,8 +368,13 @@ static int __init init_dns_resolver(void) /* instruct request_key() to use this special keyring as a cache for * the results it looks up */ set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(cred, keyring); + iee_set_cred_jit_keyring(cred, KEY_REQKEY_DEFL_THREAD_KEYRING); + #else cred->thread_keyring = keyring; cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; + #endif dns_resolver_cache = cred; kdebug("DNS resolver keyring: %d\n", key_serial(keyring)); diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 04534ea537c8fd15082a2c5b23b02472f9175c3b..49e51e297cc110bd25f7094d6059d706a033f38a 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -38,9 +38,13 @@ static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { static LIST_HEAD(cred_unused); static unsigned long number_cred_unused; +#ifdef CONFIG_CREDP +static struct cred *machine_cred; +#else static struct cred machine_cred = { .usage = ATOMIC_INIT(1), }; +#endif /* * Return the machine_cred pointer to be used whenever @@ -48,7 +52,11 @@ static struct cred machine_cred = { */ const struct cred *rpc_machine_cred(void) { + #ifdef CONFIG_CREDP + return machine_cred; + #else return &machine_cred; + #endif } EXPORT_SYMBOL_GPL(rpc_machine_cred); @@ -659,15 +667,27 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) if (task->tk_op_cred) /* Task must use exactly this rpc_cred */ new = get_rpccred(task->tk_op_cred); + #ifdef CONFIG_CREDP + else if (cred && cred != machine_cred) + #else else if (cred != NULL && cred != &machine_cred) + #endif new = auth->au_ops->lookup_cred(auth, &acred, lookupflags); + #ifdef CONFIG_CREDP + else if (cred == machine_cred) + #else else if (cred == &machine_cred) + #endif new = rpcauth_bind_machine_cred(task, lookupflags); /* If machine cred couldn't be bound, try a root cred */ if (new) ; + #ifdef CONFIG_CREDP + else if (cred == machine_cred) + #else else if (cred == &machine_cred) + #endif new = rpcauth_bind_root_cred(task, lookupflags); else if (flags & RPC_TASK_NULLCREDS) new = authnull_ops.lookup_cred(NULL, NULL, 0); @@ -869,6 +889,14 @@ int __init rpcauth_init_module(void) { int err; + #ifdef CONFIG_CREDP + machine_cred = prepare_creds(); + if (!machine_cred) { + pr_err("RPCAUTH: Failed to allocate machine_cred\n"); + return -ENOMEM; + } + #endif + err = rpc_init_authunix(); if (err < 0) goto out1; @@ -887,11 +915,20 @@ int __init rpcauth_init_module(void) out2: rpc_destroy_authunix(); out1: + #ifdef CONFIG_CREDP + if (machine_cred) { + abort_creds(machine_cred); + machine_cred = NULL; + } + #endif return err; } void rpcauth_remove_module(void) { + #ifdef CONFIG_CREDP + abort_creds(machine_cred); + #endif rpc_destroy_authunix(); shrinker_free(rpc_cred_shrinker); } diff --git a/security/commoncap.c b/security/commoncap.c index bc05211041979c120831742d25f2e6133435166c..5a26b0d1ee9fa7c9a2d19767da15ad26afb972a9 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -25,6 +25,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif /* * If a non-root user executes a setuid-root binary in @@ -265,7 +268,14 @@ int cap_capset(struct cred *new, /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ if (!cap_issubset(*effective, *permitted)) return -EPERM; - + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, *effective); + iee_set_cred_cap_inheritable(new, *inheritable); + iee_set_cred_cap_permitted(new, *permitted); + + iee_set_cred_cap_ambient(new, cap_intersect(new->cap_ambient, + cap_intersect(*permitted, *inheritable))); + #else new->cap_effective = *effective; new->cap_inheritable = *inheritable; new->cap_permitted = *permitted; @@ -277,6 +287,7 @@ int cap_capset(struct cred *new, new->cap_ambient = cap_intersect(new->cap_ambient, cap_intersect(*permitted, *inheritable)); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EINVAL; return 0; @@ -601,9 +612,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) * The addition of pA' is handled later. */ + #ifdef CONFIG_CREDP + kernel_cap_t temp = new->cap_permitted; + + temp.val = (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); + iee_set_cred_cap_permitted(new, temp); + #else new->cap_permitted.val = (new->cap_bset.val & caps->permitted.val) | (new->cap_inheritable.val & caps->inheritable.val); + #endif if (caps->permitted.val & ~new->cap_permitted.val) /* insufficient to execute correctly */ @@ -726,7 +745,16 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, int rc = 0; struct cpu_vfs_cap_data vcaps; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + + cap_clear(tmp_cap); + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } while (0); + #else cap_clear(bprm->cred->cap_permitted); + #endif if (!file_caps_enabled) return 0; @@ -757,7 +785,16 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, out: if (rc) + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp_cap = bprm->cred->cap_permitted; + + cap_clear(tmp_cap); + iee_set_cred_cap_permitted(bprm->cred, tmp_cap); + } + #else cap_clear(bprm->cred->cap_permitted); + #endif return rc; } @@ -809,8 +846,13 @@ static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap, */ if (__is_eff(root_uid, new) || __is_real(root_uid, new)) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new, cap_combine(old->cap_bset, + old->cap_inheritable)); + #else new->cap_permitted = cap_combine(old->cap_bset, old->cap_inheritable); + #endif } /* * If only the real uid is 0, we do not set the effective bit. @@ -919,34 +961,73 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) /* downgrade; they get no more than they had, and maybe less */ if (!ns_capable(new->user_ns, CAP_SETUID) || (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) { + #ifdef CONFIG_CREDP + iee_set_cred_euid(new, new->uid); + iee_set_cred_egid(new, new->gid); + #else new->euid = new->uid; new->egid = new->gid; + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new, cap_intersect(new->cap_permitted, + old->cap_permitted)); + #else new->cap_permitted = cap_intersect(new->cap_permitted, old->cap_permitted); + #endif } + #ifdef CONFIG_CREDP + iee_set_cred_fsuid(new, new->euid); + iee_set_cred_suid(new, new->euid); + iee_set_cred_fsgid(new, new->egid); + iee_set_cred_sgid(new, new->egid); + #else new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; + #endif /* File caps or setid cancels ambient. */ if (has_fcap || is_setid) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + + cap_clear(tmp_cap); + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif /* * Now that we've computed pA', update pP' to give: * pP' = (X & fP) | (pI & fI) | pA' */ + #ifdef CONFIG_CREDP + iee_set_cred_cap_permitted(new, + cap_combine(new->cap_permitted, new->cap_ambient)); + #else new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient); + #endif /* * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set, * this is the same as pE' = (fE ? pP' : 0) | pA'. */ if (effective) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, new->cap_ambient); + #else new->cap_effective = new->cap_ambient; + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -957,7 +1038,12 @@ int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file) return ret; } + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new, + new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif if (WARN_ON(!cap_ambient_invariant_ok(new))) return -EPERM; @@ -1092,8 +1178,17 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) !uid_eq(new->euid, root_uid) && !uid_eq(new->suid, root_uid))) { if (!issecure(SECURE_KEEP_CAPS)) { + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_permitted; + + cap_clear(tmp_cap); + iee_set_cred_cap_permitted(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_permitted); cap_clear(new->cap_effective); + #endif } /* @@ -1101,12 +1196,34 @@ static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) * by exec to drop capabilities. We should make sure that * this remains the case. */ + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + + cap_clear(tmp_cap); + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif } if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_effective; + + cap_clear(tmp_cap); + iee_set_cred_cap_effective(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_effective); + #endif if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, new->cap_permitted); + #else new->cap_effective = new->cap_permitted; + #endif } /** @@ -1142,13 +1259,23 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) if (!issecure(SECURE_NO_SETUID_FIXUP)) { kuid_t root_uid = make_kuid(old->user_ns, 0); if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, + cap_drop_fs_set(new->cap_effective)); + #else new->cap_effective = cap_drop_fs_set(new->cap_effective); + #endif if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid)) + #ifdef CONFIG_CREDP + iee_set_cred_cap_effective(new, cap_raise_fs_set(new->cap_effective, + new->cap_permitted)); + #else new->cap_effective = cap_raise_fs_set(new->cap_effective, new->cap_permitted); + #endif } break; @@ -1243,7 +1370,16 @@ static int cap_prctl_drop(unsigned long cap) new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_bset; + + cap_lower(tmp, cap); + iee_set_cred_cap_bset(new, tmp); + } + #else cap_lower(new->cap_bset, cap); + #endif return commit_creds(new); } @@ -1319,7 +1455,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new, arg2); + #else new->securebits = arg2; + #endif return commit_creds(new); case PR_GET_SECUREBITS: @@ -1338,9 +1478,19 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2) + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new, + new->securebits | issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits |= issecure_mask(SECURE_KEEP_CAPS); + #endif else + #ifdef CONFIG_CREDP + iee_set_cred_securebits(new, + new->securebits & ~issecure_mask(SECURE_KEEP_CAPS)); + #else new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + #endif return commit_creds(new); case PR_CAP_AMBIENT: @@ -1351,7 +1501,16 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, new = prepare_creds(); if (!new) return -ENOMEM; + #ifdef CONFIG_CREDP + do { + kernel_cap_t tmp_cap = new->cap_ambient; + + cap_clear(tmp_cap); + iee_set_cred_cap_ambient(new, tmp_cap); + } while (0); + #else cap_clear(new->cap_ambient); + #endif return commit_creds(new); } @@ -1375,9 +1534,27 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!new) return -ENOMEM; if (arg2 == PR_CAP_AMBIENT_RAISE) + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + + cap_raise(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_raise(new->cap_ambient, arg3); + #endif else + #ifdef CONFIG_CREDP + { + kernel_cap_t tmp = new->cap_ambient; + + cap_lower(tmp, arg3); + iee_set_cred_cap_ambient(new, tmp); + } + #else cap_lower(new->cap_ambient, arg3); + #endif return commit_creds(new); } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index aa1dc43b16ddf3bca42b6b9da1741e1aa3941caa..60856f2f4220c99b95a9be90cc9435f4e697ba77 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -22,6 +22,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 @@ -1155,7 +1158,11 @@ static int keyctl_change_reqkey_auth(struct key *key) return -ENOMEM; key_put(new->request_key_auth); + #ifdef CONFIG_CREDP + iee_set_cred_request_key_auth(new, key_get(key)); + #else new->request_key_auth = key_get(key); + #endif return commit_creds(new); } @@ -1432,7 +1439,11 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) } set: + #ifdef CONFIG_CREDP + iee_set_cred_jit_keyring(new, reqkey_defl); + #else new->jit_keyring = reqkey_defl; + #endif commit_creds(new); return old_setting; error: @@ -1644,9 +1655,20 @@ long keyctl_session_to_parent(void) cred = cred_alloc_blank(); if (!cred) goto error_keyring; + #ifdef CONFIG_CREDP + if(haoc_enabled) + newwork = (struct rcu_head *)(cred->rcu.func); + else + newwork = &cred->rcu; + #else newwork = &cred->rcu; + #endif + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred, key_ref_to_ptr(keyring_r)); + #else cred->session_keyring = key_ref_to_ptr(keyring_r); + #endif keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); @@ -1704,8 +1726,16 @@ long keyctl_session_to_parent(void) unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); - if (oldwork) + if (oldwork){ + #ifdef CONFIG_CREDP + if(haoc_enabled) + put_cred(*(struct cred **)(oldwork + 1)); + else + put_cred(container_of(oldwork, struct cred, rcu)); + #else put_cred(container_of(oldwork, struct cred, rcu)); + #endif + } if (newwork) put_cred(cred); return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b5d5333ab3300e86862515082946552fcf4e6e1d..aa56394380fd7f689baa149e26986f17ac549bdc 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -17,6 +17,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif #include "internal.h" /* Session keyring create vs join semaphore */ @@ -232,7 +235,11 @@ int install_thread_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_thread_keyring(new, keyring); + #else new->thread_keyring = keyring; + #endif return 0; } @@ -279,7 +286,11 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); + #ifdef CONFIG_CREDP + iee_set_cred_process_keyring(new, keyring); + #else new->process_keyring = keyring; + #endif return 0; } @@ -338,7 +349,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* install the keyring */ old = cred->session_keyring; + #ifdef CONFIG_CREDP + iee_set_cred_session_keyring(cred, keyring); + #else cred->session_keyring = keyring; + #endif if (old) key_put(old); @@ -911,7 +926,15 @@ long join_session_keyring(const char *name) void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); + #ifdef CONFIG_CREDP + struct cred *new =NULL; + if(haoc_enabled) + new = *(struct cred **)(twork + 1); + else + new = container_of(twork, struct cred, rcu); + #else struct cred *new = container_of(twork, struct cred, rcu); + #endif if (unlikely(current->flags & PF_EXITING)) { put_cred(new); @@ -925,6 +948,31 @@ void key_change_session_keyring(struct callback_head *twork) return; } + #ifdef CONFIG_CREDP + iee_set_cred_uid(new, old->uid); + iee_set_cred_euid(new, old->euid); + iee_set_cred_suid(new, old->suid); + iee_set_cred_fsuid(new, old->fsuid); + iee_set_cred_gid(new, old->gid); + iee_set_cred_egid(new, old->egid); + iee_set_cred_sgid(new, old->sgid); + iee_set_cred_fsgid(new, old->fsgid); + iee_set_cred_user(new, get_uid(old->user)); + iee_set_cred_ucounts(new, old->ucounts); + iee_set_cred_user_ns(new, get_user_ns(old->user_ns)); + iee_set_cred_group_info(new, get_group_info(old->group_info)); + + iee_set_cred_securebits(new, old->securebits); + iee_set_cred_cap_inheritable(new, old->cap_inheritable); + iee_set_cred_cap_permitted(new, old->cap_permitted); + iee_set_cred_cap_effective(new, old->cap_effective); + iee_set_cred_cap_ambient(new, old->cap_ambient); + iee_set_cred_cap_bset(new, old->cap_bset); + + iee_set_cred_jit_keyring(new, old->jit_keyring); + iee_set_cred_thread_keyring(new, key_get(old->thread_keyring)); + iee_set_cred_process_keyring(new, key_get(old->process_keyring)); + #else new-> uid = old-> uid; new-> euid = old-> euid; new-> suid = old-> suid; @@ -948,6 +996,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); new->process_keyring = key_get(old->process_keyring); + #endif security_transfer_creds(new, old); diff --git a/security/security.c b/security/security.c index b6144833c7a8ea4e2221183e3bb9a56d54b02eb4..23206976e2f5ad718c743ed5015e55664d938451 100644 --- a/security/security.c +++ b/security/security.c @@ -30,6 +30,9 @@ #include #include #include +#ifdef CONFIG_CREDP +#include +#endif /* How many LSMs were built into the kernel? */ #define LSM_COUNT (__end_lsm_info - __start_lsm_info) @@ -570,11 +573,19 @@ EXPORT_SYMBOL(unregister_blocking_lsm_notifier); static int lsm_cred_alloc(struct cred *cred, gfp_t gfp) { if (blob_sizes.lbs_cred == 0) { + #ifdef CONFIG_CREDP + iee_set_cred_security(cred, NULL); + #else cred->security = NULL; + #endif return 0; } + #ifdef CONFIG_CREDP + iee_set_cred_security(cred, kzalloc(blob_sizes.lbs_cred, gfp)); + #else cred->security = kzalloc(blob_sizes.lbs_cred, gfp); + #endif if (cred->security == NULL) return -ENOMEM; return 0; @@ -2950,7 +2961,11 @@ void security_cred_free(struct cred *cred) call_void_hook(cred_free, cred); kfree(cred->security); + #ifdef CONFIG_CREDP + iee_set_cred_security(cred, NULL); + #else cred->security = NULL; + #endif } /**