diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a1eab2b7d5b3c1bca364ce2802f0c6753499c62b..2ad130a4f107ec6b8e62ea69b4323d041c056908 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -170,6 +170,8 @@ config ARM64 select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS \ if $(cc-option,-fpatchable-function-entry=2) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ + if DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4a42de35a8984663868a003541551c3090024920..e6b71f720419aa9bcb26d65fde7beab155f60ece 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -128,7 +128,11 @@ CHECKFLAGS += -D__aarch64__ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS),y) + CC_FLAGS_FTRACE := -fpatchable-function-entry=7,5 + else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + endif endif # Default value diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 0b4d5a6990aa0e7c840336179a00e16b5974a5e0..b77a2bb37b5efd827cb5430d8e15a43934a45e1d 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -45,27 +45,28 @@ extern void _mcount(unsigned long); extern void *return_address(unsigned int); struct dyn_arch_ftrace { - /* No extra data needed for arm64 */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long func; /* start address of function */ +#endif }; extern unsigned long ftrace_graph_call; extern void return_to_handler(void); -static inline unsigned long ftrace_call_adjust(unsigned long addr) +unsigned long ftrace_call_adjust(unsigned long addr); + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, + unsigned long addr) { /* - * Adjust addr to point at the BL in the callsite. - * See ftrace_init_nop() for the callsite sequence. - */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return addr + AARCH64_INSN_SIZE; - /* - * addr is the address of the mcount call instruction. - * recordmcount does the necessary offset calculation. + * Place custom trampoline address in regs->custom_tramp to let ftrace + * trampoline jump to it. */ - return addr; + regs->orig_x0 = addr; } +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS struct dyn_ftrace; @@ -103,6 +104,14 @@ static inline bool arch_syscall_match_sym_name(const char *sym, */ return !strcmp(sym + 8, name); } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +#define ftrace_dummy_tramp ftrace_dummy_tramp +extern void ftrace_dummy_tramp(void); + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif /* ifndef __ASSEMBLY__ */ #endif /* __ASM_FTRACE_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index c0973345e6e1d582ff3261f3e2d61a6e6da4706a..f837a55e7abd4f0e74a8b2b0dca78d0903a62c6e 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -200,6 +200,8 @@ enum aarch64_insn_size_type { enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_REG_OFFSET, AARCH64_INSN_LDST_STORE_REG_OFFSET, + AARCH64_INSN_LDST_LOAD_IMM_OFFSET, + AARCH64_INSN_LDST_STORE_IMM_OFFSET, AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX, AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, @@ -296,8 +298,10 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) +__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) @@ -407,6 +411,14 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, enum aarch64_insn_register offset, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, @@ -491,6 +503,8 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt); +void aarch64_literal64_write(void *addr, u64 data); + s32 aarch64_insn_adrp_get_offset(u32 insn); u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5f59e24c95d33c47ebebadb19edb03e7523bd69a..c247e11130db7d88fcff5c3b237864f8b87855ca 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -73,6 +73,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_AARCH32_EL0 diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 67f68c9ef94c43fb918cd493360a802c7134ad90..766468570ee6512a9097a2d241250fdb5837237c 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -14,14 +14,16 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /* - * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before - * the regular function prologue. For an enabled callsite, ftrace_init_nop() and - * ftrace_make_call() have patched those NOPs to: + * Due to -fpatchable-function-entry=2 or -fpatchable-function-entry=7,5, the + * compiler has placed two NOPs before the regular function prologue. For an + * enabled callsite, ftrace_init_nop() and ftrace_make_call() have patched those + * NOPs to: * * MOV X9, LR * BL * - * ... where is either ftrace_caller or ftrace_regs_caller. + * ... where is ftrace_caller or ftrace_regs_caller or custom + * trampoline. * * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to @@ -60,6 +62,9 @@ str x29, [sp, #S_FP] .endif + /* Set custom_tramp to zero */ + str xzr, [sp, #S_ORIG_X0] + /* Save the callsite's SP and LR */ add x10, sp, #(S_FRAME_SIZE + 16) stp x9, x10, [sp, #S_LR] @@ -123,12 +128,21 @@ ftrace_common_return: /* Restore the callsite's FP, LR, PC */ ldr x29, [sp, #S_FP] ldr x30, [sp, #S_LR] - ldr x9, [sp, #S_PC] - + ldr x10, [sp, #S_PC] + + ldr x11, [sp, #S_ORIG_X0] + cbz x11, 1f + /* Set x9 to parent ip before jump to custom trampoline */ + mov x9, x30 + /* Set lr to self ip */ + ldr x30, [sp, #S_PC] + /* Set x10 (used for return address) to custom trampoline */ + mov x10, x11 +1: /* Restore the callsite's SP */ add sp, sp, #S_FRAME_SIZE + 16 - ret x9 + ret x10 SYM_CODE_END(ftrace_common) #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -349,3 +363,14 @@ SYM_CODE_START(return_to_handler) ret SYM_CODE_END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_FUNC_START(ftrace_dummy_tramp) +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) + bti j /* ftrace_dummy_tramp is called via "br x10" */ +#endif + mov x10, x30 + mov x30, x9 + ret x10 +SYM_FUNC_END(ftrace_dummy_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 402a24f845b9e5aec2d76e0ef491b28503be89be..c7d4764a553fdad1b9ed02900786aaddab8081ca 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,6 +76,132 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) return NULL; } +enum ftrace_callsite_action { + FC_INIT, + FC_REMOVE_CALL, + FC_ADD_CALL, + FC_REPLACE_CALL, +}; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +/* + * When func is 8-byte aligned, literal_call is located at func - 8 and literal + * is located at func - 16: + * + * NOP + * literal: + * .quad ftrace_dummy_tramp + * literal_call: + * LDR X16, literal + * BR X16 + * func: + * BTI C // if BTI + * MOV X9, LR + * NOP + * + * When func is not 8-byte aligned, literal_call is located at func - 8 and + * literal is located at func - 20: + * + * literal: + * .quad ftrace_dummy_tramp + * NOP + * literal_call: + * LDR X16, literal + * BR X16 + * func: + * BTI C // if BTI + * MOV X9, LR + * NOP + */ + +static unsigned long ftrace_literal_call_addr(struct dyn_ftrace *rec) +{ + if (rec->arch.func == 0UL) + return 0UL; + + return rec->arch.func - 2 * AARCH64_INSN_SIZE; +} + +static unsigned long ftrace_literal_addr(struct dyn_ftrace *rec) +{ + unsigned long addr = 0; + + addr = ftrace_literal_call_addr(rec); + if (addr == 0UL) + return 0UL; + + if (addr % sizeof(long)) + addr -= 3 * AARCH64_INSN_SIZE; + else + addr -= 2 * AARCH64_INSN_SIZE; + + return addr; +} + +static void ftrace_update_literal(unsigned long literal_addr, unsigned long call_target, + int action) +{ + unsigned long dummy_tramp = (unsigned long)&ftrace_dummy_tramp; + + if (action == FC_INIT || action == FC_REMOVE_CALL) + aarch64_literal64_write((void *)literal_addr, dummy_tramp); + else if (action == FC_ADD_CALL) + aarch64_literal64_write((void *)literal_addr, call_target); +} + +static int ftrace_init_literal(struct module *mod, struct dyn_ftrace *rec) +{ + int ret; + u32 old, new; + unsigned long addr; + unsigned long pc = rec->ip - AARCH64_INSN_SIZE; + + old = aarch64_insn_gen_nop(); + + addr = ftrace_literal_addr(rec); + if (addr == 0UL) + return 0UL; + + ftrace_update_literal(addr, 0, FC_INIT); + + pc = ftrace_literal_call_addr(rec); + new = aarch64_insn_gen_load_literal(pc, addr, AARCH64_INSN_REG_16, + true); + ret = ftrace_modify_code(pc, old, new, true); + if (ret) + return ret; + + pc += AARCH64_INSN_SIZE; + new = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16, + AARCH64_INSN_BRANCH_NOLINK); + return ftrace_modify_code(pc, old, new, true); +} + +#else + +static inline unsigned long ftrace_literal_addr(struct dyn_ftrace *rec) +{ + return 0; +} + +static inline unsigned long ftrace_literal_call_addr(struct dyn_ftrace *rec) +{ + return 0; +} + +static inline void ftrace_update_literal(unsigned long literal_addr, unsigned long call_target, + int action) +{ +} + +static inline int ftrace_init_literal(struct module *mod, struct dyn_ftrace *rec) +{ + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + /* * Find the address the callsite must branch to in order to reach '*addr'. * @@ -87,7 +213,8 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) */ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, - unsigned long *addr) + unsigned long *addr, + int action) { unsigned long pc = rec->ip; long offset = (long)*addr - (long)pc; @@ -100,6 +227,17 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, if (offset >= -SZ_128M && offset < SZ_128M) return true; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + unsigned long literal_addr; + + literal_addr = ftrace_literal_addr(rec); + if (literal_addr != 0UL) { + ftrace_update_literal(literal_addr, *addr, action); + *addr = ftrace_literal_call_addr(rec); + return true; + } + } + /* * When the target is outside of the range of a 'BL' instruction, we * must use a PLT to reach it. We can only place PLTs for modules, and @@ -144,7 +282,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long pc = rec->ip; u32 old, new; - if (!ftrace_find_callable_addr(rec, NULL, &addr)) + if (!ftrace_find_callable_addr(rec, NULL, &addr, FC_ADD_CALL)) return -EINVAL; old = aarch64_insn_gen_nop(); @@ -160,9 +298,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long pc = rec->ip; u32 old, new; - if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + if (!ftrace_find_callable_addr(rec, NULL, &old_addr, FC_REPLACE_CALL)) return -EINVAL; - if (!ftrace_find_callable_addr(rec, NULL, &addr)) + if (!ftrace_find_callable_addr(rec, NULL, &addr, FC_ADD_CALL)) return -EINVAL; old = aarch64_insn_gen_branch_imm(pc, old_addr, @@ -187,9 +325,10 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, * | NOP | MOV X9, LR | MOV X9, LR | * | NOP | NOP | BL | * - * The LR value will be recovered by ftrace_regs_entry, and restored into LR - * before returning to the regular function prologue. When a function is not - * being traced, the MOV is not harmful given x9 is not live per the AAPCS. + * The LR value will be recovered by ftrace_regs_entry or custom trampoline, + * and restored into LR before returning to the regular function prologue. + * When a function is not being traced, the MOV is not harmful given x9 is + * not live per the AAPCS. * * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of * the BL. @@ -199,6 +338,14 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) unsigned long pc = rec->ip - AARCH64_INSN_SIZE; u32 old, new; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + int ret; + + ret = ftrace_init_literal(mod, rec); + if (ret) + return ret; + } + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, AARCH64_INSN_REG_LR, @@ -207,6 +354,69 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) } #endif +static int ftrace_nop_count(unsigned long addr) +{ + u32 insn; + u32 nop = aarch64_insn_gen_nop(); + int count = 0; + + for (;;) { + if (aarch64_insn_read((void *)addr, &insn)) + return -1; + + if (insn != nop) + break; + + count++; + addr += AARCH64_INSN_SIZE; + } + + return count; +} + +unsigned long ftrace_call_adjust(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + u32 insn; + u32 nop = aarch64_insn_gen_nop(); + int count = ftrace_nop_count(addr); + + if (count != 5 && count != 7 && count != 2) + return 0; + + if (count == 5 || count == 7) { + /* Skip the first 5 NOPS */ + addr += 5 * AARCH64_INSN_SIZE; + + /* Skip bti c */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { + if (aarch64_insn_read((void *)addr, &insn)) + return 0; + + if (insn != nop) + addr += AARCH64_INSN_SIZE; + } + } + + if (ftrace_nop_count(addr) != 2) + return 0; + + return addr + AARCH64_INSN_SIZE; + } else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) { + /* + * Adjust addr to point at the BL in the callsite. + * See ftrace_init_nop() for the callsite sequence. + */ + return addr + AARCH64_INSN_SIZE; + } + + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + /* * Turn off the call to ftrace_caller() in instrumented function */ @@ -232,7 +442,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return aarch64_insn_patch_text_nosync((void *)pc, new); } - if (!ftrace_find_callable_addr(rec, mod, &addr)) + if (!ftrace_find_callable_addr(rec, mod, &addr, FC_REMOVE_CALL)) return -EINVAL; old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -246,6 +456,28 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } +bool ftrace_directable(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + return rec->arch.func != 0UL; +#else + return false; +#endif +} + +void ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long func) +{ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + int count; + + count = ftrace_nop_count(func); + if (count == 5 || count == 7) + rec->arch.func = func + 5 * AARCH64_INSN_SIZE; + else + rec->arch.func = 0UL; +#endif +} + int __init ftrace_dyn_arch_init(void) { return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7d4fdf9745428a6f8aaad4149571f99d4128e949..fbd2b7eec1dc5c4cb38386ec96da546bda5224fb 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -153,6 +153,20 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) return ret; } +void __kprobes aarch64_literal64_write(void *addr, u64 data) +{ + u64 *waddr; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + WRITE_ONCE(*waddr, data); + + patch_unmap(FIX_TEXT_POKE0); + raw_spin_unlock_irqrestore(&patch_lock, flags); +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { return __aarch64_insn_write(addr, cpu_to_le32(insn)); @@ -442,36 +456,31 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, return insn; } +static const u32 aarch64_insn_ldst_size[] = { + [AARCH64_INSN_SIZE_8] = 0, + [AARCH64_INSN_SIZE_16] = 1, + [AARCH64_INSN_SIZE_32] = 2, + [AARCH64_INSN_SIZE_64] = 3, +}; + static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, u32 insn) { u32 size; - switch (type) { - case AARCH64_INSN_SIZE_8: - size = 0; - break; - case AARCH64_INSN_SIZE_16: - size = 1; - break; - case AARCH64_INSN_SIZE_32: - size = 2; - break; - case AARCH64_INSN_SIZE_64: - size = 3; - break; - default: + if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) { pr_err("%s: unknown size encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; } + size = aarch64_insn_ldst_size[type]; insn &= ~GENMASK(31, 30); insn |= size << 30; return insn; } -static inline long branch_imm_common(unsigned long pc, unsigned long addr, +static inline long label_imm_common(unsigned long pc, unsigned long addr, long range) { long offset; @@ -502,7 +511,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * ARM64 virtual address arrangement guarantees all kernel and module * texts are within +/-128M. */ - offset = branch_imm_common(pc, addr, SZ_128M); + offset = label_imm_common(pc, addr, SZ_128M); if (offset >= SZ_128M) return AARCH64_BREAK_FAULT; @@ -530,7 +539,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); if (offset >= SZ_1M) return AARCH64_BREAK_FAULT; @@ -569,7 +578,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset; - offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); insn = aarch64_insn_get_bcond_value(); @@ -647,6 +656,72 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, offset); } +u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + u32 shift; + + if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) { + pr_err("%s: unknown size encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + shift = aarch64_insn_ldst_size[size]; + if (imm & ~(BIT(12 + shift) - BIT(shift))) { + pr_err("%s: invalid imm: %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } + + imm >>= shift; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: + insn = aarch64_insn_get_ldr_imm_value(); + break; + case AARCH64_INSN_LDST_STORE_IMM_OFFSET: + insn = aarch64_insn_get_str_imm_value(); + break; + default: + pr_err("%s: unknown load/store encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); +} + +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit) +{ + u32 insn; + long offset; + + offset = label_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_ldr_lit_value(); + + if (is64bit) + insn |= BIT(30); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, + offset >> 2); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3b8d1a15d6296462282b5e8ccc8c2..864677bf08e2843e490bb2e8ec782d97a43d7933 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -66,6 +66,26 @@ #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) +/* Load/store register (immediate offset) */ +#define A64_LS_IMM(Rt, Rn, imm, size, type) \ + aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \ + AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_##type##_IMM_OFFSET) +#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) +#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) +#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) +#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) +#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) +#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) +#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) +#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) + +/* LDR (literal) */ +#define A64_LDR32LIT(Wt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Wt, false) +#define A64_LDR64LIT(Xt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Xt, true) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ @@ -218,5 +238,6 @@ #define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) #define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) #define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) +#define A64_NOP A64_HINT(AARCH64_INSN_HINT_NOP) #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 18627cbd6da4ef45a90a549fd4f06a0fbce8dc6f..4b65f3dc2606f7f12f9602a22ada04b4a1e18efe 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 4) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -50,6 +52,7 @@ static const int bpf2a64[] = { [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), + [FP_BOTTOM] = A64_R(27), }; struct jit_ctx { @@ -60,8 +63,18 @@ struct jit_ctx { int exentry_idx; __le32 *image; u32 stack_size; + int fpb_offset; }; +struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL) @@ -124,6 +137,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -143,6 +162,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } +static inline void emit_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp = bpf2a64[TMP_REG_1]; + + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { @@ -178,15 +205,72 @@ static bool is_addsub_imm(u32 imm) return !(imm & ~0xfff) || !(imm & ~0xfff000); } +/* + * There are 3 types of AArch64 LDR/STR (immediate) instruction: + * Post-index, Pre-index, Unsigned offset. + * + * For BPF ldr/str, the "unsigned offset" type is sufficient. + * + * "Unsigned offset" type LDR(immediate) format: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * "Unsigned offset" type STR(immediate) format: + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * The offset is calculated from imm12 and scale in the following way: + * + * offset = (u64)imm12 << scale + */ +static bool is_lsi_offset(int offset, int scale) +{ + if (offset < 0) + return false; + + if (offset > (0xFFF << scale)) + return false; + + if (offset & ((1 << scale) - 1)) + return false; + + return true; +} + +/* generated prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp x19, x20, [sp, #-16]! + * stp x21, x22, [sp, #-16]! + * stp x25, x26, [sp, #-16]! + * stp x27, x28, [sp, #-16]! + * mov x25, sp + * mov tcc, #0 + * // PROLOGUE_OFFSET + */ + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) + /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) /* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) -#define PROLOGUE_OFFSET 8 -#else -#define PROLOGUE_OFFSET 7 -#endif +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + 8) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -197,6 +281,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const int idx0 = ctx->idx; int cur_offset; @@ -223,9 +308,16 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ - /* BTI landing pad */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_C, ctx); + /* bpf function may be invoked by 3 instruction types: + * 1. bl, attached via freplace to bpf prog via short jump + * 2. br, attached via freplace to bpf prog via long jump + * 3. blr, working as a function pointer, used by emit_call. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); @@ -235,6 +327,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); /* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -251,10 +344,11 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) } /* BTI landing pad for the tail call, done with a BR */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_J, ctx); + emit_bti(A64_BTI_J, ctx); } + emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); + ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); /* Set up function call stack */ @@ -328,6 +422,57 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) #undef jmp_offset } +#ifdef ftrace_dummy_tramp +#define dummy_tramp ftrace_dummy_tramp +#else +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .global dummy_tramp\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, x30\n" +" mov x30, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); +#endif + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + static void build_epilogue(struct jit_ctx *ctx) { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -336,10 +481,13 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM]; /* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); + /* Restore x27 and x28 */ + emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ emit(A64_POP(fp, A64_R(26), A64_SP), ctx); @@ -432,6 +580,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const u8 tmp3 = bpf2a64[TMP_REG_3]; + const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -441,6 +591,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 jmp_cond, reg; s32 jmp_offset; u32 a64_insn; + u8 src_adj; + u8 dst_adj; + int off_adj; int ret; #define check_imm(bits, imm) do { \ @@ -766,8 +919,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -808,19 +960,45 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: - emit_a64_mov_i(1, tmp, off, ctx); + if (ctx->fpb_offset > 0 && src == fp) { + src_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + src_adj = src; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_LDR32(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR32(dst, src, tmp), ctx); + } break; case BPF_H: - emit(A64_LDRH(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRH(dst, src, tmp), ctx); + } break; case BPF_B: - emit(A64_LDRB(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRB(dst, src, tmp), ctx); + } break; case BPF_DW: - emit(A64_LDR64(dst, src, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_LDR64I(dst, src_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR64(dst, src, tmp), ctx); + } break; } @@ -847,21 +1025,47 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } /* Load imm to a register then store it */ - emit_a64_mov_i(1, tmp2, off, ctx); emit_a64_mov_i(1, tmp, imm, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR32(tmp, dst, tmp2), ctx); + } break; case BPF_H: - emit(A64_STRH(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRH(tmp, dst, tmp2), ctx); + } break; case BPF_B: - emit(A64_STRB(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRB(tmp, dst, tmp2), ctx); + } break; case BPF_DW: - emit(A64_STR64(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR64(tmp, dst, tmp2), ctx); + } break; } break; @@ -871,19 +1075,45 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR32(src, dst, tmp), ctx); + } break; case BPF_H: - emit(A64_STRH(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRH(src, dst, tmp), ctx); + } break; case BPF_B: - emit(A64_STRB(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRB(src, dst, tmp), ctx); + } break; case BPF_DW: - emit(A64_STR64(src, dst, tmp), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(src, dst_adj, off_adj), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR64(src, dst, tmp), ctx); + } break; } break; @@ -919,6 +1149,73 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return 0; } +/* + * Return 0 if FP may change at runtime, otherwise find the minimum negative + * offset to FP, converts it to positive number, and align down to 8 bytes. + */ +static int find_fpb_offset(struct bpf_prog *prog) +{ + int i; + int offset = 0; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + const u8 class = BPF_CLASS(insn->code); + const u8 mode = BPF_MODE(insn->code); + const u8 src = insn->src_reg; + const u8 dst = insn->dst_reg; + const s16 off = insn->off; + + switch (class) { + case BPF_STX: + case BPF_ST: + /* fp holds atomic operation result */ + if (class == BPF_STX && mode == BPF_XADD && + src == BPF_REG_FP) + return 0; + + if (mode == BPF_MEM && dst == BPF_REG_FP && + off < offset) + offset = insn->off; + break; + + case BPF_JMP32: + case BPF_JMP: + break; + + case BPF_LDX: + case BPF_LD: + /* fp holds load result */ + if (dst == BPF_REG_FP) + return 0; + + if (class == BPF_LDX && mode == BPF_MEM && + src == BPF_REG_FP && off < offset) + offset = off; + break; + + case BPF_ALU: + case BPF_ALU64: + default: + /* fp holds ALU result */ + if (dst == BPF_REG_FP) + return 0; + } + } + + if (offset < 0) { + /* + * safely be converted to a positive 'int', since insn->off + * is 's16' + */ + offset = -offset; + /* align down to 8 bytes */ + offset = ALIGN_DOWN(offset, 8); + } + + return offset; +} + static int build_body(struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; @@ -970,6 +1267,13 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1; @@ -990,7 +1294,7 @@ struct arm64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { - int image_size, prog_size, extable_size; + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -1040,6 +1344,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; } + ctx.fpb_offset = find_fpb_offset(prog); + /* * 1. Initial fake pass to compute ctx->idx and ctx->offset. * @@ -1058,13 +1364,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + build_plt(&ctx); + extable_align = __alignof__(struct exception_table_entry); extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); /* Now we know the actual image size. */ prog_size = sizeof(u32) * ctx.idx; - image_size = prog_size + extable_size; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1076,7 +1386,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.image = (__le32 *)image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; @@ -1090,9 +1400,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) } build_epilogue(&ctx); + build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; @@ -1160,3 +1471,504 @@ void bpf_jit_free_exec(void *addr) { return vfree(addr); } + +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_prog *p, + int args_off, int retval_off, bool save_ret) +{ + if (p->aux->sleepable) { + emit_call((const u64)__bpf_prog_enter_sleepable, ctx); + } else { + emit_call((const u64)__bpf_prog_enter, ctx); + /* save return value to callee saved register x19 */ + emit(A64_MOV(1, A64_R(19), A64_R(0)), ctx); + } + + emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(bpf2a64[BPF_REG_0], A64_SP, retval_off), ctx); + + if (p->aux->sleepable) { + emit_call((const u64)__bpf_prog_exit_sleepable, ctx); + } else { + /* arg1: prog */ + emit_addr_mov_i64(A64_R(0), (const u64)p, ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(19)), ctx); + + emit_call((const u64)__bpf_prog_exit, ctx); + } +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_progs *tp, + int args_off, int retval_off, u32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tp->nr_progs; i++) { + invoke_bpf_prog(ctx, tp->progs[i], args_off, retval_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_STR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_LDR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_progs *tprogs, void *orig_call, + int nargs, u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int args_off; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + __le32 **branches = NULL; + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * + * [ argN ] + * [ ... ] + * SP + args_off [ arg1 ] + */ + + stack_size = 0; + + args_off = stack_size; + /* room for args */ + stack_size += nargs * 8; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently only x19 is used */ + regs_off = stack_size; + stack_size += 8; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* frame for patched function */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + /* save args */ + save_args(ctx, args_off, nargs); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_progs; i++) + invoke_bpf_prog(ctx, fentry->progs[i], args_off, + retval_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(__le32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, nargs); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_BLR(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_progs && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); + } + + for (i = 0; i < fexit->nr_progs; i++) + invoke_bpf_prog(ctx, fexit->progs[i], args_off, retval_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, nargs); + + /* restore callee saved register x19 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + + if (ctx->image) + bpf_flush_icache(ctx->image, ctx->image + ctx->idx); + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_progs *tprogs, + void *orig_call) +{ + int ret; + int nargs = m->nr_args; + int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + + /* the first 8 arguments are passed by registers */ + if (nargs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tprogs, orig_call, nargs, flags); + if (ret < 0) + return ret; + + if (ret > max_insns) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(image_end - image)); + ret = prepare_trampoline(&ctx, im, tprogs, orig_call, nargs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) + ret = -EINVAL; + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + return ret; +} + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +} diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 352cf0a264d64480f156c08338d94d2c7755ee7a..ef86b3f02898de08295325215091ff1c9023c42b 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -215,17 +215,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr; kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b9dd113599eb440e4eaebb93b2b8a68197b8ae4d..9585a29ace3e3b668169b2a75bfbf9c6f836c875 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -280,6 +280,8 @@ struct ftrace_func_entry { struct dyn_ftrace; +bool ftrace_directable(struct dyn_ftrace *rec); + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); @@ -461,6 +463,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long addr); int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 4c5594266fb21ab08f0395000f29dd2f00e7d521..af9fea93b48831a56a912eaf58d7ab2fa9bad668 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -87,18 +87,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) return tr; } -static int is_ftrace_location(void *ip) -{ - long addr; - - addr = ftrace_location((long)ip); - if (!addr) - return 0; - if (WARN_ON_ONCE(addr != (long)ip)) - return -EFAULT; - return 1; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -127,12 +115,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad static int register_fentry(struct bpf_trampoline *tr, void *new_addr) { void *ip = tr->func.addr; + unsigned long faddr; int ret; - ret = is_ftrace_location(ip); - if (ret < 0) - return ret; - tr->func.ftrace_managed = ret; + faddr = ftrace_location((unsigned long)ip); + if (faddr) + tr->func.ftrace_managed = true; if (tr->func.ftrace_managed) ret = register_ftrace_direct((long)ip, (long)new_addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c34949d449844908f33b6c5b07b0c7f2bd75cbd9..6fd66a47b4767a2ee1f47726831d7c1fc58782f6 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1612,14 +1612,10 @@ static inline int check_kprobe_rereg(struct kprobe *p) int __weak arch_check_ftrace_location(struct kprobe *p) { - unsigned long ftrace_addr; + unsigned long addr = (unsigned long)p->addr; - ftrace_addr = ftrace_location((unsigned long)p->addr); - if (ftrace_addr) { + if (ftrace_location(addr) == addr) { #ifdef CONFIG_KPROBES_ON_FTRACE - /* Given address is not on the instruction boundary */ - if ((unsigned long)p->addr != ftrace_addr) - return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; #else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9682ceb1f3dfac66a4ad141f382fd5ae6d9eadf1..40512793e8e9487d8d68234c400c72b26b3b735c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -201,9 +201,14 @@ config DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS - def_bool y + bool "Support for calling custom trampoline from fentry directly" + default y depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + help + This option enables calling custom trampoline from ftrace fentry + directly, instead of using ftrace regs caller. This may reserve more + space in the fentry, making the kernel image larger. config FUNCTION_PROFILER bool "Kernel function profiler" diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6f91431bd03bb41388dd5d9ab7d6a7d50f6342b0..b2888890add04928e772a3e9e9bb2196d844af21 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1576,17 +1576,34 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) } /** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * If @ip matches the ftrace location, return @ip. + * If @ip matches sym+0, return sym's ftrace location. + * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + struct dyn_ftrace *rec; + unsigned long offset; + unsigned long size; + + rec = lookup_rec(ip, ip); + if (!rec) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) + rec = lookup_rec(ip, ip + size - 1); + } + + if (rec) + return rec->ip; + +out: + return 0; } /** @@ -4949,7 +4966,8 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; - if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL; if (remove) { @@ -5074,6 +5092,11 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) return direct; } +bool __weak ftrace_directable(struct dyn_ftrace *rec) +{ + return true; +} + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5097,11 +5120,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) struct ftrace_func_entry *entry; struct ftrace_hash *free_hash = NULL; struct dyn_ftrace *rec; - int ret = -EBUSY; + int ret = -ENODEV; mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + /* See if there's a direct function at @ip already */ + ret = -EBUSY; if (ftrace_find_rec_direct(ip)) goto out_unlock; @@ -5110,6 +5138,9 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) if (!rec) goto out_unlock; + if (!ftrace_directable(rec)) + goto out_unlock; + /* * Check if the rec says it has a direct call but we didn't * find one earlier? @@ -5230,6 +5261,10 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, NULL); if (!entry) goto out_unlock; @@ -5361,6 +5396,11 @@ int modify_ftrace_direct(unsigned long ip, mutex_lock(&direct_mutex); mutex_lock(&ftrace_lock); + + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, &rec); if (!entry) goto out_unlock; @@ -6193,6 +6233,10 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; } +void __weak ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long addr) +{ +} + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -6248,7 +6292,9 @@ static int ftrace_process_locs(struct module *mod, pg = start_pg; while (p < end) { unsigned long end_offset; - addr = ftrace_call_adjust(*p++); + unsigned long nop_addr = *p++; + + addr = ftrace_call_adjust(nop_addr); /* * Some architecture linkers will pad between * the different mcount_loc sections of different @@ -6270,6 +6316,7 @@ static int ftrace_process_locs(struct module *mod, rec = &pg->records[pg->index++]; rec->ip = addr; + ftrace_rec_arch_init(rec, nop_addr); } if (pg->next) {