diff --git a/code/rv64-static-call/0001-implement-riscv-out-of-line-static-call.patch b/code/rv64-static-call/0001-implement-riscv-out-of-line-static-call.patch new file mode 100644 index 0000000000000000000000000000000000000000..faf4a1bfa55524f560d1cdf161cb268eb040efb1 --- /dev/null +++ b/code/rv64-static-call/0001-implement-riscv-out-of-line-static-call.patch @@ -0,0 +1,401 @@ +From ff5abb19f71efd1378f2d41fbdaad49930e8dd86 Mon Sep 17 00:00:00 2001 +From: Juhan Jin +Date: Sat, 3 Aug 2024 01:37:47 +0800 +Subject: [PATCH] riscv: Add support for out-of-line static calls + +Add support for out-of-line static calls on RISC-V. + +An out-of-line static call first jumps to a trampoline, then jumps to +the target function from the trampoline. In order for the address in +a trampoline to be updated atomically, a trampoline utilize a +three-instruction sequence. The target address in this trampoline is +either the old address or the new address when this trampoline is +being updated. + +Out-of-line static calls work as expected. + +The "inline" version is under development. + +Signed-off-by: Juhan Jin +Signed-off-by: ForrestNiu +Signed-off-by: Falcon +--- + arch/riscv/Kconfig | 1 + + arch/riscv/include/asm/static_call.h | 33 ++++ + arch/riscv/kernel/Makefile | 1 + + arch/riscv/kernel/static_call.c | 286 +++++++++++++++++++++++++++ + arch/riscv/kernel/vmlinux.lds.S | 1 + + 5 files changed, 322 insertions(+) + create mode 100644 arch/riscv/include/asm/static_call.h + create mode 100644 arch/riscv/kernel/static_call.c + +diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig +index 939ea7f6a228..88daceb02e31 100644 +--- a/arch/riscv/Kconfig ++++ b/arch/riscv/Kconfig +@@ -176,6 +176,7 @@ config RISCV + select HAVE_SAMPLE_FTRACE_DIRECT + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI + select HAVE_STACKPROTECTOR ++ select HAVE_STATIC_CALL + select HAVE_SYSCALL_TRACEPOINTS + select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select IRQ_DOMAIN +diff --git a/arch/riscv/include/asm/static_call.h b/arch/riscv/include/asm/static_call.h +new file mode 100644 +index 000000000000..38d11fb0f346 +--- /dev/null ++++ b/arch/riscv/include/asm/static_call.h +@@ -0,0 +1,33 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef _ASM_STATIC_CALL_H ++#define _ASM_STATIC_CALL_H ++ ++#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ ++ asm(".pushsection .static_call.text, \"ax\" \n" \ ++ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \ ++ STATIC_CALL_TRAMP_STR(name) ": \n" \ ++ ".option push \n" \ ++ ".option norvc \n" \ ++ ".option norelax \n" \ ++ insns " \n" \ ++ ".option pop \n" \ ++ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ ++ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ ++ ".popsection \n") ++ ++#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "la t0, "#func"; jalr t0, 0(t0);") ++ ++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop;") ++ ++#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ ++ ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) ++ ++#define ARCH_ADD_TRAMP_KEY(name) \ ++ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ ++ ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \ ++ ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ ++ ".popsection \n") ++ ++#endif /* _ASM_STATIC_CALL_H */ +diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile +index 7f88cc4931f5..7e06efb1f65d 100644 +--- a/arch/riscv/kernel/Makefile ++++ b/arch/riscv/kernel/Makefile +@@ -55,6 +55,7 @@ obj-y += setup.o + obj-y += signal.o + obj-y += syscall_table.o + obj-y += sys_riscv.o ++obj-y += static_call.o + obj-y += sys_hwprobe.o + obj-y += time.o + obj-y += traps.o +diff --git a/arch/riscv/kernel/static_call.c b/arch/riscv/kernel/static_call.c +new file mode 100644 +index 000000000000..10303a14a67b +--- /dev/null ++++ b/arch/riscv/kernel/static_call.c +@@ -0,0 +1,286 @@ ++// SPDX-License-Identifier: GPL-2.0 ++#include ++#include ++#include ++#include ++#include ++#include ++ ++enum insn_type { ++ CALL = 0, /* site call */ ++ NOP = 1, /* site cond-call */ ++ JMP = 2, /* tramp / site tail-call */ ++ RET = 3, /* tramp / site cond-tail-call */ ++}; ++ ++static void __ref __static_call_transform(void *insn, enum insn_type type, ++ void *func, bool modinit) ++{ ++ u32 call[2] = {0}; ++ ++ /* first instruction in trampoline, counting from one*/ ++ u32 first_insn = *(u32*)(insn); ++ u32 second_insn = *(u32*)(insn + sizeof(u32)); ++ u32 third_insn = *(u32*)(insn + 2*sizeof(u32)); ++ ++ u32 ret_insn = 0x00008067U; ++ ++ switch (type) { ++ case CALL: ++ break; ++ ++ case NOP: ++ break; ++ ++ case JMP: ++ if (first_insn == ret_insn) { ++ make_call_t0((insn+sizeof(u32)), func, call); ++ ++ patch_text_nosync(insn+sizeof(u32), &call[0], sizeof(call[0])); ++ patch_text_nosync(insn+2*sizeof(u32), &call[1], sizeof(call[1])); ++ ++ /* make "auipc t0, 0" */ ++ call[0] &= ~(RV_U_IMM_31_12_MASK); ++ patch_text_nosync(insn, &call[0], sizeof(call[0])); ++ } else if (!riscv_insn_is_auipc(second_insn) && ++ !riscv_insn_is_jalr(second_insn)) { ++ ++ s32 imm_cur = riscv_insn_extract_utype_itype_imm(first_insn, second_insn); ++ ++ if (insn + imm_cur == func) ++ break; ++ ++ /* ++ * When a trampoline is initialized, its three ++ * instructions are as follows: ++ * ++ * auipc t0, imm20_cur ++ * addi t0, imm12_cur ++ * jalr t0, 0(t0) ++ * ++ * First, replace the second instruction ++ * "addi t0, imm12_cur" with "jalr t0, imm12_cur(t0)". ++ * ++ * The resulting trampoline still jumps to the current ++ * function. ++ */ ++ ++ s32 imm12_cur = RV_EXTRACT_ITYPE_IMM(second_insn); ++ /* jalr t0, imm12_cur(t0) in jalr_insn */ ++ u32 jalr_insn = third_insn | ((imm12_cur & RV_I_IMM_11_0_MASK) << RV_I_IMM_11_0_OPOFF); ++ ++ patch_text_nosync(insn+sizeof(u32), &jalr_insn, sizeof(jalr_insn)); ++ ++ /* ++ * After the last adjustment, the trampoline is as ++ * follows: ++ * ++ * auipc t0, imm20_cur ++ * jalr t0, imm12_cur(t0) ++ * jalr t0, 0(t0) ++ * ++ * The first two instructions guarantee that the ++ * third instruction won't be executed, so we can ++ * replace the third instruction with ++ * "jalr t0, imm12_new(t0)". ++ * ++ * The resulting trampoline still jumps to the current ++ * function. ++ * ++ * In order for the trampoline to jump to the new ++ * function, we have to replace the second instruction ++ * with "auipc t0, imm20_new". ++ * ++ * The updated trampoline is as follows: ++ * ++ * auipc t0, imm20_cur ++ * auipc t0, imm20_new ++ * jalr t0, imm12_new(t0) ++ */ ++ ++ /* make auipc jalr pair that jumps to func */ ++ make_call_t0((insn+sizeof(u32)), func, call); ++ ++ patch_text_nosync(insn+2*sizeof(u32), &call[1], sizeof(call[1])); ++ patch_text_nosync(insn+sizeof(u32), &call[0], sizeof(call[0])); ++ } else if (riscv_insn_is_auipc(second_insn)) { ++ ++ s32 imm_cur = riscv_insn_extract_utype_itype_imm(second_insn, third_insn); ++ ++ if (insn + 4 + imm_cur == func) ++ break; ++ ++ /* ++ * Now, the trampoline is as follows: ++ * ++ * auipc t0, imm20_old ++ * auipc t0, imm20_cur ++ * jalr t0, imm12_cur(t0) ++ * ++ * The first instruction does not take effect. We can ++ * replace it with "auipc t0, imm20_new". ++ * ++ * The resulting trampoline still jumps to the current ++ * function. ++ * ++ * In order for the trampoline to jump to the new ++ * function, we have to replace the second instruction ++ * with "jalr t0, imm12_new(t0)" ++ * ++ * The updated trampoline is as follows: ++ * ++ * auipc t0, imm20_new ++ * jalr t0, imm12_new(t0) ++ * jalr t0, imm12_cur(t0) ++ */ ++ make_call_t0(insn, func, call); ++ patch_text_nosync(insn, &call[0], sizeof(call[0])); ++ patch_text_nosync(insn+sizeof(u32), &call[1], sizeof(call[1])); ++ } else if (riscv_insn_is_jalr(second_insn)) { ++ ++ s32 imm_cur = riscv_insn_extract_utype_itype_imm(first_insn, second_insn); ++ ++ if (insn + imm_cur == func) ++ break; ++ ++ /* ++ * Now, the trampoline is as follows: ++ * ++ * auipc t0, imm20_cur ++ * jalr t0, imm12_cur(t0) ++ * jalr t0, imm12_old(t0) ++ * ++ * The third instruction does not take effect. We can ++ * replace it with "jalr t0, imm12_new(t0)". ++ * ++ * The resulting trampoline still jumps to the current ++ * function. ++ * ++ * In order for the trampoline to jump to the new ++ * function, we have to replace the second instruction ++ * with "auipc t0, imm20_new" ++ * ++ * The updated trampoline is as follows: ++ * ++ * auipc t0, imm20_cur ++ * auipc t0, imm20_new ++ * jalr t0, imm12_new(t0) ++ */ ++ make_call_t0((insn+sizeof(u32)), func, call); ++ ++ patch_text_nosync(insn+2*sizeof(u32), &call[1], sizeof(call[1])); ++ patch_text_nosync(insn+sizeof(u32), &call[0], sizeof(call[0])); ++ } ++ break; ++ case RET: ++ if (first_insn != ret_insn) { ++ patch_text_nosync(insn, &ret_insn, sizeof(ret_insn)); ++ } ++ break; ++ } ++} ++ ++static inline enum insn_type __sc_insn(bool null, bool tail) ++{ ++ /* ++ * Encode the following table without branches: ++ * ++ * tail null insn ++ * -----+-------+------ ++ * 0 | 0 | CALL ++ * 0 | 1 | NOP ++ * 1 | 0 | JMP ++ * 1 | 1 | RET ++ */ ++ return 2*tail + null; ++} ++ ++void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) ++{ ++ mutex_lock(&text_mutex); ++ __static_call_transform(tramp, __sc_insn(!func, true), func, false); ++ mutex_unlock(&text_mutex); ++} ++EXPORT_SYMBOL_GPL(arch_static_call_transform); ++ ++static int func_a(int x) ++{ ++ return x+1; ++} ++ ++static int func_b(int x) ++{ ++ return x+2; ++} ++ ++static int func_c(int x) ++{ ++ return x+3; ++} ++ ++DEFINE_STATIC_CALL(sc_selftest, func_a); ++ ++DEFINE_STATIC_CALL_NULL(sc_selftest1, func_a); ++ ++DEFINE_STATIC_CALL_RET0(sc_selftest2, func_a); ++ ++/* ++static struct static_call_data { ++ int (*func)(int); ++ int val; ++ int expect; ++} static_call_data [] __initdata = { ++ { NULL, 2, 3 }, ++ { func_b, 2, 4 }, ++ { func_a, 2, 3 } ++}; ++*/ ++ ++static int __init test_static_call_init(void) ++{ ++ WARN_ON(static_call(sc_selftest)(2) != 3); ++ ++ static_call_update(sc_selftest, func_a); ++ WARN_ON(static_call(sc_selftest)(2) != 3); ++ ++ static_call_update(sc_selftest, func_b); ++ WARN_ON(static_call(sc_selftest)(2) != 4); ++ ++ static_call_update(sc_selftest, func_b); ++ WARN_ON(static_call(sc_selftest)(2) != 4); ++ ++ static_call_update(sc_selftest, func_c); ++ WARN_ON(static_call(sc_selftest)(2) != 5); ++ ++ static_call_update(sc_selftest, func_c); ++ WARN_ON(static_call(sc_selftest)(2) != 5); ++ ++ static_call_update(sc_selftest, func_a); ++ WARN_ON(static_call(sc_selftest)(2) != 3); ++ ++ static_call_update(sc_selftest, NULL); ++ ++ static_call_update(sc_selftest, NULL); ++ ++ static_call_update(sc_selftest, func_b); ++ WARN_ON(static_call(sc_selftest)(2) != 4); ++ ++ static_call_update(sc_selftest1, func_a); ++ WARN_ON(static_call(sc_selftest1)(2) != 3); ++ ++ WARN_ON(static_call(sc_selftest2)(2) != 0); ++ ++ /* ++ for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) { ++ struct static_call_data *scd = &static_call_data[i]; ++ ++ if (scd->func) ++ static_call_update(sc_selftest, scd->func); ++ ++ WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect); ++ } ++ */ ++ ++ return 0; ++} ++early_initcall(test_static_call_init); +diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S +index 002ca58dd998..2962f30b064c 100644 +--- a/arch/riscv/kernel/vmlinux.lds.S ++++ b/arch/riscv/kernel/vmlinux.lds.S +@@ -45,6 +45,7 @@ SECTIONS + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT ++ STATIC_CALL_TEXT + _etext = .; + } + +-- +2.39.2 +