diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f25dd0bd7790caa81a57cb05fcb5189f2392638b..baa4851317b26fab6569988faaae4b49975d0e28 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1926,6 +1926,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +CONFIG_HISOCK=y CONFIG_NET_FLOW_LIMIT=y # diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 12c0278294e3f6856810c630b4f54b7478e14b04..0c54a15945ac2cb37a0e4337f44e210323fd58cd 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -188,6 +188,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_REL_EX, AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, +#ifdef CONFIG_HISOCK + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET, + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET, +#endif }; enum aarch64_insn_adsb_type { diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 7232b1e70a125f34f9f7d0cc05d51d89b8d40d1a..4609c550c055eed2ec2ae26e65a5e88bfc6e83e8 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -500,6 +500,14 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX: insn = aarch64_insn_get_stp_post_value(); break; +#ifdef CONFIG_HISOCK + case AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_ldp_value(); + break; + case AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_stp_value(); + break; +#endif default: pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 23b1b34db088ec0dda2c366cb874c1a80104112e..e30549bacf72d64e1f6fe59ed4a59572280be361 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -102,6 +102,21 @@ /* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */ #define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX) +#ifdef CONFIG_HISOCK +#define A64_STP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, STORE, SIGNED_OFFSET) +#define A64_LDP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, LOAD, SIGNED_OFFSET) +#define A64_STP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET) +#define A64_LDP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET) +#endif + /* Load/store exclusive */ #define A64_SIZE(sf) \ ((sf) ? AARCH64_INSN_SIZE_64 : AARCH64_INSN_SIZE_32) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8b957d2f60ebe2ad04b86359d03cc82c4ba0837f..92ea5a964b1b92c076eabe5ea2289ce1dc7cafbb 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -26,11 +26,26 @@ #include "bpf_jit.h" +#ifdef CONFIG_HISOCK +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 4) +#define TMP_REG_4 (MAX_BPF_JIT_REG + 5) +#define TMP_REG_5 (MAX_BPF_JIT_REG + 6) +#define TMP_REG_6 (MAX_BPF_JIT_REG + 7) +#define TMP_REG_7 (MAX_BPF_JIT_REG + 8) +#define TMP_REG_8 (MAX_BPF_JIT_REG + 9) +#define TMP_REG_9 (MAX_BPF_JIT_REG + 10) +#define TMP_REG_10 (MAX_BPF_JIT_REG + 11) +#else #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#endif #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -64,6 +79,15 @@ static const int bpf2a64[] = { [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), [TMP_REG_3] = A64_R(12), +#ifdef CONFIG_HISOCK + [TMP_REG_4] = A64_R(13), + [TMP_REG_5] = A64_R(14), + [TMP_REG_6] = A64_R(15), + [TMP_REG_7] = A64_R(5), + [TMP_REG_8] = A64_R(6), + [TMP_REG_9] = A64_R(7), + [TMP_REG_10] = A64_R(28), +#endif /* tail_call_cnt */ [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ @@ -801,6 +825,234 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } +#ifdef CONFIG_HISOCK +static bool support_unaligned_access(void) +{ + unsigned long sctlr = SCTLR_ELx_A; + + switch (read_sysreg(CurrentEL)) { + case CurrentEL_EL1: + sctlr = read_sysreg(sctlr_el1); + break; + case CurrentEL_EL2: + sctlr = read_sysreg(sctlr_el2); + break; + default: + /* not EL1 and EL2 ? */ + break; + } + + return (sctlr & SCTLR_ELx_A) ? false : true; +} + +extern u64 bpf_ext_memcpy(void *dst, size_t dst_sz, + const void *src, size_t src_sz); + +static void emit_memcpy(struct jit_ctx *ctx, int size) +{ + u8 dst = bpf2a64[BPF_REG_1]; + u8 src = bpf2a64[BPF_REG_3]; + u8 tmp1 = bpf2a64[TMP_REG_1]; + u8 tmp2 = bpf2a64[TMP_REG_2]; + u8 tmp3 = bpf2a64[TMP_REG_3]; + u8 tmp4 = bpf2a64[TMP_REG_4]; + u8 tmp5 = bpf2a64[TMP_REG_5]; + u8 tmp6 = bpf2a64[TMP_REG_6]; + u8 tmp7 = bpf2a64[TMP_REG_7]; + u8 tmp8 = bpf2a64[TMP_REG_8]; + u8 tmp9 = bpf2a64[TMP_REG_9]; + u8 tmp10 = bpf2a64[TMP_REG_10]; + + if (!support_unaligned_access()) { + emit_call((u64)bpf_ext_memcpy, ctx); + return; + } + + switch (size) { + case 0: + break; + case 1: + emit(A64_LDRBI(tmp1, src, 0), ctx); + emit(A64_STRBI(tmp1, dst, 0), ctx); + break; + case 2: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + break; + case 3: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 2), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 2), ctx); + break; + case 4: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + break; + case 5: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 4), ctx); + break; + case 6: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + break; + case 7: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_LDRBI(tmp3, src, 6), ctx); + emit(A64_STR32I(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + emit(A64_STRBI(tmp3, dst, 6), ctx); + break; + case 8: + emit(A64_LDR64I(tmp1, src, 0), ctx); + emit(A64_STR64I(tmp1, dst, 0), ctx); + break; + case 9 ... 15: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDR64I(tmp3, src, 0), ctx); + emit(A64_LDP32(tmp4, tmp5, tmp1, -8), ctx); + emit(A64_STR64I(tmp3, dst, 0), ctx); + emit(A64_STP32(tmp4, tmp5, tmp2, -8), ctx); + break; + case 16: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + break; + case 17 ... 31: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 32: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + break; + case 33 ... 63: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, src, 16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, dst, 16), ctx); + emit(A64_LDP(tmp3, tmp4, tmp1, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, tmp2, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 64: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + break; + case 65 ... 95: + /* copy first 48 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + + /* copy last 48 bytes */ + emit(A64_ADD_I(1, tmp7, src, size), ctx); + emit(A64_ADD_I(1, tmp8, dst, size), ctx); + + emit(A64_LDP(tmp1, tmp2, tmp7, -48), ctx); + emit(A64_LDP(tmp3, tmp4, tmp7, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp7, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp8, -48), ctx); + emit(A64_STP(tmp3, tmp4, tmp8, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp8, -16), ctx); + break; + case 96: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + break; + case 97 ... 127: + emit(A64_ADD_I(1, tmp9, src, size), ctx); + emit(A64_ADD_I(1, tmp10, dst, size), ctx); + + /* copy first 64 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + /* copy last 64 bytes */ + emit(A64_LDP(tmp1, tmp2, tmp9, -64), ctx); + emit(A64_LDP(tmp3, tmp4, tmp9, -48), ctx); + emit(A64_LDP(tmp5, tmp6, tmp9, -32), ctx); + emit(A64_LDP(tmp7, tmp8, tmp9, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp10, -64), ctx); + emit(A64_STP(tmp3, tmp4, tmp10, -48), ctx); + emit(A64_STP(tmp5, tmp6, tmp10, -32), ctx); + emit(A64_STP(tmp7, tmp8, tmp10, -16), ctx); + break; + case 128: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_LDP(tmp5, tmp6, src, 96), ctx); + emit(A64_LDP(tmp7, tmp8, src, 112), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + emit(A64_STP(tmp5, tmp6, dst, 96), ctx); + emit(A64_STP(tmp7, tmp8, dst, 112), ctx); + break; + default: + emit_call((u64)bpf_ext_memcpy, ctx); + break; + } +} +#endif + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -1164,6 +1416,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool func_addr_fixed; u64 func_addr; +#ifdef CONFIG_HISOCK + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_ext_memcpy) { + emit_memcpy(ctx, insn->off); + break; + } +#endif + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) @@ -1738,6 +1997,13 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void) +{ + return true; +} +#endif + u64 bpf_jit_alloc_exec_limit(void) { return VMALLOC_END - VMALLOC_START; diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index d00c4dd20b0f8a82c22b7985d7fce6d6a54ac7c1..1c595f3a89b571728457818da7dc572f60bd9af8 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1894,6 +1894,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +# CONFIG_HISOCK is not set CONFIG_NET_FLOW_LIMIT=y # diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index fb6adb1c3889cf3f8b723b24f5e3f8f07752014d..a9c988c8e217d7af6f624beb115a1d9db4306acc 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -45,6 +45,9 @@ enum cgroup_bpf_attach_type { CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, +#ifdef CONFIG_HISOCK + KABI_BROKEN_INSERT_ENUM(HISOCK_EGRESS) +#endif MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d4f2c8706042cd2e079775a1fa643cfc7793bfba..f94f57d185b864d44a45a18e4a1194e83dbdbfc6 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -62,6 +62,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -150,6 +153,11 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -401,6 +409,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -498,6 +520,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index f5cdd5a9e268422969ed4a0790acfd083c8262f3..15809bc5eff449276723f39964fac886ef13f1f2 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/linux/filter.h b/include/linux/filter.h index a7c0caa8b7ad5dea532f01d4fb74a3735cfd8a0c..9f3b71bee822714e5ba50da7defc3a17e1c0eea4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -917,6 +917,9 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void); +#endif u64 bpf_arch_uaddress_limit(void); bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); diff --git a/include/net/xdp.h b/include/net/xdp.h index 31698ef493b36ccf9d82812a626612eee8ff27e2..4ca0a42e55c6d94ccb3337222b11f0671872882a 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -150,6 +150,11 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data_meta = meta_valid ? data : data + 1; } +struct hisock_xdp_buff { + struct xdp_buff xdp; + struct sk_buff *skb; +}; + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a660cb68c853b83a621b7bf517d78c08eb228ed4..3e84f2deed635da9870851b78e584a937d4337ea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -5669,6 +5675,37 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5883,6 +5920,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(get_ingress_dst, 212, ##ctx) \ + FN(set_ingress_dst, 213, ##ctx) \ + FN(change_skb_dev, 214, ##ctx) \ + FN(ext_memcpy, 215, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't @@ -6310,6 +6351,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -7352,4 +7394,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cf2eb0895d403c967d63236a0984f15a67ea8b4e..90cc73c762cff1e38648fbdade7482436960891a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -663,6 +663,12 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1548,6 +1554,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2ecaf891a1670354bd97f27bf09f2a30346925c0..5adf49397a67b011435680ad345ffbfd62843395 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2939,6 +2939,13 @@ u64 __weak bpf_arch_uaddress_limit(void) #endif } +#ifdef CONFIG_HISOCK +bool __weak bpf_jit_supports_ext_helper(void) +{ + return false; +} +#endif + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 41f049ecb5c8d19a327863a61316f573f97bacec..1da2fd748714f48f017eefff9dbb1666f8bd2208 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1799,6 +1799,29 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = { .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; +#ifdef CONFIG_HISOCK +BPF_CALL_4(bpf_ext_memcpy, void *, dst, size_t, dst_sz, + const void *, src, size_t, src_sz) +{ + if (dst_sz < src_sz) + return -EINVAL; + + memcpy(dst, src, src_sz); + return 0; +} + +const struct bpf_func_proto bpf_ext_memcpy_proto = { + .func = bpf_ext_memcpy, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM | MEM_UNINIT, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE, +}; +#endif + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1855,6 +1878,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_ext_memcpy: + return &bpf_ext_memcpy_proto; +#endif default: break; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 266e84baea8456f135048a584ccc63e488850600..7131d7bf92d7b1c28c2c1b96c98a0f57fecc3695 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2554,6 +2554,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_NETFILTER: @@ -3820,6 +3823,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: @@ -3978,6 +3985,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4043,6 +4053,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: @@ -4094,6 +4107,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -5054,6 +5070,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbf2df404460123ccdac1f3990181034336d2565..85ca0f34c7c042afaab341972dc5c1f786f128ad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5576,6 +5576,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; @@ -10311,6 +10314,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = push_callback_call(env, insn, insn_idx, meta.subprogno, set_user_ringbuf_callback_state); break; +#ifdef CONFIG_HISOCK + case BPF_FUNC_ext_memcpy: + { + /* XXX: cleanup & check if allowed to access dst mem */ + u32 regno = BPF_REG_1 + 3; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; + + if (!bpf_jit_supports_ext_helper() || + reg->umax_value <= 0 || reg->umax_value > 4096) + return -ENOTSUPP; + + insn->off = reg->umax_value; + } +#endif } if (err) @@ -17356,6 +17374,9 @@ static int do_check(struct bpf_verifier_env *env) if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || (insn->src_reg != BPF_PSEUDO_KFUNC_CALL +#ifdef CONFIG_HISOCK + && insn->imm != BPF_FUNC_ext_memcpy +#endif && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && @@ -19661,6 +19682,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; } +#ifdef CONFIG_HISOCK + /* will fixup bpf extension helper in jit */ + if (insn->imm == BPF_FUNC_ext_memcpy) + continue; +#endif + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/net/Kconfig b/net/Kconfig index 2fc1860faeb40821ea39ad4146e6a0df84441f4f..fd08800cb13042e9ebacfae95261bc47da4b412f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -348,6 +348,16 @@ config BPF_STREAM_PARSER Enabling this allows a TCP stream parser to be used with BPF_MAP_TYPE_SOCKMAP. +config HISOCK + bool "enable HiSock Redirect Framework" + depends on INET + depends on CGROUP_BPF + depends on BPF_SYSCALL + default n + help + Enalbe HiSock, which bypasses net filter rules for specific + connections selected by bpf prog on both TX and RX directions. + config NET_FLOW_LIMIT bool depends on RPS diff --git a/net/core/dev.c b/net/core/dev.c index cbb4bd4718cdf825165c9b4bbab36ddd55f30dfd..22025bab89078a1172d4d7314db8a98cbf069d0f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5029,6 +5029,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: case XDP_PASS: +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: +#endif break; default: bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); @@ -5074,27 +5077,94 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } } +#ifdef CONFIG_HISOCK +static int generic_xdp_hisock_redirect(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto free_skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4 || + ip_is_fragment(iph)) + return -EOPNOTSUPP; + + if (!pskb_may_pull(skb, iph->ihl * 4)) + goto free_skb; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto free_skb; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + goto free_skb; + + if (pskb_trim_rcsum(skb, len)) + goto free_skb; + + iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl * 4; + + skb_orphan(skb); + + if (!skb_valid_dst(skb)) { + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto free_skb; + } + + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, iph->protocol); + rcu_read_unlock(); + + return 0; + +free_skb: + kfree_skb(skb); +out: + return -EFAULT; +} +#endif + static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct xdp_buff xdp; + struct hisock_xdp_buff hxdp; + struct xdp_buff *xdp = &hxdp.xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + hxdp.skb = skb; + act = netif_receive_generic_xdp(skb, xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - &xdp, xdp_prog); + xdp, xdp_prog); if (err) goto out_redir; break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + err = generic_xdp_hisock_redirect(skb); + if (err == -EOPNOTSUPP) + return XDP_PASS; + break; +#endif } return XDP_DROP; } diff --git a/net/core/filter.c b/net/core/filter.c index 2968f1f8dd471b063bc11d8b46c61420e5da2c64..9b2ecc4a8ecbe1c8a8628e432d9f6fad84539cc8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3736,6 +3736,30 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .arg4_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_skb_change_skb_dev, struct sk_buff *, skb, u32, ifindex) +{ + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_skb_change_skb_dev_proto = { + .func = bpf_skb_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { int offset = skb_network_offset(skb); @@ -7082,6 +7106,62 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_xdp_set_ingress_dst, struct xdp_buff *, xdp, void *, dst) +{ + struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; + struct dst_entry *_dst = (struct dst_entry *)dst; + + if (!hxdp->skb) + return -EOPNOTSUPP; + + if (!_dst || !virt_addr_valid(_dst)) + return -EFAULT; + + /* same as skb_valid_dst */ + if (_dst->flags & DST_METADATA) + return -EINVAL; + + skb_dst_set_noref(hxdp->skb, _dst); + return 0; +} + +static const struct bpf_func_proto bpf_xdp_set_ingress_dst_proto = { + .func = bpf_xdp_set_ingress_dst, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_xdp_change_skb_dev, struct xdp_buff *, xdp, u32, ifindex) +{ + struct hisock_xdp_buff *hxdp = (void *)xdp; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!hxdp->skb) + return -EOPNOTSUPP; + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + hxdp->skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_xdp_change_skb_dev_proto = { + .func = bpf_xdp_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -7786,6 +7866,34 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BTF_ID_LIST_SINGLE(btf_dst_entity_ids, struct, dst_entry) +BPF_CALL_1(bpf_sock_ops_get_ingress_dst, struct bpf_sock_ops_kern *, sops) +{ + struct sock *sk = sops->sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !sk_fullsock(sk)) + return (unsigned long)NULL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + + return (unsigned long)dst; +} + +const struct bpf_func_proto bpf_sock_ops_get_ingress_dst_proto = { + .func = bpf_sock_ops_get_ingress_dst, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .ret_btf_id = &btf_dst_entity_ids[0], +}; +#endif + BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, u64, tstamp, u32, tstamp_type) { @@ -8152,6 +8260,31 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_skb_change_skb_dev_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -8330,6 +8463,12 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_set_ingress_dst: + return &bpf_xdp_set_ingress_dst_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_xdp_change_skb_dev_proto; +#endif #ifdef CONFIG_SYN_COOKIES case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: return &bpf_tcp_raw_gen_syncookie_ipv4_proto; @@ -8399,6 +8538,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_ops_store_hdr_opt_proto; case BPF_FUNC_reserve_hdr_opt: return &bpf_sock_ops_reserve_hdr_opt_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_get_ingress_dst: + return &bpf_sock_ops_get_ingress_dst_proto; +#endif case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ @@ -8741,6 +8884,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -11086,6 +11256,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f49570e2f713943edd1453c26b56e2d27c38956c..89f5f3b178e1cb947ce13355430f984499766e38 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -457,6 +457,55 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) iph->daddr = fl4->daddr; } +#ifdef CONFIG_HISOCK +static int hisock_egress_redirect_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + + rcu_read_unlock_bh(); + + if (free_skb) { + rc = -ENETDOWN; + kfree_skb(skb); + } + + return rc; +} + +static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *iph; + + skb->protocol = htons(ETH_P_IP); + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + + if (skb_mac_header_was_set(skb)) + return hisock_egress_redirect_xmit(skb); + + iph = ip_hdr(skb); + iph_set_totlen(iph, skb->len); + ip_send_check(iph); + + return ip_finish_output2(net, sk, skb); +} +#endif + /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) @@ -537,6 +586,25 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); +#ifdef CONFIG_HISOCK + res = BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb); + switch (res) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + res = do_hisock_egress_redirect(net, sk, skb); + rcu_read_unlock(); + return res; + default: + pr_warn_once("Illegal HiSock return value %d, expect packet loss!", res); + fallthrough; + case HISOCK_DROP: + kfree_skb(skb); + rcu_read_unlock(); + return NET_XMIT_DROP; + } +#endif + res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0002cd359fb119059156bec1133ab240433ee015..4505e51c3f4d2327424ac50985980d7e67bb4f6a 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -41,6 +41,7 @@ xdp_adjust_tail xdp_fwd xdp_router_ipv4 xdp_tx_iptunnel +hisock/hisock_cmd testfile.img hbm_out.log iperf.* diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3fa16412db15cafa9538605a7db975ffe365ad31..2f19faa7fb63db0d575ebc9dac91654344aa78b2 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += ibumad tprogs-y += hbm +tprogs-y += hisock/hisock_cmd # Libbpf dependencies LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf @@ -96,6 +97,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +hisock_cmd-objs := hisock/hisock_cmd.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) @@ -149,6 +151,7 @@ always-y += task_fd_query_kern.o always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +always-y += hisock/bpf.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..375f33fdf2e797946de822cb7917430fdd8b2a6f --- /dev/null +++ b/samples/bpf/hisock/bpf.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define CSUM_SHIFT_BITS 16 + +#define SOCKOPS_SUCC 1 +#define SOCKOPS_FAIL 0 + +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 + +#define MAX_NUMA 8 +#define MAX_CONN_NUMA 4096 +#define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) + +struct sock_tuple { + u32 saddr; + u32 daddr; + u16 sport; + u16 dport; +}; + +struct sock_value { + struct ethhdr ingress_eth; + bool eth_updated; + u32 ingress_ifindex; + void *ingress_dst; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(struct sock_value)); + __uint(max_entries, MAX_CONN); +} connmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u16)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, 128); +} speed_port SEC(".maps"); + +static inline bool is_speed_flow(u32 local, u32 remote) +{ + u8 *val; + + val = bpf_map_lookup_elem(&speed_port, &local); + if (val && *val == PORT_LOCAL) + return true; + + val = bpf_map_lookup_elem(&speed_port, &remote); + if (val && *val == PORT_REMOTE) + return true; + + return false; +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + struct sock_tuple key = { 0 }; + struct sock_value val = { 0 }; + void *dst; + + if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + return SOCKOPS_SUCC; + + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + dst = bpf_get_ingress_dst(skops); + if (!dst) + return SOCKOPS_FAIL; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + val.ingress_dst = dst; + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); + + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + break; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); + + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + + return SOCKOPS_SUCC; +} + +SEC("hisock_ingress") +int hisock_ingress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + if (ihdr->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + key.saddr = ihdr->saddr; + key.sport = bpf_ntohs(thdr->source); + key.daddr = ihdr->daddr; + key.dport = bpf_ntohs(thdr->dest); + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return XDP_PASS; + + if (unlikely(!val->eth_updated)) { + bpf_ext_memcpy(val->ingress_eth.h_source, ETH_ALEN, + ehdr->h_dest, ETH_ALEN); + bpf_ext_memcpy(val->ingress_eth.h_dest, ETH_ALEN, + ehdr->h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr->h_proto; + val->eth_updated = true; + } + + if (unlikely(!val->ingress_ifindex)) + val->ingress_ifindex = ctx->ingress_ifindex; + + if (likely(val->ingress_dst)) + bpf_set_ingress_dst(ctx, val->ingress_dst); + + return XDP_HISOCK_REDIRECT; +} + +static inline void ipv4_csum(struct iphdr *ihdr) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ihdr; + + ihdr->check = 0; + for (size_t i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_ip_u16++; + + ihdr->check = ~((csum & 0xffff) + (csum >> CSUM_SHIFT_BITS)); +} + +SEC("hisock_egress") +int hisock_egress_prog(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int ret; + + key.saddr = skb->remote_ip4; + key.sport = bpf_ntohl(skb->remote_port); + key.daddr = skb->local_ip4; + key.dport = skb->local_port; + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return HISOCK_PASS; + + if (unlikely(!val->eth_updated)) + goto redirect; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + ihdr->tot_len = bpf_htons(skb->len); + ipv4_csum(ihdr); + + ret = bpf_skb_change_head(skb, ETH_HLEN, 0); + if (ret < 0) + goto redirect; + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return HISOCK_DROP; + + bpf_ext_memcpy(ehdr, ETH_HLEN, &val->ingress_eth, ETH_HLEN); +redirect: + if (likely(val->ingress_ifindex)) + bpf_change_skb_dev(skb, val->ingress_ifindex); + + return HISOCK_REDIRECT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c new file mode 100644 index 0000000000000000000000000000000000000000..6b64c008b6c7b0ee271c34abf7b30bfb28afc9a3 --- /dev/null +++ b/samples/bpf/hisock/hisock_cmd.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_util.h" +#include +#include + +#define DEF_BPF_PATH "bpf.o" +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 +#define MAX_IF_NUM 8 + +struct { + __u32 ifindex[MAX_IF_NUM]; + int if_num; + char *local_port; + char *remote_port; + char *cgrp_path; + char *bpf_path; + bool unload; + bool help; +} hisock; + +struct hisock_prog_info { + const char *prog_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int attach_flag; + int prog_fd; + bool is_xdp; +}; + +static struct hisock_prog_info prog_infos[] = { + { + .prog_name = "hisock_sockops_prog", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .attach_type = BPF_CGROUP_SOCK_OPS, + .attach_flag = 0, + .is_xdp = false, + }, + { + .prog_name = "hisock_ingress_prog", + .prog_type = BPF_PROG_TYPE_XDP, + .attach_type = BPF_XDP, + .attach_flag = XDP_FLAGS_SKB_MODE, + .is_xdp = true, + }, + { + .prog_name = "hisock_egress_prog", + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_EGRESS, + .attach_flag = 0, + .is_xdp = false, + }, +}; + +static int set_prog_type(struct bpf_object *obj) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + const char *prog_name; + int i; + + bpf_object__for_each_program(prog, obj) { + prog_name = bpf_program__name(prog); + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].prog_name, prog_name)) { + prog_type = prog_infos[i].prog_type; + attach_type = prog_infos[i].attach_type; + break; + } + } + + if (i == ARRAY_SIZE(prog_infos)) + return -1; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static int find_progs(struct bpf_object *obj) +{ + struct hisock_prog_info *info; + struct bpf_program *prog; + int i, prog_fd; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog = bpf_object__find_program_by_name(obj, info->prog_name); + if (!prog) { + fprintf(stderr, "ERROR: failed to find prog sec %s\n", info->prog_name); + return -1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: failed to get fd of prog %s\n", info->prog_name); + return -1; + } + + info->prog_fd = prog_fd; + } + + return 0; +} + +static int parse_port_range(const char *port_str, __u8 status, int map_fd) +{ + char *str = strdup(port_str); + char *token, *rest = str; + __u16 port; + + while ((token = strtok_r(rest, ",", &rest))) { + char *dash = strchr(token, '-'); + + if (dash) { + *dash = '\0'; + __u16 start = atoi(token); + __u16 end = atoi(dash + 1); + + if (start > end || start == 0 || end > 65535) { + fprintf(stderr, "Invalid port range: %s\n", token); + return -1; + } + + for (port = start; port <= end; port++) + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + + printf("Speed port range %u-%u:%u\n", start, end, status); + } else { + port = atoi(token); + if (port == 0 || port > 65535) { + fprintf(stderr, "Invalid port: %s\n", token); + return -1; + } + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + printf("Speed port %u:%u\n", port, status); + } + } + + free(str); + return 0; +} + +static int set_speed_port(struct bpf_object *obj) +{ + int map_fd; + + map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + if (hisock.local_port && + parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { + fprintf(stderr, "ERROR: failed to update local port\n"); + return -1; + } + + if (hisock.remote_port && + parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { + fprintf(stderr, "ERROR: failed to update remote port\n"); + return -1; + } + + return 0; +} + +static int detach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + int err_cnt = 0; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_detach(hisock.ifindex[j], + info->attach_flag, NULL)) { + fprintf(stderr, + "ERROR: failed to detach prog %s\n", + info->prog_name); + err_cnt++; + } + } + continue; + } + + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { + fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); + err_cnt++; + } + } + + close(cgrp_fd); + return -err_cnt; +} + +static int attach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_attach(hisock.ifindex[j], info->prog_fd, + info->attach_flag, NULL)) + goto fail; + } + continue; + } + + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, + info->attach_flag)) + goto fail; + } + + close(cgrp_fd); + return 0; +fail: + fprintf(stderr, "ERROR: failed to attach prog %s\n", info->prog_name); + close(cgrp_fd); + detach_progs(); + return -1; +} + +static int do_hisock(void) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + + setrlimit(RLIMIT_MEMLOCK, &r); + + obj = bpf_object__open(hisock.bpf_path); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: failed to open bpf file\n"); + return -1; + } + + if (set_prog_type(obj)) { + fprintf(stderr, "ERROR: failed to set prog type\n"); + bpf_object__close(obj); + return -1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: failed to load bpf obj\n"); + bpf_object__close(obj); + return -1; + } + + if (find_progs(obj)) { + fprintf(stderr, "ERROR: failed to find progs\n"); + bpf_object__close(obj); + return -1; + } + + if (set_speed_port(obj)) { + fprintf(stderr, "ERROR: failed to set speed port\n"); + bpf_object__close(obj); + return -1; + } + + if (attach_progs()) { + fprintf(stderr, "ERROR: failed to attach progs\n"); + bpf_object__close(obj); + return -1; + } + + bpf_object__close(obj); + return 0; +} + +static void do_help(void) +{ + fprintf(stderr, + "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " + "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); +} + +static int parse_args(int argc, char **argv) +{ + char *ifname; + int opt; + + hisock.bpf_path = DEF_BPF_PATH; + hisock.if_num = 0; + + while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + switch (opt) { + case 'f': + hisock.bpf_path = optarg; + break; + case 'c': + hisock.cgrp_path = optarg; + break; + case 'p': + hisock.local_port = optarg; + break; + case 'r': + hisock.remote_port = optarg; + break; + case 'i': + ifname = optarg; + hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.if_num++; + break; + case 'u': + hisock.unload = true; + break; + case 'h': + hisock.help = true; + break; + default: + fprintf(stderr, "ERROR: unknown option %c\n", opt); + return -1; + } + } + + if (hisock.cgrp_path == NULL || + hisock.if_num == 0 || + (!hisock.unload && + hisock.local_port == NULL && + hisock.remote_port == NULL)) { + do_help(); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (parse_args(argc, argv)) { + fprintf(stderr, "ERROR: failed to parse args\n"); + return -1; + } + + if (hisock.help) { + do_help(); + return 0; + } + + if (hisock.unload) { + if (detach_progs()) { + fprintf(stderr, "ERROR: failed to detach progs\n"); + return -1; + } + + printf("Unload HiSock successfully\n"); + return 0; + } + + if (do_hisock()) { + fprintf(stderr, "ERROR: failed to do hisock\n"); + return -1; + } + + printf("Load HiSock successfully\n"); + return 0; +} diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9b302242be6c65a1b31a7dd8219380b7e2c675ca..f4e87e96df16208b73bfeb0a1cefd02587df3315 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -5669,6 +5675,37 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5883,6 +5920,10 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(get_ingress_dst, 212, ##ctx) \ + FN(set_ingress_dst, 213, ##ctx) \ + FN(change_skb_dev, 214, ##ctx) \ + FN(ext_memcpy, 215, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't @@ -6313,6 +6354,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook @@ -7355,4 +7397,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a57f9afffe9881435d93a91cfd18f804a71a61be..828c1d2f173eadb38ff2f4ae5e406d0861f3f114 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -123,6 +123,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; static const char * const link_type_name[] = { @@ -212,6 +213,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", [BPF_PROG_TYPE_SCHED] = "sched", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8873,6 +8875,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), }; int libbpf_register_prog_handler(const char *sec,