From af8dfc02a37dbbb00edab898d0d8e79b27682412 Mon Sep 17 00:00:00 2001 From: Josh Don Date: Mon, 11 Nov 2024 10:27:38 -0800 Subject: [PATCH 01/13] sched: fix warning in sched_setaffinity mainline inclusion from mainline-6.13 commit 70ee7947a29029736a1a06c73a48ff37674a851b category: bugfix issue: #IBJOCD CVE: CVE-2024-41932 Signed-off-by: zhangshuqi --------------------------------------- Commit 8f9ea86fdf99b added some logic to sched_setaffinity that included a WARN when a per-task affinity assignment races with a cpuset update. Specifically, we can have a race where a cpuset update results in the task affinity no longer being a subset of the cpuset. That's fine; we have a fallback to instead use the cpuset mask. However, we have a WARN set up that will trigger if the cpuset mask has no overlap at all with the requested task affinity. This shouldn't be a warning condition; its trivial to create this condition. Reproduced the warning by the following setup: - $PID inside a cpuset cgroup - another thread repeatedly switching the cpuset cpus from 1-2 to just 1 - another thread repeatedly setting the $PID affinity (via taskset) to 2 Fixes: 8f9ea86fdf99b ("sched: Always preserve the user requested cpumask") Signed-off-by: Josh Don Acked-and-tested-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Tested-by: Madadi Vineeth Reddy Link: https://lkml.kernel.org/r/20241111182738.1832953-1-joshdon@google.com Signed-off-by: zhangshuqi --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee6d4c3db6a3..542f2125467f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8563,7 +8563,7 @@ __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx) bool empty = !cpumask_and(new_mask, new_mask, ctx->user_mask); - if (WARN_ON_ONCE(empty)) + if (empty) cpumask_copy(new_mask, cpus_allowed); } __set_cpus_allowed_ptr(p, ctx); -- Gitee From f083a2db1ca432d76d99f45df5d516ad64f69b46 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 11 Jul 2024 15:18:38 +0000 Subject: [PATCH 02/13] bpf, arm64: Fix trampoline for BPF_TRAMP_F_CALL_ORIG mainline inclusion from mainline-6.11 commit 19d3c179a37730caf600a97fed3794feac2b197b category: bugfix issue: #IBJOCD CVE: CVE-2024-43840 Signed-off-by: zhangshuqi --------------------------------------- When BPF_TRAMP_F_CALL_ORIG is set, the trampoline calls __bpf_tramp_enter() and __bpf_tramp_exit() functions, passing them the struct bpf_tramp_image *im pointer as an argument in R0. The trampoline generation code uses emit_addr_mov_i64() to emit instructions for moving the bpf_tramp_image address into R0, but emit_addr_mov_i64() assumes the address to be in the vmalloc() space and uses only 48 bits. Because bpf_tramp_image is allocated using kzalloc(), its address can use more than 48-bits, in this case the trampoline will pass an invalid address to __bpf_tramp_enter/exit() causing a kernel crash. Fix this by using emit_a64_mov_i64() in place of emit_addr_mov_i64() as it can work with addresses that are greater than 48-bits. Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Signed-off-by: Puranjay Mohan Signed-off-by: Daniel Borkmann Closes: https://lore.kernel.org/all/SJ0PR15MB461564D3F7E7A763498CA6A8CBDB2@SJ0PR15MB4615.namprd15.prod.outlook.com/ Link: https://lore.kernel.org/bpf/20240711151838.43469-1-puranjay@kernel.org Signed-off-by: zhangshuqi --- arch/arm64/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 5074bd1d37b5..7c5156e7d31e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1952,7 +1952,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); if (flags & BPF_TRAMP_F_CALL_ORIG) { - emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_enter, ctx); } @@ -1996,7 +1996,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, if (flags & BPF_TRAMP_F_CALL_ORIG) { im->ip_epilogue = ctx->image + ctx->idx; - emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_a64_mov_i64(A64_R(0), (const u64)im, ctx); emit_call((const u64)__bpf_tramp_exit, ctx); } -- Gitee From 2824e4a1422365165537420c706ee169bc47432a Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 29 Jul 2024 16:38:10 +0100 Subject: [PATCH 03/13] l2tp: prevent possible tunnel refcount underflow mainline inclusion from mainline-6.12 commit 24256415d18695b46da06c93135f5b51c548b950 category: bugfix issue: #IBJOCD CVE: CVE-2024-49940 Signed-off-by: zhangshuqi --------------------------------------- When a session is created, it sets a backpointer to its tunnel. When the session refcount drops to 0, l2tp_session_free drops the tunnel refcount if session->tunnel is non-NULL. However, session->tunnel is set in l2tp_session_create, before the tunnel refcount is incremented by l2tp_session_register, which leaves a small window where session->tunnel is non-NULL when the tunnel refcount hasn't been bumped. Moving the assignment to l2tp_session_register is trivial but l2tp_session_create calls l2tp_session_set_header_len which uses session->tunnel to get the tunnel's encap. Add an encap arg to l2tp_session_set_header_len to avoid using session->tunnel. If l2tpv3 sessions have colliding IDs, it is possible for l2tp_v3_session_get to race with l2tp_session_register and fetch a session which doesn't yet have session->tunnel set. Add a check for this case. Conflicts: net/l2tp/l2tp_core.c Signed-off-by: James Chapman Signed-off-by: Tom Parkin Signed-off-by: David S. Miller Signed-off-by: zhangshuqi --- net/l2tp/l2tp_core.c | 15 +++++++++------ net/l2tp/l2tp_core.h | 3 ++- net/l2tp/l2tp_netlink.c | 4 +++- net/l2tp/l2tp_ppp.c | 3 ++- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 70da78ab9520..65297b9ed74c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -387,6 +387,7 @@ int l2tp_session_register(struct l2tp_session *session, l2tp_tunnel_inc_refcount(tunnel); } + WRITE_ONCE(session->tunnel, tunnel); hlist_add_head_rcu(&session->hlist, head); spin_unlock_bh(&tunnel->hlist_lock); @@ -698,7 +699,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, if (!session->lns_mode && !session->send_seq) { trace_session_seqnum_lns_enable(session); session->send_seq = 1; - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } } else { /* No sequence numbers. @@ -719,7 +721,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, if (!session->lns_mode && session->send_seq) { trace_session_seqnum_lns_disable(session); session->send_seq = 0; - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, + tunnel->encap); } else if (session->send_seq) { pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", session->name); @@ -1574,7 +1577,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_type parameters are set. */ -void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version, + enum l2tp_encap_type encap) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1583,7 +1587,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version) } else { session->hdr_len = 4 + session->cookie_len; session->hdr_len += l2tp_get_l2specific_len(session); - if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP) + if (encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } } @@ -1597,7 +1601,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL); if (session) { session->magic = L2TP_SESSION_MAGIC; - session->tunnel = tunnel; session->session_id = session_id; session->peer_session_id = peer_session_id; @@ -1633,7 +1636,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len); } - l2tp_session_set_header_len(session, tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); refcount_set(&session->ref_count, 1); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 91ebf0a3f499..61f402c3e7f4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -261,7 +261,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); /* Transmit path helpers for sending packets over the tunnel socket. */ -void l2tp_session_set_header_len(struct l2tp_session *session, int version); +void l2tp_session_set_header_len(struct l2tp_session *session, int version, + enum l2tp_encap_type encap); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb); /* Pseudowire management. diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index e27e00cb16c6..05314419c7bb 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -690,8 +690,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); if (info->attrs[L2TP_ATTR_SEND_SEQ]) { + struct l2tp_tunnel *tunnel = session->tunnel; + session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); - l2tp_session_set_header_len(session, session->tunnel->version); + l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); } if (info->attrs[L2TP_ATTR_LNS_MODE]) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6146e4e67bbb..eebec10a156d 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1203,7 +1203,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } - l2tp_session_set_header_len(session, session->tunnel->version); + l2tp_session_set_header_len(session, session->tunnel->version, + session->tunnel->encap); break; case PPPOL2TP_SO_LNSMODE: -- Gitee From 42eff77c8fc1d056566a3172047bd65d0b993669 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Thu, 29 Aug 2024 11:29:11 +0800 Subject: [PATCH 04/13] mm, slub: avoid zeroing kmalloc redzone mainline inclusion from mainline-6.12 commit 59090e479ac78ae18facd4c58eb332562a23020e category: bugfix issue: #IBJOCD CVE: CVE-2024-49885 Signed-off-by: zhangshuqi --------------------------------------- Since commit 946fa0dbf2d8 ("mm/slub: extend redzone check to extra allocated kmalloc space than requested"), setting orig_size treats the wasted space (object_size - orig_size) as a redzone. However with init_on_free=1 we clear the full object->size, including the redzone. Additionally we clear the object metadata, including the stored orig_size, making it zero, which makes check_object() treat the whole object as a redzone. These issues lead to the following BUG report with "slub_debug=FUZ init_on_free=1": [ 0.000000] ============================================================================= [ 0.000000] BUG kmalloc-8 (Not tainted): kmalloc Redzone overwritten [ 0.000000] ----------------------------------------------------------------------------- [ 0.000000] [ 0.000000] 0xffff000010032858-0xffff00001003285f @offset=2136. First byte 0x0 instead of 0xcc [ 0.000000] FIX kmalloc-8: Restoring kmalloc Redzone 0xffff000010032858-0xffff00001003285f=0xcc [ 0.000000] Slab 0xfffffdffc0400c80 objects=36 used=23 fp=0xffff000010032a18 flags=0x3fffe0000000200(workingset|node=0|zone=0|lastcpupid=0x1ffff) [ 0.000000] Object 0xffff000010032858 @offset=2136 fp=0xffff0000100328c8 [ 0.000000] [ 0.000000] Redzone ffff000010032850: cc cc cc cc cc cc cc cc ........ [ 0.000000] Object ffff000010032858: cc cc cc cc cc cc cc cc ........ [ 0.000000] Redzone ffff000010032860: cc cc cc cc cc cc cc cc ........ [ 0.000000] Padding ffff0000100328b4: 00 00 00 00 00 00 00 00 00 00 00 00 ............ [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.11.0-rc3-next-20240814-00004-g61844c55c3f4 #144 [ 0.000000] Hardware name: NXP i.MX95 19X19 board (DT) [ 0.000000] Call trace: [ 0.000000] dump_backtrace+0x90/0xe8 [ 0.000000] show_stack+0x18/0x24 [ 0.000000] dump_stack_lvl+0x74/0x8c [ 0.000000] dump_stack+0x18/0x24 [ 0.000000] print_trailer+0x150/0x218 [ 0.000000] check_object+0xe4/0x454 [ 0.000000] free_to_partial_list+0x2f8/0x5ec To address the issue, use orig_size to clear the used area. And restore the value of orig_size after clear the remaining area. When CONFIG_SLUB_DEBUG not defined, (get_orig_size()' directly returns s->object_size. So when using memset to init the area, the size can simply be orig_size, as orig_size returns object_size when CONFIG_SLUB_DEBUG not enabled. And orig_size can never be bigger than object_size. Fixes: 946fa0dbf2d8 ("mm/slub: extend redzone check to extra allocated kmalloc space than requested") Cc: Reviewed-by: Feng Tang Acked-by: David Rientjes Signed-off-by: Peng Fan Signed-off-by: Vlastimil Babka Signed-off-by: zhangshuqi --- mm/slub.c | 138 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 66 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d2544c88a5c4..9c6d8f28552c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -452,6 +452,26 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr); } +/* + * See comment in calculate_sizes(). + */ +static inline bool freeptr_outside_object(struct kmem_cache *s) +{ + return s->offset >= s->inuse; +} + +/* + * Return offset of the end of info block which is inuse + free pointer if + * not overlapping with object. + */ +static inline unsigned int get_info_end(struct kmem_cache *s) +{ + if (freeptr_outside_object(s)) + return s->inuse + sizeof(void *); + else + return s->inuse; +} + /* Loop over all objects in a slab */ #define for_each_object(__p, __s, __addr, __objects) \ for (__p = fixup_red_left(__s, __addr); \ @@ -627,6 +647,50 @@ static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab, return false; } +/* + * kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API + * family will round up the real request size to these fixed ones, so + * there could be an extra area than what is requested. Save the original + * request size in the meta data area, for better debug and sanity check. + */ +static inline void set_orig_size(struct kmem_cache *s, + void *object, unsigned int orig_size) +{ + void *p = kasan_reset_tag(object); + + if (!slub_debug_orig_size(s)) + return; + +#ifdef CONFIG_KASAN_GENERIC + /* + * KASAN can save its free meta data inside of the object at offset 0. + * If this meta data size is larger than 'orig_size', it will overlap + * the data redzone in [orig_size+1, object_size]. Thus, we adjust + * 'orig_size' to be as at least as big as KASAN's meta data. + */ + if (kasan_metadata_size(s, true) > orig_size) + orig_size = kasan_meta_size; +#endif + + p += get_info_end(s); + p += sizeof(struct track) * 2; + + *(unsigned int *)p = orig_size; +} + +static inline unsigned int get_orig_size(struct kmem_cache *s, void *object) +{ + void *p = kasan_reset_tag(object); + + if (!slub_debug_orig_size(s)) + return s->object_size; + + p += get_info_end(s); + p += sizeof(struct track) * 2; + + return *(unsigned int *)p; +} + #ifdef CONFIG_SLUB_DEBUG static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)]; static DEFINE_SPINLOCK(object_map_lock); @@ -740,26 +804,6 @@ static void print_section(char *level, char *text, u8 *addr, metadata_access_disable(); } -/* - * See comment in calculate_sizes(). - */ -static inline bool freeptr_outside_object(struct kmem_cache *s) -{ - return s->offset >= s->inuse; -} - -/* - * Return offset of the end of info block which is inuse + free pointer if - * not overlapping with object. - */ -static inline unsigned int get_info_end(struct kmem_cache *s) -{ - if (freeptr_outside_object(s)) - return s->inuse + sizeof(void *); - else - return s->inuse; -} - static struct track *get_track(struct kmem_cache *s, void *object, enum track_item alloc) { @@ -860,50 +904,6 @@ static void print_slab_info(const struct slab *slab) folio_flags(folio, 0)); } -/* - * kmalloc caches has fixed sizes (mostly power of 2), and kmalloc() API - * family will round up the real request size to these fixed ones, so - * there could be an extra area than what is requested. Save the original - * request size in the meta data area, for better debug and sanity check. - */ -static inline void set_orig_size(struct kmem_cache *s, - void *object, unsigned int orig_size) -{ - void *p = kasan_reset_tag(object); - - if (!slub_debug_orig_size(s)) - return; - -#ifdef CONFIG_KASAN_GENERIC - /* - * KASAN could save its free meta data in object's data area at - * offset 0, if the size is larger than 'orig_size', it will - * overlap the data redzone in [orig_size+1, object_size], and - * the check should be skipped. - */ - if (kasan_metadata_size(s, true) > orig_size) - orig_size = s->object_size; -#endif - - p += get_info_end(s); - p += sizeof(struct track) * 2; - - *(unsigned int *)p = orig_size; -} - -static inline unsigned int get_orig_size(struct kmem_cache *s, void *object) -{ - void *p = kasan_reset_tag(object); - - if (!slub_debug_orig_size(s)) - return s->object_size; - - p += get_info_end(s); - p += sizeof(struct track) * 2; - - return *(unsigned int *)p; -} - void skip_orig_size_check(struct kmem_cache *s, const void *object) { set_orig_size(s, (void *)object, s->object_size); @@ -1755,7 +1755,6 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) {} static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects) {} - #ifndef CONFIG_SLUB_TINY static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab, void **freelist, void *nextfree) @@ -1795,12 +1794,19 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, */ if (init) { int rsize; + unsigned int orig_size; + orig_size = get_orig_size(s, x); if (!kasan_has_integrated_init()) - memset(kasan_reset_tag(x), 0, s->object_size); + memset(kasan_reset_tag(x), 0, orig_size); rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; memset((char *)kasan_reset_tag(x) + s->inuse, 0, s->size - s->inuse - rsize); + /* + * Restore orig_size, otherwize kmalloc redzone overwritten + * would be reported + */ + set_orig_size(s, x, orig_size); } /* KASAN might put x into memory quarantine, delaying its reuse. */ return kasan_slab_free(s, x, init); -- Gitee From 63ac8a72d11cc93cf3fae3b33cb27e0638f1c369 Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Wed, 5 Jun 2024 09:23:35 +0800 Subject: [PATCH 05/13] ext4: filesystems without casefold feature cannot be mounted with siphash mainline inclusion from mainline-6.12 commit 985b67cd86392310d9e9326de941c22fc9340eec category: bugfix issue: #IBJOCD CVE: CVE-2024-49968 Signed-off-by: zhangshuqi --------------------------------------- When mounting the ext4 filesystem, if the default hash version is set to DX_HASH_SIPHASH but the casefold feature is not set, exit the mounting. Reported-by: syzbot+340581ba9dceb7e06fb3@syzkaller.appspotmail.com Signed-off-by: Lizhi Xu Link: https://patch.msgid.link/20240605012335.44086-1-lizhi.xu@windriver.com Signed-off-by: Theodore Ts'o Signed-off-by: zhangshuqi Conflicts: fs/ext4/super.c --- fs/ext4/super.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f019ce64eba4..41d7b9fd6e37 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3626,6 +3626,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } #endif + if (EXT4_SB(sb)->s_es->s_def_hash_version == DX_HASH_SIPHASH && + !ext4_has_feature_casefold(sb)) { + ext4_msg(sb, KERN_ERR, + "Filesystem without casefold feature cannot be " + "mounted with siphash"); + return 0; + } if (readonly) return 1; -- Gitee From abdfcc1560360c1abbbb7c03dc3fe8fcba464894 Mon Sep 17 00:00:00 2001 From: Wander Lairson Costa Date: Fri, 20 Sep 2024 16:06:59 -0300 Subject: [PATCH 06/13] bpf: Use raw_spinlock_t in ringbuf mainline inclusion from mainline-6.12 commit 8b62645b09f870d70c7910e7550289d444239a46 category: bugfix issue: #IBJOCD CVE: CVE-2024-50138 Signed-off-by: zhangshuqi --------------------------------------- The function __bpf_ringbuf_reserve is invoked from a tracepoint, which disables preemption. Using spinlock_t in this context can lead to a "sleep in atomic" warning in the RT variant. This issue is illustrated in the example below: BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:48 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 556208, name: test_progs preempt_count: 1, expected: 0 RCU nest depth: 1, expected: 1 INFO: lockdep is turned off. Preemption disabled at: [] migrate_enable+0xc0/0x39c CPU: 7 PID: 556208 Comm: test_progs Tainted: G Hardware name: Qualcomm SA8775P Ride (DT) Call trace: dump_backtrace+0xac/0x130 show_stack+0x1c/0x30 dump_stack_lvl+0xac/0xe8 dump_stack+0x18/0x30 __might_resched+0x3bc/0x4fc rt_spin_lock+0x8c/0x1a4 __bpf_ringbuf_reserve+0xc4/0x254 bpf_ringbuf_reserve_dynptr+0x5c/0xdc bpf_prog_ac3d15160d62622a_test_read_write+0x104/0x238 trace_call_bpf+0x238/0x774 perf_call_bpf_enter.isra.0+0x104/0x194 perf_syscall_enter+0x2f8/0x510 trace_sys_enter+0x39c/0x564 syscall_trace_enter+0x220/0x3c0 do_el0_svc+0x138/0x1dc el0_svc+0x54/0x130 el0t_64_sync_handler+0x134/0x150 el0t_64_sync+0x17c/0x180 Switch the spinlock to raw_spinlock_t to avoid this error. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Brian Grech Signed-off-by: Wander Lairson Costa Signed-off-by: Wander Lairson Costa Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Link: https://lore.kernel.org/r/20240920190700.617253-1-wander@redhat.com Signed-off-by: zhangshuqi --- kernel/bpf/ringbuf.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 246559c3e93d..00688545742f 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -29,7 +29,7 @@ struct bpf_ringbuf { u64 mask; struct page **pages; int nr_pages; - spinlock_t spinlock ____cacheline_aligned_in_smp; + raw_spinlock_t spinlock ____cacheline_aligned_in_smp; /* For user-space producer ring buffers, an atomic_t busy bit is used * to synchronize access to the ring buffers in the kernel, rather than * the spinlock that is used for kernel-producer ring buffers. This is @@ -173,7 +173,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) if (!rb) return NULL; - spin_lock_init(&rb->spinlock); + raw_spin_lock_init(&rb->spinlock); atomic_set(&rb->busy, 0); init_waitqueue_head(&rb->waitq); init_irq_work(&rb->work, bpf_ringbuf_notify); @@ -421,10 +421,10 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) cons_pos = smp_load_acquire(&rb->consumer_pos); if (in_nmi()) { - if (!spin_trylock_irqsave(&rb->spinlock, flags)) + if (!raw_spin_trylock_irqsave(&rb->spinlock, flags)) return NULL; } else { - spin_lock_irqsave(&rb->spinlock, flags); + raw_spin_lock_irqsave(&rb->spinlock, flags); } pend_pos = rb->pending_pos; @@ -450,7 +450,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) */ if (new_prod_pos - cons_pos > rb->mask || new_prod_pos - pend_pos > rb->mask) { - spin_unlock_irqrestore(&rb->spinlock, flags); + raw_spin_unlock_irqrestore(&rb->spinlock, flags); return NULL; } @@ -462,7 +462,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) /* pairs with consumer's smp_load_acquire() */ smp_store_release(&rb->producer_pos, new_prod_pos); - spin_unlock_irqrestore(&rb->spinlock, flags); + raw_spin_unlock_irqrestore(&rb->spinlock, flags); return (void *)hdr + BPF_RINGBUF_HDR_SZ; } -- Gitee From dc1bc5b540ff79517d484647d16032aa5bbe4e69 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 3 Oct 2024 14:25:58 +0200 Subject: [PATCH 07/13] thermal: core: Reference count the zone in thermal_zone_get_by_id() mainline inclusion from mainline-6.12 commit a42a5839f400e929c489bb1b58f54596c4535167 category: bugfix issue: #IBJOCD CVE: CVE-2024-50028 Signed-off-by: zhangshuqi --------------------------------------- There are places in the thermal netlink code where nothing prevents the thermal zone object from going away while being accessed after it has been returned by thermal_zone_get_by_id(). To address this, make thermal_zone_get_by_id() get a reference on the thermal zone device object to be returned with the help of get_device(), under thermal_list_lock, and adjust all of its callers to this change with the help of the cleanup.h infrastructure. Fixes: 1ce50e7d408e ("thermal: core: genetlink support for events/cmd/sampling") Cc: 6.8+ # 6.8+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Lukasz Luba Link: https://patch.msgid.link/6112242.lOV4Wx5bFT@rjwysocki.net Signed-off-by: zhangshuqi Conflicts: drivers/thermal/thermal_core.h drivers/thermal/thermal_netlink.c --- drivers/thermal/thermal_core.c | 1 + drivers/thermal/thermal_core.h | 3 +++ drivers/thermal/thermal_netlink.c | 9 +++------ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index dad909547179..2476774af738 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -587,6 +587,7 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id) mutex_lock(&thermal_list_lock); list_for_each_entry(tz, &thermal_tz_list, node) { if (tz->id == id) { + get_device(&tz->device); match = tz; break; } diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 024e82ebf592..cf88b7d0c490 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -56,6 +56,9 @@ int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *), struct thermal_zone_device *thermal_zone_get_by_id(int id); +DEFINE_CLASS(thermal_zone_get_by_id, struct thermal_zone_device *, + if (_T) put_device(&_T->device), thermal_zone_get_by_id(id), int id) + struct thermal_attr { struct device_attribute attr; char name[THERMAL_NAME_LENGTH]; diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index 08bc46c3ec7b..8d07531d9519 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -450,7 +450,6 @@ static int thermal_genl_cmd_tz_get_id(struct param *p) static int thermal_genl_cmd_tz_get_trip(struct param *p) { struct sk_buff *msg = p->msg; - struct thermal_zone_device *tz; struct nlattr *start_trip; struct thermal_trip trip; int ret, i, id; @@ -460,7 +459,7 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p) id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); - tz = thermal_zone_get_by_id(id); + CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; @@ -498,7 +497,6 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p) static int thermal_genl_cmd_tz_get_temp(struct param *p) { struct sk_buff *msg = p->msg; - struct thermal_zone_device *tz; int temp, ret, id; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) @@ -506,7 +504,7 @@ static int thermal_genl_cmd_tz_get_temp(struct param *p) id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); - tz = thermal_zone_get_by_id(id); + CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; @@ -524,7 +522,6 @@ static int thermal_genl_cmd_tz_get_temp(struct param *p) static int thermal_genl_cmd_tz_get_gov(struct param *p) { struct sk_buff *msg = p->msg; - struct thermal_zone_device *tz; int id, ret = 0; if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID]) @@ -532,7 +529,7 @@ static int thermal_genl_cmd_tz_get_gov(struct param *p) id = nla_get_u32(p->attrs[THERMAL_GENL_ATTR_TZ_ID]); - tz = thermal_zone_get_by_id(id); + CLASS(thermal_zone_get_by_id, tz)(id); if (!tz) return -EINVAL; -- Gitee From 0dc9726ab723baa19f35bbc253bbc5850404efe5 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Wed, 21 Feb 2024 23:08:31 +0100 Subject: [PATCH 08/13] usb: gadget: uvc: fix try format returns on uncompressed formats mainline inclusion from mainline-6.10 commit 7a700d8f2431b681f2dae1118d62177719912f5d category: bugfix issue: #IBJOCD CVE: CVE-2024-50056 Signed-off-by: zhangshuqi --------------------------------------- When setting uncompressed formats, the values of bytesperline and sizeimage can already be determined by using the v4l2_fill_pixfmt helper function. We change the try_fmt function to use the helper instead. Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20240221-uvc-gadget-uncompressed-v1-1-f55e97287cae@pengutronix.de Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- drivers/usb/gadget/function/uvc_v4l2.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index 3f0a9795c0d4..ed42485ebb2a 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -260,12 +260,26 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) if (!uframe) return -EINVAL; - fmt->fmt.pix.width = uframe->frame.w_width; - fmt->fmt.pix.height = uframe->frame.w_height; + if (uformat->type == UVCG_UNCOMPRESSED) { + struct uvcg_uncompressed *u = + to_uvcg_uncompressed(&uformat->group.cg_item); + if (!u) + return 0; + + v4l2_fill_pixfmt(&fmt->fmt.pix, fmt->fmt.pix.pixelformat, + uframe->frame.w_width, uframe->frame.w_height); + + if (fmt->fmt.pix.sizeimage != (uvc_v4l2_get_bytesperline(uformat, uframe) * + uframe->frame.w_height)) + return -EINVAL; + } else { + fmt->fmt.pix.width = uframe->frame.w_width; + fmt->fmt.pix.height = uframe->frame.w_height; + fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(uformat, uframe); + fmt->fmt.pix.sizeimage = uvc_get_frame_size(uformat, uframe); + fmt->fmt.pix.pixelformat = to_uvc_format(uformat)->fcc; + } fmt->fmt.pix.field = V4L2_FIELD_NONE; - fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(uformat, uframe); - fmt->fmt.pix.sizeimage = uvc_get_frame_size(uformat, uframe); - fmt->fmt.pix.pixelformat = to_uvc_format(uformat)->fcc; fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; fmt->fmt.pix.priv = 0; -- Gitee From 41e4735af02f0c2a8a60b5e8aa42def91d765ac4 Mon Sep 17 00:00:00 2001 From: Abhishek Tamboli Date: Thu, 15 Aug 2024 15:52:02 +0530 Subject: [PATCH 09/13] usb: gadget: uvc: Fix ERR_PTR dereference in uvc_v4l2.c mainline inclusion from mainline-6.12 commit a7bb96b18864225a694e3887ac2733159489e4b0 category: bugfix issue: #IBJOCD CVE: CVE-2024-50056 Signed-off-by: zhangshuqi --------------------------------------- Fix potential dereferencing of ERR_PTR() in find_format_by_pix() and uvc_v4l2_enum_format(). Fix the following smatch errors: drivers/usb/gadget/function/uvc_v4l2.c:124 find_format_by_pix() error: 'fmtdesc' dereferencing possible ERR_PTR() drivers/usb/gadget/function/uvc_v4l2.c:392 uvc_v4l2_enum_format() error: 'fmtdesc' dereferencing possible ERR_PTR() Also, fix similar issue in uvc_v4l2_try_format() for potential dereferencing of ERR_PTR(). Signed-off-by: Abhishek Tamboli Link: https://lore.kernel.org/r/20240815102202.594812-1-abhishektamboli9@gmail.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: zhangshuqi --- drivers/usb/gadget/function/uvc_v4l2.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index ed42485ebb2a..e61bad79d0e9 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -121,6 +121,9 @@ static struct uvcg_format *find_format_by_pix(struct uvc_device *uvc, list_for_each_entry(format, &uvc->header->formats, entry) { const struct uvc_format_desc *fmtdesc = to_uvc_format(format->fmt); + if (IS_ERR(fmtdesc)) + continue; + if (fmtdesc->fcc == pixelformat) { uformat = format->fmt; break; @@ -240,6 +243,7 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) struct uvc_video *video = &uvc->video; struct uvcg_format *uformat; struct uvcg_frame *uframe; + const struct uvc_format_desc *fmtdesc; u8 *fcc; if (fmt->type != video->queue.queue.type) @@ -277,7 +281,10 @@ uvc_v4l2_try_format(struct file *file, void *fh, struct v4l2_format *fmt) fmt->fmt.pix.height = uframe->frame.w_height; fmt->fmt.pix.bytesperline = uvc_v4l2_get_bytesperline(uformat, uframe); fmt->fmt.pix.sizeimage = uvc_get_frame_size(uformat, uframe); - fmt->fmt.pix.pixelformat = to_uvc_format(uformat)->fcc; + fmtdesc = to_uvc_format(uformat); + if (IS_ERR(fmtdesc)) + return PTR_ERR(fmtdesc); + fmt->fmt.pix.pixelformat = fmtdesc->fcc; } fmt->fmt.pix.field = V4L2_FIELD_NONE; fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; @@ -389,6 +396,9 @@ uvc_v4l2_enum_format(struct file *file, void *fh, struct v4l2_fmtdesc *f) return -EINVAL; fmtdesc = to_uvc_format(uformat); + if (IS_ERR(fmtdesc)) + return PTR_ERR(fmtdesc); + f->pixelformat = fmtdesc->fcc; return 0; -- Gitee From 97e68ce0d699e1b1818a8ad5cadc3017c3dd3497 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Fri, 13 Sep 2024 08:03:26 -0700 Subject: [PATCH 10/13] bpf: Fix a sdiv overflow issue mainline inclusion from mainline-6.12 commit 7dd34d7b7dcf9309fc6224caf4dd5b35bedddcb7 category: bugfix issue: #IBJOCD CVE: CVE-2024-49888 Signed-off-by: zhangshuqi --------------------------------------- Zac Ecob reported a problem where a bpf program may cause kernel crash due to the following error: Oops: divide error: 0000 [#1] PREEMPT SMP KASAN PTI The failure is due to the below signed divide: LLONG_MIN/-1 where LLONG_MIN equals to -9,223,372,036,854,775,808. LLONG_MIN/-1 is supposed to give a positive number 9,223,372,036,854,775,808, but it is impossible since for 64-bit system, the maximum positive number is 9,223,372,036,854,775,807. On x86_64, LLONG_MIN/-1 will cause a kernel exception. On arm64, the result for LLONG_MIN/-1 is LLONG_MIN. Further investigation found all the following sdiv/smod cases may trigger an exception when bpf program is running on x86_64 platform: - LLONG_MIN/-1 for 64bit operation - INT_MIN/-1 for 32bit operation - LLONG_MIN%-1 for 64bit operation - INT_MIN%-1 for 32bit operation where -1 can be an immediate or in a register. On arm64, there are no exceptions: - LLONG_MIN/-1 = LLONG_MIN - INT_MIN/-1 = INT_MIN - LLONG_MIN%-1 = 0 - INT_MIN%-1 = 0 where -1 can be an immediate or in a register. Insn patching is needed to handle the above cases and the patched codes produced results aligned with above arm64 result. The below are pseudo codes to handle sdiv/smod exceptions including both divisor -1 and divisor 0 and the divisor is stored in a register. sdiv: tmp = rX tmp += 1 /* [-1, 0] -> [0, 1] if tmp >(unsigned) 1 goto L2 if tmp == 0 goto L1 rY = 0 L1: rY = -rY; goto L3 L2: rY /= rX L3: smod: tmp = rX tmp += 1 /* [-1, 0] -> [0, 1] if tmp >(unsigned) 1 goto L1 if tmp == 1 (is64 ? goto L2 : goto L3) rY = 0; goto L2 L1: rY %= rX L2: goto L4 // only when !is64 L3: wY = wY // only when !is64 L4: [1] https://lore.kernel.org/bpf/tPJLTEh7S_DxFEqAI2Ji5MBSoZVg7_G-Py2iaZpAaWtM961fFTWtsnlzwvTbzBzaUzwQAoNATXKUlt0LZOFgnDcIyKCswAnAGdUF3LBrhGQ=@protonmail.com/ Reported-by: Zac Ecob Signed-off-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20240913150326.1187788-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: zhangshuqi Conflicts: kernel/bpf/verifier.c --- kernel/bpf/verifier.c | 93 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d6a4102312fa..1a872b05d0b8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -18898,13 +18898,46 @@ static int do_misc_fixups(struct bpf_verifier_env *env) int i, ret, cnt, delta = 0; for (i = 0; i < insn_cnt; i++, insn++) { - /* Make divide-by-zero exceptions impossible. */ + /* Make sdiv/smod divide-by-minus-one exceptions impossible. */ + if ((insn->code == (BPF_ALU64 | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU64 | BPF_DIV | BPF_K) || + insn->code == (BPF_ALU | BPF_MOD | BPF_K) || + insn->code == (BPF_ALU | BPF_DIV | BPF_K)) && + insn->off == 1 && insn->imm == -1) { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + bool isdiv = BPF_OP(insn->code) == BPF_DIV; + struct bpf_insn *patchlet; + struct bpf_insn chk_and_sdiv[] = { + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + }; + struct bpf_insn chk_and_smod[] = { + BPF_MOV32_IMM(insn->dst_reg, 0), + }; + + patchlet = isdiv ? chk_and_sdiv : chk_and_smod; + cnt = isdiv ? ARRAY_SIZE(chk_and_sdiv) : ARRAY_SIZE(chk_and_smod); + + new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + + /* Make divide-by-zero and divide-by-minus-one exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; bool isdiv = BPF_OP(insn->code) == BPF_DIV; + bool is_sdiv = isdiv && insn->off == 1; + bool is_smod = !isdiv && insn->off == 1; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { /* [R,W]x div 0 -> 0 */ @@ -18924,10 +18957,62 @@ static int do_misc_fixups(struct bpf_verifier_env *env) BPF_JMP_IMM(BPF_JA, 0, 0, 1), BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; + struct bpf_insn chk_and_sdiv[] = { + /* [R,W]x sdiv 0 -> 0 + * LLONG_MIN sdiv -1 -> LLONG_MIN + * INT_MIN sdiv -1 -> INT_MIN + */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 4, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 1, 0), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_MOV | BPF_K, insn->dst_reg, + 0, 0, 0), + /* BPF_NEG(LLONG_MIN) == -LLONG_MIN == LLONG_MIN */ + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_NEG | BPF_K, insn->dst_reg, + 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + }; + struct bpf_insn chk_and_smod[] = { + /* [R,W]x mod 0 -> [R,W]x */ + /* [R,W]x mod -1 -> 0 */ + BPF_MOV64_REG(BPF_REG_AX, insn->src_reg), + BPF_RAW_INSN((is64 ? BPF_ALU64 : BPF_ALU) | + BPF_ADD | BPF_K, BPF_REG_AX, + 0, 0, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JGT | BPF_K, BPF_REG_AX, + 0, 3, 1), + BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | + BPF_JEQ | BPF_K, BPF_REG_AX, + 0, 3 + (is64 ? 0 : 1), 1), + BPF_MOV32_IMM(insn->dst_reg, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), + }; - patchlet = isdiv ? chk_and_div : chk_and_mod; - cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + if (is_sdiv) { + patchlet = chk_and_sdiv; + cnt = ARRAY_SIZE(chk_and_sdiv); + } else if (is_smod) { + patchlet = chk_and_smod; + cnt = ARRAY_SIZE(chk_and_smod) - (is64 ? 2 : 0); + } else { + patchlet = isdiv ? chk_and_div : chk_and_mod; + cnt = isdiv ? ARRAY_SIZE(chk_and_div) : + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); + } new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) -- Gitee From a539ff3fedabd0cd735ba60725f260bf3ffffb09 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 15 Oct 2024 23:02:06 +0800 Subject: [PATCH 11/13] bpf: Prevent tailcall infinite loop caused by freplace mainline inclusion from mainline-6.13 commit d6083f040d5d8f8d748462c77e90547097df936e category: bugfix issue: #IBJOCD CVE: CVE-2024-47794 Signed-off-by: zhangshuqi --------------------------------------- There is a potential infinite loop issue that can occur when using a combination of tail calls and freplace. In an upcoming selftest, the attach target for entry_freplace of tailcall_freplace.c is subprog_tc of tc_bpf2bpf.c, while the tail call in entry_freplace leads to entry_tc. This results in an infinite loop: entry_tc -> subprog_tc -> entry_freplace --tailcall-> entry_tc. The problem arises because the tail_call_cnt in entry_freplace resets to zero each time entry_freplace is executed, causing the tail call mechanism to never terminate, eventually leading to a kernel panic. To fix this issue, the solution is twofold: 1. Prevent updating a program extended by an freplace program to a prog_array map. 2. Prevent extending a program that is already part of a prog_array map with an freplace program. This ensures that: * If a program or its subprogram has been extended by an freplace program, it can no longer be updated to a prog_array map. * If a program has been added to a prog_array map, neither it nor its subprograms can be extended by an freplace program. Moreover, an extension program should not be tailcalled. As such, return -EINVAL if the program has a type of BPF_PROG_TYPE_EXT when adding it to a prog_array map. Additionally, fix a minor code style issue by replacing eight spaces with a tab for proper formatting. Reviewed-by: Eduard Zingerman Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20241015150207.70264-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: zhangshuqi Conflicts: include/linux/bpf.h --- include/linux/bpf.h | 17 +++++++++++---- kernel/bpf/arraymap.c | 26 +++++++++++++++++++++-- kernel/bpf/core.c | 1 + kernel/bpf/syscall.c | 7 +++--- kernel/bpf/trampoline.c | 47 ++++++++++++++++++++++++++++++++++------- 5 files changed, 81 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 035e627f94f6..706060aa4431 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1245,8 +1245,12 @@ int bpf_dynptr_check_size(u32 size); u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog); struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); @@ -1327,12 +1331,14 @@ void bpf_jit_uncharge_modmem(u32 size); bool bpf_prog_has_trampoline(const struct bpf_prog *prog); #else static inline int bpf_trampoline_link_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, - struct bpf_trampoline *tr) + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { return -ENOTSUPP; } @@ -1430,6 +1436,9 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; + bool is_extended; /* true if extended by freplace program */ + u64 prog_array_member_cnt; /* counts how many times as member of prog_array */ + struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */ /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1811efcfbd6e..7f1ef37c67be 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -909,22 +909,44 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_prog *prog = bpf_prog_get(fd); + bool is_extended; if (IS_ERR(prog)) return prog; - if (!bpf_prog_map_compatible(map, prog)) { + if (prog->type == BPF_PROG_TYPE_EXT || + !bpf_prog_map_compatible(map, prog)) { bpf_prog_put(prog); return ERR_PTR(-EINVAL); } + mutex_lock(&prog->aux->ext_mutex); + is_extended = prog->aux->is_extended; + if (!is_extended) + prog->aux->prog_array_member_cnt++; + mutex_unlock(&prog->aux->ext_mutex); + if (is_extended) { + /* Extended prog can not be tail callee. It's to prevent a + * potential infinite loop like: + * tail callee prog entry -> tail callee prog subprog -> + * freplace prog entry --tailcall-> tail callee prog entry. + */ + bpf_prog_put(prog); + return ERR_PTR(-EBUSY); + } + return prog; } static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { + struct bpf_prog *prog = ptr; + + mutex_lock(&prog->aux->ext_mutex); + prog->aux->prog_array_member_cnt--; + mutex_unlock(&prog->aux->ext_mutex); /* bpf_prog is freed after one RCU or tasks trace grace period */ - bpf_prog_put(ptr); + bpf_prog_put(prog); } static u32 prog_fd_array_sys_lookup_elem(void *ptr) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 02f327f05fd6..855523318f91 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -122,6 +122,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); mutex_init(&fp->aux->used_maps_mutex); + mutex_init(&fp->aux->ext_mutex); mutex_init(&fp->aux->dst_mutex); return fp; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ba38c08a9a05..e7981a9e2af6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3107,7 +3107,8 @@ static void bpf_tracing_link_release(struct bpf_link *link) container_of(link, struct bpf_tracing_link, link.link); WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link, - tr_link->trampoline)); + tr_link->trampoline, + tr_link->tgt_prog)); bpf_trampoline_put(tr_link->trampoline); @@ -3242,7 +3243,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, * in prog->aux * * - if prog->aux->dst_trampoline is NULL, the program has already been - * attached to a target and its initial target was cleared (below) + * attached to a target and its initial target was cleared (below) * * - if tgt_prog != NULL, the caller specified tgt_prog_fd + * target_btf_id using the link_create API. @@ -3317,7 +3318,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, if (err) goto out_unlock; - err = bpf_trampoline_link_prog(&link->link, tr); + err = bpf_trampoline_link_prog(&link->link, tr, tgt_prog); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index e97aeda3a86b..7c0abf19b52f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -510,7 +510,27 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int bpf_freplace_check_tgt_prog(struct bpf_prog *tgt_prog) +{ + struct bpf_prog_aux *aux = tgt_prog->aux; + + guard(mutex)(&aux->ext_mutex); + if (aux->prog_array_member_cnt) + /* Program extensions can not extend target prog when the target + * prog has been updated to any prog_array map as tail callee. + * It's to prevent a potential infinite loop like: + * tgt prog entry -> tgt prog subprog -> freplace prog entry + * --tailcall-> tgt prog entry. + */ + return -EBUSY; + + aux->is_extended = true; + return 0; +} + +static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; @@ -531,6 +551,9 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr /* Cannot attach extension if fentry/fexit are in use. */ if (cnt) return -EBUSY; + err = bpf_freplace_check_tgt_prog(tgt_prog); + if (err) + return err; tr->extension_prog = link->link.prog; return bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, link->link.prog->bpf_func); @@ -557,17 +580,21 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr return err; } -int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_link_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_link_prog(link, tr); + err = __bpf_trampoline_link_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } -static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { enum bpf_tramp_prog_type kind; int err; @@ -578,6 +605,8 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, tr->extension_prog->bpf_func, NULL); tr->extension_prog = NULL; + guard(mutex)(&tgt_prog->aux->ext_mutex); + tgt_prog->aux->is_extended = false; return err; } hlist_del_init(&link->tramp_hlist); @@ -586,12 +615,14 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_ } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr) +int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, + struct bpf_trampoline *tr, + struct bpf_prog *tgt_prog) { int err; mutex_lock(&tr->mutex); - err = __bpf_trampoline_unlink_prog(link, tr); + err = __bpf_trampoline_unlink_prog(link, tr, tgt_prog); mutex_unlock(&tr->mutex); return err; } @@ -606,7 +637,7 @@ static void bpf_shim_tramp_link_release(struct bpf_link *link) if (!shim_link->trampoline) return; - WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline)); + WARN_ON_ONCE(bpf_trampoline_unlink_prog(&shim_link->link, shim_link->trampoline, NULL)); bpf_trampoline_put(shim_link->trampoline); } @@ -720,7 +751,7 @@ int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, goto err; } - err = __bpf_trampoline_link_prog(&shim_link->link, tr); + err = __bpf_trampoline_link_prog(&shim_link->link, tr, NULL); if (err) goto err; -- Gitee From bd75634ce20614479f76715ca121497122d34410 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 28 Jul 2024 19:46:12 +0800 Subject: [PATCH 12/13] selftests/bpf: Add testcase for updating attached freplace prog to prog_array map mainline inclusion from mainline-6.12 commit 7559a7a84ef83a2dd86caf623430b8d834843cec category: bugfix issue: #IBJOCD CVE: CVE-2024-47794 Signed-off-by: zhangshuqi --------------------------------------- Add a selftest to confirm the issue, which gets -EINVAL when update attached freplace prog to prog_array map, has been fixed. cd tools/testing/selftests/bpf; ./test_progs -t tailcalls 328/25 tailcalls/tailcall_freplace:OK 328 tailcalls:OK Summary: 1/25 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Yonghong Song Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20240728114612.48486-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: zhangshuqi Conflicts: tools/testing/selftests/bpf/prog_tests/tailcalls.c --- .../selftests/bpf/prog_tests/tailcalls.c | 64 +++++++++++++++++++ .../selftests/bpf/progs/tailcall_freplace.c | 23 +++++++ .../testing/selftests/bpf/progs/tc_bpf2bpf.c | 22 +++++++ 3 files changed, 109 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/tailcall_freplace.c create mode 100644 tools/testing/selftests/bpf/progs/tc_bpf2bpf.c diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 09c189761926..9e5f0f31e805 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include "tailcall_freplace.skel.h" +#include "tc_bpf2bpf.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -884,6 +886,66 @@ static void test_tailcall_bpf2bpf_6(void) tailcall_bpf2bpf6__destroy(obj); } +/* test_tailcall_freplace checks that the attached freplace prog is OK to + * update the prog_array map. + */ +static void test_tailcall_freplace(void) +{ + struct tailcall_freplace *freplace_skel = NULL; + struct bpf_link *freplace_link = NULL; + struct bpf_program *freplace_prog; + struct tc_bpf2bpf *tc_skel = NULL; + int prog_fd, map_fd; + char buff[128] = {}; + int err, key; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + freplace_skel = tailcall_freplace__open(); + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) + return; + + tc_skel = tc_bpf2bpf__open_and_load(); + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + freplace_prog = freplace_skel->progs.entry_freplace; + err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = tailcall_freplace__load(freplace_skel); + if (!ASSERT_OK(err, "tailcall_freplace__load")) + goto out; + + freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, + "subprog"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); + prog_fd = bpf_program__fd(freplace_prog); + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 34, "test_run retval"); + +out: + bpf_link__destroy(freplace_link); + tc_bpf2bpf__destroy(tc_skel); + tailcall_freplace__destroy(freplace_skel); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -910,4 +972,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_4(true); if (test__start_subtest("tailcall_bpf2bpf_6")) test_tailcall_bpf2bpf_6(); + if (test__start_subtest("tailcall_freplace")) + test_tailcall_freplace(); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_freplace.c b/tools/testing/selftests/bpf/progs/tailcall_freplace.c new file mode 100644 index 000000000000..6713b809df44 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_freplace.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +int count = 0; + +SEC("freplace") +int entry_freplace(struct __sk_buff *skb) +{ + count++; + bpf_tail_call_static(skb, &jmp_table, 0); + return count; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c new file mode 100644 index 000000000000..8a0632c37839 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +__noinline +int subprog(struct __sk_buff *skb) +{ + int ret = 1; + + __sink(ret); + return ret; +} + +SEC("tc") +int entry_tc(struct __sk_buff *skb) +{ + return subprog(skb); +} + +char __license[] SEC("license") = "GPL"; -- Gitee From 774986e3ee3160a2f7861e862ca4792e40cbdbe6 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Tue, 15 Oct 2024 23:02:07 +0800 Subject: [PATCH 13/13] selftests/bpf: Add test to verify tailcall and freplace restrictions mainline inclusion from mainline-6.13 commit 021611d33e78694f4bd54573093c6fc70a812644 category: bugfix issue: #IBJOCD CVE: CVE-2024-47794 Signed-off-by: zhangshuqi --------------------------------------- Add a test case to ensure that attaching a tail callee program with an freplace program fails, and that updating an extended program to a prog_array map is also prohibited. This test is designed to prevent the potential infinite loop issue caused by the combination of tail calls and freplace, ensuring the correct behavior and stability of the system. Additionally, fix the broken tailcalls/tailcall_freplace selftest because an extension prog should not be tailcalled. cd tools/testing/selftests/bpf; ./test_progs -t tailcalls 337/25 tailcalls/tailcall_freplace:OK 337/26 tailcalls/tailcall_bpf2bpf_freplace:OK 337 tailcalls:OK Summary: 1/26 PASSED, 0 SKIPPED, 0 FAILED Acked-by: Eduard Zingerman Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20241015150207.70264-3-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov Signed-off-by: zhangshuqi Conflicts: tools/testing/selftests/bpf/prog_tests/tailcalls.c --- .../selftests/bpf/prog_tests/tailcalls.c | 120 ++++++++++++++++-- .../testing/selftests/bpf/progs/tc_bpf2bpf.c | 5 +- 2 files changed, 109 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 9e5f0f31e805..3693a6857699 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -886,8 +886,8 @@ static void test_tailcall_bpf2bpf_6(void) tailcall_bpf2bpf6__destroy(obj); } -/* test_tailcall_freplace checks that the attached freplace prog is OK to - * update the prog_array map. +/* test_tailcall_freplace checks that the freplace prog fails to update the + * prog_array map, no matter whether the freplace prog attaches to its target. */ static void test_tailcall_freplace(void) { @@ -895,7 +895,7 @@ static void test_tailcall_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *freplace_prog; struct tc_bpf2bpf *tc_skel = NULL; - int prog_fd, map_fd; + int prog_fd, tc_prog_fd, map_fd; char buff[128] = {}; int err, key; @@ -913,9 +913,10 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); freplace_prog = freplace_skel->progs.entry_freplace; - err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); + err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK(err, "set_attach_target")) goto out; @@ -923,27 +924,116 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK(err, "tailcall_freplace__load")) goto out; - freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, - "subprog"); + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); + prog_fd = bpf_program__fd(freplace_prog); + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + + freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) goto out; - map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); - prog_fd = bpf_program__fd(freplace_prog); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + +out: + bpf_link__destroy(freplace_link); + tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +/* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail + * callee prog with freplace prog or fails to update an extended prog to + * prog_array map. + */ +static void test_tailcall_bpf2bpf_freplace(void) +{ + struct tailcall_freplace *freplace_skel = NULL; + struct bpf_link *freplace_link = NULL; + struct tc_bpf2bpf *tc_skel = NULL; + char buff[128] = {}; + int prog_fd, map_fd; + int err, key; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + tc_skel = tc_bpf2bpf__open_and_load(); + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + freplace_skel = tailcall_freplace__open(); + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) + goto out; + + err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = tailcall_freplace__load(freplace_skel); + if (!ASSERT_OK(err, "tailcall_freplace__load")) + goto out; + + /* OK to attach then detach freplace prog. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_link__destroy(freplace_link); + if (!ASSERT_OK(err, "destroy link")) + goto out; + + /* OK to update prog_array map then delete element from the map. */ + key = 0; + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); if (!ASSERT_OK(err, "update jmp_table")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_EQ(topts.retval, 34, "test_run retval"); + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to attach a tail callee prog with freplace prog. */ + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure")) + goto out; + + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to update an extended prog to prog_array map. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_ERR(err, "update jmp_table failure")) + goto out; out: bpf_link__destroy(freplace_link); - tc_bpf2bpf__destroy(tc_skel); tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); } void test_tailcalls(void) @@ -974,4 +1064,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_6(); if (test__start_subtest("tailcall_freplace")) test_tailcall_freplace(); + if (test__start_subtest("tailcall_bpf2bpf_freplace")) + test_tailcall_bpf2bpf_freplace(); } diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index 8a0632c37839..d1a57f7d09bd 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -5,10 +5,11 @@ #include "bpf_misc.h" __noinline -int subprog(struct __sk_buff *skb) +int subprog_tc(struct __sk_buff *skb) { int ret = 1; + __sink(skb); __sink(ret); return ret; } @@ -16,7 +17,7 @@ int subprog(struct __sk_buff *skb) SEC("tc") int entry_tc(struct __sk_buff *skb) { - return subprog(skb); + return subprog_tc(skb); } char __license[] SEC("license") = "GPL"; -- Gitee