From d1d7eb7e12c09836802bfeacc4bba0a4de30d5a2 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 2 Jun 2023 09:51:03 +0800 Subject: [PATCH 1/4] of: allocate / free phandle cache outside of the devtree_lock commit af2b5f6fa1369ed12974a97865df6125bbede492 upstream. The phandle cache code allocates memory while holding devtree_lock which is a raw_spinlock_t. Memory allocation (and free()) is not possible on RT while a raw_spinlock_t is held. Invoke the kfree() and kcalloc() while the lock is dropped. Cc: Rob Herring Cc: Frank Rowand Cc: devicetree@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- drivers/of/base.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index f0dbb7ad88cf..c59b30bab0e0 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -130,31 +130,34 @@ static u32 phandle_cache_mask; /* * Caller must hold devtree_lock. */ -static void __of_free_phandle_cache(void) +static struct device_node** __of_free_phandle_cache(void) { u32 cache_entries = phandle_cache_mask + 1; u32 k; + struct device_node **shadow; if (!phandle_cache) - return; + return NULL; for (k = 0; k < cache_entries; k++) of_node_put(phandle_cache[k]); - kfree(phandle_cache); + shadow = phandle_cache; phandle_cache = NULL; + return shadow; } int of_free_phandle_cache(void) { unsigned long flags; + struct device_node **shadow; raw_spin_lock_irqsave(&devtree_lock, flags); - __of_free_phandle_cache(); + shadow = __of_free_phandle_cache(); raw_spin_unlock_irqrestore(&devtree_lock, flags); - + kfree(shadow); return 0; } #if !defined(CONFIG_MODULES) @@ -189,10 +192,11 @@ void of_populate_phandle_cache(void) u32 cache_entries; struct device_node *np; u32 phandles = 0; + struct device_node **shadow; raw_spin_lock_irqsave(&devtree_lock, flags); - __of_free_phandle_cache(); + shadow = __of_free_phandle_cache(); for_each_of_allnodes(np) if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL) @@ -200,12 +204,14 @@ void of_populate_phandle_cache(void) if (!phandles) goto out; + raw_spin_unlock_irqrestore(&devtree_lock, flags); cache_entries = roundup_pow_of_two(phandles); phandle_cache_mask = cache_entries - 1; phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache), GFP_ATOMIC); + raw_spin_lock_irqsave(&devtree_lock, flags); if (!phandle_cache) goto out; @@ -217,6 +223,7 @@ void of_populate_phandle_cache(void) out: raw_spin_unlock_irqrestore(&devtree_lock, flags); + kfree(shadow); } void __init of_core_init(void) -- Gitee From 5ec5f3892e67679d898c72f88ba1190708103407 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 2 Jun 2023 09:51:39 +0800 Subject: [PATCH 2/4] mm/kasan: make quarantine_lock a raw_spinlock_t commit 90ac218590a6d692a0dafeeef4bfafea30811be3 upstream. The static lock quarantine_lock is used in quarantine.c to protect the quarantine queue datastructures. It is taken inside quarantine queue manipulation routines (quarantine_put(), quarantine_reduce() and quarantine_remove_cache()), with IRQs disabled. This is not a problem on a stock kernel but is problematic on an RT kernel where spin locks are sleeping spinlocks, which can sleep and can not be acquired with disabled interrupts. Convert the quarantine_lock to a raw spinlock_t. The usage of quarantine_lock is confined to quarantine.c and the work performed while the lock is held is limited. Signed-off-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior --- mm/kasan/quarantine.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 3a8ddf8baf7d..b209dbaefde8 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -103,7 +103,7 @@ static int quarantine_head; static int quarantine_tail; /* Total size of all objects in global_quarantine across all batches. */ static unsigned long quarantine_size; -static DEFINE_SPINLOCK(quarantine_lock); +static DEFINE_RAW_SPINLOCK(quarantine_lock); DEFINE_STATIC_SRCU(remove_cache_srcu); /* Maximum size of the global queue. */ @@ -190,7 +190,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) { qlist_move_all(q, &temp); - spin_lock(&quarantine_lock); + raw_spin_lock(&quarantine_lock); WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes); qlist_move_all(&temp, &global_quarantine[quarantine_tail]); if (global_quarantine[quarantine_tail].bytes >= @@ -203,7 +203,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache) if (new_tail != quarantine_head) quarantine_tail = new_tail; } - spin_unlock(&quarantine_lock); + raw_spin_unlock(&quarantine_lock); } local_irq_restore(flags); @@ -230,7 +230,7 @@ void quarantine_reduce(void) * expected case). */ srcu_idx = srcu_read_lock(&remove_cache_srcu); - spin_lock_irqsave(&quarantine_lock, flags); + raw_spin_lock_irqsave(&quarantine_lock, flags); /* * Update quarantine size in case of hotplug. Allocate a fraction of @@ -254,7 +254,7 @@ void quarantine_reduce(void) quarantine_head = 0; } - spin_unlock_irqrestore(&quarantine_lock, flags); + raw_spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, NULL); srcu_read_unlock(&remove_cache_srcu, srcu_idx); @@ -310,17 +310,17 @@ void quarantine_remove_cache(struct kmem_cache *cache) */ on_each_cpu(per_cpu_remove_cache, cache, 1); - spin_lock_irqsave(&quarantine_lock, flags); + raw_spin_lock_irqsave(&quarantine_lock, flags); for (i = 0; i < QUARANTINE_BATCHES; i++) { if (qlist_empty(&global_quarantine[i])) continue; qlist_move_cache(&global_quarantine[i], &to_free, cache); /* Scanning whole quarantine can take a while. */ - spin_unlock_irqrestore(&quarantine_lock, flags); + raw_spin_unlock_irqrestore(&quarantine_lock, flags); cond_resched(); - spin_lock_irqsave(&quarantine_lock, flags); + raw_spin_lock_irqsave(&quarantine_lock, flags); } - spin_unlock_irqrestore(&quarantine_lock, flags); + raw_spin_unlock_irqrestore(&quarantine_lock, flags); qlist_free_all(&to_free, cache); -- Gitee From 98c151a7afd54e64d737fe1c59e864dc4b1be5af Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 2 Jun 2023 09:52:02 +0800 Subject: [PATCH 3/4] EXP rcu: Revert expedited GP parallelization cleverness commit 37d23bb703b59574c9eeebba86f8c8b9cf382d15 upstream. (Commit 258ba8e089db23f760139266c232f01bad73f85c from linux-rcu) This commit reverts a series of commits starting with fcc635436501 ("rcu: Make expedited GPs handle CPU 0 being offline") and its successors, thus queueing each rcu_node structure's expedited grace-period initialization work on the first CPU of that rcu_node structure. Suggested-by: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney Signed-off-by: Sebastian Andrzej Siewior --- kernel/rcu/tree_exp.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 72f9f2f12834..23c7b5037cf2 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -472,7 +472,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, smp_call_func_t func) { - int cpu; struct rcu_node *rnp; trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); @@ -494,13 +493,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, continue; } INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); - preempt_disable(); - cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask); - /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi)) - cpu = WORK_CPU_UNBOUND; - queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); - preempt_enable(); + queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work); rnp->exp_need_flush = true; } -- Gitee From 4da19a7aac764be3a7682b05f793258ae4338c08 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 2 Jun 2023 09:52:13 +0800 Subject: [PATCH 4/4] kmemleak: Turn kmemleak_lock to raw spinlock on RT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5a62bb92ae6df828a0b6cf14c0e1d55558f0002e upstream. kmemleak_lock, as a rwlock on RT, can possibly be held in atomic context and causes the follow BUG. BUG: scheduling while atomic: migration/15/132/0x00000002 Preemption disabled at: [] cpu_stopper_thread+0x71/0x100 CPU: 15 PID: 132 Comm: migration/15 Not tainted 4.19.0-rt1-preempt-rt #1 Call Trace: schedule+0x3d/0xe0 __rt_spin_lock+0x26/0x30 __write_rt_lock+0x23/0x1a0 rt_write_lock+0x2a/0x30 find_and_remove_object+0x1e/0x80 delete_object_full+0x10/0x20 kmemleak_free+0x32/0x50 kfree+0x104/0x1f0 intel_pmu_cpu_dying+0x67/0x70 x86_pmu_dying_cpu+0x1a/0x30 cpuhp_invoke_callback+0x92/0x700 take_cpu_down+0x70/0xa0 multi_cpu_stop+0x62/0xc0 cpu_stopper_thread+0x79/0x100 smpboot_thread_fn+0x20f/0x2d0 kthread+0x121/0x140 And on v4.18 stable tree the following call trace, caused by grabbing kmemleak_lock again, is also observed. kernel BUG at kernel/locking/rtmutex.c:1048! CPU: 5 PID: 689 Comm: mkfs.ext4 Not tainted 4.18.16-rt9-preempt-rt #1 Call Trace: rt_write_lock+0x2a/0x30 create_object+0x17d/0x2b0 kmemleak_alloc+0x34/0x50 kmem_cache_alloc+0x146/0x220 mempool_alloc_slab+0x15/0x20 mempool_alloc+0x65/0x170 sg_pool_alloc+0x21/0x60 sg_alloc_table_chained+0x8b/0xb0 … blk_flush_plug_list+0x204/0x230 schedule+0x87/0xe0 rt_write_lock+0x2a/0x30 create_object+0x17d/0x2b0 kmemleak_alloc+0x34/0x50 __kmalloc_node+0x1cd/0x340 alloc_request_size+0x30/0x70 mempool_alloc+0x65/0x170 get_request+0x4e3/0x8d0 blk_queue_bio+0x153/0x470 generic_make_request+0x1dc/0x3f0 submit_bio+0x49/0x140 … kmemleak is an error detecting feature. We would not expect as good performance as without it. As there is no raw rwlock defining helpers, we turn kmemleak_lock to a raw spinlock. Signed-off-by: He Zhe Cc: catalin.marinas@arm.com Cc: bigeasy@linutronix.de Cc: tglx@linutronix.de Cc: rostedt@goodmis.org Acked-by: Catalin Marinas Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com Signed-off-by: Sebastian Andrzej Siewior --- mm/kmemleak.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 5eeabece0c17..92ce99b15f2b 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -26,7 +26,7 @@ * * The following locks and mutexes are used by kmemleak: * - * - kmemleak_lock (rwlock): protects the object_list modifications and + * - kmemleak_lock (raw spinlock): protects the object_list modifications and * accesses to the object_tree_root. The object_list is the main list * holding the metadata (struct kmemleak_object) for the allocated memory * blocks. The object_tree_root is a red black tree used to look-up @@ -197,7 +197,7 @@ static LIST_HEAD(gray_list); /* search tree for object boundaries */ static struct rb_root object_tree_root = RB_ROOT; /* rw_lock protecting the access to object_list and object_tree_root */ -static DEFINE_RWLOCK(kmemleak_lock); +static DEFINE_RAW_SPINLOCK(kmemleak_lock); /* allocation caches for kmemleak internal data */ static struct kmem_cache *object_cache; @@ -491,9 +491,9 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) struct kmemleak_object *object; rcu_read_lock(); - read_lock_irqsave(&kmemleak_lock, flags); + raw_spin_lock_irqsave(&kmemleak_lock, flags); object = lookup_object(ptr, alias); - read_unlock_irqrestore(&kmemleak_lock, flags); + raw_spin_unlock_irqrestore(&kmemleak_lock, flags); /* check whether the object is still available */ if (object && !get_object(object)) @@ -513,13 +513,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali unsigned long flags; struct kmemleak_object *object; - write_lock_irqsave(&kmemleak_lock, flags); + raw_spin_lock_irqsave(&kmemleak_lock, flags); object = lookup_object(ptr, alias); if (object) { rb_erase(&object->rb_node, &object_tree_root); list_del_rcu(&object->object_list); } - write_unlock_irqrestore(&kmemleak_lock, flags); + raw_spin_unlock_irqrestore(&kmemleak_lock, flags); return object; } @@ -593,7 +593,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, /* kernel backtrace */ object->trace_len = __save_stack_trace(object->trace); - write_lock_irqsave(&kmemleak_lock, flags); + raw_spin_lock_irqsave(&kmemleak_lock, flags); min_addr = min(min_addr, ptr); max_addr = max(max_addr, ptr + size); @@ -624,7 +624,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, list_add_tail_rcu(&object->object_list, &object_list); out: - write_unlock_irqrestore(&kmemleak_lock, flags); + raw_spin_unlock_irqrestore(&kmemleak_lock, flags); return object; } @@ -1310,7 +1310,7 @@ static void scan_block(void *_start, void *_end, unsigned long *end = _end - (BYTES_PER_POINTER - 1); unsigned long flags; - read_lock_irqsave(&kmemleak_lock, flags); + raw_spin_lock_irqsave(&kmemleak_lock, flags); for (ptr = start; ptr < end; ptr++) { struct kmemleak_object *object; unsigned long pointer; @@ -1367,7 +1367,7 @@ static void scan_block(void *_start, void *_end, spin_unlock(&object->lock); } } - read_unlock_irqrestore(&kmemleak_lock, flags); + raw_spin_unlock_irqrestore(&kmemleak_lock, flags); } /* -- Gitee