From d1d7eb7e12c09836802bfeacc4bba0a4de30d5a2 Mon Sep 17 00:00:00 2001
From: meganz009 <zhang.wenya1@zte.com.cn>
Date: Fri, 2 Jun 2023 09:51:03 +0800
Subject: [PATCH 1/4] of: allocate / free phandle cache outside of the 
 devtree_lock

commit af2b5f6fa1369ed12974a97865df6125bbede492 upstream.

The phandle cache code allocates memory while holding devtree_lock which
is a raw_spinlock_t. Memory allocation (and free()) is not possible on
RT while a raw_spinlock_t is held.
Invoke the kfree() and kcalloc() while the lock is dropped.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 drivers/of/base.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index f0dbb7ad88cf..c59b30bab0e0 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -130,31 +130,34 @@ static u32 phandle_cache_mask;
 /*
  * Caller must hold devtree_lock.
  */
-static void __of_free_phandle_cache(void)
+static struct device_node** __of_free_phandle_cache(void)
 {
 	u32 cache_entries = phandle_cache_mask + 1;
 	u32 k;
+	struct device_node **shadow;
 
 	if (!phandle_cache)
-		return;
+		return NULL;
 
 	for (k = 0; k < cache_entries; k++)
 		of_node_put(phandle_cache[k]);
 
-	kfree(phandle_cache);
+	shadow = phandle_cache;
 	phandle_cache = NULL;
+	return shadow;
 }
 
 int of_free_phandle_cache(void)
 {
 	unsigned long flags;
+	struct device_node **shadow;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 
-	__of_free_phandle_cache();
+	shadow = __of_free_phandle_cache();
 
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-
+	kfree(shadow);
 	return 0;
 }
 #if !defined(CONFIG_MODULES)
@@ -189,10 +192,11 @@ void of_populate_phandle_cache(void)
 	u32 cache_entries;
 	struct device_node *np;
 	u32 phandles = 0;
+	struct device_node **shadow;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 
-	__of_free_phandle_cache();
+	shadow = __of_free_phandle_cache();
 
 	for_each_of_allnodes(np)
 		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
@@ -200,12 +204,14 @@ void of_populate_phandle_cache(void)
 
 	if (!phandles)
 		goto out;
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
 	cache_entries = roundup_pow_of_two(phandles);
 	phandle_cache_mask = cache_entries - 1;
 
 	phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
 				GFP_ATOMIC);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
 	if (!phandle_cache)
 		goto out;
 
@@ -217,6 +223,7 @@ void of_populate_phandle_cache(void)
 
 out:
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	kfree(shadow);
 }
 
 void __init of_core_init(void)
-- 
Gitee


From 5ec5f3892e67679d898c72f88ba1190708103407 Mon Sep 17 00:00:00 2001
From: meganz009 <zhang.wenya1@zte.com.cn>
Date: Fri, 2 Jun 2023 09:51:39 +0800
Subject: [PATCH 2/4] mm/kasan: make quarantine_lock a raw_spinlock_t

commit 90ac218590a6d692a0dafeeef4bfafea30811be3 upstream.

The static lock quarantine_lock is used in quarantine.c to protect the
quarantine queue datastructures. It is taken inside quarantine queue
manipulation routines (quarantine_put(), quarantine_reduce() and
quarantine_remove_cache()), with IRQs disabled.
This is not a problem on a stock kernel but is problematic on an RT
kernel where spin locks are sleeping spinlocks, which can sleep and can
not be acquired with disabled interrupts.

Convert the quarantine_lock to a raw spinlock_t. The usage of
quarantine_lock is confined to quarantine.c and the work performed while
the lock is held is limited.

Signed-off-by: Clark Williams <williams@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/kasan/quarantine.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index 3a8ddf8baf7d..b209dbaefde8 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -103,7 +103,7 @@ static int quarantine_head;
 static int quarantine_tail;
 /* Total size of all objects in global_quarantine across all batches. */
 static unsigned long quarantine_size;
-static DEFINE_SPINLOCK(quarantine_lock);
+static DEFINE_RAW_SPINLOCK(quarantine_lock);
 DEFINE_STATIC_SRCU(remove_cache_srcu);
 
 /* Maximum size of the global queue. */
@@ -190,7 +190,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 	if (unlikely(q->bytes > QUARANTINE_PERCPU_SIZE)) {
 		qlist_move_all(q, &temp);
 
-		spin_lock(&quarantine_lock);
+		raw_spin_lock(&quarantine_lock);
 		WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
 		qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
 		if (global_quarantine[quarantine_tail].bytes >=
@@ -203,7 +203,7 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
 			if (new_tail != quarantine_head)
 				quarantine_tail = new_tail;
 		}
-		spin_unlock(&quarantine_lock);
+		raw_spin_unlock(&quarantine_lock);
 	}
 
 	local_irq_restore(flags);
@@ -230,7 +230,7 @@ void quarantine_reduce(void)
 	 * expected case).
 	 */
 	srcu_idx = srcu_read_lock(&remove_cache_srcu);
-	spin_lock_irqsave(&quarantine_lock, flags);
+	raw_spin_lock_irqsave(&quarantine_lock, flags);
 
 	/*
 	 * Update quarantine size in case of hotplug. Allocate a fraction of
@@ -254,7 +254,7 @@ void quarantine_reduce(void)
 			quarantine_head = 0;
 	}
 
-	spin_unlock_irqrestore(&quarantine_lock, flags);
+	raw_spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, NULL);
 	srcu_read_unlock(&remove_cache_srcu, srcu_idx);
@@ -310,17 +310,17 @@ void quarantine_remove_cache(struct kmem_cache *cache)
 	 */
 	on_each_cpu(per_cpu_remove_cache, cache, 1);
 
-	spin_lock_irqsave(&quarantine_lock, flags);
+	raw_spin_lock_irqsave(&quarantine_lock, flags);
 	for (i = 0; i < QUARANTINE_BATCHES; i++) {
 		if (qlist_empty(&global_quarantine[i]))
 			continue;
 		qlist_move_cache(&global_quarantine[i], &to_free, cache);
 		/* Scanning whole quarantine can take a while. */
-		spin_unlock_irqrestore(&quarantine_lock, flags);
+		raw_spin_unlock_irqrestore(&quarantine_lock, flags);
 		cond_resched();
-		spin_lock_irqsave(&quarantine_lock, flags);
+		raw_spin_lock_irqsave(&quarantine_lock, flags);
 	}
-	spin_unlock_irqrestore(&quarantine_lock, flags);
+	raw_spin_unlock_irqrestore(&quarantine_lock, flags);
 
 	qlist_free_all(&to_free, cache);
 
-- 
Gitee


From 98c151a7afd54e64d737fe1c59e864dc4b1be5af Mon Sep 17 00:00:00 2001
From: meganz009 <zhang.wenya1@zte.com.cn>
Date: Fri, 2 Jun 2023 09:52:02 +0800
Subject: [PATCH 3/4] EXP rcu: Revert expedited GP parallelization  cleverness

commit 37d23bb703b59574c9eeebba86f8c8b9cf382d15 upstream.

(Commit 258ba8e089db23f760139266c232f01bad73f85c from linux-rcu)

This commit reverts a series of commits starting with fcc635436501 ("rcu:
Make expedited GPs handle CPU 0 being offline") and its successors, thus
queueing each rcu_node structure's expedited grace-period initialization
work on the first CPU of that rcu_node structure.

Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 kernel/rcu/tree_exp.h | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 72f9f2f12834..23c7b5037cf2 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -472,7 +472,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
 static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 				     smp_call_func_t func)
 {
-	int cpu;
 	struct rcu_node *rnp;
 
 	trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
@@ -494,13 +493,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
 			continue;
 		}
 		INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
-		preempt_disable();
-		cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask);
-		/* If all offline, queue the work on an unbound CPU. */
-		if (unlikely(cpu > rnp->grphi))
-			cpu = WORK_CPU_UNBOUND;
-		queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work);
-		preempt_enable();
+		queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
 		rnp->exp_need_flush = true;
 	}
 
-- 
Gitee


From 4da19a7aac764be3a7682b05f793258ae4338c08 Mon Sep 17 00:00:00 2001
From: meganz009 <zhang.wenya1@zte.com.cn>
Date: Fri, 2 Jun 2023 09:52:13 +0800
Subject: [PATCH 4/4] kmemleak: Turn kmemleak_lock to raw spinlock on RT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5a62bb92ae6df828a0b6cf14c0e1d55558f0002e upstream.

kmemleak_lock, as a rwlock on RT, can possibly be held in atomic context and
causes the follow BUG.

BUG: scheduling while atomic: migration/15/132/0x00000002
Preemption disabled at:
[<ffffffff8c927c11>] cpu_stopper_thread+0x71/0x100
CPU: 15 PID: 132 Comm: migration/15 Not tainted 4.19.0-rt1-preempt-rt #1
Call Trace:
 schedule+0x3d/0xe0
 __rt_spin_lock+0x26/0x30
 __write_rt_lock+0x23/0x1a0
 rt_write_lock+0x2a/0x30
 find_and_remove_object+0x1e/0x80
 delete_object_full+0x10/0x20
 kmemleak_free+0x32/0x50
 kfree+0x104/0x1f0
 intel_pmu_cpu_dying+0x67/0x70
 x86_pmu_dying_cpu+0x1a/0x30
 cpuhp_invoke_callback+0x92/0x700
 take_cpu_down+0x70/0xa0
 multi_cpu_stop+0x62/0xc0
 cpu_stopper_thread+0x79/0x100
 smpboot_thread_fn+0x20f/0x2d0
 kthread+0x121/0x140

And on v4.18 stable tree the following call trace, caused by grabbing
kmemleak_lock again, is also observed.

kernel BUG at kernel/locking/rtmutex.c:1048!
CPU: 5 PID: 689 Comm: mkfs.ext4 Not tainted 4.18.16-rt9-preempt-rt #1
Call Trace:
 rt_write_lock+0x2a/0x30
 create_object+0x17d/0x2b0
 kmemleak_alloc+0x34/0x50
 kmem_cache_alloc+0x146/0x220
 mempool_alloc_slab+0x15/0x20
 mempool_alloc+0x65/0x170
 sg_pool_alloc+0x21/0x60
 sg_alloc_table_chained+0x8b/0xb0
…
 blk_flush_plug_list+0x204/0x230
 schedule+0x87/0xe0
 rt_write_lock+0x2a/0x30
 create_object+0x17d/0x2b0
 kmemleak_alloc+0x34/0x50
 __kmalloc_node+0x1cd/0x340
 alloc_request_size+0x30/0x70
 mempool_alloc+0x65/0x170
 get_request+0x4e3/0x8d0
 blk_queue_bio+0x153/0x470
 generic_make_request+0x1dc/0x3f0
 submit_bio+0x49/0x140
…

kmemleak is an error detecting feature. We would not expect as good performance
as without it. As there is no raw rwlock defining helpers, we turn kmemleak_lock
to a raw spinlock.

Signed-off-by: He Zhe <zhe.he@windriver.com>
Cc: catalin.marinas@arm.com
Cc: bigeasy@linutronix.de
Cc: tglx@linutronix.de
Cc: rostedt@goodmis.org
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lkml.kernel.org/r/1542877459-144382-1-git-send-email-zhe.he@windriver.com
Link: https://lkml.kernel.org/r/20181218150744.GB20197@arrakis.emea.arm.com
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 mm/kmemleak.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5eeabece0c17..92ce99b15f2b 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -26,7 +26,7 @@
  *
  * The following locks and mutexes are used by kmemleak:
  *
- * - kmemleak_lock (rwlock): protects the object_list modifications and
+ * - kmemleak_lock (raw spinlock): protects the object_list modifications and
  *   accesses to the object_tree_root. The object_list is the main list
  *   holding the metadata (struct kmemleak_object) for the allocated memory
  *   blocks. The object_tree_root is a red black tree used to look-up
@@ -197,7 +197,7 @@ static LIST_HEAD(gray_list);
 /* search tree for object boundaries */
 static struct rb_root object_tree_root = RB_ROOT;
 /* rw_lock protecting the access to object_list and object_tree_root */
-static DEFINE_RWLOCK(kmemleak_lock);
+static DEFINE_RAW_SPINLOCK(kmemleak_lock);
 
 /* allocation caches for kmemleak internal data */
 static struct kmem_cache *object_cache;
@@ -491,9 +491,9 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
 	struct kmemleak_object *object;
 
 	rcu_read_lock();
-	read_lock_irqsave(&kmemleak_lock, flags);
+	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 	object = lookup_object(ptr, alias);
-	read_unlock_irqrestore(&kmemleak_lock, flags);
+	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 
 	/* check whether the object is still available */
 	if (object && !get_object(object))
@@ -513,13 +513,13 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
 	unsigned long flags;
 	struct kmemleak_object *object;
 
-	write_lock_irqsave(&kmemleak_lock, flags);
+	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 	object = lookup_object(ptr, alias);
 	if (object) {
 		rb_erase(&object->rb_node, &object_tree_root);
 		list_del_rcu(&object->object_list);
 	}
-	write_unlock_irqrestore(&kmemleak_lock, flags);
+	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 
 	return object;
 }
@@ -593,7 +593,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 	/* kernel backtrace */
 	object->trace_len = __save_stack_trace(object->trace);
 
-	write_lock_irqsave(&kmemleak_lock, flags);
+	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 
 	min_addr = min(min_addr, ptr);
 	max_addr = max(max_addr, ptr + size);
@@ -624,7 +624,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
 
 	list_add_tail_rcu(&object->object_list, &object_list);
 out:
-	write_unlock_irqrestore(&kmemleak_lock, flags);
+	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 	return object;
 }
 
@@ -1310,7 +1310,7 @@ static void scan_block(void *_start, void *_end,
 	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 	unsigned long flags;
 
-	read_lock_irqsave(&kmemleak_lock, flags);
+	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 	for (ptr = start; ptr < end; ptr++) {
 		struct kmemleak_object *object;
 		unsigned long pointer;
@@ -1367,7 +1367,7 @@ static void scan_block(void *_start, void *_end,
 			spin_unlock(&object->lock);
 		}
 	}
-	read_unlock_irqrestore(&kmemleak_lock, flags);
+	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
 }
 
 /*
-- 
Gitee