From 3e19f9dd83b967850107ce57410e8edebd0533d4 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Sat, 3 Jun 2023 13:58:10 +0800 Subject: [PATCH 1/4] timers: Don't block on ->expiry_lock for TIMER_IRQSAFE commit 9978859f2b4a9f1174d8e9d368d22ca740830471 upstream. PREEMPT_RT does not spin and wait until a running timer completes its callback but instead it blocks on a sleeping lock to prevent a deadlock. This blocking can not be done for workqueue's IRQ_SAFE timer which will be canceled in an IRQ-off region. It has to happen to in IRQ-off region because changing the PENDING bit and clearing the timer must not be interrupted to avoid a busy-loop. The callback invocation of IRQSAFE timer is not preempted on PREEMPT_RT so there is no need to synchronize on timer_base::expiry_lock. Don't acquire the timer_base::expiry_lock for TIMER_IRQSAFE flagged timer. Add a lockdep annotation to ensure that this function is always invoked in preemptible context on PREEMPT_RT. Reported-by: Mike Galbraith Signed-off-by: Sebastian Andrzej Siewior Cc: stable-rt@vger.kernel.org --- kernel/time/timer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e87e638c31bd..a4fdc7cfb723 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1287,7 +1287,7 @@ static void del_timer_wait_running(struct timer_list *timer) u32 tf; tf = READ_ONCE(timer->flags); - if (!(tf & TIMER_MIGRATING)) { + if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) { struct timer_base *base = get_timer_base(tf); /* @@ -1371,6 +1371,13 @@ int del_timer_sync(struct timer_list *timer) */ WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); + /* + * Must be able to sleep on PREEMPT_RT because of the slowpath in + * del_timer_wait_running(). + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE)) + lockdep_assert_preemption_enabled(); + do { ret = try_to_del_timer_sync(timer); -- Gitee From 2fdbb339f248f8e201f2bde93cc65d0f8a9b7637 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Sat, 3 Jun 2023 14:01:38 +0800 Subject: [PATCH 2/4] kthread: Move prio/affinite change into the newly created thread commit c4d528ac7f9e2c713ef8f05516b7fb3f5e607e63 upstream. With enabled threaded interrupts the nouveau driver reported the following: | Chain exists of: | &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem | | Possible unsafe locking scenario: | | CPU0 CPU1 --- kernel/kthread.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 508fe5278285..3ce6a31db7b4 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -264,6 +264,7 @@ EXPORT_SYMBOL_GPL(kthread_parkme); static int kthread(void *_create) { + static const struct sched_param param = { .sched_priority = 0 }; /* Copy data: it's on kthread's stack */ struct kthread_create_info *create = _create; int (*threadfn)(void *data) = create->threadfn; @@ -294,6 +295,13 @@ static int kthread(void *_create) init_completion(&self->parked); current->vfork_done = &self->exited; + /* + * The new thread inherited kthreadd's priority and CPU mask. Reset + * back to default in case they have been changed. + */ + sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); + set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_KTHREAD)); + /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); create->result = current; @@ -391,7 +399,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), } task = create->result; if (!IS_ERR(task)) { - static const struct sched_param param = { .sched_priority = 0 }; char name[TASK_COMM_LEN]; /* @@ -400,13 +407,6 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data), */ vsnprintf(name, sizeof(name), namefmt, args); set_task_comm(task, name); - /* - * root may have changed our (kthreadd's) priority or CPU mask. - * The kernel thread should not inherit these properties. - */ - sched_setscheduler_nocheck(task, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(task, - housekeeping_cpumask(HK_FLAG_KTHREAD)); } kfree(create); return task; -- Gitee From 5028f88076cde1caeb7fb991db2a430692a92e1e Mon Sep 17 00:00:00 2001 From: meganz009 Date: Sat, 3 Jun 2023 14:02:32 +0800 Subject: [PATCH 3/4] genirq: Move prio assignment into the newly created thread commit d1b6e0bc9c0b7f9cf1b25aa608c95da67c318464 upstream. With enabled threaded interrupts the nouveau driver reported the following: | Chain exists of: | &mm->mmap_lock#2 --> &device->mutex --> &cpuset_rwsem | | Possible unsafe locking scenario: | | CPU0 CPU1 --- kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 942f3b09adac..5355f5639079 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1217,6 +1217,8 @@ static int irq_thread(void *data) irq_thread_set_ready(desc, action); + sched_set_fifo(current); + if (force_irqthreads && test_bit(IRQTF_FORCED_THREAD, &action->thread_flags)) handler_fn = irq_forced_thread_fn; @@ -1382,8 +1384,6 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) if (IS_ERR(t)) return PTR_ERR(t); - sched_set_fifo(t); - /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code -- Gitee From 7e87beafc4370f68768338d2c2ca9c13221e835e Mon Sep 17 00:00:00 2001 From: meganz009 Date: Sat, 3 Jun 2023 14:03:26 +0800 Subject: [PATCH 4/4] notifier: Make atomic_notifiers use raw_spinlock commit 1e4b833a53c7ab00e926e0ba43eb060eda69c04a upstream. Booting a recent PREEMPT_RT kernel (v5.10-rc3-rt7-rebase) on my arm64 Juno leads to the idle task blocking on an RT sleeping spinlock down some notifier path: [ 1.809101] BUG: scheduling while atomic: swapper/5/0/0x00000002 [ 1.809116] Modules linked in: [ 1.809123] Preemption disabled at: [ 1.809125] secondary_start_kernel (arch/arm64/kernel/smp.c:227) [ 1.809146] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G W 5.10.0-rc3-rt7 #168 [ 1.809153] Hardware name: ARM Juno development board (r0) (DT) [ 1.809158] Call trace: [ 1.809160] dump_backtrace (arch/arm64/kernel/stacktrace.c:100 (discriminator 1)) [ 1.809170] show_stack (arch/arm64/kernel/stacktrace.c:198) [ 1.809178] dump_stack (lib/dump_stack.c:122) [ 1.809188] __schedule_bug (kernel/sched/core.c:4886) [ 1.809197] __schedule (./arch/arm64/include/asm/preempt.h:18 kernel/sched/core.c:4913 kernel/sched/core.c:5040) [ 1.809204] preempt_schedule_lock (kernel/sched/core.c:5365 (discriminator 1)) [ 1.809210] rt_spin_lock_slowlock_locked (kernel/locking/rtmutex.c:1072) [ 1.809217] rt_spin_lock_slowlock (kernel/locking/rtmutex.c:1110) [ 1.809224] rt_spin_lock (./include/linux/rcupdate.h:647 kernel/locking/rtmutex.c:1139) [ 1.809231] atomic_notifier_call_chain_robust (kernel/notifier.c:71 kernel/notifier.c:118 kernel/notifier.c:186) [ 1.809240] cpu_pm_enter (kernel/cpu_pm.c:39 kernel/cpu_pm.c:93) [ 1.809249] psci_enter_idle_state (drivers/cpuidle/cpuidle-psci.c:52 drivers/cpuidle/cpuidle-psci.c:129) [ 1.809258] cpuidle_enter_state (drivers/cpuidle/cpuidle.c:238) [ 1.809267] cpuidle_enter (drivers/cpuidle/cpuidle.c:353) [ 1.809275] do_idle (kernel/sched/idle.c:132 kernel/sched/idle.c:213 kernel/sched/idle.c:273) [ 1.809282] cpu_startup_entry (kernel/sched/idle.c:368 (discriminator 1)) [ 1.809288] secondary_start_kernel (arch/arm64/kernel/smp.c:273) Two points worth noting: 1) That this is conceptually the same issue as pointed out in: 313c8c16ee62 ("PM / CPU: replace raw_notifier with atomic_notifier") 2) Only the _robust() variant of atomic_notifier callchains suffer from this AFAICT only the cpu_pm_notifier_chain really needs to be changed, but singling it out would mean introducing a new (truly) non-blocking API. At the same time, callers that are fine with any blocking within the call chain should use blocking notifiers, so patching up all atomic_notifier's doesn't seem *too* crazy to me. Fixes: 70d932985757 ("notifier: Fix broken error handling pattern") Signed-off-by: Valentin Schneider Reviewed-by: Daniel Bristot de Oliveira Link: https://lkml.kernel.org/r/20201122201904.30940-1-valentin.schneider@arm.com Signed-off-by: Sebastian Andrzej Siewior --- include/linux/notifier.h | 6 +++--- kernel/notifier.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 2fb373a5c1ed..723bc2df6388 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -58,7 +58,7 @@ struct notifier_block { }; struct atomic_notifier_head { - spinlock_t lock; + raw_spinlock_t lock; struct notifier_block __rcu *head; }; @@ -78,7 +78,7 @@ struct srcu_notifier_head { }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ - spin_lock_init(&(name)->lock); \ + raw_spin_lock_init(&(name)->lock); \ (name)->head = NULL; \ } while (0) #define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ @@ -95,7 +95,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); cleanup_srcu_struct(&(name)->srcu); #define ATOMIC_NOTIFIER_INIT(name) { \ - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ .head = NULL } #define BLOCKING_NOTIFIER_INIT(name) { \ .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ diff --git a/kernel/notifier.c b/kernel/notifier.c index 1b019cbca594..c20782f07643 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -142,9 +142,9 @@ int atomic_notifier_chain_register(struct atomic_notifier_head *nh, unsigned long flags; int ret; - spin_lock_irqsave(&nh->lock, flags); + raw_spin_lock_irqsave(&nh->lock, flags); ret = notifier_chain_register(&nh->head, n); - spin_unlock_irqrestore(&nh->lock, flags); + raw_spin_unlock_irqrestore(&nh->lock, flags); return ret; } EXPORT_SYMBOL_GPL(atomic_notifier_chain_register); @@ -164,9 +164,9 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, unsigned long flags; int ret; - spin_lock_irqsave(&nh->lock, flags); + raw_spin_lock_irqsave(&nh->lock, flags); ret = notifier_chain_unregister(&nh->head, n); - spin_unlock_irqrestore(&nh->lock, flags); + raw_spin_unlock_irqrestore(&nh->lock, flags); synchronize_rcu(); return ret; } @@ -182,9 +182,9 @@ int atomic_notifier_call_chain_robust(struct atomic_notifier_head *nh, * Musn't use RCU; because then the notifier list can * change between the up and down traversal. */ - spin_lock_irqsave(&nh->lock, flags); + raw_spin_lock_irqsave(&nh->lock, flags); ret = notifier_call_chain_robust(&nh->head, val_up, val_down, v); - spin_unlock_irqrestore(&nh->lock, flags); + raw_spin_unlock_irqrestore(&nh->lock, flags); return ret; } -- Gitee