From f8952e472a84c9900e88fec1701202f20cc26bad Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Tue, 6 Jun 2023 18:19:23 +0800 Subject: [PATCH 1/3] softirq: split timer softirqs out of ksoftirqd commit 74b99df23856c57791eccaa46030824066095f30 upstream. The softirqd runs in -RT with SCHED_FIFO (prio 1) and deals mostly with timer wakeup which can not happen in hardirq context. The prio has been risen from the normal SCHED_OTHER so the timer wakeup does not happen too late. With enough networking load it is possible that the system never goes idle and schedules ksoftirqd and everything else with a higher priority. One of the tasks left behind is one of RCU's threads and so we see stalls and eventually run out of memory. This patch moves the TIMER and HRTIMER softirqs out of the `ksoftirqd` thread into its own `ktimersoftd`. The former can now run SCHED_OTHER (same as mainline) and the latter at SCHED_FIFO due to the wakeups. From networking point of view: The NAPI callback runs after the network interrupt thread completes. If its run time takes too long the NAPI code itself schedules the `ksoftirqd`. Here in the thread it can run at SCHED_OTHER priority and it won't defer RCU anymore. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- kernel/softirq.c | 85 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 12 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 3e9333d148ad..fe4e59c80a08 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -59,6 +59,10 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +#ifdef CONFIG_PREEMPT_RT_FULL +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); +#endif const char * const softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", @@ -172,6 +176,17 @@ static void wakeup_softirqd(void) wake_up_process(tsk); } +#ifdef CONFIG_PREEMPT_RT_FULL +static void wakeup_timer_softirqd(void) +{ + /* Interrupts are disabled: no need to stop preemption */ + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); + + if (tsk && tsk->state != TASK_RUNNING) + wake_up_process(tsk); +} +#endif + static void handle_softirq(unsigned int vec_nr) { struct softirq_action *h = softirq_vec + vec_nr; @@ -493,7 +508,6 @@ void __raise_softirq_irqoff(unsigned int nr) static inline void local_bh_disable_nort(void) { local_bh_disable(); } static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } static void ksoftirqd_set_sched_params(unsigned int cpu) { } -static void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) { } #else /* !PREEMPT_RT_FULL */ @@ -640,8 +654,12 @@ void thread_do_softirq(void) static void do_raise_softirq_irqoff(unsigned int nr) { + unsigned int mask; + + mask = 1UL << nr; + trace_softirq_raise(nr); - or_softirq_pending(1UL << nr); + or_softirq_pending(mask); /* * If we are not in a hard interrupt and inside a bh disabled @@ -650,16 +668,29 @@ static void do_raise_softirq_irqoff(unsigned int nr) * delegate it to ksoftirqd. */ if (!in_irq() && current->softirq_nestcnt) - current->softirqs_raised |= (1U << nr); - else if (__this_cpu_read(ksoftirqd)) - __this_cpu_read(ksoftirqd)->softirqs_raised |= (1U << nr); + current->softirqs_raised |= mask; + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) + return; + + if (mask & TIMER_SOFTIRQS) + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; + else + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; +} + +static void wakeup_proper_softirq(unsigned int nr) +{ + if ((1UL << nr) & TIMER_SOFTIRQS) + wakeup_timer_softirqd(); + else + wakeup_softirqd(); } void __raise_softirq_irqoff(unsigned int nr) { do_raise_softirq_irqoff(nr); if (!in_irq() && !current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_proper_softirq(nr); } /* @@ -685,7 +716,7 @@ void raise_softirq_irqoff(unsigned int nr) * raise a WARN() if the condition is met. */ if (!current->softirq_nestcnt) - wakeup_softirqd(); + wakeup_proper_softirq(nr); } static inline int ksoftirqd_softirq_pending(void) @@ -697,23 +728,38 @@ static inline void local_bh_disable_nort(void) { } static inline void _local_bh_enable_nort(void) { } static inline void ksoftirqd_set_sched_params(unsigned int cpu) +{ + /* Take over all but timer pending softirqs when starting */ + local_irq_disable(); + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; + local_irq_enable(); +} + +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) { struct sched_param param = { .sched_priority = 1 }; sched_setscheduler(current, SCHED_FIFO, ¶m); - /* Take over all pending softirqs when starting */ + + /* Take over timer pending softirqs when starting */ local_irq_disable(); - current->softirqs_raised = local_softirq_pending(); + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; local_irq_enable(); } -static inline void ksoftirqd_clr_sched_params(unsigned int cpu, bool online) +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, + bool online) { struct sched_param param = { .sched_priority = 0 }; sched_setscheduler(current, SCHED_NORMAL, ¶m); } +static int ktimer_softirqd_should_run(unsigned int cpu) +{ + return current->softirqs_raised; +} + #endif /* PREEMPT_RT_FULL */ /* * Enter an interrupt context. @@ -766,6 +812,9 @@ static inline void invoke_softirq(void) if (__this_cpu_read(ksoftirqd) && __this_cpu_read(ksoftirqd)->softirqs_raised) wakeup_softirqd(); + if (__this_cpu_read(ktimer_softirqd) && + __this_cpu_read(ktimer_softirqd)->softirqs_raised) + wakeup_timer_softirqd(); local_irq_restore(flags); #endif } @@ -1153,18 +1202,30 @@ static int takeover_tasklets(unsigned int cpu) static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, .setup = ksoftirqd_set_sched_params, - .cleanup = ksoftirqd_clr_sched_params, .thread_should_run = ksoftirqd_should_run, .thread_fn = run_ksoftirqd, .thread_comm = "ksoftirqd/%u", }; +#ifdef CONFIG_PREEMPT_RT_FULL +static struct smp_hotplug_thread softirq_timer_threads = { + .store = &ktimer_softirqd, + .setup = ktimer_softirqd_set_sched_params, + .cleanup = ktimer_softirqd_clr_sched_params, + .thread_should_run = ktimer_softirqd_should_run, + .thread_fn = run_ksoftirqd, + .thread_comm = "ktimersoftd/%u", +}; +#endif + static __init int spawn_ksoftirqd(void) { cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); - +#ifdef CONFIG_PREEMPT_RT_FULL + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); +#endif return 0; } early_initcall(spawn_ksoftirqd); -- Gitee From 764a3e5422abc4bfdc5d8c9fbe7c028f692debf4 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Tue, 6 Jun 2023 18:19:37 +0800 Subject: [PATCH 2/3] softirq: Avoid "local_softirq_pending" messages if ksoftirqd is blocked commit ece86c041648cc912cb0e5f6c921ce25c8ffc506 upstream. If the ksoftirqd thread has a softirq pending and is blocked on the `local_softirq_locks' lock then softirq_check_pending_idle() won't complain because the "lock owner" will mask away this softirq from the mask of pending softirqs. If ksoftirqd has an additional softirq pending then it won't be masked out because we never look at ksoftirqd's mask. If there are still pending softirqs while going to idle check ksoftirqd's and ktimersfotd's mask before complaining about unhandled softirqs. Cc: stable-rt@vger.kernel.org Tested-by: Juri Lelli Signed-off-by: Sebastian Andrzej Siewior --- kernel/softirq.c | 57 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index fe4e59c80a08..1920985eeb09 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -92,6 +92,31 @@ static inline void softirq_clr_runner(unsigned int sirq) sr->runner[sirq] = NULL; } +static bool softirq_check_runner_tsk(struct task_struct *tsk, + unsigned int *pending) +{ + bool ret = false; + + if (!tsk) + return ret; + + /* + * The wakeup code in rtmutex.c wakes up the task + * _before_ it sets pi_blocked_on to NULL under + * tsk->pi_lock. So we need to check for both: state + * and pi_blocked_on. + */ + raw_spin_lock(&tsk->pi_lock); + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { + /* Clear all bits pending in that task */ + *pending &= ~(tsk->softirqs_raised); + ret = true; + } + raw_spin_unlock(&tsk->pi_lock); + + return ret; +} + /* * On preempt-rt a softirq running context might be blocked on a * lock. There might be no other runnable task on this CPU because the @@ -104,6 +129,7 @@ static inline void softirq_clr_runner(unsigned int sirq) */ void softirq_check_pending_idle(void) { + struct task_struct *tsk; static int rate_limit; struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); u32 warnpending; @@ -113,24 +139,23 @@ void softirq_check_pending_idle(void) return; warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; + if (!warnpending) + return; for (i = 0; i < NR_SOFTIRQS; i++) { - struct task_struct *tsk = sr->runner[i]; + tsk = sr->runner[i]; - /* - * The wakeup code in rtmutex.c wakes up the task - * _before_ it sets pi_blocked_on to NULL under - * tsk->pi_lock. So we need to check for both: state - * and pi_blocked_on. - */ - if (tsk) { - raw_spin_lock(&tsk->pi_lock); - if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { - /* Clear all bits pending in that task */ - warnpending &= ~(tsk->softirqs_raised); - warnpending &= ~(1 << i); - } - raw_spin_unlock(&tsk->pi_lock); - } + if (softirq_check_runner_tsk(tsk, &warnpending)) + warnpending &= ~(1 << i); + } + + if (warnpending) { + tsk = __this_cpu_read(ksoftirqd); + softirq_check_runner_tsk(tsk, &warnpending); + } + + if (warnpending) { + tsk = __this_cpu_read(ktimer_softirqd); + softirq_check_runner_tsk(tsk, &warnpending); } if (warnpending) { -- Gitee From 6246334daf29ba5e0b971adef3f0ec56a07d6f14 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Tue, 6 Jun 2023 18:19:55 +0800 Subject: [PATCH 3/3] softirq: Avoid "local_softirq_pending" messages if task is in cpu_chill() commit 7bc071d971f2caab6d8cf678fa51ac0f0f84be57 upstream. If the softirq thread enters cpu_chill() then ->state is UNINTERRUPTIBLE and has no ->pi_blocked_on set and so its mask is not taken into account. ->sleeping_lock is increased by cpu_chill() since it is also requried to avoid a splat by RCU in case cpu_chill() is used while a RCU-read lock is held. Use the same mechanism for the softirq-pending check. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior --- kernel/softirq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 1920985eeb09..27a4bb2303d0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -105,9 +105,12 @@ static bool softirq_check_runner_tsk(struct task_struct *tsk, * _before_ it sets pi_blocked_on to NULL under * tsk->pi_lock. So we need to check for both: state * and pi_blocked_on. + * The test against UNINTERRUPTIBLE + ->sleeping_lock is in case the + * task does cpu_chill(). */ raw_spin_lock(&tsk->pi_lock); - if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING || + (tsk->state == TASK_UNINTERRUPTIBLE && tsk->sleeping_lock)) { /* Clear all bits pending in that task */ *pending &= ~(tsk->softirqs_raised); ret = true; -- Gitee