diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index b11fcdfd077076e9593e9af3ea64f443d34572a2..0c50559987c55c4dd2aeb67d2d14f973d3831cb6 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -18,6 +18,8 @@ /* Doesn't want IPI, wait for tick: */ #define IRQ_WORK_LAZY BIT(2) +/* Run hard IRQ context, even on RT */ +#define IRQ_WORK_HARD_IRQ BIT(3) #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) @@ -52,4 +54,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } static inline void irq_work_run(void) { } #endif +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) +void irq_work_tick_soft(void); +#else +static inline void irq_work_tick_soft(void) { } +#endif + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 73288914ed5e78cc44b596ab5428ac98aceb73b3..2940622da5b3b9332dde0438b509d643efb592b5 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -57,29 +58,35 @@ void __weak arch_irq_work_raise(void) } /* Enqueue on current CPU, work must already be claimed and preempt disabled */ -static void __irq_work_queue_local(struct irq_work *work) +static void __irq_work_queue_local(struct irq_work *work, struct llist_head *list) { - /* If the work is "lazy", handle it from next tick if any */ - if (work->flags & IRQ_WORK_LAZY) { - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && - tick_nohz_tick_stopped()) - arch_irq_work_raise(); - } else { - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) - arch_irq_work_raise(); - } + bool empty; + + empty = llist_add(&work->llnode, list); + + if (empty && + (!(work->flags & IRQ_WORK_LAZY) || + tick_nohz_tick_stopped())) + arch_irq_work_raise(); } /* Enqueue the irq work @work on the current CPU */ bool irq_work_queue(struct irq_work *work) { + struct llist_head *list; + /* Only queue if not already pending */ if (!irq_work_claim(work)) return false; /* Queue the entry and raise the IPI if needed. */ preempt_disable(); - __irq_work_queue_local(work); + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) + list = this_cpu_ptr(&lazy_list); + else + list = this_cpu_ptr(&raised_list); + + __irq_work_queue_local(work, list); preempt_enable(); return true; @@ -98,6 +105,9 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) return irq_work_queue(work); #else /* CONFIG_SMP: */ + struct llist_head *list; + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); + /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(cpu)); @@ -106,13 +116,21 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) return false; preempt_disable(); + + lazy_work = work->flags & IRQ_WORK_LAZY; + + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) + list = &per_cpu(lazy_list, cpu); + else + list = &per_cpu(raised_list, cpu); + if (cpu != smp_processor_id()) { /* Arch remote IPI send/receive backend aren't NMI safe */ WARN_ON_ONCE(in_nmi()); - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) + if (llist_add(&work->llnode, list)) arch_send_call_function_single_ipi(cpu); } else { - __irq_work_queue_local(work); + __irq_work_queue_local(work, list); } preempt_enable(); @@ -128,9 +146,8 @@ bool irq_work_needs_cpu(void) raised = this_cpu_ptr(&raised_list); lazy = this_cpu_ptr(&lazy_list); - if (llist_empty(raised) || arch_irq_work_has_interrupt()) - if (llist_empty(lazy)) - return false; + if (llist_empty(raised) && llist_empty(lazy)) + return false; /* All work should have been flushed before going offline */ WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); @@ -144,8 +161,12 @@ static void irq_work_run_list(struct llist_head *list) struct llist_node *llnode; unsigned long flags; +#ifndef CONFIG_PREEMPT_RT_FULL + /* + * nort: On RT IRQ-work may run in SOFTIRQ context. + */ BUG_ON(!irqs_disabled()); - +#endif if (llist_empty(list)) return; @@ -177,7 +198,16 @@ static void irq_work_run_list(struct llist_head *list) void irq_work_run(void) { irq_work_run_list(this_cpu_ptr(&raised_list)); - irq_work_run_list(this_cpu_ptr(&lazy_list)); + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { + /* + * NOTE: we raise softirq via IPI for safety, + * and execute in irq_work_tick() to move the + * overhead from hard to soft irq context. + */ + if (!llist_empty(this_cpu_ptr(&lazy_list))) + raise_softirq(TIMER_SOFTIRQ); + } else + irq_work_run_list(this_cpu_ptr(&lazy_list)); } EXPORT_SYMBOL_GPL(irq_work_run); @@ -187,8 +217,17 @@ void irq_work_tick(void) if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) irq_work_run_list(raised); + + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) + irq_work_run_list(this_cpu_ptr(&lazy_list)); +} + +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) +void irq_work_tick_soft(void) +{ irq_work_run_list(this_cpu_ptr(&lazy_list)); } +#endif /* * Synchronize against the irq_work @entry, ensures the entry is not diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e809b4730e6ff3f6f224c81c01dd157544fbd4d5..982cab423c33e69538043a45e2025d8e120d9901 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1298,6 +1298,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && (rnp->ffmask & rdp->grpmask)) { init_irq_work(&rdp->rcu_iw, rcu_iw_handler); + rdp->rcu_iw.flags = IRQ_WORK_HARD_IRQ; rdp->rcu_iw_pending = true; rdp->rcu_iw_gp_seq = rnp->gp_seq; irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index aea9f1255c3d05fd470f7b9a42ab16032575dc33..497b0926f392176cef5d8695d310413d2305474e 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -279,6 +279,7 @@ static int init_rootdomain(struct root_domain *rd) rd->rto_cpu = -1; raw_spin_lock_init(&rd->rto_lock); init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); + rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; #endif init_dl_bw(&rd->dl_bw); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 463a6a84e7a122c8590de257ebb1d345be0534be..2778d02bb71caa706a9815ec9607bc45d9cccbfb 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -232,6 +232,7 @@ static void nohz_full_kick_func(struct irq_work *work) static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_func, + .flags = IRQ_WORK_HARD_IRQ, }; /* diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 753b75751f0ae9a8fdae3bb4bc94084d43d144c3..bdbfd52b1cfda1937126805ed82e328b41293400 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1754,6 +1754,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + irq_work_tick_soft(); + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));