From 025d80ca9e34366b539ed7a99b70485b4c0f9eaf Mon Sep 17 00:00:00 2001 From: zr_yy Date: Sun, 4 Jun 2023 22:29:13 +0800 Subject: [PATCH 1/4] timers: Prepare for full preemption commit c87cd9494b5ba0149620db6bc1655edbe04f9c9d upstream. When softirqs can be preempted we need to make sure that cancelling the timer from the active thread can not deadlock vs. a running timer callback. Add a waitqueue to resolve that. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 2 +- kernel/sched/core.c | 9 +++++++-- kernel/time/timer.c | 45 +++++++++++++++++++++++++++++++++++++++---- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/include/linux/timer.h b/include/linux/timer.h index 4831897bc704..35ac1005257d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -177,7 +177,7 @@ extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) extern int del_timer_sync(struct timer_list *timer); #else # define del_timer_sync(t) del_timer(t) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f3709be5f5a0..73a0d492f877 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -509,11 +509,14 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { - int i, cpu = smp_processor_id(); + int i, cpu; struct sched_domain *sd; + preempt_disable_rt(); + cpu = smp_processor_id(); + if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) - return cpu; + goto preempt_en_rt; rcu_read_lock(); for_each_domain(cpu, sd) { @@ -532,6 +535,8 @@ int get_nohz_timer_target(void) cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); +preempt_en_rt: + preempt_enable_rt(); return cpu; } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 61e41ea3a96e..ecc23d2c21a9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -198,6 +199,9 @@ EXPORT_SYMBOL(jiffies_64); struct timer_base { raw_spinlock_t lock; struct timer_list *running_timer; +#ifdef CONFIG_PREEMPT_RT_FULL + struct swait_queue_head wait_for_running_timer; +#endif unsigned long clk; unsigned long next_expiry; unsigned int cpu; @@ -1190,6 +1194,33 @@ void add_timer_on(struct timer_list *timer, int cpu) } EXPORT_SYMBOL_GPL(add_timer_on); +#ifdef CONFIG_PREEMPT_RT_FULL +/* + * Wait for a running timer + */ +static void wait_for_running_timer(struct timer_list *timer) +{ + struct timer_base *base; + u32 tf = timer->flags; + + if (tf & TIMER_MIGRATING) + return; + + base = get_timer_base(tf); + swait_event_exclusive(base->wait_for_running_timer, + base->running_timer != timer); +} + +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) +#else +static inline void wait_for_running_timer(struct timer_list *timer) +{ + cpu_relax(); +} + +# define wakeup_timer_waiters(b) do { } while (0) +#endif + /** * del_timer - deactivate a timer. * @timer: the timer to be deactivated @@ -1245,7 +1276,7 @@ int try_to_del_timer_sync(struct timer_list *timer) } EXPORT_SYMBOL(try_to_del_timer_sync); -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1305,7 +1336,7 @@ int del_timer_sync(struct timer_list *timer) int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; - cpu_relax(); + wait_for_running_timer(timer); } } EXPORT_SYMBOL(del_timer_sync); @@ -1366,13 +1397,16 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) fn = timer->function; - if (timer->flags & TIMER_IRQSAFE) { + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && + timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); call_timer_fn(timer, fn); + base->running_timer = NULL; raw_spin_lock(&base->lock); } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn); + base->running_timer = NULL; raw_spin_lock_irq(&base->lock); } } @@ -1702,8 +1736,8 @@ static inline void __run_timers(struct timer_base *base) while (levels--) expire_timers(base, heads + levels); } - base->running_timer = NULL; raw_spin_unlock_irq(&base->lock); + wakeup_timer_waiters(base); } /* @@ -1948,6 +1982,9 @@ static void __init init_timer_cpu(int cpu) base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; +#ifdef CONFIG_PREEMPT_RT_FULL + init_swait_queue_head(&base->wait_for_running_timer); +#endif } } -- Gitee From 54e7cc4a6bacc908a0998e4ecae9d7568715c184 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Sun, 4 Jun 2023 22:29:28 +0800 Subject: [PATCH 2/4] x86: kvm Require const tsc for RT commit 70c79c38cb1e81d8cffa19dc562b629dcde98c6c upstream. Non constant TSC is a nightmare on bare metal already, but with virtualization it becomes a complete disaster because the workarounds are horrible latency wise. That's also a preliminary for running RT in a guest on top of a RT host. Signed-off-by: Thomas Gleixner --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8c720b05141a..ecd600d5351a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6881,6 +6881,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out_free_percpu; -- Gitee From 95bc183fcf5c3243b4540e3b107c1e368a653ec3 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Sun, 4 Jun 2023 22:29:39 +0800 Subject: [PATCH 3/4] pci/switchtec: Don't use completion's wait queue commit cffa5c3f97fb6ad83cf3d770f5297f879f6efc36 upstream. The poll callback is using completion's wait_queue_head_t member and puts it in poll_wait() so the poll() caller gets a wakeup after command completed. This does not work on RT because we don't have a wait_queue_head_t in our completion implementation. Nobody in tree does like that in tree so this is the only driver that breaks. Instead of using the completion here is waitqueue with a status flag as suggested by Logan. I don't have the HW so I have no idea if it works as expected, so please test it. Cc: Kurt Schwemmer Cc: Logan Gunthorpe Signed-off-by: Sebastian Andrzej Siewior --- drivers/pci/switch/switchtec.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 72db2e0ebced..77d4fb86d05b 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -43,10 +43,11 @@ struct switchtec_user { enum mrpc_state state; - struct completion comp; + wait_queue_head_t cmd_comp; struct kref kref; struct list_head list; + bool cmd_done; u32 cmd; u32 status; u32 return_code; @@ -68,7 +69,7 @@ static struct switchtec_user *stuser_create(struct switchtec_dev *stdev) stuser->stdev = stdev; kref_init(&stuser->kref); INIT_LIST_HEAD(&stuser->list); - init_completion(&stuser->comp); + init_waitqueue_head(&stuser->cmd_comp); stuser->event_cnt = atomic_read(&stdev->event_cnt); dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); @@ -151,7 +152,7 @@ static int mrpc_queue_cmd(struct switchtec_user *stuser) kref_get(&stuser->kref); stuser->read_len = sizeof(stuser->data); stuser_set_state(stuser, MRPC_QUEUED); - init_completion(&stuser->comp); + stuser->cmd_done = false; list_add_tail(&stuser->list, &stdev->mrpc_queue); mrpc_cmd_submit(stdev); @@ -188,7 +189,8 @@ static void mrpc_complete_cmd(struct switchtec_dev *stdev) stuser->read_len); out: - complete_all(&stuser->comp); + stuser->cmd_done = true; + wake_up_interruptible(&stuser->cmd_comp); list_del_init(&stuser->list); stuser_put(stuser); stdev->mrpc_busy = 0; @@ -458,10 +460,11 @@ static ssize_t switchtec_dev_read(struct file *filp, char __user *data, mutex_unlock(&stdev->mrpc_mutex); if (filp->f_flags & O_NONBLOCK) { - if (!try_wait_for_completion(&stuser->comp)) + if (!READ_ONCE(stuser->cmd_done)) return -EAGAIN; } else { - rc = wait_for_completion_interruptible(&stuser->comp); + rc = wait_event_interruptible(stuser->cmd_comp, + stuser->cmd_done); if (rc < 0) return rc; } @@ -509,7 +512,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait) struct switchtec_dev *stdev = stuser->stdev; __poll_t ret = 0; - poll_wait(filp, &stuser->comp.wait, wait); + poll_wait(filp, &stuser->cmd_comp, wait); poll_wait(filp, &stdev->event_wq, wait); if (lock_mutex_and_test_alive(stdev)) @@ -517,7 +520,7 @@ static __poll_t switchtec_dev_poll(struct file *filp, poll_table *wait) mutex_unlock(&stdev->mrpc_mutex); - if (try_wait_for_completion(&stuser->comp)) + if (READ_ONCE(stuser->cmd_done)) ret |= EPOLLIN | EPOLLRDNORM; if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) @@ -1041,7 +1044,8 @@ static void stdev_kill(struct switchtec_dev *stdev) /* Wake up and kill any users waiting on an MRPC request */ list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) { - complete_all(&stuser->comp); + stuser->cmd_done = true; + wake_up_interruptible(&stuser->cmd_comp); list_del_init(&stuser->list); stuser_put(stuser); } -- Gitee From 22a032962a5c7f0963c28304a25e67cfc0895bab Mon Sep 17 00:00:00 2001 From: zr_yy Date: Sun, 4 Jun 2023 22:29:51 +0800 Subject: [PATCH 4/4] wait.h: include atomic.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0aafaa8df18cf87cf491f2592b9c378667766a62 upstream. | CC init/main.o |In file included from include/linux/mmzone.h:9:0, | from include/linux/gfp.h:4, | from include/linux/kmod.h:22, | from include/linux/module.h:13, | from init/main.c:15: |include/linux/wait.h: In function ‘wait_on_atomic_t’: |include/linux/wait.h:982:2: error: implicit declaration of function ‘atomic_read’ [-Werror=implicit-function-declaration] | if (atomic_read(val) == 0) | ^ This pops up on ARM. Non-RT gets its atomic.h include from spinlock.h Signed-off-by: Sebastian Andrzej Siewior --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index ed7c122cb31f..2b5ef8e94d19 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -10,6 +10,7 @@ #include #include +#include typedef struct wait_queue_entry wait_queue_entry_t; -- Gitee