diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 326e870d712394fad445033defd8e3ff5975ebdd..d9ac80aa1eb0a17dae81a0aa4e883f7386b86886 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -17,22 +17,22 @@ #include "hardware.h" static int num_idle_cpus = 0; -static DEFINE_SPINLOCK(cpuidle_lock); +static DEFINE_RAW_SPINLOCK(cpuidle_lock); static int imx6q_enter_wait(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - spin_lock(&cpuidle_lock); + raw_spin_lock(&cpuidle_lock); if (++num_idle_cpus == num_online_cpus()) imx6_set_lpm(WAIT_UNCLOCKED); - spin_unlock(&cpuidle_lock); + raw_spin_unlock(&cpuidle_lock); cpu_do_idle(); - spin_lock(&cpuidle_lock); + raw_spin_lock(&cpuidle_lock); if (num_idle_cpus-- == num_online_cpus()) imx6_set_lpm(WAIT_CLOCKED); - spin_unlock(&cpuidle_lock); + raw_spin_unlock(&cpuidle_lock); return index; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3e4dd4b2771d13a3cf05567565d2bf03aa33094e..5840d9033b5b7c3e36a5aad95df016f88f59099c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2261,7 +2261,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) apic->vcpu = vcpu; hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS_PINNED); + HRTIMER_MODE_ABS_PINNED_HARD); apic->lapic_timer.timer.function = apic_timer_fn; /* diff --git a/block/blk-mq.c b/block/blk-mq.c index 4542fb0217773acbe99dd0c4214a1d5c079c5ba4..7ac3221b57698013ddf18a906941d76548f653d5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3416,10 +3416,9 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, kt = nsecs; mode = HRTIMER_MODE_REL; - hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); + hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode, current); hrtimer_set_expires(&hs.timer, kt); - hrtimer_init_sleeper(&hs, current); do { if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE) break; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1a71e49fcda41615753e4b563585814192cbf374..01b7bb2a97c0f58562e32086baa0133b4e3f4ae9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 77d4fb86d05beecab63e35c2edbde61771f3ab36..ea70bc0b06e9f91a94fb3bcc3b17c2c2c57f5bc1 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -360,7 +360,7 @@ static int switchtec_dev_open(struct inode *inode, struct file *filp) return PTR_ERR(stuser); filp->private_data = stuser; - nonseekable_open(inode, filp); + stream_open(inode, filp); dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index e05426079b4df27562d9ddcf9925ed729ad5a78e..7998278221a4cf63eed5c3e1cc9d823482ecca0b 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -23,6 +23,7 @@ #include #include #include +#include #define DRIVER_NAME "rockchip-spi" diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c index 22571abcaa4e88c0a141adf42f2a899c5669303e..78a529d363f3b5299bf2b588366ade7a9fe7df8d 100644 --- a/drivers/staging/android/vsoc.c +++ b/drivers/staging/android/vsoc.c @@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg) return -EINVAL; wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec); - hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS, current); hrtimer_set_expires_range_ns(&to->timer, wake_time, current->timer_slack_ns); - - hrtimer_init_sleeper(to, current); } while (1) { diff --git a/fs/timerfd.c b/fs/timerfd.c index d69ad801eb8039af7f19054d32791d48b40589aa..f845093466be1fd3c6a5afae135c8bd7586c41a6 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -471,7 +471,11 @@ static int do_timerfd_settime(int ufd, int flags, break; } spin_unlock_irq(&ctx->wqh.lock); - cpu_relax(); + + if (isalarm(ctx)) + hrtimer_grab_expiry_lock(&ctx->t.alarm.timer); + else + hrtimer_grab_expiry_lock(&ctx->t.tmr); } /* diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 670f0019a67c32e230f5bce852c3e2139ba49f7f..d7476470957e71305be75310b8408c9ba2b6f43a 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -14,7 +14,7 @@ * Nauman Rafique */ -#include +#include #include #include #include diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3892e9c8b2deb7993c11c1ac171375c7ba6e9e09..6c4c38186c99526c1697b47103ec0c9927d7a0d2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -41,6 +41,7 @@ enum hrtimer_mode { HRTIMER_MODE_REL = 0x01, HRTIMER_MODE_PINNED = 0x02, HRTIMER_MODE_SOFT = 0x04, + HRTIMER_MODE_HARD = 0x08, HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, @@ -51,6 +52,11 @@ enum hrtimer_mode { HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, + + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, }; /* @@ -186,6 +192,8 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) @@ -213,6 +221,7 @@ struct hrtimer_cpu_base { unsigned short nr_hangs; unsigned int max_hang_time; #endif + spinlock_t softirq_expiry_lock; ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; @@ -364,10 +373,17 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device); /* Initialize timers: */ extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, enum hrtimer_mode mode); +extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task); extern void destroy_hrtimer_on_stack(struct hrtimer *timer); #else @@ -377,6 +393,15 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer, { hrtimer_init(timer, which_clock, mode); } + +static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task) +{ + hrtimer_init_sleeper(sl, clock_id, mode, task); +} + static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } #endif @@ -400,6 +425,7 @@ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); +extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) @@ -480,9 +506,6 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid); -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, - struct task_struct *tsk); - extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, const enum hrtimer_mode mode); extern int schedule_hrtimeout_range_clock(ktime_t *expires, diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6936d921aa53a00d67f65725a8af09e89876d7c4..26579b8233a7f97f3f5fc4859a204a3f022d0888 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -248,7 +248,6 @@ extern void resume_device_irqs(void); * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use - * @swork: Swork item, for internal use * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. @@ -261,7 +260,7 @@ struct irq_affinity_notify { unsigned int irq; struct kref kref; #ifdef CONFIG_PREEMPT_RT_BASE - struct swork_event swork; + struct kthread_work work; #else struct work_struct work; #endif diff --git a/include/linux/kthread-cgroup.h b/include/linux/kthread-cgroup.h new file mode 100644 index 0000000000000000000000000000000000000000..53d34bca9d7249be700abcaa43c67019d463f36d --- /dev/null +++ b/include/linux/kthread-cgroup.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KTHREAD_CGROUP_H +#define _LINUX_KTHREAD_CGROUP_H +#include +#include + +#ifdef CONFIG_BLK_CGROUP +void kthread_associate_blkcg(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *kthread_blkcg(void); +#else +static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } +static inline struct cgroup_subsys_state *kthread_blkcg(void) +{ + return NULL; +} +#endif +#endif diff --git a/include/linux/kthread.h b/include/linux/kthread.h index fd6403945fc3c725d5ab3a4f6f7bca42514cb930..4571e251123788f5fb373129aee848812ab02467 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -105,7 +105,7 @@ struct kthread_delayed_work { }; #define KTHREAD_WORKER_INIT(worker) { \ - .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \ + .lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock), \ .work_list = LIST_HEAD_INIT((worker).work_list), \ .delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\ } @@ -199,14 +199,12 @@ void kthread_destroy_worker(struct kthread_worker *worker); struct cgroup_subsys_state; -#ifdef CONFIG_BLK_CGROUP -void kthread_associate_blkcg(struct cgroup_subsys_state *css); -struct cgroup_subsys_state *kthread_blkcg(void); -#else -static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } -static inline struct cgroup_subsys_state *kthread_blkcg(void) +extern struct kthread_worker kthread_global_worker; +void kthread_init_global_worker(void); + +static inline bool kthread_schedule_work(struct kthread_work *work) { - return NULL; + return kthread_queue_work(&kthread_global_worker, work); } -#endif + #endif /* _LINUX_KTHREAD_H */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ee7e987ea1b4354ef93a149c997caf75b6269c1b..3e6c91bdf2ef8678f3319fe7b81c9f20359a4448 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -15,6 +15,7 @@ struct cpu_timer_list { u64 expires, incr; struct task_struct *task; int firing; + int firing_cpu; }; /* @@ -114,8 +115,8 @@ struct k_itimer { struct { struct alarm alarmtimer; } alarm; - struct rcu_head rcu; } it; + struct rcu_head rcu; }; void run_posix_cpu_timers(struct task_struct *task); diff --git a/include/linux/sched.h b/include/linux/sched.h index 68fa3d093a97559aefdb5814c17ce00227190098..5425ccb2d689154b8a778516e76c013040069af3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -938,6 +938,9 @@ struct task_struct { #ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#ifdef CONFIG_PREEMPT_RT_BASE + struct task_struct *posix_timer_list; +#endif #endif /* Process credentials: */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 2b5ef8e94d19268a383a53adaaa82d14a57be71c..94bd2e841de646ed52e6f5331682aa6133a38f0b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -489,8 +489,8 @@ do { \ int __ret = 0; \ struct hrtimer_sleeper __t; \ \ - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ - hrtimer_init_sleeper(&__t, current); \ + hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, HRTIMER_MODE_REL, \ + current); \ if ((timeout) != KTIME_MAX) \ hrtimer_start_range_ns(&__t.timer, timeout, \ current->timer_slack_ns, \ diff --git a/init/init_task.c b/init/init_task.c index 0b49b9cf55714000700f4120ef04ff90db43a3b8..9e33627482147ed06bf11f74e64463def98371c2 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -50,6 +50,12 @@ static struct sighand_struct init_sighand = { .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh), }; +#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE) +# define INIT_TIMER_LIST .posix_timer_list = NULL, +#else +# define INIT_TIMER_LIST +#endif + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) @@ -119,6 +125,7 @@ struct task_struct init_task INIT_CPU_TIMERS(init_task) .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), .timer_slack_ns = 50000, /* 50 usec default slack */ + INIT_TIMER_LIST .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), diff --git a/init/main.c b/init/main.c index c11aed805ef58cbcadf07ec643b7d80623e28679..e244ed4045c0a4fb1dfe23422a59503e75cad274 100644 --- a/init/main.c +++ b/init/main.c @@ -1136,6 +1136,7 @@ static noinline void __init kernel_init_freeable(void) smp_prepare_cpus(setup_max_cpus); workqueue_init(); + kthread_init_global_worker(); init_mm_internals(); diff --git a/kernel/events/core.c b/kernel/events/core.c index 64c8205704451ff29b7a62a08e0489ff6b0aaa33..ce1ebde1a0ec0dd71cbb383238d412c8d89883e5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1102,7 +1102,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); raw_spin_lock_init(&cpuctx->hrtimer_lock); - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); timer->function = perf_mux_hrtimer_handler; } @@ -9326,7 +9326,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) if (!is_sampling_event(event)) return; - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hwc->hrtimer.function = perf_swevent_hrtimer; /* diff --git a/kernel/fork.c b/kernel/fork.c index e43babb1af4f0616582e43c75d4f6c345c065702..6aef8c531a9064f40d68cadfa3545deff43bef3d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1631,6 +1631,9 @@ static void rt_mutex_init_task(struct task_struct *p) */ static void posix_cpu_timers_init(struct task_struct *tsk) { +#ifdef CONFIG_PREEMPT_RT_BASE + tsk->posix_timer_list = NULL; +#endif tsk->cputime_expires.prof_exp = 0; tsk->cputime_expires.virt_exp = 0; tsk->cputime_expires.sched_exp = 0; diff --git a/kernel/futex.c b/kernel/futex.c index 0e5b78290df648759aded198fd8406504ee8ae5d..0ef8ec8a583eca2a0617c8844a95c49395c55183 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2708,10 +2708,9 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); } @@ -2810,9 +2809,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, if (time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); + hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME, + HRTIMER_MODE_ABS, current); hrtimer_set_expires(&to->timer, *time); } @@ -3249,10 +3247,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, if (abs_time) { to = &timeout; - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? - CLOCK_REALTIME : CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? + CLOCK_REALTIME : CLOCK_MONOTONIC, + HRTIMER_MODE_ABS, current); hrtimer_set_expires_range_ns(&to->timer, *abs_time, current->timer_slack_ns); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7e51e1d9f437359a372eec31838eb00349151320..c7440dd83e230290788f1352ad2c262871612cb8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -324,7 +324,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, kref_get(&desc->affinity_notify->kref); #ifdef CONFIG_PREEMPT_RT_BASE - swork_queue(&desc->affinity_notify->swork); + kthread_schedule_work(&desc->affinity_notify->work); #else schedule_work(&desc->affinity_notify->work); #endif @@ -389,21 +389,11 @@ static void _irq_affinity_notify(struct irq_affinity_notify *notify) } #ifdef CONFIG_PREEMPT_RT_BASE -static void init_helper_thread(void) -{ - static int init_sworker_once; - - if (init_sworker_once) - return; - if (WARN_ON(swork_get())) - return; - init_sworker_once = 1; -} -static void irq_affinity_notify(struct swork_event *swork) +static void irq_affinity_notify(struct kthread_work *work) { struct irq_affinity_notify *notify = - container_of(swork, struct irq_affinity_notify, swork); + container_of(work, struct irq_affinity_notify, work); _irq_affinity_notify(notify); } @@ -446,8 +436,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) notify->irq = irq; kref_init(¬ify->kref); #ifdef CONFIG_PREEMPT_RT_BASE - INIT_SWORK(¬ify->swork, irq_affinity_notify); - init_helper_thread(); + kthread_init_work(¬ify->work, irq_affinity_notify); #else INIT_WORK(¬ify->work, irq_affinity_notify); #endif @@ -459,8 +448,9 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) raw_spin_unlock_irqrestore(&desc->lock, flags); if (old_notify) { -#ifndef CONFIG_PREEMPT_RT_BASE - /* Need to address this for PREEMPT_RT */ +#ifdef CONFIG_PREEMPT_RT_BASE + kthread_cancel_work_sync(¬ify->work); +#else cancel_work_sync(&old_notify->work); #endif kref_put(&old_notify->kref, old_notify->release); diff --git a/kernel/kthread.c b/kernel/kthread.c index fcb3a1a6e14bc64fec6c97132fcc5d125a4050f4..20d01a4bf16d02c7486ba4ef697ed967ab0e529e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,6 +21,7 @@ #include #include #include +#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -1181,6 +1182,19 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); +DEFINE_KTHREAD_WORKER(kthread_global_worker); +EXPORT_SYMBOL(kthread_global_worker); + +__init void kthread_init_global_worker(void) +{ + kthread_global_worker.task = kthread_create(kthread_worker_fn, + &kthread_global_worker, + "kswork"); + if (WARN_ON(IS_ERR(kthread_global_worker.task))) + return; + wake_up_process(kthread_global_worker.task); +} + #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 16d8dba23329aa6bea256ac41d59aef83ed4a7cb..ed75addd3ccda42c4eaabafa91c4187863ba9033 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -69,7 +69,9 @@ module_param(rcu_expedited, int, 0); extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); +#ifndef CONFIG_PREEMPT_RT_FULL module_param(rcu_normal_after_boot, int, 0); +#endif #endif /* #ifndef CONFIG_TINY_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 755a58084978198b7b2438b9fad59943447aeb13..49c14137988ea5947e22c1229d2512075f2f8efa 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -72,12 +72,12 @@ do_wait_for_common(struct completion *x, if (!x->done) { DECLARE_SWAITQUEUE(wait); - __prepare_to_swait(&x->wait, &wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } + __prepare_to_swait(&x->wait, &wait); __set_current_state(state); raw_spin_unlock_irq(&x->wait.lock); timeout = action(timeout); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 60de304a7b097fccdd00e512441fd4f0b489bcd5..e37d8d0911b6144f65c91012b4a08c1d980a59c4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -330,7 +330,7 @@ static void hrtick_rq_init(struct rq *rq) rq->hrtick_csd.info = rq; #endif - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); rq->hrtick_timer.function = hrtick; } #else /* CONFIG_SCHED_HRTICK */ @@ -569,14 +569,11 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { - int i, cpu; + int i, cpu = smp_processor_id(); struct sched_domain *sd; - preempt_disable_rt(); - cpu = smp_processor_id(); - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) - goto preempt_en_rt; + return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { @@ -595,8 +592,6 @@ int get_nohz_timer_target(void) cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); -preempt_en_rt: - preempt_enable_rt(); return cpu; } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 4b13df38c069e646997b12763e3f3b72506298ab..974a8f9b615a3555fffe3e4d09922bdac7fcbc22 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1086,7 +1086,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) { struct hrtimer *timer = &dl_se->dl_timer; - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); timer->function = dl_task_timer; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e9a609cdd19b683f7fdb498f50e5b47db0ab2ea2..f5f522d5151a64613c16c15ddbd8be30ba0563bf 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -45,8 +45,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) raw_spin_lock_init(&rt_b->rt_runtime_lock); - hrtimer_init(&rt_b->rt_period_timer, - CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_HARD); rt_b->rt_period_timer.function = sched_rt_period_timer; } diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f4255a65c44b2347bd4559137e53c2bf7636cd0c..61ab2c923579d30ad1fd0396774a373c99d799d7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -436,7 +436,7 @@ int alarm_cancel(struct alarm *alarm) int ret = alarm_try_to_cancel(alarm); if (ret >= 0) return ret; - cpu_relax(); + hrtimer_grab_expiry_lock(&alarm->timer); } } EXPORT_SYMBOL_GPL(alarm_cancel); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 49feba6f076252c17badf34a43d664c4b3fdbe58..4534e7871c8c169017b5cd1c9a2168c371968191 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -730,6 +730,29 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } +#ifdef CONFIG_PREEMPT_RT_FULL + +static struct swork_event clock_set_delay_work; + +static void run_clock_set_delay(struct swork_event *event) +{ + clock_was_set(); +} + +void clock_was_set_delayed(void) +{ + swork_queue(&clock_set_delay_work); +} + +static __init int create_clock_set_delay_thread(void) +{ + WARN_ON(swork_get()); + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); + return 0; +} +early_initcall(create_clock_set_delay_thread); +#else /* PREEMPT_RT_FULL */ + static void clock_was_set_work(struct work_struct *work) { clock_was_set(); @@ -745,6 +768,7 @@ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } +#endif #else @@ -939,6 +963,16 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) } EXPORT_SYMBOL_GPL(hrtimer_forward); +void hrtimer_grab_expiry_lock(const struct hrtimer *timer) +{ + struct hrtimer_clock_base *base = timer->base; + + if (base && base->cpu_base) { + spin_lock(&base->cpu_base->softirq_expiry_lock); + spin_unlock(&base->cpu_base->softirq_expiry_lock); + } +} + /* * enqueue_hrtimer - internal function to (re)start a timer * @@ -1108,7 +1142,9 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft * match. */ +#ifndef CONFIG_PREEMPT_RT_BASE WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); +#endif base = lock_hrtimer_base(timer, &flags); @@ -1171,7 +1207,7 @@ int hrtimer_cancel(struct hrtimer *timer) if (ret >= 0) return ret; - cpu_relax(); + hrtimer_grab_expiry_lock(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1268,10 +1304,17 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { - bool softtimer = !!(mode & HRTIMER_MODE_SOFT); - int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + bool softtimer; + int base; struct hrtimer_cpu_base *cpu_base; + softtimer = !!(mode & HRTIMER_MODE_SOFT); +#ifdef CONFIG_PREEMPT_RT_FULL + if (!softtimer && !(mode & HRTIMER_MODE_HARD)) + softtimer = true; +#endif + base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; + memset(timer, 0, sizeof(struct hrtimer)); cpu_base = raw_cpu_ptr(&hrtimer_bases); @@ -1468,6 +1511,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) unsigned long flags; ktime_t now; + spin_lock(&cpu_base->softirq_expiry_lock); raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); @@ -1477,6 +1521,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); + spin_unlock(&cpu_base->softirq_expiry_lock); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1648,13 +1693,52 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task) { +#ifdef CONFIG_PREEMPT_RT_FULL + if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) { + if (task_is_realtime(current) || system_state != SYSTEM_RUNNING) + mode |= HRTIMER_MODE_HARD; + else + mode |= HRTIMER_MODE_SOFT; + } +#endif + __hrtimer_init(&sl->timer, clock_id, mode); sl->timer.function = hrtimer_wakeup; sl->task = task; } + +/** + * hrtimer_init_sleeper - initialize sleeper to the given clock + * @sl: sleeper to be initialized + * @clock_id: the clock to be used + * @mode: timer mode abs/rel + * @task: the task to wake up + */ +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, + enum hrtimer_mode mode, struct task_struct *task) +{ + debug_init(&sl->timer, clock_id, mode); + __hrtimer_init_sleeper(sl, clock_id, mode, task); + +} EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS +void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, + clockid_t clock_id, + enum hrtimer_mode mode, + struct task_struct *task) +{ + debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); + __hrtimer_init_sleeper(sl, clock_id, mode, task); +} +EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); +#endif + int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { @@ -1678,8 +1762,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod { struct restart_block *restart; - hrtimer_init_sleeper(t, current); - do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t->timer, mode); @@ -1687,12 +1769,12 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod if (likely(t->task)) freezable_schedule(); + __set_current_state(TASK_RUNNING); hrtimer_cancel(&t->timer); mode = HRTIMER_MODE_ABS; } while (t->task && !signal_pending(current)); - __set_current_state(TASK_RUNNING); if (!t->task) return 0; @@ -1716,10 +1798,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) struct hrtimer_sleeper t; int ret; - hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, - HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, + HRTIMER_MODE_ABS, current); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); - ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; @@ -1737,7 +1818,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, if (dl_task(current) || rt_task(current)) slack = 0; - hrtimer_init_on_stack(&t.timer, clockid, mode); + hrtimer_init_sleeper_on_stack(&t, clockid, mode, current); hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); ret = do_nanosleep(&t, mode); if (ret != -ERESTART_RESTARTBLOCK) @@ -1850,6 +1931,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; + spin_lock_init(&cpu_base->softirq_expiry_lock); return 0; } @@ -1968,11 +2050,9 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return -EINTR; } - hrtimer_init_on_stack(&t.timer, clock_id, mode); + hrtimer_init_sleeper_on_stack(&t, clock_id, mode, current); hrtimer_set_expires_range_ns(&t.timer, *expires, delta); - hrtimer_init_sleeper(&t, current); - hrtimer_start_expires(&t.timer, mode); if (likely(t.task)) diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 9a65713c83093f343466bd4ed0c5435d4077309c..a5ff222df4c706d813b90c794b2bb6e9d649a76c 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -215,6 +215,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); + hrtimer_grab_expiry_lock(timer); goto again; } expires = timeval_to_ktime(value->it_value); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index d62d7ae5201c5c5a525c48aa2a3197d3e4126041..765e700962ab6582756f4a208637c144068eb9cb 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -3,8 +3,10 @@ * Implement CPU time clocks for the POSIX clock interface. */ +#include #include #include +#include #include #include #include @@ -15,6 +17,7 @@ #include #include #include +#include #include "posix-timers.h" @@ -789,6 +792,7 @@ check_timers_list(struct list_head *timers, return t->expires; t->firing = 1; + t->firing_cpu = smp_processor_id(); list_move_tail(&t->entry, firing); } @@ -1135,18 +1139,31 @@ static inline int fastpath_timer_check(struct task_struct *tsk) return 0; } +static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock); + +void cpu_timers_grab_expiry_lock(struct k_itimer *timer) +{ + int cpu = timer->it.cpu.firing_cpu; + + if (cpu >= 0) { + spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu); + + spin_lock_irq(expiry_lock); + spin_unlock_irq(expiry_lock); + } +} + /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. * Interrupts are disabled. */ -void run_posix_cpu_timers(struct task_struct *tsk) +static void __run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; unsigned long flags; - - lockdep_assert_irqs_disabled(); + spinlock_t *expiry_lock; /* * The fast path checks that there are no expired thread or thread @@ -1155,6 +1172,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) if (!fastpath_timer_check(tsk)) return; + expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock); + spin_lock(expiry_lock); + if (!lock_task_sighand(tsk, &flags)) return; /* @@ -1189,6 +1209,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) list_del_init(&timer->it.cpu.entry); cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; + timer->it.cpu.firing_cpu = -1; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this @@ -1198,8 +1219,156 @@ void run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + spin_unlock(expiry_lock); +} + +#ifdef CONFIG_PREEMPT_RT_BASE +#include +#include +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); +DEFINE_PER_CPU(bool, posix_timer_th_active); + +static void posix_cpu_kthread_fn(unsigned int cpu) +{ + struct task_struct *tsk = NULL; + struct task_struct *next = NULL; + + BUG_ON(per_cpu(posix_timer_task, cpu) != current); + + /* grab task list */ + raw_local_irq_disable(); + tsk = per_cpu(posix_timer_tasklist, cpu); + per_cpu(posix_timer_tasklist, cpu) = NULL; + raw_local_irq_enable(); + + /* its possible the list is empty, just return */ + if (!tsk) + return; + + /* Process task list */ + while (1) { + /* save next */ + next = tsk->posix_timer_list; + + /* run the task timers, clear its ptr and + * unreference it + */ + __run_posix_cpu_timers(tsk); + tsk->posix_timer_list = NULL; + put_task_struct(tsk); + + /* check if this is the last on the list */ + if (next == tsk) + break; + tsk = next; + } +} + +static inline int __fastpath_timer_check(struct task_struct *tsk) +{ + /* tsk == current, ensure it is safe to use ->signal/sighand */ + if (unlikely(tsk->exit_state)) + return 0; + + if (!task_cputime_zero(&tsk->cputime_expires)) + return 1; + + if (!task_cputime_zero(&tsk->signal->cputime_expires)) + return 1; + + return 0; +} + +void run_posix_cpu_timers(struct task_struct *tsk) +{ + unsigned int cpu = smp_processor_id(); + struct task_struct *tasklist; + + BUG_ON(!irqs_disabled()); + + if (per_cpu(posix_timer_th_active, cpu) != true) + return; + + /* get per-cpu references */ + tasklist = per_cpu(posix_timer_tasklist, cpu); + + /* check to see if we're already queued */ + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { + get_task_struct(tsk); + if (tasklist) { + tsk->posix_timer_list = tasklist; + } else { + /* + * The list is terminated by a self-pointing + * task_struct + */ + tsk->posix_timer_list = tsk; + } + per_cpu(posix_timer_tasklist, cpu) = tsk; + + wake_up_process(per_cpu(posix_timer_task, cpu)); + } +} + +static int posix_cpu_kthread_should_run(unsigned int cpu) +{ + return __this_cpu_read(posix_timer_tasklist) != NULL; +} + +static void posix_cpu_kthread_park(unsigned int cpu) +{ + this_cpu_write(posix_timer_th_active, false); +} + +static void posix_cpu_kthread_unpark(unsigned int cpu) +{ + this_cpu_write(posix_timer_th_active, true); } +static void posix_cpu_kthread_setup(unsigned int cpu) +{ + struct sched_param sp; + + sp.sched_priority = MAX_RT_PRIO - 1; + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); + posix_cpu_kthread_unpark(cpu); +} + +static struct smp_hotplug_thread posix_cpu_thread = { + .store = &posix_timer_task, + .thread_should_run = posix_cpu_kthread_should_run, + .thread_fn = posix_cpu_kthread_fn, + .thread_comm = "posixcputmr/%u", + .setup = posix_cpu_kthread_setup, + .park = posix_cpu_kthread_park, + .unpark = posix_cpu_kthread_unpark, +}; + +static int __init posix_cpu_thread_init(void) +{ + /* Start one for boot CPU. */ + unsigned long cpu; + int ret; + + /* init the per-cpu posix_timer_tasklets */ + for_each_possible_cpu(cpu) + per_cpu(posix_timer_tasklist, cpu) = NULL; + + ret = smpboot_register_percpu_thread(&posix_cpu_thread); + WARN_ON(ret); + + return 0; +} +early_initcall(posix_cpu_thread_init); +#else /* CONFIG_PREEMPT_RT_BASE */ +void run_posix_cpu_timers(struct task_struct *tsk) +{ + lockdep_assert_irqs_disabled(); + __run_posix_cpu_timers(tsk); +} +#endif /* CONFIG_PREEMPT_RT_BASE */ + /* * Set one of the process-wide special case CPU timers or RLIMIT_CPU. * The tsk->sighand->siglock must be held by the caller. @@ -1318,6 +1487,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, spin_unlock_irq(&timer.it_lock); while (error == TIMER_RETRY) { + + cpu_timers_grab_expiry_lock(&timer); /* * We need to handle case when timer was or is in the * middle of firing. In other cases we already freed diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 5a01c4fdbfef40337a5a06b9944391820df8d5cd..c7e97d421590e09301226321a135623980d47f44 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -463,7 +463,7 @@ static struct k_itimer * alloc_posix_timer(void) static void k_itimer_rcu_free(struct rcu_head *head) { - struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); + struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); kmem_cache_free(posix_timers_cache, tmr); } @@ -480,7 +480,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) } put_pid(tmr->it_pid); sigqueue_free(tmr->sigq); - call_rcu(&tmr->it.rcu, k_itimer_rcu_free); + call_rcu(&tmr->rcu, k_itimer_rcu_free); } static int common_timer_create(struct k_itimer *new_timer) @@ -826,6 +826,17 @@ static int common_hrtimer_try_to_cancel(struct k_itimer *timr) return hrtimer_try_to_cancel(&timr->it.real.timer); } +static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer) +{ + if (kc->timer_arm == common_hrtimer_arm) + hrtimer_grab_expiry_lock(&timer->it.real.timer); + else if (kc == &alarm_clock) + hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer); + else + /* posix-cpu-timers */ + cpu_timers_grab_expiry_lock(timer); +} + /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, @@ -891,11 +902,15 @@ static int do_timer_settime(timer_t timer_id, int flags, else error = kc->timer_set(timr, flags, new_spec64, old_spec64); - unlock_timer(timr, flag); if (error == TIMER_RETRY) { + rcu_read_lock(); + unlock_timer(timr, flag); + timer_wait_for_callback(kc, timr); + rcu_read_unlock(); old_spec64 = NULL; // We already got the old time... goto retry; } + unlock_timer(timr, flag); return error; } @@ -957,13 +972,21 @@ int common_timer_del(struct k_itimer *timer) return 0; } -static inline int timer_delete_hook(struct k_itimer *timer) +static int timer_delete_hook(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; + int ret; if (WARN_ON_ONCE(!kc || !kc->timer_del)) return -EINVAL; - return kc->timer_del(timer); + ret = kc->timer_del(timer); + if (ret == TIMER_RETRY) { + rcu_read_lock(); + spin_unlock_irq(&timer->it_lock); + timer_wait_for_callback(kc, timer); + rcu_read_unlock(); + } + return ret; } /* Delete a POSIX.1b interval timer. */ @@ -977,10 +1000,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) if (!timer) return -EINVAL; - if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } spin_lock(¤t->sighand->siglock); list_del(&timer->list); @@ -1006,10 +1027,9 @@ static void itimer_delete(struct k_itimer *timer) retry_delete: spin_lock_irqsave(&timer->it_lock, flags); - if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } + list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index ddb21145211a0280f9b20667a33b9503ce62d776..725bd230a8db425ad209eef2be931c0637cb5e9c 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -32,6 +32,8 @@ extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; +extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer); + int posix_timer_event(struct k_itimer *timr, int si_private); void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index a836efd34589595f12087f11c9798e9ebb23293b..c50e8f3262deae65aeb03da6a50a2aadf505d833 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -107,7 +107,7 @@ static enum hrtimer_restart bc_handler(struct hrtimer *t) void tick_setup_hrtimer_broadcast(void) { - hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); bctimer.function = bc_handler; clockevents_register_device(&ce_broadcast_hrtimer); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2778d02bb71caa706a9815ec9607bc45d9cccbfb..0e9e5ebdf8ff1e8bdb2754f9b15d3e697fbc8150 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1302,7 +1302,7 @@ void tick_setup_sched_timer(void) /* * Emulate tick processing via per-CPU hrtimers: */ - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); ts->sched_timer.function = tick_sched_timer; /* Get the next period (per-CPU) */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index bdbfd52b1cfda1937126805ed82e328b41293400..716364bbe5becd4eceaca692d5715ccaeb68351e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include @@ -199,9 +198,7 @@ EXPORT_SYMBOL(jiffies_64); struct timer_base { raw_spinlock_t lock; struct timer_list *running_timer; -#ifdef CONFIG_PREEMPT_RT_FULL - struct swait_queue_head wait_for_running_timer; -#endif + spinlock_t expiry_lock; unsigned long clk; unsigned long next_expiry; unsigned int cpu; @@ -1201,33 +1198,6 @@ void add_timer_on(struct timer_list *timer, int cpu) } EXPORT_SYMBOL_GPL(add_timer_on); -#ifdef CONFIG_PREEMPT_RT_FULL -/* - * Wait for a running timer - */ -static void wait_for_running_timer(struct timer_list *timer) -{ - struct timer_base *base; - u32 tf = timer->flags; - - if (tf & TIMER_MIGRATING) - return; - - base = get_timer_base(tf); - swait_event_exclusive(base->wait_for_running_timer, - base->running_timer != timer); -} - -# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) -#else -static inline void wait_for_running_timer(struct timer_list *timer) -{ - cpu_relax(); -} - -# define wakeup_timer_waiters(b) do { } while (0) -#endif - /** * del_timer - deactivate a timer. * @timer: the timer to be deactivated @@ -1257,14 +1227,8 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); -/** - * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer to delete - * - * This function tries to deactivate a timer. Upon successful (ret >= 0) - * exit the timer is not queued and the handler is not running on any CPU. - */ -int try_to_del_timer_sync(struct timer_list *timer) +static int __try_to_del_timer_sync(struct timer_list *timer, + struct timer_base **basep) { struct timer_base *base; unsigned long flags; @@ -1272,7 +1236,7 @@ int try_to_del_timer_sync(struct timer_list *timer) debug_assert_init(timer); - base = lock_timer_base(timer, &flags); + *basep = base = lock_timer_base(timer, &flags); if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); @@ -1281,9 +1245,42 @@ int try_to_del_timer_sync(struct timer_list *timer) return ret; } + +/** + * try_to_del_timer_sync - Try to deactivate a timer + * @timer: timer to delete + * + * This function tries to deactivate a timer. Upon successful (ret >= 0) + * exit the timer is not queued and the handler is not running on any CPU. + */ +int try_to_del_timer_sync(struct timer_list *timer) +{ + struct timer_base *base; + + return __try_to_del_timer_sync(timer, &base); +} EXPORT_SYMBOL(try_to_del_timer_sync); #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) +static int __del_timer_sync(struct timer_list *timer) +{ + struct timer_base *base; + int ret; + + for (;;) { + ret = __try_to_del_timer_sync(timer, &base); + if (ret >= 0) + return ret; + + /* + * When accessing the lock, timers of base are no longer expired + * and so timer is no longer running. + */ + spin_lock(&base->expiry_lock); + spin_unlock(&base->expiry_lock); + } +} + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1339,12 +1336,8 @@ int del_timer_sync(struct timer_list *timer) * could lead to deadlock. */ WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); - for (;;) { - int ret = try_to_del_timer_sync(timer); - if (ret >= 0) - return ret; - wait_for_running_timer(timer); - } + + return __del_timer_sync(timer); } EXPORT_SYMBOL(del_timer_sync); #endif @@ -1409,11 +1402,15 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) raw_spin_unlock(&base->lock); call_timer_fn(timer, fn); base->running_timer = NULL; + spin_unlock(&base->expiry_lock); + spin_lock(&base->expiry_lock); raw_spin_lock(&base->lock); } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn); base->running_timer = NULL; + spin_unlock(&base->expiry_lock); + spin_lock(&base->expiry_lock); raw_spin_lock_irq(&base->lock); } } @@ -1717,6 +1714,7 @@ static inline void __run_timers(struct timer_base *base) if (!time_after_eq(jiffies, base->clk)) return; + spin_lock(&base->expiry_lock); raw_spin_lock_irq(&base->lock); /* @@ -1744,7 +1742,7 @@ static inline void __run_timers(struct timer_base *base) expire_timers(base, heads + levels); } raw_spin_unlock_irq(&base->lock); - wakeup_timer_waiters(base); + spin_unlock(&base->expiry_lock); } /* @@ -1991,9 +1989,7 @@ static void __init init_timer_cpu(int cpu) base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; -#ifdef CONFIG_PREEMPT_RT_FULL - init_swait_queue_head(&base->wait_for_running_timer); -#endif + spin_lock_init(&base->expiry_lock); } } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 72f18b6822245b1b19ac548f848f8e1af5bc287a..1c6c542e5f36b532952ae976d01c807209766d6b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -558,7 +558,7 @@ static void watchdog_enable(unsigned int cpu) * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. */ - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); hrtimer->function = watchdog_timer_fn; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL_PINNED); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 092fa3d75b32a8cef12ee6112343cd44f098618a..9d472d626aaac643141341052758ab5b847e5b6e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2160,7 +2160,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) s64 remaining; struct hrtimer_sleeper t; - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS, + current); hrtimer_set_expires(&t.timer, spin_until); remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); @@ -2175,7 +2176,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ - hrtimer_init_sleeper(&t, current); do { set_current_state(TASK_INTERRUPTIBLE); hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);