From 6261662ca105f59bd1c8676c4985b1b004cd8b19 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:17:11 +0800 Subject: [PATCH 01/12] sched: Add saved_state for tasks blocked on sleeping locks commit 321628fe4e281be121ed55fdf60c24c6d58cc051 upstream. Spinlocks are state preserving in !RT. RT changes the state when a task gets blocked on a lock. So we need to remember the state before the lock contention. If a regular wakeup (not a RTmutex related wakeup) happens, the saved_state is updated to running. When the lock sleep is done, the saved state is restored. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 +++ kernel/sched/core.c | 33 ++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 1 + 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 5425ccb2d689..1407cc0396c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -689,6 +689,8 @@ struct task_struct { #endif /* -1 unrunnable, 0 runnable, >0 stopped: */ volatile long state; + /* saved state for "spinlock sleepers" */ + volatile long saved_state; /* * This begins the randomizable portion of task_struct. Only @@ -1799,6 +1801,7 @@ extern struct task_struct *find_get_task_by_vpid(pid_t nr); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_lock_sleeper(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e37d8d0911b6..f54a56acd40b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2090,8 +2090,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ raw_spin_lock_irqsave(&p->pi_lock, flags); smp_mb__after_spinlock(); - if (!(p->state & state)) + if (!(p->state & state)) { + /* + * The task might be running due to a spinlock sleeper + * wakeup. Check the saved state and set it to running + * if the wakeup condition is true. + */ + if (!(wake_flags & WF_LOCK_SLEEPER)) { + if (p->saved_state & state) { + p->saved_state = TASK_RUNNING; + success = 1; + } + } goto out; + } + + /* + * If this is a regular wakeup, then we can unconditionally + * clear the saved state of a "lock sleeper". + */ + if (!(wake_flags & WF_LOCK_SLEEPER)) + p->saved_state = TASK_RUNNING; trace_sched_waking(p); @@ -2208,6 +2227,18 @@ int wake_up_process(struct task_struct *p) } EXPORT_SYMBOL(wake_up_process); +/** + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" + * @p: The process to be woken up. + * + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate + * the nature of the wakeup. + */ +int wake_up_lock_sleeper(struct task_struct *p) +{ + return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); +} + int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 459f95ee16df..b52fedabbd96 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1740,6 +1740,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* Child wakeup after fork */ #define WF_MIGRATED 0x4 /* Internal use, task got migrated */ +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution -- Gitee From 2ebea21a22780aae85bd6503e928b1105e13406d Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:22:11 +0800 Subject: [PATCH 02/12] futex: Make the futex_hash_bucket lock raw commit 61b70039f1df8a7dca7776bf3f15ffb4f07e2fb4 upstream. [ Upstream commit f646521aadedab78801c9befe193e2e8a0c99298 ] Since commit 1a1fb985f2e2b ("futex: Handle early deadlock return correctly") we can deadlock while we attempt to acquire the HB lock if we fail to acquire the lock. The RT waiter (for the futex lock) is still enqueued and acquiring the HB lock may build up a lock chain which leads to a deadlock if the owner of the lock futex-lock holds the HB lock. Make the hash bucket lock raw so it does not participate in the lockchain. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- kernel/futex.c | 88 +++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 196310555cef..f8308688745c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -243,7 +243,7 @@ struct futex_q { struct plist_node list; struct task_struct *task; - spinlock_t *lock_ptr; + raw_spinlock_t *lock_ptr; union futex_key key; struct futex_pi_state *pi_state; struct rt_mutex_waiter *rt_waiter; @@ -264,7 +264,7 @@ static const struct futex_q futex_q_init = { */ struct futex_hash_bucket { atomic_t waiters; - spinlock_t lock; + raw_spinlock_t lock; struct plist_head chain; } ____cacheline_aligned_in_smp; @@ -911,7 +911,7 @@ void exit_pi_state_list(struct task_struct *curr) } raw_spin_unlock_irq(&curr->pi_lock); - spin_lock(&hb->lock); + raw_spin_lock(&hb->lock); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); raw_spin_lock(&curr->pi_lock); /* @@ -921,7 +921,7 @@ void exit_pi_state_list(struct task_struct *curr) if (head->next != next) { /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); put_pi_state(pi_state); continue; } @@ -933,7 +933,7 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock(&curr->pi_lock); raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); rt_mutex_futex_unlock(&pi_state->pi_mutex); put_pi_state(pi_state); @@ -1427,7 +1427,7 @@ static void __unqueue_futex(struct futex_q *q) { struct futex_hash_bucket *hb; - if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) + if (WARN_ON_SMP(!q->lock_ptr || !raw_spin_is_locked(q->lock_ptr)) || WARN_ON(plist_node_empty(&q->list))) return; @@ -1555,21 +1555,21 @@ static inline void double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) { if (hb1 <= hb2) { - spin_lock(&hb1->lock); + raw_spin_lock(&hb1->lock); if (hb1 < hb2) - spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING); } else { /* hb1 > hb2 */ - spin_lock(&hb2->lock); - spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock(&hb2->lock); + raw_spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING); } } static inline void double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) { - spin_unlock(&hb1->lock); + raw_spin_unlock(&hb1->lock); if (hb1 != hb2) - spin_unlock(&hb2->lock); + raw_spin_unlock(&hb2->lock); } /* @@ -1597,7 +1597,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) if (!hb_waiters_pending(hb)) goto out_put_key; - spin_lock(&hb->lock); + raw_spin_lock(&hb->lock); plist_for_each_entry_safe(this, next, &hb->chain, list) { if (match_futex (&this->key, &key)) { @@ -1616,7 +1616,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) } } - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); wake_up_q(&wake_q); out_put_key: put_futex_key(&key); @@ -2221,7 +2221,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) q->lock_ptr = &hb->lock; - spin_lock(&hb->lock); /* implies smp_mb(); (A) */ + raw_spin_lock(&hb->lock); /* implies smp_mb(); (A) */ return hb; } @@ -2229,7 +2229,7 @@ static inline void queue_unlock(struct futex_hash_bucket *hb) __releases(&hb->lock) { - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); hb_waiters_dec(hb); } @@ -2268,7 +2268,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) __releases(&hb->lock) { __queue_me(q, hb); - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); } /** @@ -2284,41 +2284,41 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) */ static int unqueue_me(struct futex_q *q) { - spinlock_t *lock_ptr; + raw_spinlock_t *lock_ptr; int ret = 0; /* In the common case we don't take the spinlock, which is nice. */ retry: /* - * q->lock_ptr can change between this read and the following spin_lock. - * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and - * optimizing lock_ptr out of the logic below. + * q->lock_ptr can change between this read and the following + * raw_spin_lock. Use READ_ONCE to forbid the compiler from reloading + * q->lock_ptr and optimizing lock_ptr out of the logic below. */ lock_ptr = READ_ONCE(q->lock_ptr); if (lock_ptr != NULL) { - spin_lock(lock_ptr); + raw_spin_lock(lock_ptr); /* * q->lock_ptr can change between reading it and - * spin_lock(), causing us to take the wrong lock. This + * raw_spin_lock(), causing us to take the wrong lock. This * corrects the race condition. * * Reasoning goes like this: if we have the wrong lock, * q->lock_ptr must have changed (maybe several times) - * between reading it and the spin_lock(). It can - * change again after the spin_lock() but only if it was - * already changed before the spin_lock(). It cannot, + * between reading it and the raw_spin_lock(). It can + * change again after the raw_spin_lock() but only if it was + * already changed before the raw_spin_lock(). It cannot, * however, change back to the original value. Therefore * we can detect whether we acquired the correct lock. */ if (unlikely(lock_ptr != q->lock_ptr)) { - spin_unlock(lock_ptr); + raw_spin_unlock(lock_ptr); goto retry; } __unqueue_futex(q); BUG_ON(q->pi_state); - spin_unlock(lock_ptr); + raw_spin_unlock(lock_ptr); ret = 1; } @@ -2340,7 +2340,7 @@ static void unqueue_me_pi(struct futex_q *q) put_pi_state(q->pi_state); q->pi_state = NULL; - spin_unlock(q->lock_ptr); + raw_spin_unlock(q->lock_ptr); } static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, @@ -2473,7 +2473,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, */ handle_err: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(q->lock_ptr); + raw_spin_unlock(q->lock_ptr); switch (err) { case -EFAULT: @@ -2491,7 +2491,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, break; } - spin_lock(q->lock_ptr); + raw_spin_lock(q->lock_ptr); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); /* @@ -2578,7 +2578,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, /* * The task state is guaranteed to be set before another task can * wake it. set_current_state() is implemented using smp_store_mb() and - * queue_me() calls spin_unlock() upon completion, both serializing + * queue_me() calls raw_spin_unlock() upon completion, both serializing * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE); @@ -2869,7 +2869,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, * before __rt_mutex_start_proxy_lock() is done. */ raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); - spin_unlock(q.lock_ptr); + raw_spin_unlock(q.lock_ptr); /* * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter * such that futex_unlock_pi() is guaranteed to observe the waiter when @@ -2890,7 +2890,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); cleanup: - spin_lock(q.lock_ptr); + raw_spin_lock(q.lock_ptr); /* * If we failed to acquire the lock (deadlock/signal/timeout), we must * first acquire the hb->lock before removing the lock from the @@ -2991,7 +2991,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) return ret; hb = hash_futex(&key); - spin_lock(&hb->lock); + raw_spin_lock(&hb->lock); /* * Check waiters first. We do not trust user space values at @@ -3025,7 +3025,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * rt_waiter. Also see the WARN in wake_futex_pi(). */ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); /* drops pi_state->pi_mutex.wait_lock */ ret = wake_futex_pi(uaddr, uval, pi_state); @@ -3064,7 +3064,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) * owner. */ if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); switch (ret) { case -EFAULT: goto pi_faulted; @@ -3084,7 +3084,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) ret = (curval == uval) ? 0 : -EAGAIN; out_unlock: - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); out_putkey: put_futex_key(&key); return ret; @@ -3258,9 +3258,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); - spin_lock(&hb->lock); + raw_spin_lock(&hb->lock); ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); - spin_unlock(&hb->lock); + raw_spin_unlock(&hb->lock); if (ret) goto out_put_keys; @@ -3280,7 +3280,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { - spin_lock(q.lock_ptr); + raw_spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { pi_state = q.pi_state; @@ -3291,7 +3291,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * the requeue_pi() code acquired for us. */ put_pi_state(q.pi_state); - spin_unlock(q.lock_ptr); + raw_spin_unlock(q.lock_ptr); /* * Adjust the return value. It's either -EFAULT or * success (1) but the caller expects 0 for success. @@ -3310,7 +3310,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, pi_mutex = &q.pi_state->pi_mutex; ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); - spin_lock(q.lock_ptr); + raw_spin_lock(q.lock_ptr); if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) ret = 0; @@ -3977,7 +3977,7 @@ static int __init futex_init(void) for (i = 0; i < futex_hashsize; i++) { atomic_set(&futex_queues[i].waiters, 0); plist_head_init(&futex_queues[i].chain); - spin_lock_init(&futex_queues[i].lock); + raw_spin_lock_init(&futex_queues[i].lock); } return 0; -- Gitee From 7ee74bb746778cb54692e2a73d5c0ce40663abe2 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:25:26 +0800 Subject: [PATCH 03/12] futex: Delay deallocation of pi_state commit ebba085e584e2dafd1d06328a5d4f3b3f3fc0269 upstream. [ Upstream commit d7c7cf8cb68b7df17e6e50be1f25f35d83e686c7 ] On -RT we can't invoke kfree() in a non-preemptible context. Defer the deallocation of pi_state to preemptible context. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- kernel/futex.c | 55 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index f8308688745c..f05c6ff421b6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -825,13 +825,13 @@ static void get_pi_state(struct futex_pi_state *pi_state) * Drops a reference to the pi_state object and frees or caches it * when the last reference is gone. */ -static void put_pi_state(struct futex_pi_state *pi_state) +static struct futex_pi_state *__put_pi_state(struct futex_pi_state *pi_state) { if (!pi_state) - return; + return NULL; if (!atomic_dec_and_test(&pi_state->refcount)) - return; + return NULL; /* * If pi_state->owner is NULL, the owner is most probably dying @@ -851,9 +851,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); } - if (current->pi_state_cache) { - kfree(pi_state); - } else { + if (!current->pi_state_cache) { /* * pi_state->list is already empty. * clear pi_state->owner. @@ -862,6 +860,30 @@ static void put_pi_state(struct futex_pi_state *pi_state) pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); current->pi_state_cache = pi_state; + pi_state = NULL; + } + return pi_state; +} + +static void put_pi_state(struct futex_pi_state *pi_state) +{ + kfree(__put_pi_state(pi_state)); +} + +static void put_pi_state_atomic(struct futex_pi_state *pi_state, + struct list_head *to_free) +{ + if (__put_pi_state(pi_state)) + list_add(&pi_state->list, to_free); +} + +static void free_pi_state_list(struct list_head *to_free) +{ + struct futex_pi_state *p, *next; + + list_for_each_entry_safe(p, next, to_free, list) { + list_del(&p->list); + kfree(p); } } @@ -878,6 +900,7 @@ void exit_pi_state_list(struct task_struct *curr) struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; union futex_key key = FUTEX_KEY_INIT; + LIST_HEAD(to_free); if (!futex_cmpxchg_enabled) return; @@ -922,7 +945,7 @@ void exit_pi_state_list(struct task_struct *curr) /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); raw_spin_unlock(&hb->lock); - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); continue; } @@ -941,6 +964,8 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_lock_irq(&curr->pi_lock); } raw_spin_unlock_irq(&curr->pi_lock); + + free_pi_state_list(&to_free); } #endif @@ -1923,6 +1948,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_hash_bucket *hb1, *hb2; struct futex_q *this, *next; DEFINE_WAKE_Q(wake_q); + LIST_HEAD(to_free); if (nr_wake < 0 || nr_requeue < 0) return -EINVAL; @@ -2160,7 +2186,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * object. */ this->pi_state = NULL; - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); /* * We stop queueing more waiters and let user * space deal with the mess. @@ -2177,7 +2203,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We * need to drop it here again. */ - put_pi_state(pi_state); + put_pi_state_atomic(pi_state, &to_free); out_unlock: double_unlock_hb(hb1, hb2); @@ -2198,6 +2224,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, out_put_key1: put_futex_key(&key1); out: + free_pi_state_list(&to_free); return ret ? ret : task_count; } @@ -2334,13 +2361,16 @@ static int unqueue_me(struct futex_q *q) static void unqueue_me_pi(struct futex_q *q) __releases(q->lock_ptr) { + struct futex_pi_state *ps; + __unqueue_futex(q); BUG_ON(!q->pi_state); - put_pi_state(q->pi_state); + ps = __put_pi_state(q->pi_state); q->pi_state = NULL; raw_spin_unlock(q->lock_ptr); + kfree(ps); } static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, @@ -3280,6 +3310,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { + struct futex_pi_state *ps_free; + raw_spin_lock(q.lock_ptr); ret = fixup_pi_state_owner(uaddr2, &q, current); if (ret < 0 && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { @@ -3290,8 +3322,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * Drop the reference to the pi state which * the requeue_pi() code acquired for us. */ - put_pi_state(q.pi_state); + ps_free = __put_pi_state(q.pi_state); raw_spin_unlock(q.lock_ptr); + kfree(ps_free); /* * Adjust the return value. It's either -EFAULT or * success (1) but the caller expects 0 for success. -- Gitee From 753c62010085b8e404196213f4c7256d7c59cea0 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:26:45 +0800 Subject: [PATCH 04/12] mm/zswap: Do not disable preemption in zswap_frontswap_store() commit b68bca0d18af8b42c058485dfde37d4acfbabcc5 upstream. [ Upstream commit 4e4cf4be79635e67144632d9135286381acbc95a ] Zswap causes "BUG: scheduling while atomic" by blocking on a rt_spin_lock() with preemption disabled. The preemption is disabled by get_cpu_var() in zswap_frontswap_store() to protect the access of the zswap_dstmem percpu variable. Use get_locked_var() to protect the percpu zswap_dstmem variable, making the code preemptive. As get_cpu_ptr() also disables preemption, replace it by this_cpu_ptr() and remove the counterpart put_cpu_ptr(). Steps to Reproduce: 1. # grubby --args "zswap.enabled=1" --update-kernel DEFAULT 2. # reboot 3. Calculate the amount o memory to be used by the test: ---> grep MemAvailable /proc/meminfo ---> Add 25% ~ 50% to that value 4. # stress --vm 1 --vm-bytes ${MemAvailable+25%} --timeout 240s Usually, in less than 5 minutes the backtrace listed below appears, followed by a kernel panic: | BUG: scheduling while atomic: kswapd1/181/0x00000002 | | Preemption disabled at: | [] zswap_frontswap_store+0x21a/0x6e1 | | Kernel panic - not syncing: scheduling while atomic | CPU: 14 PID: 181 Comm: kswapd1 Kdump: loaded Not tainted 5.0.14-rt9 #1 | Hardware name: AMD Pence/Pence, BIOS WPN2321X_Weekly_12_03_21 03/19/2012 | Call Trace: | panic+0x106/0x2a7 | __schedule_bug.cold+0x3f/0x51 | __schedule+0x5cb/0x6f0 | schedule+0x43/0xd0 | rt_spin_lock_slowlock_locked+0x114/0x2b0 | rt_spin_lock_slowlock+0x51/0x80 | zbud_alloc+0x1da/0x2d0 | zswap_frontswap_store+0x31a/0x6e1 | __frontswap_store+0xab/0x130 | swap_writepage+0x39/0x70 | pageout.isra.0+0xe3/0x320 | shrink_page_list+0xa8e/0xd10 | shrink_inactive_list+0x251/0x840 | shrink_node_memcg+0x213/0x770 | shrink_node+0xd9/0x450 | balance_pgdat+0x2d5/0x510 | kswapd+0x218/0x470 | kthread+0xfb/0x130 | ret_from_fork+0x27/0x50 Cc: stable-rt@vger.kernel.org Reported-by: Ping Fang Signed-off-by: Luis Claudio R. Goncalves Reviewed-by: Daniel Bristot de Oliveira Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- mm/zswap.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mm/zswap.c b/mm/zswap.c index 7d748a489541..2537946c32c6 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -989,6 +990,8 @@ static void zswap_fill_page(void *ptr, unsigned long value) memset_l(page, value, PAGE_SIZE / sizeof(unsigned long)); } +/* protect zswap_dstmem from concurrency */ +static DEFINE_LOCAL_IRQ_LOCK(zswap_dstmem_lock); /********************************* * frontswap hooks **********************************/ @@ -1066,12 +1069,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, } /* compress */ - dst = get_cpu_var(zswap_dstmem); - tfm = *get_cpu_ptr(entry->pool->tfm); + dst = get_locked_var(zswap_dstmem_lock, zswap_dstmem); + tfm = *this_cpu_ptr(entry->pool->tfm); src = kmap_atomic(page); ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen); kunmap_atomic(src); - put_cpu_ptr(entry->pool->tfm); if (ret) { ret = -EINVAL; goto put_dstmem; @@ -1095,7 +1097,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, memcpy(buf, &zhdr, hlen); memcpy(buf + hlen, dst, dlen); zpool_unmap_handle(entry->pool->zpool, handle); - put_cpu_var(zswap_dstmem); + put_locked_var(zswap_dstmem_lock, zswap_dstmem); /* populate entry */ entry->offset = offset; @@ -1123,7 +1125,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, return 0; put_dstmem: - put_cpu_var(zswap_dstmem); + put_locked_var(zswap_dstmem_lock, zswap_dstmem); zswap_pool_put(entry->pool); freepage: zswap_entry_cache_free(entry); -- Gitee From 796cac431714ebba016edf4d5615c03098ba7d46 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:27:47 +0800 Subject: [PATCH 05/12] revert-aio commit a127a24d65e5ec18ec4af24df6695fbaccf925ca upstream. revert: fs/aio: simple simple work Signed-off-by: Steven Rostedt (VMware) --- fs/aio.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 94c37e04b0f5..9b09aaa8aedd 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -122,7 +121,6 @@ struct kioctx { long nr_pages; struct rcu_work free_rwork; /* see free_ioctx() */ - struct swork_event free_swork; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -268,7 +266,6 @@ static int __init aio_setup(void) .mount = aio_mount, .kill_sb = kill_anon_super, }; - BUG_ON(swork_get()); aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); @@ -610,9 +607,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users_work(struct swork_event *sev) +static void free_ioctx_users(struct percpu_ref *ref) { - struct kioctx *ctx = container_of(sev, struct kioctx, free_swork); + struct kioctx *ctx = container_of(ref, struct kioctx, users); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -630,14 +627,6 @@ static void free_ioctx_users_work(struct swork_event *sev) percpu_ref_put(&ctx->reqs); } -static void free_ioctx_users(struct percpu_ref *ref) -{ - struct kioctx *ctx = container_of(ref, struct kioctx, users); - - INIT_SWORK(&ctx->free_swork, free_ioctx_users_work); - swork_queue(&ctx->free_swork); -} - static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) { unsigned i, new_nr; -- Gitee From 0367c2b687569b9a74fe7585bfc2dd0ffb19057c Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:28:00 +0800 Subject: [PATCH 06/12] fs/aio: simple simple work commit fb686d1f8844133f15f617e0effa288bcbef4a18 upstream. [ Upstream commit 1a142116f6435ef070ecebb66d2d599507c10601 ] |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768 |in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2 |2 locks held by rcuos/2/26: | #0: (rcu_callback){.+.+..}, at: [] rcu_nocb_kthread+0x1e2/0x380 | #1: (rcu_read_lock_sched){.+.+..}, at: [] percpu_ref_kill_rcu+0xa6/0x1c0 |Preemption disabled at:[] rcu_nocb_kthread+0x263/0x380 |Call Trace: | [] dump_stack+0x4e/0x9c | [] __might_sleep+0xfb/0x170 | [] rt_spin_lock+0x24/0x70 | [] free_ioctx_users+0x30/0x130 | [] percpu_ref_kill_rcu+0x1b4/0x1c0 | [] rcu_nocb_kthread+0x263/0x380 | [] kthread+0xd6/0xf0 | [] ret_from_fork+0x7c/0xb0 replace this preempt_disable() friendly swork. Reported-By: Mike Galbraith Suggested-by: Benjamin LaHaise Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) --- fs/aio.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 9b09aaa8aedd..3078645fa900 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -121,6 +121,7 @@ struct kioctx { long nr_pages; struct rcu_work free_rwork; /* see free_ioctx() */ + struct kthread_work free_kwork; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -607,9 +608,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users(struct percpu_ref *ref) +static void free_ioctx_users_work(struct kthread_work *work) { - struct kioctx *ctx = container_of(ref, struct kioctx, users); + struct kioctx *ctx = container_of(work, struct kioctx, free_kwork); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -627,6 +628,14 @@ static void free_ioctx_users(struct percpu_ref *ref) percpu_ref_put(&ctx->reqs); } +static void free_ioctx_users(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, users); + + kthread_init_work(&ctx->free_kwork, free_ioctx_users_work); + kthread_schedule_work(&ctx->free_kwork); +} + static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) { unsigned i, new_nr; -- Gitee From 1e422684e65fbca92d4d3133a134d4f76e6d85be Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:28:20 +0800 Subject: [PATCH 07/12] revert-thermal commit 466a66edbba9ee1f7405d51c674dd3dc11bbdb13 upstream. Revert: thermal: Defer thermal wakups to threads Signed-off-by: Steven Rostedt (VMware) --- drivers/thermal/x86_pkg_temp_thermal.c | 52 ++------------------------ 1 file changed, 3 insertions(+), 49 deletions(-) diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index a5991cbb408f..1ef937d799e4 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -330,7 +329,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work) schedule_delayed_work_on(cpu, work, ms); } -static void pkg_thermal_notify_work(struct swork_event *event) +static int pkg_thermal_notify(u64 msr_val) { int cpu = smp_processor_id(); struct pkg_device *pkgdev; @@ -349,47 +348,9 @@ static void pkg_thermal_notify_work(struct swork_event *event) } spin_unlock_irqrestore(&pkg_temp_lock, flags); -} - -#ifdef CONFIG_PREEMPT_RT_FULL -static struct swork_event notify_work; - -static int pkg_thermal_notify_work_init(void) -{ - int err; - - err = swork_get(); - if (err) - return err; - - INIT_SWORK(¬ify_work, pkg_thermal_notify_work); return 0; } -static void pkg_thermal_notify_work_cleanup(void) -{ - swork_put(); -} - -static int pkg_thermal_notify(u64 msr_val) -{ - swork_queue(¬ify_work); - return 0; -} - -#else /* !CONFIG_PREEMPT_RT_FULL */ - -static int pkg_thermal_notify_work_init(void) { return 0; } - -static void pkg_thermal_notify_work_cleanup(void) { } - -static int pkg_thermal_notify(u64 msr_val) -{ - pkg_thermal_notify_work(NULL); - return 0; -} -#endif /* CONFIG_PREEMPT_RT_FULL */ - static int pkg_temp_thermal_device_add(unsigned int cpu) { int pkgid = topology_logical_package_id(cpu); @@ -554,16 +515,11 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; - if (!pkg_thermal_notify_work_init()) - return -ENODEV; - max_packages = topology_max_packages(); packages = kcalloc(max_packages, sizeof(struct pkg_device *), GFP_KERNEL); - if (!packages) { - ret = -ENOMEM; - goto err; - } + if (!packages) + return -ENOMEM; ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online", pkg_thermal_cpu_online, pkg_thermal_cpu_offline); @@ -581,7 +537,6 @@ static int __init pkg_temp_thermal_init(void) return 0; err: - pkg_thermal_notify_work_cleanup(); kfree(packages); return ret; } @@ -595,7 +550,6 @@ static void __exit pkg_temp_thermal_exit(void) cpuhp_remove_state(pkg_thermal_hp_state); debugfs_remove_recursive(debugfs); kfree(packages); - pkg_thermal_notify_work_cleanup(); } module_exit(pkg_temp_thermal_exit) -- Gitee From eab8e64468598c419d40ab183fb247b6d0c5cded Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:28:39 +0800 Subject: [PATCH 08/12] thermal: Defer thermal wakups to threads commit d68ea3ebd9e76e3a78a5034c6af647c0c948b6a8 upstream. [ Upstream commit ad2408dc248fe58536eef5b2b5734d8f9d3a280b ] On RT the spin lock in pkg_temp_thermal_platfrom_thermal_notify will call schedule while we run in irq context. [] dump_stack+0x4e/0x8f [] __schedule_bug+0xa6/0xb4 [] __schedule+0x5b4/0x700 [] schedule+0x2a/0x90 [] rt_spin_lock_slowlock+0xe5/0x2d0 [] rt_spin_lock+0x25/0x30 [] pkg_temp_thermal_platform_thermal_notify+0x45/0x134 [x86_pkg_temp_thermal] [] ? therm_throt_process+0x1b/0x160 [] intel_thermal_interrupt+0x211/0x250 [] smp_thermal_interrupt+0x21/0x40 [] thermal_interrupt+0x6d/0x80 Let's defer the work to a kthread. Signed-off-by: Daniel Wagner Signed-off-by: Steven Rostedt (VMware) [bigeasy: reoder init/denit position. TODO: flush swork on exit] Signed-off-by: Sebastian Andrzej Siewior --- drivers/thermal/x86_pkg_temp_thermal.c | 28 +++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 1ef937d799e4..82f21fd4afb0 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -329,7 +330,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work) schedule_delayed_work_on(cpu, work, ms); } -static int pkg_thermal_notify(u64 msr_val) +static void pkg_thermal_notify_work(struct kthread_work *work) { int cpu = smp_processor_id(); struct pkg_device *pkgdev; @@ -348,8 +349,32 @@ static int pkg_thermal_notify(u64 msr_val) } spin_unlock_irqrestore(&pkg_temp_lock, flags); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static DEFINE_KTHREAD_WORK(notify_work, pkg_thermal_notify_work); + +static int pkg_thermal_notify(u64 msr_val) +{ + kthread_schedule_work(¬ify_work); + return 0; +} + +static void pkg_thermal_notify_flush(void) +{ + kthread_flush_work(¬ify_work); +} + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +static void pkg_thermal_notify_flush(void) { } + +static int pkg_thermal_notify(u64 msr_val) +{ + pkg_thermal_notify_work(NULL); return 0; } +#endif /* CONFIG_PREEMPT_RT_FULL */ static int pkg_temp_thermal_device_add(unsigned int cpu) { @@ -548,6 +573,7 @@ static void __exit pkg_temp_thermal_exit(void) platform_thermal_package_rate_control = NULL; cpuhp_remove_state(pkg_thermal_hp_state); + pkg_thermal_notify_flush(); debugfs_remove_recursive(debugfs); kfree(packages); } -- Gitee From a8e4e792b25004859f5660c0f0a8583641c48206 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:32:21 +0800 Subject: [PATCH 09/12] revert-block commit 5d4341f0df610e341ef6248b72a4d24ea53b7694 upstream. Revert swork version of: block: blk-mq: move blk_queue_usage_counter_release() into process context In order to switch to upstream, we need to revert the swork code. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- block/blk-core.c | 14 +------------- include/linux/blkdev.h | 2 -- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2111ef5ff3fc..a4a52b742885 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1017,21 +1017,12 @@ void blk_queue_exit(struct request_queue *q) percpu_ref_put(&q->q_usage_counter); } -static void blk_queue_usage_counter_release_swork(struct swork_event *sev) -{ - struct request_queue *q = - container_of(sev, struct request_queue, mq_pcpu_wake); - - wake_up_all(&q->mq_freeze_wq); -} - static void blk_queue_usage_counter_release(struct percpu_ref *ref) { struct request_queue *q = container_of(ref, struct request_queue, q_usage_counter); - if (wq_has_sleeper(&q->mq_freeze_wq)) - swork_queue(&q->mq_pcpu_wake); + wake_up_all(&q->mq_freeze_wq); } static void blk_rq_timed_out_timer(struct timer_list *t) @@ -1130,7 +1121,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); init_waitqueue_head(&q->mq_freeze_wq); - INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork); mutex_init(&q->mq_freeze_lock); /* @@ -4015,8 +4005,6 @@ int __init blk_dev_init(void) if (!kblockd_workqueue) panic("Failed to create kblockd\n"); - BUG_ON(swork_get()); - request_cachep = kmem_cache_create("blkdev_requests", sizeof(struct request), 0, SLAB_PANIC, NULL); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9acd42a2ff2e..a4811f216cda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -27,7 +27,6 @@ #include #include #include -#include struct module; struct scsi_ioctl_command; @@ -684,7 +683,6 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; - struct swork_event mq_pcpu_wake; /* * Protect concurrent access to q_usage_counter by * percpu_ref_kill() and percpu_ref_reinit(). -- Gitee From 4d4c8a7a4c3ebfeaad7a887495c407817563ab22 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:38:36 +0800 Subject: [PATCH 10/12] block: blk-mq: move blk_queue_usage_counter_release() into process context commit 7de506cb172bfbca14767ae02f0309d70d3c7dcb upstream. [ Upstream commit 61c928ecf4fe200bda9b49a0813b5ba0f43995b5 ] | BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914 | in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6 | 5 locks held by kworker/u257:6/255: | #0: ("events_unbound"){.+.+.+}, at: [] process_one_work+0x171/0x5e0 | #1: ((&entry->work)){+.+.+.}, at: [] process_one_work+0x171/0x5e0 | #2: (&shost->scan_mutex){+.+.+.}, at: [] __scsi_add_device+0xa3/0x130 [scsi_mod] | #3: (&set->tag_list_lock){+.+...}, at: [] blk_mq_init_queue+0x96a/0xa50 | #4: (rcu_read_lock_sched){......}, at: [] percpu_ref_kill_and_confirm+0x1d/0x120 | Preemption disabled at:[] blk_mq_freeze_queue_start+0x56/0x70 | | CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1 | Workqueue: events_unbound async_run_entry_fn | 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000 | 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28 | ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000 | Call Trace: | [] dump_stack+0x4f/0x7c | [] __might_sleep+0x116/0x190 | [] rt_spin_lock+0x24/0x60 | [] __wake_up+0x29/0x60 | [] blk_mq_usage_counter_release+0x1e/0x20 | [] percpu_ref_kill_and_confirm+0x106/0x120 | [] blk_mq_freeze_queue_start+0x56/0x70 | [] blk_mq_update_tag_set_depth+0x40/0xd0 | [] blk_mq_init_queue+0x98c/0xa50 | [] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod] | [] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod] | [] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod] | [] __scsi_add_device+0x126/0x130 [scsi_mod] | [] ata_scsi_scan_host+0xaf/0x200 [libata] | [] async_port_probe+0x46/0x60 [libata] | [] async_run_entry_fn+0x3b/0xf0 | [] process_one_work+0x201/0x5e0 percpu_ref_kill_and_confirm() invokes blk_mq_usage_counter_release() in a rcu-sched region. swait based wake queue can't be used due to wake_up_all() usage and disabled interrupts in !RT configs (as reported by Corey Minyard). The wq_has_sleeper() check has been suggested by Peter Zijlstra. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- block/blk-core.c | 12 +++++++++++- include/linux/blkdev.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index a4a52b742885..5ea3b4177a26 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1017,12 +1017,21 @@ void blk_queue_exit(struct request_queue *q) percpu_ref_put(&q->q_usage_counter); } +static void blk_queue_usage_counter_release_wrk(struct kthread_work *work) +{ + struct request_queue *q = + container_of(work, struct request_queue, mq_pcpu_wake); + + wake_up_all(&q->mq_freeze_wq); +} + static void blk_queue_usage_counter_release(struct percpu_ref *ref) { struct request_queue *q = container_of(ref, struct request_queue, q_usage_counter); - wake_up_all(&q->mq_freeze_wq); + if (wq_has_sleeper(&q->mq_freeze_wq)) + kthread_schedule_work(&q->mq_pcpu_wake); } static void blk_rq_timed_out_timer(struct timer_list *t) @@ -1121,6 +1130,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); init_waitqueue_head(&q->mq_freeze_wq); + kthread_init_work(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk); mutex_init(&q->mq_freeze_lock); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a4811f216cda..fd2c76d1832d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -683,6 +684,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; + struct kthread_work mq_pcpu_wake; /* * Protect concurrent access to q_usage_counter by * percpu_ref_kill() and percpu_ref_reinit(). -- Gitee From 5cdbbfa86f02bf8de384a619ee725fa3d9167c81 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:42:07 +0800 Subject: [PATCH 11/12] workqueue: rework commit b5e4a5ce5478ed6616eec2fed6c13da28cdbb61c upstream. [ Upstream commit d15a862f24df983458533aebd6fa207ecdd1095a ] This is an all-in change of the workqueue rework. The worker_pool.lock is made to raw_spinlock_t. With this change we can schedule workitems from preempt-disable sections and sections with disabled interrupts. This change allows to remove all kthread_.* workarounds we used to have. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Wenya Zhang Reviewed-by: Xuexin Jiang --- block/blk-core.c | 6 +- drivers/block/loop.c | 2 +- drivers/spi/spi-rockchip.c | 1 - drivers/thermal/x86_pkg_temp_thermal.c | 28 +-- fs/aio.c | 10 +- include/linux/blk-cgroup.h | 2 +- include/linux/blkdev.h | 2 +- include/linux/interrupt.h | 5 - include/linux/kthread-cgroup.h | 17 -- include/linux/kthread.h | 15 +- include/linux/swait.h | 14 ++ include/linux/workqueue.h | 4 - init/main.c | 1 - kernel/irq/manage.c | 36 +-- kernel/kthread.c | 14 -- kernel/sched/core.c | 1 + kernel/time/hrtimer.c | 24 -- kernel/workqueue.c | 304 +++++++++++-------------- 18 files changed, 166 insertions(+), 320 deletions(-) delete mode 100644 include/linux/kthread-cgroup.h diff --git a/block/blk-core.c b/block/blk-core.c index 5ea3b4177a26..2a74c12d266d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1017,7 +1017,7 @@ void blk_queue_exit(struct request_queue *q) percpu_ref_put(&q->q_usage_counter); } -static void blk_queue_usage_counter_release_wrk(struct kthread_work *work) +static void blk_queue_usage_counter_release_wrk(struct work_struct *work) { struct request_queue *q = container_of(work, struct request_queue, mq_pcpu_wake); @@ -1031,7 +1031,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) container_of(ref, struct request_queue, q_usage_counter); if (wq_has_sleeper(&q->mq_freeze_wq)) - kthread_schedule_work(&q->mq_pcpu_wake); + schedule_work(&q->mq_pcpu_wake); } static void blk_rq_timed_out_timer(struct timer_list *t) @@ -1130,7 +1130,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q); init_waitqueue_head(&q->mq_freeze_wq); - kthread_init_work(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk); + INIT_WORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_wrk); mutex_init(&q->mq_freeze_lock); /* diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 01b7bb2a97c0..1a71e49fcda4 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 7998278221a4..e05426079b4d 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -23,7 +23,6 @@ #include #include #include -#include #define DRIVER_NAME "rockchip-spi" diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 82f21fd4afb0..1ef937d799e4 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -330,7 +329,7 @@ static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work) schedule_delayed_work_on(cpu, work, ms); } -static void pkg_thermal_notify_work(struct kthread_work *work) +static int pkg_thermal_notify(u64 msr_val) { int cpu = smp_processor_id(); struct pkg_device *pkgdev; @@ -349,32 +348,8 @@ static void pkg_thermal_notify_work(struct kthread_work *work) } spin_unlock_irqrestore(&pkg_temp_lock, flags); -} - -#ifdef CONFIG_PREEMPT_RT_FULL -static DEFINE_KTHREAD_WORK(notify_work, pkg_thermal_notify_work); - -static int pkg_thermal_notify(u64 msr_val) -{ - kthread_schedule_work(¬ify_work); - return 0; -} - -static void pkg_thermal_notify_flush(void) -{ - kthread_flush_work(¬ify_work); -} - -#else /* !CONFIG_PREEMPT_RT_FULL */ - -static void pkg_thermal_notify_flush(void) { } - -static int pkg_thermal_notify(u64 msr_val) -{ - pkg_thermal_notify_work(NULL); return 0; } -#endif /* CONFIG_PREEMPT_RT_FULL */ static int pkg_temp_thermal_device_add(unsigned int cpu) { @@ -573,7 +548,6 @@ static void __exit pkg_temp_thermal_exit(void) platform_thermal_package_rate_control = NULL; cpuhp_remove_state(pkg_thermal_hp_state); - pkg_thermal_notify_flush(); debugfs_remove_recursive(debugfs); kfree(packages); } diff --git a/fs/aio.c b/fs/aio.c index 3078645fa900..0e285c800465 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -121,7 +121,7 @@ struct kioctx { long nr_pages; struct rcu_work free_rwork; /* see free_ioctx() */ - struct kthread_work free_kwork; /* see free_ioctx() */ + struct work_struct free_work; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -608,9 +608,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users_work(struct kthread_work *work) +static void free_ioctx_users_work(struct work_struct *work) { - struct kioctx *ctx = container_of(work, struct kioctx, free_kwork); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -632,8 +632,8 @@ static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - kthread_init_work(&ctx->free_kwork, free_ioctx_users_work); - kthread_schedule_work(&ctx->free_kwork); + INIT_WORK(&ctx->free_work, free_ioctx_users_work); + schedule_work(&ctx->free_work); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index d7476470957e..6bedcc86aaa6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -14,7 +14,7 @@ * Nauman Rafique */ -#include +#include #include #include #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fd2c76d1832d..af24560b4092 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -684,7 +684,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; - struct kthread_work mq_pcpu_wake; + struct work_struct mq_pcpu_wake; /* * Protect concurrent access to q_usage_counter by * percpu_ref_kill() and percpu_ref_reinit(). diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 26579b8233a7..432af770ad89 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -259,11 +258,7 @@ extern void resume_device_irqs(void); struct irq_affinity_notify { unsigned int irq; struct kref kref; -#ifdef CONFIG_PREEMPT_RT_BASE - struct kthread_work work; -#else struct work_struct work; -#endif void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; diff --git a/include/linux/kthread-cgroup.h b/include/linux/kthread-cgroup.h deleted file mode 100644 index 53d34bca9d72..000000000000 --- a/include/linux/kthread-cgroup.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_KTHREAD_CGROUP_H -#define _LINUX_KTHREAD_CGROUP_H -#include -#include - -#ifdef CONFIG_BLK_CGROUP -void kthread_associate_blkcg(struct cgroup_subsys_state *css); -struct cgroup_subsys_state *kthread_blkcg(void); -#else -static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } -static inline struct cgroup_subsys_state *kthread_blkcg(void) -{ - return NULL; -} -#endif -#endif diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 4571e2511237..d8fa27701295 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -4,6 +4,7 @@ /* Simple interface for creating and stopping kernel threads without mess. */ #include #include +#include __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), @@ -199,12 +200,14 @@ void kthread_destroy_worker(struct kthread_worker *worker); struct cgroup_subsys_state; -extern struct kthread_worker kthread_global_worker; -void kthread_init_global_worker(void); - -static inline bool kthread_schedule_work(struct kthread_work *work) +#ifdef CONFIG_BLK_CGROUP +void kthread_associate_blkcg(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *kthread_blkcg(void); +#else +static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } +static inline struct cgroup_subsys_state *kthread_blkcg(void) { - return kthread_queue_work(&kthread_global_worker, work); + return NULL; } - +#endif #endif /* _LINUX_KTHREAD_H */ diff --git a/include/linux/swait.h b/include/linux/swait.h index f426a0661aa0..21ae66cd41d3 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -299,4 +299,18 @@ do { \ __ret; \ }) +#define __swait_event_lock_irq(wq, condition, lock, cmd) \ + ___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ + raw_spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + raw_spin_lock_irq(&lock)) + +#define swait_event_lock_irq(wq_head, condition, lock) \ + do { \ + if (condition) \ + break; \ + __swait_event_lock_irq(wq_head, condition, lock, ); \ + } while (0) + #endif /* _LINUX_SWAIT_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 60d673e15632..546aa73fba6a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -455,10 +455,6 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, extern void destroy_workqueue(struct workqueue_struct *wq); -struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); -void free_workqueue_attrs(struct workqueue_attrs *attrs); -int apply_workqueue_attrs(struct workqueue_struct *wq, - const struct workqueue_attrs *attrs); int workqueue_set_unbound_cpumask(cpumask_var_t cpumask); extern bool queue_work_on(int cpu, struct workqueue_struct *wq, diff --git a/init/main.c b/init/main.c index e244ed4045c0..c11aed805ef5 100644 --- a/init/main.c +++ b/init/main.c @@ -1136,7 +1136,6 @@ static noinline void __init kernel_init_freeable(void) smp_prepare_cpus(setup_max_cpus); workqueue_init(); - kthread_init_global_worker(); init_mm_internals(); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c7440dd83e23..1c286faeba38 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -322,12 +322,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); - -#ifdef CONFIG_PREEMPT_RT_BASE - kthread_schedule_work(&desc->affinity_notify->work); -#else schedule_work(&desc->affinity_notify->work); -#endif } irqd_set(data, IRQD_AFFINITY_SET); @@ -365,8 +360,10 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); -static void _irq_affinity_notify(struct irq_affinity_notify *notify) +static void irq_affinity_notify(struct work_struct *work) { + struct irq_affinity_notify *notify = + container_of(work, struct irq_affinity_notify, work); struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; unsigned long flags; @@ -388,25 +385,6 @@ static void _irq_affinity_notify(struct irq_affinity_notify *notify) kref_put(¬ify->kref, notify->release); } -#ifdef CONFIG_PREEMPT_RT_BASE - -static void irq_affinity_notify(struct kthread_work *work) -{ - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); - _irq_affinity_notify(notify); -} - -#else - -static void irq_affinity_notify(struct work_struct *work) -{ - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); - _irq_affinity_notify(notify); -} -#endif - /** * irq_set_affinity_notifier - control notification of IRQ affinity changes * @irq: Interrupt for which to enable/disable notification @@ -435,11 +413,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) if (notify) { notify->irq = irq; kref_init(¬ify->kref); -#ifdef CONFIG_PREEMPT_RT_BASE - kthread_init_work(¬ify->work, irq_affinity_notify); -#else INIT_WORK(¬ify->work, irq_affinity_notify); -#endif } raw_spin_lock_irqsave(&desc->lock, flags); @@ -448,11 +422,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) raw_spin_unlock_irqrestore(&desc->lock, flags); if (old_notify) { -#ifdef CONFIG_PREEMPT_RT_BASE - kthread_cancel_work_sync(¬ify->work); -#else cancel_work_sync(&old_notify->work); -#endif kref_put(&old_notify->kref, old_notify->release); } diff --git a/kernel/kthread.c b/kernel/kthread.c index 20d01a4bf16d..fcb3a1a6e14b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,7 +21,6 @@ #include #include #include -#include #include static DEFINE_SPINLOCK(kthread_create_lock); @@ -1182,19 +1181,6 @@ void kthread_destroy_worker(struct kthread_worker *worker) } EXPORT_SYMBOL(kthread_destroy_worker); -DEFINE_KTHREAD_WORKER(kthread_global_worker); -EXPORT_SYMBOL(kthread_global_worker); - -__init void kthread_init_global_worker(void) -{ - kthread_global_worker.task = kthread_create(kthread_worker_fn, - &kthread_global_worker, - "kswork"); - if (WARN_ON(IS_ERR(kthread_global_worker.task))) - return; - wake_up_process(kthread_global_worker.task); -} - #ifdef CONFIG_BLK_CGROUP /** * kthread_associate_blkcg - associate blkcg to current kthread diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f54a56acd40b..41846b6b3f0d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3681,6 +3681,7 @@ static inline void sched_submit_work(struct task_struct *tsk) { if (!tsk->state) return; + /* * If a worker went to sleep, notify and ask workqueue whether * it wants to wake up a task to maintain concurrency. diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 4534e7871c8c..67951292df58 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -730,29 +730,6 @@ static void hrtimer_switch_to_hres(void) retrigger_next_event(NULL); } -#ifdef CONFIG_PREEMPT_RT_FULL - -static struct swork_event clock_set_delay_work; - -static void run_clock_set_delay(struct swork_event *event) -{ - clock_was_set(); -} - -void clock_was_set_delayed(void) -{ - swork_queue(&clock_set_delay_work); -} - -static __init int create_clock_set_delay_thread(void) -{ - WARN_ON(swork_get()); - INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); - return 0; -} -early_initcall(create_clock_set_delay_thread); -#else /* PREEMPT_RT_FULL */ - static void clock_was_set_work(struct work_struct *work) { clock_was_set(); @@ -768,7 +745,6 @@ void clock_was_set_delayed(void) { schedule_work(&hrtimer_work); } -#endif #else diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4f41609df39e..5876fdbf1261 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -49,8 +49,6 @@ #include #include #include -#include -#include #include "workqueue_internal.h" @@ -125,11 +123,6 @@ enum { * cpu or grabbing pool->lock is enough for read access. If * POOL_DISASSOCIATED is set, it's identical to L. * - * On RT we need the extra protection via rt_lock_idle_list() for - * the list manipulations against read access from - * wq_worker_sleeping(). All other places are nicely serialized via - * pool->lock. - * * A: wq_pool_attach_mutex protected. * * PL: wq_pool_mutex protected. @@ -151,7 +144,7 @@ enum { /* struct worker is defined in workqueue_internal.h */ struct worker_pool { - spinlock_t lock; /* the pool lock */ + raw_spinlock_t lock; /* the pool lock */ int cpu; /* I: the associated cpu */ int node; /* I: the associated node ID */ int id; /* I: pool ID */ @@ -304,8 +297,8 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf; static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */ static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */ -static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ -static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ +static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ +static DECLARE_SWAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ @@ -357,8 +350,6 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); -static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); - static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); @@ -435,31 +426,6 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ else -#ifdef CONFIG_PREEMPT_RT_BASE -static inline void rt_lock_idle_list(struct worker_pool *pool) -{ - preempt_disable(); -} -static inline void rt_unlock_idle_list(struct worker_pool *pool) -{ - preempt_enable(); -} -static inline void sched_lock_idle_list(struct worker_pool *pool) { } -static inline void sched_unlock_idle_list(struct worker_pool *pool) { } -#else -static inline void rt_lock_idle_list(struct worker_pool *pool) { } -static inline void rt_unlock_idle_list(struct worker_pool *pool) { } -static inline void sched_lock_idle_list(struct worker_pool *pool) -{ - spin_lock_irq(&pool->lock); -} -static inline void sched_unlock_idle_list(struct worker_pool *pool) -{ - spin_unlock_irq(&pool->lock); -} -#endif - - #ifdef CONFIG_DEBUG_OBJECTS_WORK static struct debug_obj_descr work_debug_descr; @@ -862,20 +828,14 @@ static struct worker *first_idle_worker(struct worker_pool *pool) * Wake up the first idle worker of @pool. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void wake_up_worker(struct worker_pool *pool) { - struct worker *worker; - - rt_lock_idle_list(pool); - - worker = first_idle_worker(pool); + struct worker *worker = first_idle_worker(pool); if (likely(worker)) wake_up_process(worker->task); - - rt_unlock_idle_list(pool); } /** @@ -904,7 +864,7 @@ void wq_worker_running(struct task_struct *task) */ void wq_worker_sleeping(struct task_struct *task) { - struct worker *worker = kthread_data(task); + struct worker *next, *worker = kthread_data(task); struct worker_pool *pool; /* @@ -921,18 +881,26 @@ void wq_worker_sleeping(struct task_struct *task) return; worker->sleeping = 1; + raw_spin_lock_irq(&pool->lock); /* * The counterpart of the following dec_and_test, implied mb, * worklist not empty test sequence is in insert_work(). * Please read comment there. + * + * NOT_RUNNING is clear. This means that we're bound to and + * running on the local cpu w/ rq lock held and preemption + * disabled, which in turn means that none else could be + * manipulating idle_list, so dereferencing idle_list without pool + * lock is safe. */ if (atomic_dec_and_test(&pool->nr_running) && !list_empty(&pool->worklist)) { - sched_lock_idle_list(pool); - wake_up_worker(pool); - sched_unlock_idle_list(pool); + next = first_idle_worker(pool); + if (next) + wake_up_process(next->task); } + raw_spin_unlock_irq(&pool->lock); } /** @@ -963,7 +931,7 @@ work_func_t wq_worker_last_func(struct task_struct *task) * Set @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(pool->lock) + * raw_spin_lock_irq(pool->lock) */ static inline void worker_set_flags(struct worker *worker, unsigned int flags) { @@ -988,7 +956,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags) * Clear @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: - * spin_lock_irq(pool->lock) + * raw_spin_lock_irq(pool->lock) */ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) { @@ -1036,7 +1004,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) * actually occurs, it should be easy to locate the culprit work function. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). * * Return: * Pointer to worker which is executing @work if found, %NULL @@ -1071,7 +1039,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, * nested inside outer list_for_each_entry_safe(). * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void move_linked_works(struct work_struct *work, struct list_head *head, struct work_struct **nextp) @@ -1149,11 +1117,9 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ - rcu_read_lock(); - local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); + raw_spin_lock_irq(&pwq->pool->lock); put_pwq(pwq); - local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); - rcu_read_unlock(); + raw_spin_unlock_irq(&pwq->pool->lock); } } @@ -1186,7 +1152,7 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq) * decrement nr_in_flight of its pwq and handle workqueue flushing. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) { @@ -1257,7 +1223,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, struct worker_pool *pool; struct pool_workqueue *pwq; - local_lock_irqsave(pendingb_lock, *flags); + local_irq_save(*flags); /* try to steal the timer if it exists */ if (is_dwork) { @@ -1285,7 +1251,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (!pool) goto fail; - spin_lock(&pool->lock); + raw_spin_lock(&pool->lock); /* * work->data is guaranteed to point to pwq only while the work * item is queued on pwq->wq, and both updating work->data to point @@ -1314,17 +1280,17 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, /* work->data points to pwq iff queued, point to pool */ set_work_pool_and_keep_pending(work, pool->id); - spin_unlock(&pool->lock); + raw_spin_unlock(&pool->lock); rcu_read_unlock(); return 1; } - spin_unlock(&pool->lock); + raw_spin_unlock(&pool->lock); fail: rcu_read_unlock(); - local_unlock_irqrestore(pendingb_lock, *flags); + local_irq_restore(*flags); if (work_is_canceling(work)) return -ENOENT; - cpu_chill(); + cpu_relax(); return -EAGAIN; } @@ -1339,7 +1305,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, * work_struct flags. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct list_head *head, unsigned int extra_flags) @@ -1426,13 +1392,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ -#ifndef CONFIG_PREEMPT_RT_FULL - /* - * nort: On RT the "interrupts-disabled" rule has been replaced with - * pendingb_lock. - */ lockdep_assert_irqs_disabled(); -#endif debug_work_activate(work); @@ -1460,7 +1420,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (last_pool && last_pool != pwq->pool) { struct worker *worker; - spin_lock(&last_pool->lock); + raw_spin_lock(&last_pool->lock); worker = find_worker_executing_work(last_pool, work); @@ -1468,11 +1428,11 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, pwq = worker->current_pwq; } else { /* meh... not running there, queue here */ - spin_unlock(&last_pool->lock); - spin_lock(&pwq->pool->lock); + raw_spin_unlock(&last_pool->lock); + raw_spin_lock(&pwq->pool->lock); } } else { - spin_lock(&pwq->pool->lock); + raw_spin_lock(&pwq->pool->lock); } /* @@ -1485,7 +1445,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, */ if (unlikely(!pwq->refcnt)) { if (wq->flags & WQ_UNBOUND) { - spin_unlock(&pwq->pool->lock); + raw_spin_unlock(&pwq->pool->lock); cpu_relax(); goto retry; } @@ -1517,7 +1477,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, insert_work(pwq, work, worklist, work_flags); out: - spin_unlock(&pwq->pool->lock); + raw_spin_unlock(&pwq->pool->lock); rcu_read_unlock(); } @@ -1538,14 +1498,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; - local_lock_irqsave(pendingb_lock,flags); + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_work(cpu, wq, work); ret = true; } - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } EXPORT_SYMBOL(queue_work_on); @@ -1553,12 +1513,11 @@ EXPORT_SYMBOL(queue_work_on); void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); + unsigned long flags; - /* XXX */ - /* local_lock(pendingb_lock); */ - /* should have been called from irqsafe timer with irq already off */ + local_irq_save(flags); __queue_work(dwork->cpu, dwork->wq, &dwork->work); - /* local_unlock(pendingb_lock); */ + local_irq_restore(flags); } EXPORT_SYMBOL(delayed_work_timer_fn); @@ -1613,14 +1572,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, unsigned long flags; /* read the comment in __queue_work() */ - local_lock_irqsave(pendingb_lock, flags); + local_irq_save(flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } EXPORT_SYMBOL(queue_delayed_work_on); @@ -1655,7 +1614,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); } /* -ENOENT from try_to_grab_pending() becomes %true */ @@ -1666,12 +1625,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work_on); static void rcu_work_rcufn(struct rcu_head *rcu) { struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu); - unsigned long flags; /* read the comment in __queue_work() */ - local_lock_irqsave(pendingb_lock, flags); + local_irq_disable(); __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_enable(); } /** @@ -1706,7 +1664,7 @@ EXPORT_SYMBOL(queue_rcu_work); * necessary. * * LOCKING: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void worker_enter_idle(struct worker *worker) { @@ -1723,9 +1681,7 @@ static void worker_enter_idle(struct worker *worker) worker->last_active = jiffies; /* idle_list is LIFO */ - rt_lock_idle_list(pool); list_add(&worker->entry, &pool->idle_list); - rt_unlock_idle_list(pool); if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); @@ -1748,7 +1704,7 @@ static void worker_enter_idle(struct worker *worker) * @worker is leaving idle state. Update stats. * * LOCKING: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void worker_leave_idle(struct worker *worker) { @@ -1758,9 +1714,7 @@ static void worker_leave_idle(struct worker *worker) return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; - rt_lock_idle_list(pool); list_del_init(&worker->entry); - rt_unlock_idle_list(pool); } static struct worker *alloc_worker(int node) @@ -1888,11 +1842,11 @@ static struct worker *create_worker(struct worker_pool *pool) worker_attach_to_pool(worker, pool); /* start the newly created worker */ - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); worker->pool->nr_workers++; worker_enter_idle(worker); wake_up_process(worker->task); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); return worker; @@ -1911,7 +1865,7 @@ static struct worker *create_worker(struct worker_pool *pool) * be idle. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void destroy_worker(struct worker *worker) { @@ -1928,9 +1882,7 @@ static void destroy_worker(struct worker *worker) pool->nr_workers--; pool->nr_idle--; - rt_lock_idle_list(pool); list_del_init(&worker->entry); - rt_unlock_idle_list(pool); worker->flags |= WORKER_DIE; wake_up_process(worker->task); } @@ -1939,7 +1891,7 @@ static void idle_worker_timeout(struct timer_list *t) { struct worker_pool *pool = from_timer(pool, t, idle_timer); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); while (too_many_workers(pool)) { struct worker *worker; @@ -1957,7 +1909,7 @@ static void idle_worker_timeout(struct timer_list *t) destroy_worker(worker); } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } static void send_mayday(struct work_struct *work) @@ -1988,8 +1940,8 @@ static void pool_mayday_timeout(struct timer_list *t) struct worker_pool *pool = from_timer(pool, t, mayday_timer); struct work_struct *work; - spin_lock_irq(&pool->lock); - spin_lock(&wq_mayday_lock); /* for wq->maydays */ + raw_spin_lock_irq(&pool->lock); + raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ if (need_to_create_worker(pool)) { /* @@ -2002,8 +1954,8 @@ static void pool_mayday_timeout(struct timer_list *t) send_mayday(work); } - spin_unlock(&wq_mayday_lock); - spin_unlock_irq(&pool->lock); + raw_spin_unlock(&wq_mayday_lock); + raw_spin_unlock_irq(&pool->lock); mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); } @@ -2022,7 +1974,7 @@ static void pool_mayday_timeout(struct timer_list *t) * may_start_working() %true. * * LOCKING: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. Called only from * manager. */ @@ -2031,7 +1983,7 @@ __releases(&pool->lock) __acquires(&pool->lock) { restart: - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); @@ -2047,7 +1999,7 @@ __acquires(&pool->lock) } del_timer_sync(&pool->mayday_timer); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* * This is necessary even after a new worker was just successfully * created as @pool->lock was dropped and the new worker might have @@ -2070,7 +2022,7 @@ __acquires(&pool->lock) * and may_start_working() is true. * * CONTEXT: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. Does GFP_KERNEL allocations. * * Return: @@ -2093,7 +2045,7 @@ static bool manage_workers(struct worker *worker) pool->manager = NULL; pool->flags &= ~POOL_MANAGER_ACTIVE; - wake_up(&wq_manager_wait); + swake_up_one(&wq_manager_wait); return true; } @@ -2109,7 +2061,7 @@ static bool manage_workers(struct worker *worker) * call this function to process a work. * * CONTEXT: - * spin_lock_irq(pool->lock) which is released and regrabbed. + * raw_spin_lock_irq(pool->lock) which is released and regrabbed. */ static void process_one_work(struct worker *worker, struct work_struct *work) __releases(&pool->lock) @@ -2191,7 +2143,7 @@ __acquires(&pool->lock) */ set_work_pool_and_clear_pending(work, pool->id); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); lock_map_acquire(&pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); @@ -2246,7 +2198,7 @@ __acquires(&pool->lock) */ cond_resched(); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* clear cpu intensive status */ if (unlikely(cpu_intensive)) @@ -2272,7 +2224,7 @@ __acquires(&pool->lock) * fetches a work from the top and executes it. * * CONTEXT: - * spin_lock_irq(pool->lock) which may be released and regrabbed + * raw_spin_lock_irq(pool->lock) which may be released and regrabbed * multiple times. */ static void process_scheduled_works(struct worker *worker) @@ -2314,11 +2266,11 @@ static int worker_thread(void *__worker) /* tell the scheduler that this is a workqueue worker */ set_pf_worker(true); woke_up: - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* am I supposed to die? */ if (unlikely(worker->flags & WORKER_DIE)) { - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); WARN_ON_ONCE(!list_empty(&worker->entry)); set_pf_worker(false); @@ -2384,7 +2336,7 @@ static int worker_thread(void *__worker) */ worker_enter_idle(worker); __set_current_state(TASK_IDLE); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); schedule(); goto woke_up; } @@ -2438,7 +2390,7 @@ static int rescuer_thread(void *__rescuer) should_stop = kthread_should_stop(); /* see whether any pwq is asking for help */ - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); while (!list_empty(&wq->maydays)) { struct pool_workqueue *pwq = list_first_entry(&wq->maydays, @@ -2450,11 +2402,11 @@ static int rescuer_thread(void *__rescuer) __set_current_state(TASK_RUNNING); list_del_init(&pwq->mayday_node); - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); worker_attach_to_pool(rescuer, pool); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* * Slurp in all works issued via this workqueue and @@ -2483,7 +2435,7 @@ static int rescuer_thread(void *__rescuer) * incur MAYDAY_INTERVAL delay inbetween. */ if (need_to_create_worker(pool)) { - spin_lock(&wq_mayday_lock); + raw_spin_lock(&wq_mayday_lock); /* * Queue iff we aren't racing destruction * and somebody else hasn't queued it already. @@ -2492,7 +2444,7 @@ static int rescuer_thread(void *__rescuer) get_pwq(pwq); list_add_tail(&pwq->mayday_node, &wq->maydays); } - spin_unlock(&wq_mayday_lock); + raw_spin_unlock(&wq_mayday_lock); } } @@ -2510,14 +2462,14 @@ static int rescuer_thread(void *__rescuer) if (need_more_worker(pool)) wake_up_worker(pool); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); worker_detach_from_pool(rescuer); - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); } - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); if (should_stop) { __set_current_state(TASK_RUNNING); @@ -2597,7 +2549,7 @@ static void wq_barrier_func(struct work_struct *work) * underneath us, so we can't reliably determine pwq from @target. * * CONTEXT: - * spin_lock_irq(pool->lock). + * raw_spin_lock_irq(pool->lock). */ static void insert_wq_barrier(struct pool_workqueue *pwq, struct wq_barrier *barr, @@ -2684,7 +2636,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, for_each_pwq(pwq, wq) { struct worker_pool *pool = pwq->pool; - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); if (flush_color >= 0) { WARN_ON_ONCE(pwq->flush_color != -1); @@ -2701,7 +2653,7 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, pwq->work_color = work_color; } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) @@ -2901,9 +2853,9 @@ void drain_workqueue(struct workqueue_struct *wq) for_each_pwq(pwq, wq) { bool drained; - spin_lock_irq(&pwq->pool->lock); + raw_spin_lock_irq(&pwq->pool->lock); drained = !pwq->nr_active && list_empty(&pwq->delayed_works); - spin_unlock_irq(&pwq->pool->lock); + raw_spin_unlock_irq(&pwq->pool->lock); if (drained) continue; @@ -2939,7 +2891,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, return false; } - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { @@ -2955,7 +2907,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, check_flush_dependency(pwq->wq, work); insert_wq_barrier(pwq, barr, work, worker); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); /* * Force a lock recursion deadlock when using flush_work() inside a @@ -2974,7 +2926,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, rcu_read_unlock(); return true; already_gone: - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); rcu_read_unlock(); return false; } @@ -3075,7 +3027,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); /* * This allows canceling during early boot. We know that @work @@ -3136,10 +3088,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { - local_lock_irq(pendingb_lock); + local_irq_disable(); if (del_timer_sync(&dwork->timer)) __queue_work(dwork->cpu, dwork->wq, &dwork->work); - local_unlock_irq(pendingb_lock); + local_irq_enable(); return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); @@ -3177,7 +3129,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) return false; set_work_pool_and_clear_pending(work, get_work_pool_id(work)); - local_unlock_irqrestore(pendingb_lock, flags); + local_irq_restore(flags); return ret; } @@ -3287,7 +3239,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context); * * Undo alloc_workqueue_attrs(). */ -void free_workqueue_attrs(struct workqueue_attrs *attrs) +static void free_workqueue_attrs(struct workqueue_attrs *attrs) { if (attrs) { free_cpumask_var(attrs->cpumask); @@ -3297,21 +3249,20 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) /** * alloc_workqueue_attrs - allocate a workqueue_attrs - * @gfp_mask: allocation mask to use * * Allocate a new workqueue_attrs, initialize with default settings and * return it. * * Return: The allocated new workqueue_attr on success. %NULL on failure. */ -struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask) +static struct workqueue_attrs *alloc_workqueue_attrs(void) { struct workqueue_attrs *attrs; - attrs = kzalloc(sizeof(*attrs), gfp_mask); + attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); if (!attrs) goto fail; - if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask)) + if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) goto fail; cpumask_copy(attrs->cpumask, cpu_possible_mask); @@ -3368,7 +3319,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a, */ static int init_worker_pool(struct worker_pool *pool) { - spin_lock_init(&pool->lock); + raw_spin_lock_init(&pool->lock); pool->id = -1; pool->cpu = -1; pool->node = NUMA_NO_NODE; @@ -3389,7 +3340,7 @@ static int init_worker_pool(struct worker_pool *pool) pool->refcnt = 1; /* shouldn't fail above this point */ - pool->attrs = alloc_workqueue_attrs(GFP_KERNEL); + pool->attrs = alloc_workqueue_attrs(); if (!pool->attrs) return -ENOMEM; return 0; @@ -3454,15 +3405,15 @@ static void put_unbound_pool(struct worker_pool *pool) * @pool's workers from blocking on attach_mutex. We're the last * manager and @pool gets freed with the flag set. */ - spin_lock_irq(&pool->lock); - wait_event_lock_irq(wq_manager_wait, + raw_spin_lock_irq(&pool->lock); + swait_event_lock_irq(wq_manager_wait, !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock); pool->flags |= POOL_MANAGER_ACTIVE; while ((worker = first_idle_worker(pool))) destroy_worker(worker); WARN_ON(pool->nr_workers || pool->nr_idle); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); mutex_lock(&wq_pool_attach_mutex); if (!list_empty(&pool->workers)) @@ -3616,7 +3567,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) return; /* this function can be called during early boot w/ irq disabled */ - spin_lock_irqsave(&pwq->pool->lock, flags); + raw_spin_lock_irqsave(&pwq->pool->lock, flags); /* * During [un]freezing, the caller is responsible for ensuring that @@ -3639,7 +3590,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) pwq->max_active = 0; } - spin_unlock_irqrestore(&pwq->pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); } /* initialize newly alloced @pwq which is associated with @wq and @pool */ @@ -3812,8 +3763,8 @@ apply_wqattrs_prepare(struct workqueue_struct *wq, ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL); - new_attrs = alloc_workqueue_attrs(GFP_KERNEL); - tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); + new_attrs = alloc_workqueue_attrs(); + tmp_attrs = alloc_workqueue_attrs(); if (!ctx || !new_attrs || !tmp_attrs) goto out_free; @@ -3949,7 +3900,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, * * Return: 0 on success and -errno on failure. */ -int apply_workqueue_attrs(struct workqueue_struct *wq, +static int apply_workqueue_attrs(struct workqueue_struct *wq, const struct workqueue_attrs *attrs) { int ret; @@ -3960,7 +3911,6 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, return ret; } -EXPORT_SYMBOL_GPL(apply_workqueue_attrs); /** * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug @@ -4038,9 +3988,9 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, use_dfl_pwq: mutex_lock(&wq->mutex); - spin_lock_irq(&wq->dfl_pwq->pool->lock); + raw_spin_lock_irq(&wq->dfl_pwq->pool->lock); get_pwq(wq->dfl_pwq); - spin_unlock_irq(&wq->dfl_pwq->pool->lock); + raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock); old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq); out_unlock: mutex_unlock(&wq->mutex); @@ -4159,7 +4109,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, return NULL; if (flags & WQ_UNBOUND) { - wq->unbound_attrs = alloc_workqueue_attrs(GFP_KERNEL); + wq->unbound_attrs = alloc_workqueue_attrs(); if (!wq->unbound_attrs) goto err_free_wq; } @@ -4246,9 +4196,9 @@ void destroy_workqueue(struct workqueue_struct *wq) struct worker *rescuer = wq->rescuer; /* this prevents new queueing */ - spin_lock_irq(&wq_mayday_lock); + raw_spin_lock_irq(&wq_mayday_lock); wq->rescuer = NULL; - spin_unlock_irq(&wq_mayday_lock); + raw_spin_unlock_irq(&wq_mayday_lock); /* rescuer will empty maydays list before exiting */ kthread_stop(rescuer->task); @@ -4443,10 +4393,10 @@ unsigned int work_busy(struct work_struct *work) rcu_read_lock(); pool = get_work_pool(work); if (pool) { - spin_lock_irqsave(&pool->lock, flags); + raw_spin_lock_irqsave(&pool->lock, flags); if (find_worker_executing_work(pool, work)) ret |= WORK_BUSY_RUNNING; - spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pool->lock, flags); } rcu_read_unlock(); @@ -4653,10 +4603,10 @@ void show_workqueue_state(void) pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); for_each_pwq(pwq, wq) { - spin_lock_irqsave(&pwq->pool->lock, flags); + raw_spin_lock_irqsave(&pwq->pool->lock, flags); if (pwq->nr_active || !list_empty(&pwq->delayed_works)) show_pwq(pwq); - spin_unlock_irqrestore(&pwq->pool->lock, flags); + raw_spin_unlock_irqrestore(&pwq->pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_workqueue_state(). Avoid triggering @@ -4670,7 +4620,7 @@ void show_workqueue_state(void) struct worker *worker; bool first = true; - spin_lock_irqsave(&pool->lock, flags); + raw_spin_lock_irqsave(&pool->lock, flags); if (pool->nr_workers == pool->nr_idle) goto next_pool; @@ -4689,7 +4639,7 @@ void show_workqueue_state(void) } pr_cont("\n"); next_pool: - spin_unlock_irqrestore(&pool->lock, flags); + raw_spin_unlock_irqrestore(&pool->lock, flags); /* * We could be printing a lot from atomic context, e.g. * sysrq-t -> show_workqueue_state(). Avoid triggering @@ -4719,7 +4669,7 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) struct worker_pool *pool = worker->pool; if (pool) { - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* * ->desc tracks information (wq name or * set_worker_desc()) for the latest execution. If @@ -4733,7 +4683,7 @@ void wq_worker_comm(char *buf, size_t size, struct task_struct *task) scnprintf(buf + off, size - off, "-%s", worker->desc); } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } } @@ -4764,7 +4714,7 @@ static void unbind_workers(int cpu) for_each_cpu_worker_pool(pool, cpu) { mutex_lock(&wq_pool_attach_mutex); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); /* * We've blocked all attach/detach operations. Make all workers @@ -4778,7 +4728,7 @@ static void unbind_workers(int cpu) pool->flags |= POOL_DISASSOCIATED; - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); mutex_unlock(&wq_pool_attach_mutex); /* @@ -4804,9 +4754,9 @@ static void unbind_workers(int cpu) * worker blocking could lead to lengthy stalls. Kick off * unbound chain execution of currently pending work items. */ - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); wake_up_worker(pool); - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } } @@ -4833,7 +4783,7 @@ static void rebind_workers(struct worker_pool *pool) WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask) < 0); - spin_lock_irq(&pool->lock); + raw_spin_lock_irq(&pool->lock); pool->flags &= ~POOL_DISASSOCIATED; @@ -4872,7 +4822,7 @@ static void rebind_workers(struct worker_pool *pool) WRITE_ONCE(worker->flags, worker_flags); } - spin_unlock_irq(&pool->lock); + raw_spin_unlock_irq(&pool->lock); } /** @@ -5324,7 +5274,7 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) lockdep_assert_held(&wq_pool_mutex); - attrs = alloc_workqueue_attrs(GFP_KERNEL); + attrs = alloc_workqueue_attrs(); if (!attrs) return NULL; @@ -5746,7 +5696,7 @@ static void __init wq_numa_init(void) return; } - wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL); + wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(); BUG_ON(!wq_update_unbound_numa_attrs_buf); /* @@ -5821,7 +5771,7 @@ int __init workqueue_init_early(void) for (i = 0; i < NR_STD_WORKER_POOLS; i++) { struct workqueue_attrs *attrs; - BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); + BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; unbound_std_wq_attrs[i] = attrs; @@ -5830,7 +5780,7 @@ int __init workqueue_init_early(void) * guaranteed by max_active which is enforced by pwqs. * Turn off NUMA so that dfl_pwq is used for all nodes. */ - BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); + BUG_ON(!(attrs = alloc_workqueue_attrs())); attrs->nice = std_nice[i]; attrs->no_numa = true; ordered_wq_attrs[i] = attrs; -- Gitee From 6291565d301de75080c2784d74def02c4cb5ff6f Mon Sep 17 00:00:00 2001 From: meganz009 Date: Tue, 13 Jun 2023 10:43:01 +0800 Subject: [PATCH 12/12] Linux 4.19.94-rt38 REBASE commit dee368fe90e9506b8a4c9948fb611fb77dda153f upstream. --- localversion-rt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localversion-rt b/localversion-rt index 1199ebade17b..49bae8d6aa67 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt16 +-rt38 -- Gitee