From d901c22b1b2e166138e99e58709e3c200effdc89 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 10:46:18 +0800 Subject: [PATCH 1/6] work-simple: Simple work queue implemenation commit eccea3070c1d002aaa2a9cbdaaee36e65f5d06cf upstream. Provides a framework for enqueuing callbacks from irq context PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. Bases on wait-simple. Cc: Sebastian Andrzej Siewior Signed-off-by: Daniel Wagner --- include/linux/swork.h | 24 ++++++ kernel/sched/Makefile | 2 +- kernel/sched/swork.c | 173 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 include/linux/swork.h create mode 100644 kernel/sched/swork.c diff --git a/include/linux/swork.h b/include/linux/swork.h new file mode 100644 index 000000000000..f175fa9a6016 --- /dev/null +++ b/include/linux/swork.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_SWORK_H +#define _LINUX_SWORK_H + +#include + +struct swork_event { + struct list_head item; + unsigned long flags; + void (*func)(struct swork_event *); +}; + +static inline void INIT_SWORK(struct swork_event *event, + void (*func)(struct swork_event *)) +{ + event->flags = 0; + event->func = func; +} + +bool swork_queue(struct swork_event *sev); + +int swork_get(void); +void swork_put(void); + +#endif /* _LINUX_SWORK_H */ diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 21fb5a5662b5..eac7839b880d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -18,7 +18,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o -obj-y += wait.o wait_bit.o swait.o completion.o +obj-y += wait.o wait_bit.o swait.o swork.o completion.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c new file mode 100644 index 000000000000..a5b89fdacf19 --- /dev/null +++ b/kernel/sched/swork.c @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de + * + * Provides a framework for enqueuing callbacks from irq context + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. + */ + +#include +#include +#include +#include +#include +#include + +#define SWORK_EVENT_PENDING (1 << 0) + +static DEFINE_MUTEX(worker_mutex); +static struct sworker *glob_worker; + +struct sworker { + struct list_head events; + struct swait_queue_head wq; + + raw_spinlock_t lock; + + struct task_struct *task; + int refs; +}; + +static bool swork_readable(struct sworker *worker) +{ + bool r; + + if (kthread_should_stop()) + return true; + + raw_spin_lock_irq(&worker->lock); + r = !list_empty(&worker->events); + raw_spin_unlock_irq(&worker->lock); + + return r; +} + +static int swork_kthread(void *arg) +{ + struct sworker *worker = arg; + + for (;;) { + swait_event_interruptible_exclusive(worker->wq, + swork_readable(worker)); + if (kthread_should_stop()) + break; + + raw_spin_lock_irq(&worker->lock); + while (!list_empty(&worker->events)) { + struct swork_event *sev; + + sev = list_first_entry(&worker->events, + struct swork_event, item); + list_del(&sev->item); + raw_spin_unlock_irq(&worker->lock); + + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, + &sev->flags)); + sev->func(sev); + raw_spin_lock_irq(&worker->lock); + } + raw_spin_unlock_irq(&worker->lock); + } + return 0; +} + +static struct sworker *swork_create(void) +{ + struct sworker *worker; + + worker = kzalloc(sizeof(*worker), GFP_KERNEL); + if (!worker) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&worker->events); + raw_spin_lock_init(&worker->lock); + init_swait_queue_head(&worker->wq); + + worker->task = kthread_run(swork_kthread, worker, "kswork"); + if (IS_ERR(worker->task)) { + kfree(worker); + return ERR_PTR(-ENOMEM); + } + + return worker; +} + +static void swork_destroy(struct sworker *worker) +{ + kthread_stop(worker->task); + + WARN_ON(!list_empty(&worker->events)); + kfree(worker); +} + +/** + * swork_queue - queue swork + * + * Returns %false if @work was already on a queue, %true otherwise. + * + * The work is queued and processed on a random CPU + */ +bool swork_queue(struct swork_event *sev) +{ + unsigned long flags; + + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) + return false; + + raw_spin_lock_irqsave(&glob_worker->lock, flags); + list_add_tail(&sev->item, &glob_worker->events); + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); + + swake_up_one(&glob_worker->wq); + return true; +} +EXPORT_SYMBOL_GPL(swork_queue); + +/** + * swork_get - get an instance of the sworker + * + * Returns an negative error code if the initialization if the worker did not + * work, %0 otherwise. + * + */ +int swork_get(void) +{ + struct sworker *worker; + + mutex_lock(&worker_mutex); + if (!glob_worker) { + worker = swork_create(); + if (IS_ERR(worker)) { + mutex_unlock(&worker_mutex); + return -ENOMEM; + } + + glob_worker = worker; + } + + glob_worker->refs++; + mutex_unlock(&worker_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(swork_get); + +/** + * swork_put - puts an instance of the sworker + * + * Will destroy the sworker thread. This function must not be called until all + * queued events have been completed. + */ +void swork_put(void) +{ + mutex_lock(&worker_mutex); + + glob_worker->refs--; + if (glob_worker->refs > 0) + goto out; + + swork_destroy(glob_worker); + glob_worker = NULL; +out: + mutex_unlock(&worker_mutex); +} +EXPORT_SYMBOL_GPL(swork_put); -- Gitee From a5a95afbeb9b03fe980a139962fce2d4fc51bf5b Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 10:46:54 +0800 Subject: [PATCH 2/6] work-simple: drop a shit statement in SWORK_EVENT_PENDING commit cb2283a50926db7e39bf3643c28a9c8c73a3263a upstream. Dan Carpenter reported | smatch warnings: |kernel/sched/swork.c:63 swork_kthread() warn: test_bit() takes a bit number This is not a bug because we shift by zero (and use the same value in both places). Nevertheless I'm dropping that shift by zero to keep smatch quiet. Cc: Daniel Wagner Signed-off-by: Sebastian Andrzej Siewior --- kernel/sched/swork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c index a5b89fdacf19..c90d14b9b126 100644 --- a/kernel/sched/swork.c +++ b/kernel/sched/swork.c @@ -12,7 +12,7 @@ #include #include -#define SWORK_EVENT_PENDING (1 << 0) +#define SWORK_EVENT_PENDING 1 static DEFINE_MUTEX(worker_mutex); static struct sworker *glob_worker; -- Gitee From aba80ff0977ec315d5a14e14ea12589d8ed18587 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 11:12:39 +0800 Subject: [PATCH 3/6] completion: Use simple wait queues commit 93e59514ca04f36ad3f87b3c0da5be1e0b9d238f upstream. Completions have no long lasting callbacks and therefor do not need the complex waitqueue variant. Use simple waitqueues which reduces the contention on the waitqueue lock. Signed-off-by: Thomas Gleixner --- arch/powerpc/platforms/ps3/device-init.c | 4 +-- .../wireless/intersil/orinoco/orinoco_usb.c | 4 +-- drivers/usb/gadget/function/f_fs.c | 2 +- drivers/usb/gadget/legacy/inode.c | 4 +-- include/linux/completion.h | 8 ++--- include/linux/suspend.h | 6 ++++ include/linux/swait.h | 2 ++ kernel/power/hibernate.c | 7 ++++ kernel/power/suspend.c | 4 +++ kernel/sched/completion.c | 34 +++++++++---------- kernel/sched/core.c | 10 ++++-- kernel/sched/swait.c | 21 +++++++++++- 12 files changed, 75 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index e7075aaff1bb..1580464a9d5b 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -752,8 +752,8 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, } pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); - res = wait_event_interruptible(dev->done.wait, - dev->done.done || kthread_should_stop()); + res = swait_event_interruptible_exclusive(dev->done.wait, + dev->done.done || kthread_should_stop()); if (kthread_should_stop()) res = -EINTR; if (res) { diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 94ad6fe29e69..52a49f0bbc19 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -697,8 +697,8 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, while (!ctx->done.done && msecs--) udelay(1000); } else { - wait_event_interruptible(ctx->done.wait, - ctx->done.done); + swait_event_interruptible_exclusive(ctx->done.wait, + ctx->done.done); } break; default: diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 2050993fb58b..e2ca75a6e241 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1626,7 +1626,7 @@ static void ffs_data_put(struct ffs_data *ffs) pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || - waitqueue_active(&ffs->ep0req_completion.wait) || + swait_active(&ffs->ep0req_completion.wait) || waitqueue_active(&ffs->wait)); destroy_workqueue(ffs->io_completion_wq); kfree(ffs->dev_name); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 327783b5dc91..87a14f37bc21 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -343,7 +343,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { - value = wait_event_interruptible (done.wait, done.done); + value = swait_event_interruptible_exclusive(done.wait, done.done); if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { @@ -352,7 +352,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); - wait_event (done.wait, done.done); + swait_event_exclusive(done.wait, done.done); if (epdata->status == -ECONNRESET) epdata->status = -EINTR; } else { diff --git a/include/linux/completion.h b/include/linux/completion.h index 519e94915d18..bf8e77001f18 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,7 +9,7 @@ * See kernel/sched/completion.c for details. */ -#include +#include /* * struct completion - structure used to maintain state for a "completion" @@ -25,7 +25,7 @@ */ struct completion { unsigned int done; - wait_queue_head_t wait; + struct swait_queue_head wait; }; #define init_completion_map(x, m) __init_completion(x) @@ -34,7 +34,7 @@ static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} #define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ (*({ init_completion_map(&(work), &(map)); &(work); })) @@ -85,7 +85,7 @@ static inline void complete_release(struct completion *x) {} static inline void __init_completion(struct completion *x) { x->done = 0; - init_waitqueue_head(&x->wait); + init_swait_queue_head(&x->wait); } /** diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 3f529ad9a9d2..328439ce71f5 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -196,6 +196,12 @@ struct platform_s2idle_ops { void (*end)(void); }; +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) +extern bool pm_in_action; +#else +# define pm_in_action false +#endif + #ifdef CONFIG_SUSPEND extern suspend_state_t mem_sleep_current; extern suspend_state_t mem_sleep_default; diff --git a/include/linux/swait.h b/include/linux/swait.h index 73e06e9986d4..f426a0661aa0 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -160,7 +160,9 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq) extern void swake_up_one(struct swait_queue_head *q); extern void swake_up_all(struct swait_queue_head *q); extern void swake_up_locked(struct swait_queue_head *q); +extern void swake_up_all_locked(struct swait_queue_head *q); +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state); extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f5ce9f7ec132..0f00ba01376f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -690,6 +690,10 @@ static int load_image_and_restore(void) return error; } +#ifndef CONFIG_SUSPEND +bool pm_in_action; +#endif + /** * hibernate - Carry out system hibernation, including saving the image. */ @@ -703,6 +707,8 @@ int hibernate(void) return -EPERM; } + pm_in_action = true; + lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { @@ -781,6 +787,7 @@ int hibernate(void) atomic_inc(&snapshot_device_available); Unlock: unlock_system_sleep(); + pm_in_action = false; pr_info("hibernation exit\n"); return error; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 0bd595a0b610..a4456772d98e 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -600,6 +600,8 @@ static int enter_state(suspend_state_t state) return error; } +bool pm_in_action; + /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter. @@ -614,6 +616,7 @@ int pm_suspend(suspend_state_t state) if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) return -EINVAL; + pm_in_action = true; pr_info("suspend entry (%s)\n", mem_sleep_labels[state]); error = enter_state(state); if (error) { @@ -623,6 +626,7 @@ int pm_suspend(suspend_state_t state) suspend_stats.success++; } pr_info("suspend exit\n"); + pm_in_action = false; return error; } EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index a1ad5b7d5521..755a58084978 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -29,12 +29,12 @@ void complete(struct completion *x) { unsigned long flags; - spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); if (x->done != UINT_MAX) x->done++; - __wake_up_locked(&x->wait, TASK_NORMAL, 1); - spin_unlock_irqrestore(&x->wait.lock, flags); + swake_up_locked(&x->wait); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete); @@ -58,10 +58,10 @@ void complete_all(struct completion *x) { unsigned long flags; - spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); x->done = UINT_MAX; - __wake_up_locked(&x->wait, TASK_NORMAL, 0); - spin_unlock_irqrestore(&x->wait.lock, flags); + swake_up_all_locked(&x->wait); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); } EXPORT_SYMBOL(complete_all); @@ -70,20 +70,20 @@ do_wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { if (!x->done) { - DECLARE_WAITQUEUE(wait, current); + DECLARE_SWAITQUEUE(wait); - __add_wait_queue_entry_tail_exclusive(&x->wait, &wait); + __prepare_to_swait(&x->wait, &wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } __set_current_state(state); - spin_unlock_irq(&x->wait.lock); + raw_spin_unlock_irq(&x->wait.lock); timeout = action(timeout); - spin_lock_irq(&x->wait.lock); + raw_spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); - __remove_wait_queue(&x->wait, &wait); + __finish_swait(&x->wait, &wait); if (!x->done) return timeout; } @@ -100,9 +100,9 @@ __wait_for_common(struct completion *x, complete_acquire(x); - spin_lock_irq(&x->wait.lock); + raw_spin_lock_irq(&x->wait.lock); timeout = do_wait_for_common(x, action, timeout, state); - spin_unlock_irq(&x->wait.lock); + raw_spin_unlock_irq(&x->wait.lock); complete_release(x); @@ -291,12 +291,12 @@ bool try_wait_for_completion(struct completion *x) if (!READ_ONCE(x->done)) return false; - spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); if (!x->done) ret = false; else if (x->done != UINT_MAX) x->done--; - spin_unlock_irqrestore(&x->wait.lock, flags); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); return ret; } EXPORT_SYMBOL(try_wait_for_completion); @@ -322,8 +322,8 @@ bool completion_done(struct completion *x) * otherwise we can end up freeing the completion before complete() * is done referencing it. */ - spin_lock_irqsave(&x->wait.lock, flags); - spin_unlock_irqrestore(&x->wait.lock, flags); + raw_spin_lock_irqsave(&x->wait.lock, flags); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); return true; } EXPORT_SYMBOL(completion_done); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 73a0d492f877..8415bdbb44db 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7443,7 +7443,10 @@ void migrate_disable(void) return; } #ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(p->migrate_disable_atomic); + if (unlikely(p->migrate_disable_atomic)) { + tracing_off(); + WARN_ON_ONCE(1); + } #endif if (p->migrate_disable) { @@ -7473,7 +7476,10 @@ void migrate_enable(void) } #ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(p->migrate_disable_atomic); + if (unlikely(p->migrate_disable_atomic)) { + tracing_off(); + WARN_ON_ONCE(1); + } #endif WARN_ON_ONCE(p->migrate_disable <= 0); diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 66b59ac77c22..c7cb30cdd1b7 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -32,6 +32,25 @@ void swake_up_locked(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_locked); +void swake_up_all_locked(struct swait_queue_head *q) +{ + struct swait_queue *curr; + int wakes = 0; + + while (!list_empty(&q->task_list)) { + + curr = list_first_entry(&q->task_list, typeof(*curr), + task_list); + wake_up_process(curr->task); + list_del_init(&curr->task_list); + wakes++; + } + if (pm_in_action) + return; + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); +} +EXPORT_SYMBOL(swake_up_all_locked); + void swake_up_one(struct swait_queue_head *q) { unsigned long flags; @@ -69,7 +88,7 @@ void swake_up_all(struct swait_queue_head *q) } EXPORT_SYMBOL(swake_up_all); -static void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) { wait->task = current; if (list_empty(&wait->task_list)) -- Gitee From 8401a6b6aeac63cbb6048b7604875e02cf029286 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 11:26:46 +0800 Subject: [PATCH 4/6] fs/aio: simple simple work commit 864971842ede76748f6c198f112d6857b19196fa upstream. |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768 |in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2 |2 locks held by rcuos/2/26: | #0: (rcu_callback){.+.+..}, at: [] rcu_nocb_kthread+0x1e2/0x380 | #1: (rcu_read_lock_sched){.+.+..}, at: [] percpu_ref_kill_rcu+0xa6/0x1c0 |Preemption disabled at:[] rcu_nocb_kthread+0x263/0x380 |Call Trace: | [] dump_stack+0x4e/0x9c | [] __might_sleep+0xfb/0x170 | [] rt_spin_lock+0x24/0x70 | [] free_ioctx_users+0x30/0x130 | [] percpu_ref_kill_rcu+0x1b4/0x1c0 | [] rcu_nocb_kthread+0x263/0x380 | [] kthread+0xd6/0xf0 | [] ret_from_fork+0x7c/0xb0 replace this preempt_disable() friendly swork. Reported-By: Mike Galbraith Suggested-by: Benjamin LaHaise Signed-off-by: Sebastian Andrzej Siewior --- fs/aio.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 9b09aaa8aedd..94c37e04b0f5 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -121,6 +122,7 @@ struct kioctx { long nr_pages; struct rcu_work free_rwork; /* see free_ioctx() */ + struct swork_event free_swork; /* see free_ioctx() */ /* * signals when all in-flight requests are done @@ -266,6 +268,7 @@ static int __init aio_setup(void) .mount = aio_mount, .kill_sb = kill_anon_super, }; + BUG_ON(swork_get()); aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); @@ -607,9 +610,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users(struct percpu_ref *ref) +static void free_ioctx_users_work(struct swork_event *sev) { - struct kioctx *ctx = container_of(ref, struct kioctx, users); + struct kioctx *ctx = container_of(sev, struct kioctx, free_swork); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -627,6 +630,14 @@ static void free_ioctx_users(struct percpu_ref *ref) percpu_ref_put(&ctx->reqs); } +static void free_ioctx_users(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, users); + + INIT_SWORK(&ctx->free_swork, free_ioctx_users_work); + swork_queue(&ctx->free_swork); +} + static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) { unsigned i, new_nr; -- Gitee From 1060e9eaaa47e5ec6ba70e80192ff9681a4f4c53 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 11:27:00 +0800 Subject: [PATCH 5/6] genirq: Do not invoke the affinity callback via a workqueue on RT commit 06003e1c96291d4d372f2e31c988999c2ef34f31 upstream. Joe Korty reported, that __irq_set_affinity_locked() schedules a workqueue while holding a rawlock which results in a might_sleep() warning. This patch uses swork_queue() instead. Signed-off-by: Sebastian Andrzej Siewior --- include/linux/interrupt.h | 6 +++++ kernel/irq/manage.c | 46 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 119960618dd1..8f2acd69e716 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -245,6 +246,7 @@ extern void resume_device_irqs(void); * struct irq_affinity_notify - context for notification of IRQ affinity changes * @irq: Interrupt to which notification applies * @kref: Reference count, for internal use + * @swork: Swork item, for internal use * @work: Work item, for internal use * @notify: Function to be called on change. This will be * called in process context. @@ -256,7 +258,11 @@ extern void resume_device_irqs(void); struct irq_affinity_notify { unsigned int irq; struct kref kref; +#ifdef CONFIG_PREEMPT_RT_BASE + struct swork_event swork; +#else struct work_struct work; +#endif void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 35eb5dde8a06..1051f9c29492 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -322,7 +322,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (desc->affinity_notify) { kref_get(&desc->affinity_notify->kref); + +#ifdef CONFIG_PREEMPT_RT_BASE + swork_queue(&desc->affinity_notify->swork); +#else schedule_work(&desc->affinity_notify->work); +#endif } irqd_set(data, IRQD_AFFINITY_SET); @@ -360,10 +365,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) } EXPORT_SYMBOL_GPL(irq_set_affinity_hint); -static void irq_affinity_notify(struct work_struct *work) +static void _irq_affinity_notify(struct irq_affinity_notify *notify) { - struct irq_affinity_notify *notify = - container_of(work, struct irq_affinity_notify, work); struct irq_desc *desc = irq_to_desc(notify->irq); cpumask_var_t cpumask; unsigned long flags; @@ -385,6 +388,35 @@ static void irq_affinity_notify(struct work_struct *work) kref_put(¬ify->kref, notify->release); } +#ifdef CONFIG_PREEMPT_RT_BASE +static void init_helper_thread(void) +{ + static int init_sworker_once; + + if (init_sworker_once) + return; + if (WARN_ON(swork_get())) + return; + init_sworker_once = 1; +} + +static void irq_affinity_notify(struct swork_event *swork) +{ + struct irq_affinity_notify *notify = + container_of(swork, struct irq_affinity_notify, swork); + _irq_affinity_notify(notify); +} + +#else + +static void irq_affinity_notify(struct work_struct *work) +{ + struct irq_affinity_notify *notify = + container_of(work, struct irq_affinity_notify, work); + _irq_affinity_notify(notify); +} +#endif + /** * irq_set_affinity_notifier - control notification of IRQ affinity changes * @irq: Interrupt for which to enable/disable notification @@ -413,7 +445,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) if (notify) { notify->irq = irq; kref_init(¬ify->kref); +#ifdef CONFIG_PREEMPT_RT_BASE + INIT_SWORK(¬ify->swork, irq_affinity_notify); + init_helper_thread(); +#else INIT_WORK(¬ify->work, irq_affinity_notify); +#endif } raw_spin_lock_irqsave(&desc->lock, flags); @@ -422,7 +459,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) raw_spin_unlock_irqrestore(&desc->lock, flags); if (old_notify) { +#ifndef CONFIG_PREEMPT_RT_BASE + /* Need to address this for PREEMPT_RT */ cancel_work_sync(&old_notify->work); +#endif kref_put(&old_notify->kref, old_notify->release); } -- Gitee From 175013467ab8dbc1c68e040db7ebd404f1322e86 Mon Sep 17 00:00:00 2001 From: zr_yy Date: Mon, 5 Jun 2023 11:27:10 +0800 Subject: [PATCH 6/6] time/hrtimer: avoid schedule_work() with interrupts disabled commit cd74856820b82cb80ddd3b12dde3caad12642b3c upstream. The NOHZ code tries to schedule a workqueue with interrupts disabled. Since this does not work -RT I am switching it to swork instead. Signed-off-by: Sebastian Andrzej Siewior --- kernel/time/timer.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ecc23d2c21a9..753b75751f0a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -218,8 +218,7 @@ static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); static DEFINE_MUTEX(timer_keys_mutex); -static void timer_update_keys(struct work_struct *work); -static DECLARE_WORK(timer_update_work, timer_update_keys); +static struct swork_event timer_update_swork; #ifdef CONFIG_SMP unsigned int sysctl_timer_migration = 1; @@ -237,7 +236,7 @@ static void timers_update_migration(void) static inline void timers_update_migration(void) { } #endif /* !CONFIG_SMP */ -static void timer_update_keys(struct work_struct *work) +static void timer_update_keys(struct swork_event *event) { mutex_lock(&timer_keys_mutex); timers_update_migration(); @@ -247,9 +246,17 @@ static void timer_update_keys(struct work_struct *work) void timers_update_nohz(void) { - schedule_work(&timer_update_work); + swork_queue(&timer_update_swork); } +static __init int hrtimer_init_thread(void) +{ + WARN_ON(swork_get()); + INIT_SWORK(&timer_update_swork, timer_update_keys); + return 0; +} +early_initcall(hrtimer_init_thread); + int timer_migration_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) -- Gitee