From 4dbb17f665526412f2e7fbf59098ed07bd2a3b33 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 9 Jun 2023 10:47:31 +0800 Subject: [PATCH 1/4] fs/dcache: use swait_queue instead of waitqueue commit 558647aa05135aa727be9198f29a041686e683af upstream. __d_lookup_done() invokes wake_up_all() while holding a hlist_bl_lock() which disables preemption. As a workaround convert it to swait. Signed-off-by: Sebastian Andrzej Siewior --- fs/cifs/readdir.c | 2 +- fs/dcache.c | 27 +++++++++++++++------------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 3925a7bfc74d..33f7723fb83e 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct inode *inode; struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); diff --git a/fs/dcache.c b/fs/dcache.c index 8f868760e799..f08ed98cf372 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2497,21 +2497,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) static void d_wait_lookup(struct dentry *dentry) { - if (d_in_lookup(dentry)) { - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(dentry->d_wait, &wait); - do { - set_current_state(TASK_UNINTERRUPTIBLE); - spin_unlock(&dentry->d_lock); - schedule(); - spin_lock(&dentry->d_lock); - } while (d_in_lookup(dentry)); - } + struct swait_queue __wait; + + if (!d_in_lookup(dentry)) + return; + + INIT_LIST_HEAD(&__wait.task_list); + do { + prepare_to_swait_exclusive(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&dentry->d_lock); + schedule(); + spin_lock(&dentry->d_lock); + } while (d_in_lookup(dentry)); + finish_swait(dentry->d_wait, &__wait); } struct dentry *d_alloc_parallel(struct dentry *parent, const struct qstr *name, - wait_queue_head_t *wq) + struct swait_queue_head *wq) { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); @@ -2626,7 +2629,7 @@ void __d_lookup_done(struct dentry *dentry) hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); - wake_up_all(dentry->d_wait); + swake_up_all(dentry->d_wait); dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); -- Gitee From 42615e0e32230d71c98aacc511210f34296ae918 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 9 Jun 2023 10:47:49 +0800 Subject: [PATCH 2/4] workqueue: Use normal rcu commit a65796914416941351ac0cd0271c5da76bed792a upstream. There is no need for sched_rcu. The undocumented reason why sched_rcu is used is to avoid a few explicit rcu_read_lock()/unlock() pairs by abusing the fact that sched_rcu reader side critical sections are also protected by preempt or irq disabled regions. Signed-off-by: Thomas Gleixner --- kernel/workqueue.c | 95 +++++++++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a8a260a8f4bf..d0d9b126bc0d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -127,7 +127,7 @@ enum { * * PL: wq_pool_mutex protected. * - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. + * PR: wq_pool_mutex protected for writes. RCU protected for reads. * * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. * @@ -136,7 +136,7 @@ enum { * * WQ: wq->mutex protected. * - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. + * WR: wq->mutex protected for writes. RCU protected for reads. * * MD: wq_mayday_lock protected. */ @@ -183,7 +183,7 @@ struct worker_pool { atomic_t nr_running ____cacheline_aligned_in_smp; /* - * Destruction of pool is sched-RCU protected to allow dereferences + * Destruction of pool is RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; @@ -212,7 +212,7 @@ struct pool_workqueue { /* * Release of unbound pwq is punted to system_wq. See put_pwq() * and pwq_unbound_release_workfn() for details. pool_workqueue - * itself is also sched-RCU protected so that the first pwq can be + * itself is also RCU protected so that the first pwq can be * determined without grabbing wq->mutex. */ struct work_struct unbound_release_work; @@ -357,20 +357,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); #include #define assert_rcu_or_pool_mutex() \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq_pool_mutex), \ - "sched RCU or wq_pool_mutex should be held") + "RCU or wq_pool_mutex should be held") #define assert_rcu_or_wq_mutex(wq) \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq->mutex), \ - "sched RCU or wq->mutex should be held") + "RCU or wq->mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&wq->mutex) && \ !lockdep_is_held(&wq_pool_mutex), \ - "sched RCU, wq->mutex or wq_pool_mutex should be held") + "RCU, wq->mutex or wq_pool_mutex should be held") #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ @@ -382,7 +382,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @pool: iteration cursor * @pi: integer used for iteration * - * This must be called either with wq_pool_mutex held or sched RCU read + * This must be called either with wq_pool_mutex held or RCU read * locked. If the pool needs to be used beyond the locking in effect, the * caller is responsible for guaranteeing that the pool stays online. * @@ -414,7 +414,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); * @pwq: iteration cursor * @wq: the target workqueue * - * This must be called either with wq->mutex held or sched RCU read locked. + * This must be called either with wq->mutex held or RCU read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * @@ -550,7 +550,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) * @wq: the target workqueue * @node: the node ID * - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU + * This must be called with any of wq_pool_mutex, wq->mutex or RCU * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. @@ -694,8 +694,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) * @work: the work item of interest * * Pools are created and destroyed under wq_pool_mutex, and allows read - * access under sched-RCU read lock. As such, this function should be - * called under wq_pool_mutex or with preemption disabled. + * access under RCU read lock. As such, this function should be + * called under wq_pool_mutex or inside of a rcu_read_lock() region. * * All fields of the returned pool are accessible as long as the above * mentioned locking is in effect. If the returned pool needs to be used @@ -1114,7 +1114,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) { if (pwq) { /* - * As both pwqs and pools are sched-RCU protected, the + * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ spin_lock_irq(&pwq->pool->lock); @@ -1242,6 +1242,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; + rcu_read_lock(); /* * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. @@ -1280,10 +1281,12 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, set_work_pool_and_keep_pending(work, pool->id); spin_unlock(&pool->lock); + rcu_read_unlock(); return 1; } spin_unlock(&pool->lock); fail: + rcu_read_unlock(); local_irq_restore(*flags); if (work_is_canceling(work)) return -ENOENT; @@ -1397,6 +1400,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; + rcu_read_lock(); retry: if (req_cpu == WORK_CPU_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); @@ -1453,10 +1457,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); - if (WARN_ON(!list_empty(&work->entry))) { - spin_unlock(&pwq->pool->lock); - return; - } + if (WARN_ON(!list_empty(&work->entry))) + goto out; pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); @@ -1474,7 +1476,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, insert_work(pwq, work, worklist, work_flags); +out: spin_unlock(&pwq->pool->lock); + rcu_read_unlock(); } /** @@ -2878,14 +2882,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, might_sleep(); - local_irq_disable(); + rcu_read_lock(); pool = get_work_pool(work); if (!pool) { - local_irq_enable(); + rcu_read_unlock(); return false; } - spin_lock(&pool->lock); + spin_lock_irq(&pool->lock); /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { @@ -2917,10 +2921,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, lock_map_acquire(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); } - + rcu_read_unlock(); return true; already_gone: spin_unlock_irq(&pool->lock); + rcu_read_unlock(); return false; } @@ -3367,7 +3372,7 @@ static void rcu_free_pool(struct rcu_head *rcu) * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). @@ -3421,8 +3426,8 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); - /* sched-RCU protected to allow dereferences from get_work_pool() */ - call_rcu_sched(&pool->rcu, rcu_free_pool); + /* RCU protected to allow dereferences from get_work_pool() */ + call_rcu(&pool->rcu, rcu_free_pool); } /** @@ -3529,14 +3534,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); - call_rcu_sched(&pwq->rcu, rcu_free_pwq); + call_rcu(&pwq->rcu, rcu_free_pwq); /* * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ if (is_last) - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } /** @@ -4236,7 +4241,7 @@ void destroy_workqueue(struct workqueue_struct *wq) * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } else { /* * We're the sole accessor of @wq at this point. Directly @@ -4346,7 +4351,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) struct pool_workqueue *pwq; bool ret; - rcu_read_lock_sched(); + rcu_read_lock(); + preempt_disable(); if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); @@ -4357,7 +4363,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); ret = !list_empty(&pwq->delayed_works); - rcu_read_unlock_sched(); + preempt_enable(); + rcu_read_unlock(); return ret; } @@ -4383,15 +4390,15 @@ unsigned int work_busy(struct work_struct *work) if (work_pending(work)) ret |= WORK_BUSY_PENDING; - local_irq_save(flags); + rcu_read_lock(); pool = get_work_pool(work); if (pool) { - spin_lock(&pool->lock); + spin_lock_irqsave(&pool->lock, flags); if (find_worker_executing_work(pool, work)) ret |= WORK_BUSY_RUNNING; - spin_unlock(&pool->lock); + spin_unlock_irqrestore(&pool->lock, flags); } - local_irq_restore(flags); + rcu_read_unlock(); return ret; } @@ -4576,7 +4583,7 @@ void show_workqueue_state(void) unsigned long flags; int pi; - rcu_read_lock_sched(); + rcu_read_lock(); pr_info("Showing busy workqueues and worker pools:\n"); @@ -4641,7 +4648,7 @@ void show_workqueue_state(void) touch_nmi_watchdog(); } - rcu_read_unlock_sched(); + rcu_read_unlock(); } /* used to show worker information through /proc/PID/{comm,stat,status} */ @@ -5028,16 +5035,16 @@ bool freeze_workqueues_busy(void) * nr_active is monotonically decreasing. It's safe * to peek without lock. */ - rcu_read_lock_sched(); + rcu_read_lock(); for_each_pwq(pwq, wq) { WARN_ON_ONCE(pwq->nr_active < 0); if (pwq->nr_active) { busy = true; - rcu_read_unlock_sched(); + rcu_read_unlock(); goto out_unlock; } } - rcu_read_unlock_sched(); + rcu_read_unlock(); } out_unlock: mutex_unlock(&wq_pool_mutex); @@ -5232,7 +5239,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, const char *delim = ""; int node, written = 0; - rcu_read_lock_sched(); + get_online_cpus(); + rcu_read_lock(); for_each_node(node) { written += scnprintf(buf + written, PAGE_SIZE - written, "%s%d:%d", delim, node, @@ -5240,7 +5248,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, delim = " "; } written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); - rcu_read_unlock_sched(); + rcu_read_unlock(); + put_online_cpus(); return written; } -- Gitee From a020d31e95bcf0911144c20ff84bd34f9b96f6fc Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 9 Jun 2023 10:48:05 +0800 Subject: [PATCH 3/4] workqueue: Use local irq lock instead of irq disable regions commit 670717ab259111dcf589c98670ff74a206bf87d3 upstream. Use a local_irq_lock as a replacement for irq off regions. We keep the semantic of irq-off in regard to the pool->lock and remain preemptible. Signed-off-by: Thomas Gleixner --- kernel/workqueue.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d0d9b126bc0d..ce78223154f2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -49,6 +49,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -350,6 +351,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); + static int worker_thread(void *__worker); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); @@ -1117,9 +1120,11 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) * As both pwqs and pools are RCU protected, the * following lock operations are safe. */ - spin_lock_irq(&pwq->pool->lock); + rcu_read_lock(); + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); put_pwq(pwq); - spin_unlock_irq(&pwq->pool->lock); + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); + rcu_read_unlock(); } } @@ -1223,7 +1228,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, struct worker_pool *pool; struct pool_workqueue *pwq; - local_irq_save(*flags); + local_lock_irqsave(pendingb_lock, *flags); /* try to steal the timer if it exists */ if (is_dwork) { @@ -1287,7 +1292,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, spin_unlock(&pool->lock); fail: rcu_read_unlock(); - local_irq_restore(*flags); + local_unlock_irqrestore(pendingb_lock, *flags); if (work_is_canceling(work)) return -ENOENT; cpu_relax(); @@ -1392,7 +1397,13 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ +#ifndef CONFIG_PREEMPT_RT_FULL + /* + * nort: On RT the "interrupts-disabled" rule has been replaced with + * pendingb_lock. + */ lockdep_assert_irqs_disabled(); +#endif debug_work_activate(work); @@ -1498,14 +1509,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; - local_irq_save(flags); + local_lock_irqsave(pendingb_lock,flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_work(cpu, wq, work); ret = true; } - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); return ret; } EXPORT_SYMBOL(queue_work_on); @@ -1514,8 +1525,11 @@ void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); + /* XXX */ + /* local_lock(pendingb_lock); */ /* should have been called from irqsafe timer with irq already off */ __queue_work(dwork->cpu, dwork->wq, &dwork->work); + /* local_unlock(pendingb_lock); */ } EXPORT_SYMBOL(delayed_work_timer_fn); @@ -1570,14 +1584,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, unsigned long flags; /* read the comment in __queue_work() */ - local_irq_save(flags); + local_lock_irqsave(pendingb_lock, flags); if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); return ret; } EXPORT_SYMBOL(queue_delayed_work_on); @@ -1612,7 +1626,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); } /* -ENOENT from try_to_grab_pending() becomes %true */ @@ -1623,11 +1637,12 @@ EXPORT_SYMBOL_GPL(mod_delayed_work_on); static void rcu_work_rcufn(struct rcu_head *rcu) { struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu); + unsigned long flags; /* read the comment in __queue_work() */ - local_irq_disable(); + local_lock_irqsave(pendingb_lock, flags); __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); - local_irq_enable(); + local_unlock_irqrestore(pendingb_lock, flags); } /** @@ -3025,7 +3040,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); /* * This allows canceling during early boot. We know that @work @@ -3086,10 +3101,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { - local_irq_disable(); + local_lock_irq(pendingb_lock); if (del_timer_sync(&dwork->timer)) __queue_work(dwork->cpu, dwork->wq, &dwork->work); - local_irq_enable(); + local_unlock_irq(pendingb_lock); return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); @@ -3127,7 +3142,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) return false; set_work_pool_and_clear_pending(work, get_work_pool_id(work)); - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); return ret; } -- Gitee From c73df060c460b49dca929baeb7fe35fcf56e0df0 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Fri, 9 Jun 2023 10:48:16 +0800 Subject: [PATCH 4/4] workqueue: Prevent workqueue versus ata-piix livelock commit c6d8c462a41a7c4c64ade776387c5b9394464de6 upstream. An Intel i7 system regularly detected rcu_preempt stalls after the kernel was upgraded from 3.6-rt to 3.8-rt. When the stall happened, disk I/O was no longer possible, unless the system was restarted. The kernel message was: INFO: rcu_preempt self-detected stall on CPU { 6} [..] NMI backtrace for cpu 6 CPU 6 Pid: 119, comm: irq/19-ata_piix Not tainted 3.8.13-rt13 #11 Shuttle Inc. SX58/SX58 RIP: 0010:[] [] ip_compute_csum+0x30/0x30 RSP: 0018:ffff880333303cb0 EFLAGS: 00000002 RAX: 0000000000000006 RBX: 00000000000003e9 RCX: 0000000000000034 RDX: 0000000000000000 RSI: ffffffff81aa16d0 RDI: 0000000000000001 RBP: ffff880333303ce8 R08: ffffffff81aa16d0 R09: ffffffff81c1b8cc R10: 0000000000000000 R11: 0000000000000000 R12: 000000000005161f R13: 0000000000000006 R14: ffffffff81aa16d0 R15: 0000000000000002 FS: 0000000000000000(0000) GS:ffff880333300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000003c1b2bb420 CR3: 0000000001a0f000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process irq/19-ata_piix (pid: 119, threadinfo ffff88032d88a000, task ffff88032df80000) Stack: ffffffff8124cb32 000000000005161e 00000000000003e9 0000000000001000 0000000000009022 ffffffff81aa16d0 0000000000000002 ffff880333303cf8 ffffffff8124caa9 ffff880333303d08 ffffffff8124cad2 ffff880333303d28 Call Trace: [] ? delay_tsc+0x33/0xe3 [] __delay+0xf/0x11 [] __const_udelay+0x27/0x29 [] native_safe_apic_wait_icr_idle+0x39/0x45 [] __default_send_IPI_dest_field.constprop.0+0x1e/0x58 [] default_send_IPI_mask_sequence_phys+0x49/0x7d [] physflat_send_IPI_all+0x17/0x19 [] arch_trigger_all_cpu_backtrace+0x50/0x79 [] rcu_check_callbacks+0x1cb/0x568 [] ? raise_softirq+0x2e/0x35 [] ? tick_sched_do_timer+0x38/0x38 [] update_process_times+0x44/0x55 [] tick_sched_handle+0x4a/0x59 [] tick_sched_timer+0x3c/0x5b [] __run_hrtimer+0x9b/0x158 [] hrtimer_interrupt+0x172/0x2aa [] smp_apic_timer_interrupt+0x76/0x89 [] apic_timer_interrupt+0x6d/0x80 [] ? __local_lock_irqsave+0x17/0x4a [] try_to_grab_pending+0x42/0x17e [] mod_delayed_work_on+0x32/0x88 [] mod_delayed_work+0x1c/0x1e [] blk_run_queue_async+0x37/0x39 [] flush_end_io+0xf1/0x107 [] blk_finish_request+0x21e/0x264 [] blk_end_bidi_request+0x42/0x60 [] blk_end_request+0x10/0x12 [] scsi_io_completion+0x1bf/0x492 [] ? sd_done+0x298/0x2ef [] scsi_finish_command+0xe9/0xf2 [] scsi_softirq_done+0x106/0x10f [] blk_done_softirq+0x77/0x87 [] do_current_softirqs+0x172/0x2e1 [] ? irq_thread_fn+0x3a/0x3a [] local_bh_enable+0x43/0x72 [] irq_forced_thread_fn+0x46/0x52 [] irq_thread+0x8c/0x17c [] ? irq_thread+0x17c/0x17c [] ? wake_threads_waitq+0x44/0x44 [] kthread+0x8d/0x95 [] ? __kthread_parkme+0x65/0x65 [] ret_from_fork+0x7c/0xb0 [] ? __kthread_parkme+0x65/0x65 The state of softirqd of this CPU at the time of the crash was: ksoftirqd/6 R running task 0 53 2 0x00000000 ffff88032fc39d18 0000000000000046 ffff88033330c4c0 ffff8803303f4710 ffff88032fc39fd8 ffff88032fc39fd8 0000000000000000 0000000000062500 ffff88032df88000 ffff8803303f4710 0000000000000000 ffff88032fc38000 Call Trace: [] ? __queue_work+0x27c/0x27c [] preempt_schedule+0x61/0x76 [] migrate_enable+0xe5/0x1df [] ? __queue_work+0x27c/0x27c [] run_timer_softirq+0x161/0x1d6 [] do_current_softirqs+0x172/0x2e1 [] run_ksoftirqd+0x2d/0x45 [] smpboot_thread_fn+0x2ea/0x308 [] ? test_ti_thread_flag+0xc/0xc [] ? test_ti_thread_flag+0xc/0xc [] kthread+0x8d/0x95 [] ? __kthread_parkme+0x65/0x65 [] ret_from_fork+0x7c/0xb0 [] ? __kthread_parkme+0x65/0x65 Apparently, the softirq demon and the ata_piix IRQ handler were waiting for each other to finish ending up in a livelock. After the below patch was applied, the system no longer crashes. Reported-by: Carsten Emde Proposed-by: Thomas Gleixner Tested by: Carsten Emde Signed-off-by: Carsten Emde Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ce78223154f2..39521eca80be 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "workqueue_internal.h" @@ -1295,7 +1296,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, local_unlock_irqrestore(pendingb_lock, *flags); if (work_is_canceling(work)) return -ENOENT; - cpu_relax(); + cpu_chill(); return -EAGAIN; } -- Gitee