From 1386ae22d47382b1b6dea8028a57af8f5c8efafe Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 14:11:41 +0800 Subject: [PATCH 1/7] kthread: convert worker lock to raw spinlock commit e2fc3df409c6764dc2c394061aae000a86437a7c upstream. In order to enable the queuing of kthread work items from hardirq context even when PREEMPT_RT_FULL is enabled, convert the worker spin_lock to a raw_spin_lock. This is only acceptable to do because the work performed under the lock is well-bounded and minimal. Cc: Sebastian Andrzej Siewior Cc: Guenter Roeck Reported-and-tested-by: Steffen Trumtrar Reported-by: Tim Sander Signed-off-by: Julia Cartwright Signed-off-by: Sebastian Andrzej Siewior --- include/linux/kthread.h | 2 +- kernel/kthread.c | 42 ++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index edf8f8636ee9..fd6403945fc3 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -84,7 +84,7 @@ enum { struct kthread_worker { unsigned int flags; - spinlock_t lock; + raw_spinlock_t lock; struct list_head work_list; struct list_head delayed_work_list; struct task_struct *task; diff --git a/kernel/kthread.c b/kernel/kthread.c index e3dfad2dbd37..fcb3a1a6e14b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -600,7 +600,7 @@ void __kthread_init_worker(struct kthread_worker *worker, struct lock_class_key *key) { memset(worker, 0, sizeof(struct kthread_worker)); - spin_lock_init(&worker->lock); + raw_spin_lock_init(&worker->lock); lockdep_set_class_and_name(&worker->lock, key, name); INIT_LIST_HEAD(&worker->work_list); INIT_LIST_HEAD(&worker->delayed_work_list); @@ -642,21 +642,21 @@ int kthread_worker_fn(void *worker_ptr) if (kthread_should_stop()) { __set_current_state(TASK_RUNNING); - spin_lock_irq(&worker->lock); + raw_spin_lock_irq(&worker->lock); worker->task = NULL; - spin_unlock_irq(&worker->lock); + raw_spin_unlock_irq(&worker->lock); return 0; } work = NULL; - spin_lock_irq(&worker->lock); + raw_spin_lock_irq(&worker->lock); if (!list_empty(&worker->work_list)) { work = list_first_entry(&worker->work_list, struct kthread_work, node); list_del_init(&work->node); } worker->current_work = work; - spin_unlock_irq(&worker->lock); + raw_spin_unlock_irq(&worker->lock); if (work) { __set_current_state(TASK_RUNNING); @@ -813,12 +813,12 @@ bool kthread_queue_work(struct kthread_worker *worker, bool ret = false; unsigned long flags; - spin_lock_irqsave(&worker->lock, flags); + raw_spin_lock_irqsave(&worker->lock, flags); if (!queuing_blocked(worker, work)) { kthread_insert_work(worker, work, &worker->work_list); ret = true; } - spin_unlock_irqrestore(&worker->lock, flags); + raw_spin_unlock_irqrestore(&worker->lock, flags); return ret; } EXPORT_SYMBOL_GPL(kthread_queue_work); @@ -844,7 +844,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) if (WARN_ON_ONCE(!worker)) return; - spin_lock(&worker->lock); + raw_spin_lock(&worker->lock); /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker); @@ -853,7 +853,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) list_del_init(&work->node); kthread_insert_work(worker, work, &worker->work_list); - spin_unlock(&worker->lock); + raw_spin_unlock(&worker->lock); } EXPORT_SYMBOL(kthread_delayed_work_timer_fn); @@ -909,14 +909,14 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker, unsigned long flags; bool ret = false; - spin_lock_irqsave(&worker->lock, flags); + raw_spin_lock_irqsave(&worker->lock, flags); if (!queuing_blocked(worker, work)) { __kthread_queue_delayed_work(worker, dwork, delay); ret = true; } - spin_unlock_irqrestore(&worker->lock, flags); + raw_spin_unlock_irqrestore(&worker->lock, flags); return ret; } EXPORT_SYMBOL_GPL(kthread_queue_delayed_work); @@ -952,7 +952,7 @@ void kthread_flush_work(struct kthread_work *work) if (!worker) return; - spin_lock_irq(&worker->lock); + raw_spin_lock_irq(&worker->lock); /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker); @@ -964,7 +964,7 @@ void kthread_flush_work(struct kthread_work *work) else noop = true; - spin_unlock_irq(&worker->lock); + raw_spin_unlock_irq(&worker->lock); if (!noop) wait_for_completion(&fwork.done); @@ -997,9 +997,9 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, * any queuing is blocked by setting the canceling counter. */ work->canceling++; - spin_unlock_irqrestore(&worker->lock, *flags); + raw_spin_unlock_irqrestore(&worker->lock, *flags); del_timer_sync(&dwork->timer); - spin_lock_irqsave(&worker->lock, *flags); + raw_spin_lock_irqsave(&worker->lock, *flags); work->canceling--; } @@ -1046,7 +1046,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, unsigned long flags; int ret = false; - spin_lock_irqsave(&worker->lock, flags); + raw_spin_lock_irqsave(&worker->lock, flags); /* Do not bother with canceling when never queued. */ if (!work->worker) @@ -1063,7 +1063,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, fast_queue: __kthread_queue_delayed_work(worker, dwork, delay); out: - spin_unlock_irqrestore(&worker->lock, flags); + raw_spin_unlock_irqrestore(&worker->lock, flags); return ret; } EXPORT_SYMBOL_GPL(kthread_mod_delayed_work); @@ -1077,7 +1077,7 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) if (!worker) goto out; - spin_lock_irqsave(&worker->lock, flags); + raw_spin_lock_irqsave(&worker->lock, flags); /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker); @@ -1091,13 +1091,13 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) * In the meantime, block any queuing by setting the canceling counter. */ work->canceling++; - spin_unlock_irqrestore(&worker->lock, flags); + raw_spin_unlock_irqrestore(&worker->lock, flags); kthread_flush_work(work); - spin_lock_irqsave(&worker->lock, flags); + raw_spin_lock_irqsave(&worker->lock, flags); work->canceling--; out_fast: - spin_unlock_irqrestore(&worker->lock, flags); + raw_spin_unlock_irqrestore(&worker->lock, flags); out: return ret; } -- Gitee From b33a8f6df7a26cbcd073d523ac1ad02aa042515f Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 14:13:04 +0800 Subject: [PATCH 2/7] crypto: caam/qi - simplify CGR allocation, freeing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ba865f26206ec12d7d6a140c2a9f6871c7be3d5f upstream. [Upstream commit 29e83c757006fd751966bdc53392bb22d74179c6] CGRs (Congestion Groups) have to be freed by the same CPU that initialized them. This is why currently the driver takes special measures; however, using set_cpus_allowed_ptr() is incorrect - as reported by Sebastian. Instead of the generic solution of replacing set_cpus_allowed_ptr() with work_on_cpu_safe(), we use the qman_delete_cgr_safe() QBMan API instead of qman_delete_cgr() - which internally takes care of proper CGR deletion. Link: https://lkml.kernel.org/r/20181005125443.dfhd2asqktm22ney@linutronix.de Reported-by: Sebastian Andrzej Siewior Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/qi.c | 43 ++++------------------------------------ drivers/crypto/caam/qi.h | 2 +- 2 files changed, 5 insertions(+), 40 deletions(-) diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 67f7f8c42c93..b84e6c8b1e13 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -83,13 +83,6 @@ EXPORT_SYMBOL(caam_congested); static u64 times_congested; #endif -/* - * CPU from where the module initialised. This is required because QMan driver - * requires CGRs to be removed from same CPU from where they were originally - * allocated. - */ -static int mod_init_cpu; - /* * This is a a cache of buffers, from which the users of CAAM QI driver * can allocate short (CAAM_QI_MEMCACHE_SIZE) buffers. It's faster than @@ -492,12 +485,11 @@ void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx) } EXPORT_SYMBOL(caam_drv_ctx_rel); -int caam_qi_shutdown(struct device *qidev) +void caam_qi_shutdown(struct device *qidev) { - int i, ret; + int i; struct caam_qi_priv *priv = dev_get_drvdata(qidev); const cpumask_t *cpus = qman_affine_cpus(); - struct cpumask old_cpumask = current->cpus_allowed; for_each_cpu(i, cpus) { struct napi_struct *irqtask; @@ -510,26 +502,12 @@ int caam_qi_shutdown(struct device *qidev) dev_err(qidev, "Rsp FQ kill failed, cpu: %d\n", i); } - /* - * QMan driver requires CGRs to be deleted from same CPU from where they - * were instantiated. Hence we get the module removal execute from the - * same CPU from where it was originally inserted. - */ - set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu)); - - ret = qman_delete_cgr(&priv->cgr); - if (ret) - dev_err(qidev, "Deletion of CGR failed: %d\n", ret); - else - qman_release_cgrid(priv->cgr.cgrid); + qman_delete_cgr_safe(&priv->cgr); + qman_release_cgrid(priv->cgr.cgrid); kmem_cache_destroy(qi_cache); - /* Now that we're done with the CGRs, restore the cpus allowed mask */ - set_cpus_allowed_ptr(current, &old_cpumask); - platform_device_unregister(priv->qi_pdev); - return ret; } static void cgr_cb(struct qman_portal *qm, struct qman_cgr *cgr, int congested) @@ -718,22 +696,11 @@ int caam_qi_init(struct platform_device *caam_pdev) struct device *ctrldev = &caam_pdev->dev, *qidev; struct caam_drv_private *ctrlpriv; const cpumask_t *cpus = qman_affine_cpus(); - struct cpumask old_cpumask = current->cpus_allowed; static struct platform_device_info qi_pdev_info = { .name = "caam_qi", .id = PLATFORM_DEVID_NONE }; - /* - * QMAN requires CGRs to be removed from same CPU+portal from where it - * was originally allocated. Hence we need to note down the - * initialisation CPU and use the same CPU for module exit. - * We select the first CPU to from the list of portal owning CPUs. - * Then we pin module init to this CPU. - */ - mod_init_cpu = cpumask_first(cpus); - set_cpus_allowed_ptr(current, get_cpu_mask(mod_init_cpu)); - qi_pdev_info.parent = ctrldev; qi_pdev_info.dma_mask = dma_get_mask(ctrldev); qi_pdev = platform_device_register_full(&qi_pdev_info); @@ -795,8 +762,6 @@ int caam_qi_init(struct platform_device *caam_pdev) return -ENOMEM; } - /* Done with the CGRs; restore the cpus allowed mask */ - set_cpus_allowed_ptr(current, &old_cpumask); #ifdef CONFIG_DEBUG_FS debugfs_create_file("qi_congested", 0444, ctrlpriv->ctl, ×_congested, &caam_fops_u64_ro); diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h index 357b69f57072..b6c8acc30853 100644 --- a/drivers/crypto/caam/qi.h +++ b/drivers/crypto/caam/qi.h @@ -174,7 +174,7 @@ int caam_drv_ctx_update(struct caam_drv_ctx *drv_ctx, u32 *sh_desc); void caam_drv_ctx_rel(struct caam_drv_ctx *drv_ctx); int caam_qi_init(struct platform_device *pdev); -int caam_qi_shutdown(struct device *dev); +void caam_qi_shutdown(struct device *dev); /** * qi_cache_alloc - Allocate buffers from CAAM-QI cache -- Gitee From 0618d2dd0069b0fb3b7d38241dfa51080720794f Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 14:23:24 +0800 Subject: [PATCH 3/7] sched/fair: Robustify CFS-bandwidth timer locking commit 147b5bb1f8cd2975bdfa1368ba792f28df1ab8a6 upstream. Traditionally hrtimer callbacks were run with IRQs disabled, but with the introduction of HRTIMER_MODE_SOFT it is possible they run from SoftIRQ context, which does _NOT_ have IRQs disabled. Allow for the CFS bandwidth timers (period_timer and slack_timer) to be ran from SoftIRQ context; this entails removing the assumption that IRQs are already disabled from the locking. While mainline doesn't strictly need this, -RT forces all timers not explicitly marked with MODE_HARD into MODE_SOFT and trips over this. And marking these timers as MODE_HARD doesn't make sense as they're not required for RT operation and can potentially be quite expensive. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Reported-by: Tom Putzeys Tested-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net Signed-off-by: Sebastian Andrzej Siewior --- kernel/sched/fair.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 805686c46450..4c13c00ec6dd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6697,7 +6697,7 @@ static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) struct rq *rq = rq_of(cfs_rq); struct rq_flags rf; - rq_lock(rq, &rf); + rq_lock_irqsave(rq, &rf); if (!cfs_rq_throttled(cfs_rq)) goto next; @@ -6719,7 +6719,7 @@ static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) unthrottle_cfs_rq(cfs_rq); next: - rq_unlock(rq, &rf); + rq_unlock_irqrestore(rq, &rf); if (!remaining) break; @@ -6733,7 +6733,7 @@ static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) * period the timer is deactivated until scheduling resumes; cfs_b->idle is * used to track this state. */ -static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) +static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags) { int throttled; @@ -6767,10 +6767,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) */ while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { cfs_b->distribute_running = 1; - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); /* we can't nest cfs_b->lock while distributing bandwidth */ distribute_cfs_runtime(cfs_b); - raw_spin_lock(&cfs_b->lock); + raw_spin_lock_irqsave(&cfs_b->lock, flags); cfs_b->distribute_running = 0; throttled = !list_empty(&cfs_b->throttled_cfs_rq); @@ -6881,17 +6881,18 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) { u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); + unsigned long flags; /* confirm we're still not at a refresh boundary */ - raw_spin_lock(&cfs_b->lock); + raw_spin_lock_irqsave(&cfs_b->lock, flags); cfs_b->slack_started = false; if (cfs_b->distribute_running) { - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); return; } if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); return; } @@ -6901,16 +6902,16 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (runtime) cfs_b->distribute_running = 1; - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); if (!runtime) return; distribute_cfs_runtime(cfs_b); - raw_spin_lock(&cfs_b->lock); + raw_spin_lock_irqsave(&cfs_b->lock, flags); cfs_b->distribute_running = 0; - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); } /* @@ -6991,11 +6992,12 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = container_of(timer, struct cfs_bandwidth, period_timer); + unsigned long flags; int overrun; int idle = 0; int count = 0; - raw_spin_lock(&cfs_b->lock); + raw_spin_lock_irqsave(&cfs_b->lock, flags); for (;;) { overrun = hrtimer_forward_now(timer, cfs_b->period); if (!overrun) @@ -7038,11 +7040,11 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) count = 0; } - idle = do_sched_cfs_period_timer(cfs_b, overrun); + idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); } if (idle) cfs_b->period_active = 0; - raw_spin_unlock(&cfs_b->lock); + raw_spin_unlock_irqrestore(&cfs_b->lock, flags); return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; } -- Gitee From 3127c23bf743eb8702a02d7c454c269052e87084 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 16:24:34 +0800 Subject: [PATCH 4/7] arm: Convert arm boot_lock to raw commit fd46cb23c2b2f9fe3514d2e773f289e0e2f46d73 upstream. The arm boot_lock is used by the secondary processor startup code. The locking task is the idle thread, which has idle->sched_class == &idle_sched_class. idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the lock, the attempt to wake it when the lock becomes available will fail: try_to_wake_up() ... activate_task() enqueue_task() p->sched_class->enqueue_task(rq, p, flags) Fix by converting boot_lock to a raw spin lock. Signed-off-by: Frank Rowand Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com Signed-off-by: Thomas Gleixner Tested-by: Tony Lindgren Acked-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski [Exynos5422 Linaro PM-QA] Signed-off-by: Sebastian Andrzej Siewior --- arch/arm/mach-exynos/platsmp.c | 12 ++++++------ arch/arm/mach-hisi/platmcpm.c | 22 +++++++++++----------- arch/arm/mach-omap2/omap-smp.c | 10 +++++----- arch/arm/mach-prima2/platsmp.c | 10 +++++----- arch/arm/mach-qcom/platsmp.c | 10 +++++----- arch/arm/mach-spear/platsmp.c | 10 +++++----- arch/arm/mach-sti/platsmp.c | 10 +++++----- arch/arm/plat-versatile/platsmp.c | 10 +++++----- 8 files changed, 47 insertions(+), 47 deletions(-) diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 6a1e682371b3..17dca0ff336e 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -239,7 +239,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void exynos_secondary_init(unsigned int cpu) { @@ -252,8 +252,8 @@ static void exynos_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) @@ -317,7 +317,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -344,7 +344,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -390,7 +390,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * calibrations, then wait for it to finish */ fail: - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? ret : 0; } diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index f66815c3dd07..00524abd963f 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -61,7 +61,7 @@ static void __iomem *sysctrl, *fabric; static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static u32 fabric_phys_addr; /* * [0]: bootwrapper physical address @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) return -EINVAL; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); if (hip04_cpu_table[cluster][cpu]) goto out; @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) out: hip04_cpu_table[cluster][cpu]++; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { /* A power_up request went ahead of us. */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return; } else if (hip04_cpu_table[cluster][cpu] > 1) { pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) } last_man = hip04_cluster_is_down(cluster); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); if (last_man) { /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { if (hip04_cpu_table[cluster][cpu]) goto err; @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) break; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); /* Wait for clean L2 when the whole cluster is down. */ msleep(POLL_MSEC); - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); } if (tries >= count) goto err; @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) goto err; if (hip04_cluster_is_down(cluster)) hip04_set_snoop_filter(cluster, 0); - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 1; err: - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } #endif diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 1c73694c871a..ac4d2f030b87 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -69,7 +69,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { .startup_addr = omap5_secondary_startup, }; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -177,8 +177,8 @@ static void omap4_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -191,7 +191,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -270,7 +270,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return 0; } diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 75ef5d4be554..c17c86e5d860 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -22,7 +22,7 @@ static void __iomem *clk_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sirfsoc_secondary_init(unsigned int cpu) { @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static const struct of_device_id clk_ids[] = { @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) /* make sure write buffer is drained */ mb(); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index 5494c9e0c909..e8ce157d3548 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -46,7 +46,7 @@ extern void secondary_startup_arm(void); -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); #ifdef CONFIG_HOTPLUG_CPU static void qcom_cpu_die(unsigned int cpu) @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int scss_release_secondary(unsigned int cpu) @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Send the secondary CPU a soft interrupt, thereby causing @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return ret; } diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 39038a03836a..6da5c93872bf 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -32,7 +32,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c index 231f19e17436..a3419b7003e6 100644 --- a/arch/arm/mach-sti/platsmp.c +++ b/arch/arm/mach-sti/platsmp.c @@ -35,7 +35,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sti_secondary_init(unsigned int cpu) { @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index c2366510187a..6b60f582b738 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -32,7 +32,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void versatile_secondary_init(unsigned int cpu) { @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } -- Gitee From 88f9fe3dede98ca6695b18c581431f549b935ee0 Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 16:26:14 +0800 Subject: [PATCH 5/7] x86/ioapic: Don't let setaffinity unmask threaded EOI interrupt too early commit 2ab61b3cb2b66bd2515c8531da37be20f5cc14a9 upstream. There is an issue with threaded interrupts which are marked ONESHOT and using the fasteoi handler. if (IS_ONESHOT()) mask_irq(); .... .... cond_unmask_eoi_irq() chip->irq_eoi(); So if setaffinity is pending then the interrupt will be moved and then unmasked, which is wrong as it should be kept masked up to the point where the threaded handler finished. It's not a real problem, the interrupt will just be able to fire before the threaded handler has finished, though the irq masked state will be wrong for a bit. The patch below should cure the issue. It also renames the horribly misnomed functions so it becomes clear what they are supposed to do. Signed-off-by: Thomas Gleixner [bigeasy: add the body of the patch, use the same functions in both ifdef paths (spotted by Andy Shevchenko)] Signed-off-by: Sebastian Andrzej Siewior --- arch/x86/kernel/apic/io_apic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ab22eded61d2..3f373aa59dd0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1722,7 +1722,7 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) return false; } -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { /* If we are moving the irq we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { @@ -1732,9 +1732,9 @@ static inline bool ioapic_irqd_mask(struct irq_data *data) return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { - if (unlikely(masked)) { + if (unlikely(moveit)) { /* Only migrate the irq if the ack has been received. * * On rare occasions the broadcast level triggered ack gets @@ -1767,11 +1767,11 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) } } #else -static inline bool ioapic_irqd_mask(struct irq_data *data) +static inline bool ioapic_prepare_move(struct irq_data *data) { return false; } -static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) +static inline void ioapic_finish_move(struct irq_data *data, bool moveit) { } #endif @@ -1780,11 +1780,11 @@ static void ioapic_ack_level(struct irq_data *irq_data) { struct irq_cfg *cfg = irqd_cfg(irq_data); unsigned long v; - bool masked; + bool moveit; int i; irq_complete_move(cfg); - masked = ioapic_irqd_mask(irq_data); + moveit = ioapic_prepare_move(irq_data); /* * It appears there is an erratum which affects at least version 0x11 @@ -1839,7 +1839,7 @@ static void ioapic_ack_level(struct irq_data *irq_data) eoi_ioapic_pin(cfg->vector, irq_data->chip_data); } - ioapic_irqd_unmask(irq_data, masked); + ioapic_finish_move(irq_data, moveit); } static void ioapic_ir_ack_level(struct irq_data *irq_data) -- Gitee From b334334f659e98cf6ad79df817087b1deb99d9ed Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 16:27:40 +0800 Subject: [PATCH 6/7] cgroup: use irqsave in cgroup_rstat_flush_locked() commit 56933f479ee6e504e58236259c2fdacd09bcdeb1 upstream. All callers of cgroup_rstat_flush_locked() acquire cgroup_rstat_lock either with spin_lock_irq() or spin_lock_irqsave(). cgroup_rstat_flush_locked() itself acquires cgroup_rstat_cpu_lock which is a raw_spin_lock. This lock is also acquired in cgroup_rstat_updated() in IRQ context and therefore requires _irqsave() locking suffix in cgroup_rstat_flush_locked(). Since there is no difference between spin_lock_t and raw_spin_lock_t on !RT lockdep does not complain here. On RT lockdep complains because the interrupts were not disabled here and a deadlock is possible. Acquire the raw_spin_lock_t with disabled interrupts. Signed-off-by: Sebastian Andrzej Siewior --- kernel/cgroup/rstat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index bb95a35e8c2d..3266a9781b4e 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -159,8 +159,9 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu); struct cgroup *pos = NULL; + unsigned long flags; - raw_spin_lock(cpu_lock); + raw_spin_lock_irqsave(cpu_lock, flags); while ((pos = cgroup_rstat_cpu_pop_updated(pos, cgrp, cpu))) { struct cgroup_subsys_state *css; @@ -172,7 +173,7 @@ static void cgroup_rstat_flush_locked(struct cgroup *cgrp, bool may_sleep) css->ss->css_rstat_flush(css, cpu); rcu_read_unlock(); } - raw_spin_unlock(cpu_lock); + raw_spin_unlock_irqrestore(cpu_lock, flags); /* if @may_sleep, play nice and yield if necessary */ if (may_sleep && (need_resched() || -- Gitee From 0966c1f8779397231fb340347301b836f016313e Mon Sep 17 00:00:00 2001 From: meganz009 Date: Thu, 1 Jun 2023 16:28:53 +0800 Subject: [PATCH 7/7] fscache: initialize cookie hash table raw spinlocks commit 11a5fe1b2f98fc37eac8919e99282c1b91f5bb05 upstream. The fscache cookie mechanism uses a hash table of hlist_bl_head structures. The PREEMPT_RT patcheset adds a raw spinlock to this structure and so on PREEMPT_RT the structures get used uninitialized, causing warnings about bad magic numbers when spinlock debugging is turned on. Use the init function for fscache cookies. Signed-off-by: Clark Williams Signed-off-by: Sebastian Andrzej Siewior --- fs/fscache/cookie.c | 8 ++++++++ fs/fscache/main.c | 1 + include/linux/fscache.h | 1 + 3 files changed, 10 insertions(+) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index c550512ce335..d5d57da32ffa 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -962,3 +962,11 @@ int __fscache_check_consistency(struct fscache_cookie *cookie, return -ESTALE; } EXPORT_SYMBOL(__fscache_check_consistency); + +void __init fscache_cookie_init(void) +{ + int i; + + for (i = 0; i < (1 << fscache_cookie_hash_shift) - 1; i++) + INIT_HLIST_BL_HEAD(&fscache_cookie_hash[i]); +} diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 8fe9a991637d..2532bd0ffcab 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -150,6 +150,7 @@ static int __init fscache_init(void) ret = -ENOMEM; goto error_cookie_jar; } + fscache_cookie_init(); fscache_root = kobject_create_and_add("fscache", kernel_kobj); if (!fscache_root) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index d5b732ea1125..89a92df4ee91 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -237,6 +237,7 @@ extern void __fscache_readpages_cancel(struct fscache_cookie *cookie, extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool); extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t, bool (*)(void *), void *); +extern void fscache_cookie_init(void); /** * fscache_register_netfs - Register a filesystem as desiring caching services -- Gitee