From cbd2d4fef0580c2b92442b5e309029eb05b11cac Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Sat, 3 Jun 2023 13:24:39 +0800 Subject: [PATCH 1/3] blk-mq: Don't complete on a remote CPU in force threaded mode commit dfce28e26ed2766a248b39dcd73f3596b1966917 upstream. With force threaded interrupts enabled, raising softirq from an SMP function call will always result in waking the ksoftirqd thread. This is not optimal given that the thread runs at SCHED_OTHER priority. Completing the request in hard IRQ-context on PREEMPT_RT (which enforces the force threaded mode) is bad because the completion handler may acquire sleeping locks which violate the locking context. Disable request completing on a remote CPU in force threaded mode. Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Jens Axboe Signed-off-by: Sebastian Andrzej Siewior --- block/blk-mq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 290112b2c70c..3882fd62f52a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -681,6 +681,14 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) if (!IS_ENABLED(CONFIG_SMP) || !test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) return false; + /* + * With force threaded interrupts enabled, raising softirq from an SMP + * function call will always result in waking the ksoftirqd thread. + * This is probably worse than completing the request on a different + * cache domain. + */ + if (force_irqthreads) + return false; /* same CPU or cache domain? Complete locally */ if (cpu == rq->mq_ctx->cpu || -- Gitee From 31a02e8d2550fefdd14e4f70882ae888ae2b0bee Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Sat, 3 Jun 2023 13:25:38 +0800 Subject: [PATCH 2/3] blk-mq: Always complete remote completions requests in softirq commit 2e72894e58eff069fb71004ba447f9e38883ef6f upstream. Controllers with multiple queues have their IRQ-handelers pinned to a CPU. The core shouldn't need to complete the request on a remote CPU. Remove this case and always raise the softirq to complete the request. Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe Signed-off-by: Sebastian Andrzej Siewior --- block/blk-mq.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3882fd62f52a..4eafdfc2185d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -659,19 +659,7 @@ static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; - /* - * For most of single queue controllers, there is only one irq vector - * for handling I/O completion, and the only irq's affinity is set - * to all possible CPUs. On most of ARCHs, this affinity means the irq - * is handled on one specific CPU. - * - * So complete I/O requests in softirq context in case of single queue - * devices to avoid degrading I/O performance due to irqsoff latency. - */ - if (rq->q->nr_hw_queues == 1) - blk_mq_trigger_softirq(rq); - else - rq->q->mq_ops->complete(rq); + blk_mq_trigger_softirq(rq); } static inline bool blk_mq_complete_need_ipi(struct request *rq) -- Gitee From 016cb52ee02333ffb233648541313fea9e44d201 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Sat, 3 Jun 2023 13:25:56 +0800 Subject: [PATCH 3/3] blk-mq: Use llist_head for blk_cpu_done commit 580fa099b1f8144d35e337bb060dbb4fb50f2d1e upstream. With llist_head it is possible to avoid the locking (the irq-off region) when items are added. This makes it possible to add items on a remote CPU without additional locking. llist_add() returns true if the list was previously empty. This can be used to invoke the SMP function call / raise sofirq only if the first item was added (otherwise it is already pending). This simplifies the code a little and reduces the IRQ-off regions. blk_mq_raise_softirq() needs a preempt-disable section to ensure the request is enqueued on the same CPU as the softirq is raised. Some callers (USB-storage) invoke this path in preemptible context. Reviewed-by: Christoph Hellwig Reviewed-by: Daniel Wagner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe Signed-off-by: Sebastian Andrzej Siewior --- block/blk-mq.c | 101 ++++++++++++++++++----------------------- include/linux/blkdev.h | 2 +- 2 files changed, 44 insertions(+), 59 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4eafdfc2185d..5b3e89fac232 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,7 +41,7 @@ #include "blk-mq-sched.h" #include "blk-rq-qos.h" -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); +static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); @@ -598,68 +598,29 @@ void blk_mq_end_request(struct request *rq, blk_status_t error) } EXPORT_SYMBOL(blk_mq_end_request); -/* - * Softirq action handler - move entries to local list and loop over them - * while passing them to the queue registered handler. - */ -static __latent_entropy void blk_done_softirq(struct softirq_action *h) +static void blk_complete_reqs(struct llist_head *list) { - struct list_head *cpu_list, local_list; - - local_irq_disable(); - cpu_list = this_cpu_ptr(&blk_cpu_done); - list_replace_init(cpu_list, &local_list); - local_irq_enable(); - - while (!list_empty(&local_list)) { - struct request *rq; + struct llist_node *entry = llist_reverse_order(llist_del_all(list)); + struct request *rq, *next; - rq = list_entry(local_list.next, struct request, ipi_list); - list_del_init(&rq->ipi_list); + llist_for_each_entry_safe(rq, next, entry, ipi_list) rq->q->mq_ops->complete(rq); - } } -static void blk_mq_trigger_softirq(struct request *rq) +static __latent_entropy void blk_done_softirq(struct softirq_action *h) { - struct list_head *list; - unsigned long flags; - - local_irq_save(flags); - list = this_cpu_ptr(&blk_cpu_done); - list_add_tail(&rq->ipi_list, list); - - /* - * If the list only contains our just added request, signal a raise of - * the softirq. If there are already entries there, someone already - * raised the irq but it hasn't run yet. - */ - if (list->next == &rq->ipi_list) - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_restore(flags); + blk_complete_reqs(this_cpu_ptr(&blk_cpu_done)); } static int blk_softirq_cpu_dead(unsigned int cpu) { - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_done, cpu), - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); - + blk_complete_reqs(&per_cpu(blk_cpu_done, cpu)); return 0; } - static void __blk_mq_complete_request_remote(void *data) { - struct request *rq = data; - - blk_mq_trigger_softirq(rq); + __raise_softirq_irqoff(BLOCK_SOFTIRQ); } static inline bool blk_mq_complete_need_ipi(struct request *rq) @@ -688,6 +649,32 @@ static inline bool blk_mq_complete_need_ipi(struct request *rq) return cpu_online(rq->mq_ctx->cpu); } +static void blk_mq_complete_send_ipi(struct request *rq) +{ + struct llist_head *list; + unsigned int cpu; + + cpu = rq->mq_ctx->cpu; + list = &per_cpu(blk_cpu_done, cpu); + if (llist_add(&rq->ipi_list, list)) { + rq->csd.func = __blk_mq_complete_request_remote; + rq->csd.info = rq; + rq->csd.flags = 0; + smp_call_function_single_async(cpu, &rq->csd); + } +} + +static void blk_mq_raise_softirq(struct request *rq) +{ + struct llist_head *list; + + preempt_disable(); + list = this_cpu_ptr(&blk_cpu_done); + if (llist_add(&rq->ipi_list, list)) + raise_softirq(BLOCK_SOFTIRQ); + preempt_enable(); +} + bool blk_mq_complete_request_remote(struct request *rq) { WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); @@ -700,17 +687,15 @@ bool blk_mq_complete_request_remote(struct request *rq) return false; if (blk_mq_complete_need_ipi(rq)) { - rq->csd.func = __blk_mq_complete_request_remote; - rq->csd.info = rq; - rq->csd.flags = 0; - smp_call_function_single_async(rq->mq_ctx->cpu, &rq->csd); - } else { - if (rq->q->nr_hw_queues > 1) - return false; - blk_mq_trigger_softirq(rq); + blk_mq_complete_send_ipi(rq); + return true; } - return true; + if (rq->q->nr_hw_queues == 1) { + blk_mq_raise_softirq(rq); + return true; + } + return false; } EXPORT_SYMBOL_GPL(blk_mq_complete_request_remote); @@ -4015,7 +4000,7 @@ static int __init blk_mq_init(void) int i; for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + init_llist_head(&per_cpu(blk_cpu_done, i)); open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 10e908ec72de..aa913a32169d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -162,7 +162,7 @@ struct request { */ union { struct hlist_node hash; /* merge hash */ - struct list_head ipi_list; + struct llist_node ipi_list; }; /* -- Gitee