diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fd8c5b7cdc62f432000e217ee9dabc66d5c9a27..2dff665cc33aaaa991cadeb4fea2667c7d30b3cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -705,6 +705,7 @@ struct task_struct { u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; + struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ struct sched_info sched_info; diff --git a/init/init_task.c b/init/init_task.c index b312a045f4b9125d030c8968db53761e5169a346..4bc8502d33d3221064900c9eb7ef671cb1173209 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -139,6 +139,7 @@ struct task_struct init_task .rcu_tasks_holdout = false, .rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), .rcu_tasks_idle_cpu = -1, + .rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list), #endif #ifdef CONFIG_CPUSETS .mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq), diff --git a/kernel/fork.c b/kernel/fork.c index bfc4534ff11656fda5133663429bc84a7de059b6..8c96c517e09eba64da61212518fb4d1fc46f7c20 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1711,6 +1711,7 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_tasks_holdout = false; INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); p->rcu_tasks_idle_cpu = -1; + INIT_LIST_HEAD(&p->rcu_tasks_exit_list); #endif /* #ifdef CONFIG_TASKS_RCU */ } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 759ea6881a58801c46f707a8594b905ccf14944b..af9a43172d0e43403c3a5ed9670c1ed3bc87d606 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -527,7 +527,8 @@ static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq); static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock); /* Track exiting tasks in order to allow them to be waited for. */ -DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); +static LIST_HEAD(rtp_exit_list); +static DEFINE_RAW_SPINLOCK(rtp_exit_list_lock); /* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ #define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10) @@ -661,6 +662,17 @@ static void check_holdout_task(struct task_struct *t, sched_show_task(t); } +static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop) +{ + if (t != current && READ_ONCE(t->on_rq) && + !is_idle_task(t)) { + get_task_struct(t); + t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); + WRITE_ONCE(t->rcu_tasks_holdout, true); + list_add(&t->rcu_tasks_holdout_list, hop); + } +} + /* RCU-tasks kthread that detects grace periods and invokes callbacks. */ static int __noreturn rcu_tasks_kthread(void *arg) { @@ -726,14 +738,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) */ rcu_read_lock(); for_each_process_thread(g, t) { - if (t != current && READ_ONCE(t->on_rq) && - !is_idle_task(t)) { - get_task_struct(t); - t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw); - WRITE_ONCE(t->rcu_tasks_holdout, true); - list_add(&t->rcu_tasks_holdout_list, - &rcu_tasks_holdouts); - } + rcu_tasks_pertask(t, &rcu_tasks_holdouts); } rcu_read_unlock(); @@ -744,8 +749,12 @@ static int __noreturn rcu_tasks_kthread(void *arg) * where they have disabled preemption, allowing the * later synchronize_sched() to finish the job. */ - synchronize_srcu(&tasks_rcu_exit_srcu); - + raw_spin_lock_irqsave(&rtp_exit_list_lock, flags); + list_for_each_entry(t, &rtp_exit_list, rcu_tasks_exit_list) { + if (list_empty(&t->rcu_tasks_holdout_list)) + rcu_tasks_pertask(t, &rcu_tasks_holdouts); + } + raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags); /* * Each pass through the following loop scans the list * of holdout tasks, removing any that are no longer @@ -802,8 +811,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) * * In addition, this synchronize_sched() waits for exiting * tasks to complete their final preempt_disable() region - * of execution, cleaning up after the synchronize_srcu() - * above. + * of execution. */ synchronize_sched(); @@ -835,20 +843,39 @@ static int __init rcu_spawn_tasks_kthread(void) } core_initcall(rcu_spawn_tasks_kthread); -/* Do the srcu_read_lock() for the above synchronize_srcu(). */ +/* + * Protect against tasklist scan blind spot while the task is exiting and + * may be removed from the tasklist. Do this by adding the task to yet + * another list. + */ void exit_tasks_rcu_start(void) { + unsigned long flags; + struct task_struct *t = current; + + WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list)); + get_task_struct(t); preempt_disable(); - current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); + raw_spin_lock_irqsave(&rtp_exit_list_lock, flags); + list_add(&t->rcu_tasks_exit_list, &rtp_exit_list); + raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags); preempt_enable(); } -/* Do the srcu_read_unlock() for the above synchronize_srcu(). */ +/* + * Remove the task from the "yet another list" because do_exit() is now + * non-preemptible, allowing synchronize_rcu() to wait beyond this point. + */ void exit_tasks_rcu_finish(void) { - preempt_disable(); - __srcu_read_unlock(&tasks_rcu_exit_srcu, current->rcu_tasks_idx); - preempt_enable(); + unsigned long flags; + struct task_struct *t = current; + + WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list)); + raw_spin_lock_irqsave(&rtp_exit_list_lock, flags); + list_del_init(&t->rcu_tasks_exit_list); + raw_spin_unlock_irqrestore(&rtp_exit_list_lock, flags); + put_task_struct(t); } #endif /* #ifdef CONFIG_TASKS_RCU */