From a1ad6bab8497b45d5a7f61ada0238ec42d0b8587 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 09:40:53 +0800 Subject: [PATCH 1/2] futex: Fix bug on when a requeued RT task times out commit 7edf91c5189d3b406d53b139fd703354cc43dd16 upstream. Requeue with timeout causes a bug with PREEMPT_RT_FULL. The bug comes from a timed out condition. TASK 1 TASK 2 ------ ------ futex_wait_requeue_pi() futex_wait_queue_me() double_lock_hb(); raw_spin_lock(pi_lock); if (current->pi_blocked_on) { } else { current->pi_blocked_on = PI_WAKE_INPROGRESS; run_spin_unlock(pi_lock); spin_lock(hb->lock); <-- blocked! plist_for_each_entry_safe(this) { rt_mutex_start_proxy_lock(); task_blocks_on_rt_mutex(); BUG_ON(task->pi_blocked_on)!!!! The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to grab the hb->lock, which it fails to do so. As the hb->lock is a mutex, it will block and set the "pi_blocked_on" to the hb->lock. When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails because the task1's pi_blocked_on is no longer set to that, but instead, set to the hb->lock. The fix: When calling rt_mutex_start_proxy_lock() a check is made to see if the proxy tasks pi_blocked_on is set. If so, exit out early. Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies the proxy task that it is being requeued, and will handle things appropriately. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- kernel/locking/rtmutex.c | 31 ++++++++++++++++++++++++++++++- kernel/locking/rtmutex_common.h | 1 + 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 71d161c93b98..1c3f56d3d9b6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -137,7 +137,8 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) { - return waiter && waiter != PI_WAKEUP_INPROGRESS; + return waiter && waiter != PI_WAKEUP_INPROGRESS && + waiter != PI_REQUEUE_INPROGRESS; } /* @@ -1784,6 +1785,34 @@ int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, if (try_to_take_rt_mutex(lock, task, NULL)) return 1; +#ifdef CONFIG_PREEMPT_RT_FULL + /* + * In PREEMPT_RT there's an added race. + * If the task, that we are about to requeue, times out, + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue + * to skip this task. But right after the task sets + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. + * This will replace the PI_WAKEUP_INPROGRESS with the actual + * lock that it blocks on. We *must not* place this task + * on this proxy lock in that case. + * + * To prevent this race, we first take the task's pi_lock + * and check if it has updated its pi_blocked_on. If it has, + * we assume that it woke up and we return -EAGAIN. + * Otherwise, we set the task's pi_blocked_on to + * PI_REQUEUE_INPROGRESS, so that if the task is waking up + * it will know that we are in the process of requeuing it. + */ + raw_spin_lock(&task->pi_lock); + if (task->pi_blocked_on) { + raw_spin_unlock(&task->pi_lock); + return -EAGAIN; + } + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; + raw_spin_unlock(&task->pi_lock); +#endif + /* We enforce deadlock detection for futexes */ ret = task_blocks_on_rt_mutex(lock, waiter, task, RT_MUTEX_FULL_CHAINWALK); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index f4b6596d224a..461527f3f7af 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -131,6 +131,7 @@ enum rtmutex_chainwalk { * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, -- Gitee From 18640db2936702b2b69f5d9aa750229d7b4d4693 Mon Sep 17 00:00:00 2001 From: "yang.yang29@zte.com.cn" Date: Wed, 7 Jun 2023 09:46:10 +0800 Subject: [PATCH 2/2] futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock commit 6a2f8ae29dd9f3697e918a18a871efdaa4d919d9 upstream. In exit_pi_state_list() we have the following locking construct: spin_lock(&hb->lock); raw_spin_lock_irq(&curr->pi_lock); ... spin_unlock(&hb->lock); In !RT this works, but on RT the migrate_enable() function which is called from spin_unlock() sees atomic context due to the held pi_lock and just decrements the migrate_disable_atomic counter of the task. Now the next call to migrate_disable() sees the counter being negative and issues a warning. That check should be in migrate_enable() already. Fix this by dropping pi_lock before unlocking hb->lock and reaquire pi_lock after that again. This is safe as the loop code reevaluates head again under the pi_lock. Reported-by: Yong Zhang Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior --- kernel/futex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index 9a8708cb3634..25639add1e76 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -921,7 +921,9 @@ void exit_pi_state_list(struct task_struct *curr) if (head->next != next) { /* retain curr->pi_lock for the loop invariant */ raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irq(&curr->pi_lock); spin_unlock(&hb->lock); + raw_spin_lock_irq(&curr->pi_lock); put_pi_state(pi_state); continue; } -- Gitee