Skip to content

Commit fce67b3

Browse files
htejungregkh
authored andcommitted
workqueue: replace pool->manager_arb mutex with a flag
commit 692b48258dda7c302e777d7d5f4217244478f1f6 upstream. Josef reported a HARDIRQ-safe -> HARDIRQ-unsafe lock order detected by lockdep: [ 1270.472259] WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected [ 1270.472783] 4.14.0-rc1-xfstests-12888-g76833e8 #110 Not tainted [ 1270.473240] ----------------------------------------------------- [ 1270.473710] kworker/u5:2/5157 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: [ 1270.474239] (&(&lock->wait_lock)->rlock){+.+.}, at: [<ffffffff8da253d2>] __mutex_unlock_slowpath+0xa2/0x280 [ 1270.474994] [ 1270.474994] and this task is already holding: [ 1270.475440] (&pool->lock/1){-.-.}, at: [<ffffffff8d2992f6>] worker_thread+0x366/0x3c0 [ 1270.476046] which would create a new lock dependency: [ 1270.476436] (&pool->lock/1){-.-.} -> (&(&lock->wait_lock)->rlock){+.+.} [ 1270.476949] [ 1270.476949] but this new dependency connects a HARDIRQ-irq-safe lock: [ 1270.477553] (&pool->lock/1){-.-.} ... [ 1270.488900] to a HARDIRQ-irq-unsafe lock: [ 1270.489327] (&(&lock->wait_lock)->rlock){+.+.} ... [ 1270.494735] Possible interrupt unsafe locking scenario: [ 1270.494735] [ 1270.495250] CPU0 CPU1 [ 1270.495600] ---- ---- [ 1270.495947] lock(&(&lock->wait_lock)->rlock); [ 1270.496295] local_irq_disable(); [ 1270.496753] lock(&pool->lock/1); [ 1270.497205] lock(&(&lock->wait_lock)->rlock); [ 1270.497744] <Interrupt> [ 1270.497948] lock(&pool->lock/1); , which will cause a irq inversion deadlock if the above lock scenario happens. The root cause of this safe -> unsafe lock order is the mutex_unlock(pool->manager_arb) in manage_workers() with pool->lock held. Unlocking mutex while holding an irq spinlock was never safe and this problem has been around forever but it never got noticed because the only time the mutex is usually trylocked while holding irqlock making actual failures very unlikely and lockdep annotation missed the condition until the recent b9c16a0e1f73 ("locking/mutex: Fix lockdep_assert_held() fail"). Using mutex for pool->manager_arb has always been a bit of stretch. It primarily is an mechanism to arbitrate managership between workers which can easily be done with a pool flag. The only reason it became a mutex is that pool destruction path wants to exclude parallel managing operations. This patch replaces the mutex with a new pool flag POOL_MANAGER_ACTIVE and make the destruction path wait for the current manager on a wait queue. v2: Drop unnecessary flag clearing before pool destruction as suggested by Boqun. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Boqun Feng <boqun.feng@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 9b36699 commit fce67b3

1 file changed

Lines changed: 15 additions & 22 deletions

File tree

kernel/workqueue.c

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum {
6868
* attach_mutex to avoid changing binding state while
6969
* worker_attach_to_pool() is in progress.
7070
*/
71+
POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
7172
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
7273

7374
/* worker flags */
@@ -163,7 +164,6 @@ struct worker_pool {
163164
/* L: hash of busy workers */
164165

165166
/* see manage_workers() for details on the two manager mutexes */
166-
struct mutex manager_arb; /* manager arbitration */
167167
struct worker *manager; /* L: purely informational */
168168
struct mutex attach_mutex; /* attach/detach exclusion */
169169
struct list_head workers; /* A: attached workers */
@@ -295,6 +295,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
295295

296296
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
297297
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
298+
static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
298299

299300
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
300301
static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -808,7 +809,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
808809
/* Do we have too many workers and should some go away? */
809810
static bool too_many_workers(struct worker_pool *pool)
810811
{
811-
bool managing = mutex_is_locked(&pool->manager_arb);
812+
bool managing = pool->flags & POOL_MANAGER_ACTIVE;
812813
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
813814
int nr_busy = pool->nr_workers - nr_idle;
814815

@@ -1952,24 +1953,17 @@ static bool manage_workers(struct worker *worker)
19521953
{
19531954
struct worker_pool *pool = worker->pool;
19541955

1955-
/*
1956-
* Anyone who successfully grabs manager_arb wins the arbitration
1957-
* and becomes the manager. mutex_trylock() on pool->manager_arb
1958-
* failure while holding pool->lock reliably indicates that someone
1959-
* else is managing the pool and the worker which failed trylock
1960-
* can proceed to executing work items. This means that anyone
1961-
* grabbing manager_arb is responsible for actually performing
1962-
* manager duties. If manager_arb is grabbed and released without
1963-
* actual management, the pool may stall indefinitely.
1964-
*/
1965-
if (!mutex_trylock(&pool->manager_arb))
1956+
if (pool->flags & POOL_MANAGER_ACTIVE)
19661957
return false;
1958+
1959+
pool->flags |= POOL_MANAGER_ACTIVE;
19671960
pool->manager = worker;
19681961

19691962
maybe_create_worker(pool);
19701963

19711964
pool->manager = NULL;
1972-
mutex_unlock(&pool->manager_arb);
1965+
pool->flags &= ~POOL_MANAGER_ACTIVE;
1966+
wake_up(&wq_manager_wait);
19731967
return true;
19741968
}
19751969

@@ -3119,7 +3113,6 @@ static int init_worker_pool(struct worker_pool *pool)
31193113
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
31203114
(unsigned long)pool);
31213115

3122-
mutex_init(&pool->manager_arb);
31233116
mutex_init(&pool->attach_mutex);
31243117
INIT_LIST_HEAD(&pool->workers);
31253118

@@ -3189,13 +3182,15 @@ static void put_unbound_pool(struct worker_pool *pool)
31893182
hash_del(&pool->hash_node);
31903183

31913184
/*
3192-
* Become the manager and destroy all workers. Grabbing
3193-
* manager_arb prevents @pool's workers from blocking on
3194-
* attach_mutex.
3185+
* Become the manager and destroy all workers. This prevents
3186+
* @pool's workers from blocking on attach_mutex. We're the last
3187+
* manager and @pool gets freed with the flag set.
31953188
*/
3196-
mutex_lock(&pool->manager_arb);
3197-
31983189
spin_lock_irq(&pool->lock);
3190+
wait_event_lock_irq(wq_manager_wait,
3191+
!(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
3192+
pool->flags |= POOL_MANAGER_ACTIVE;
3193+
31993194
while ((worker = first_idle_worker(pool)))
32003195
destroy_worker(worker);
32013196
WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3209,8 +3204,6 @@ static void put_unbound_pool(struct worker_pool *pool)
32093204
if (pool->detach_completion)
32103205
wait_for_completion(pool->detach_completion);
32113206

3212-
mutex_unlock(&pool->manager_arb);
3213-
32143207
/* shut down the timers */
32153208
del_timer_sync(&pool->idle_timer);
32163209
del_timer_sync(&pool->mayday_timer);

0 commit comments

Comments
 (0)