Skip to content

Commit 710f793

Browse files
Jack Morgensteingregkh
authored andcommitted
net/mlx4_core: Fix racy CQ (Completion Queue) free
commit 291c566a28910614ce42d0ffe82196eddd6346f4 upstream. In function mlx4_cq_completion() and mlx4_cq_event(), the radix_tree_lookup requires a rcu_read_lock. This is mandatory: if another core frees the CQ, it could run the radix_tree_node_rcu_free() call_rcu() callback while its being used by the radix tree lookup function. Additionally, in function mlx4_cq_event(), since we are adding the rcu lock around the radix-tree lookup, we no longer need to take the spinlock. Also, the synchronize_irq() call for the async event eliminates the need for incrementing the cq reference count in mlx4_cq_event(). Other changes: 1. In function mlx4_cq_free(), replace spin_lock_irq with spin_lock: we no longer take this spinlock in the interrupt context. The spinlock here, therefore, simply protects against different threads simultaneously invoking mlx4_cq_free() for different cq's. 2. In function mlx4_cq_free(), we move the radix tree delete to before the synchronize_irq() calls. This guarantees that we will not access this cq during any subsequent interrupts, and therefore can safely free the CQ after the synchronize_irq calls. The rcu_read_lock in the interrupt handlers only needs to protect against corrupting the radix tree; the interrupt handlers may access the cq outside the rcu_read_lock due to the synchronize_irq calls which protect against premature freeing of the cq. 3. In function mlx4_cq_event(), we change the mlx_warn message to mlx4_dbg. 4. We leave the cq reference count mechanism in place, because it is still needed for the cq completion tasklet mechanism. Fixes: 6d90aa5 ("net/mlx4_core: Make sure there are no pending async events when freeing CQ") Fixes: 225c7b1 ("IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters") Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Matan Barak <matanb@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sumit Semwal <sumit.semwal@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent f1e6b11 commit 710f793

1 file changed

Lines changed: 20 additions & 18 deletions

File tree

  • drivers/net/ethernet/mellanox/mlx4

drivers/net/ethernet/mellanox/mlx4/cq.c

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,19 @@ void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
101101
{
102102
struct mlx4_cq *cq;
103103

104+
rcu_read_lock();
104105
cq = radix_tree_lookup(&mlx4_priv(dev)->cq_table.tree,
105106
cqn & (dev->caps.num_cqs - 1));
107+
rcu_read_unlock();
108+
106109
if (!cq) {
107110
mlx4_dbg(dev, "Completion event for bogus CQ %08x\n", cqn);
108111
return;
109112
}
110113

114+
/* Acessing the CQ outside of rcu_read_lock is safe, because
115+
* the CQ is freed only after interrupt handling is completed.
116+
*/
111117
++cq->arm_sn;
112118

113119
cq->comp(cq);
@@ -118,23 +124,19 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type)
118124
struct mlx4_cq_table *cq_table = &mlx4_priv(dev)->cq_table;
119125
struct mlx4_cq *cq;
120126

121-
spin_lock(&cq_table->lock);
122-
127+
rcu_read_lock();
123128
cq = radix_tree_lookup(&cq_table->tree, cqn & (dev->caps.num_cqs - 1));
124-
if (cq)
125-
atomic_inc(&cq->refcount);
126-
127-
spin_unlock(&cq_table->lock);
129+
rcu_read_unlock();
128130

129131
if (!cq) {
130-
mlx4_warn(dev, "Async event for bogus CQ %08x\n", cqn);
132+
mlx4_dbg(dev, "Async event for bogus CQ %08x\n", cqn);
131133
return;
132134
}
133135

136+
/* Acessing the CQ outside of rcu_read_lock is safe, because
137+
* the CQ is freed only after interrupt handling is completed.
138+
*/
134139
cq->event(cq, event_type);
135-
136-
if (atomic_dec_and_test(&cq->refcount))
137-
complete(&cq->free);
138140
}
139141

140142
static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -301,9 +303,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
301303
if (err)
302304
return err;
303305

304-
spin_lock_irq(&cq_table->lock);
306+
spin_lock(&cq_table->lock);
305307
err = radix_tree_insert(&cq_table->tree, cq->cqn, cq);
306-
spin_unlock_irq(&cq_table->lock);
308+
spin_unlock(&cq_table->lock);
307309
if (err)
308310
goto err_icm;
309311

@@ -347,9 +349,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
347349
return 0;
348350

349351
err_radix:
350-
spin_lock_irq(&cq_table->lock);
352+
spin_lock(&cq_table->lock);
351353
radix_tree_delete(&cq_table->tree, cq->cqn);
352-
spin_unlock_irq(&cq_table->lock);
354+
spin_unlock(&cq_table->lock);
353355

354356
err_icm:
355357
mlx4_cq_free_icm(dev, cq->cqn);
@@ -368,15 +370,15 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
368370
if (err)
369371
mlx4_warn(dev, "HW2SW_CQ failed (%d) for CQN %06x\n", err, cq->cqn);
370372

373+
spin_lock(&cq_table->lock);
374+
radix_tree_delete(&cq_table->tree, cq->cqn);
375+
spin_unlock(&cq_table->lock);
376+
371377
synchronize_irq(priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq);
372378
if (priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(cq->vector)].irq !=
373379
priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
374380
synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
375381

376-
spin_lock_irq(&cq_table->lock);
377-
radix_tree_delete(&cq_table->tree, cq->cqn);
378-
spin_unlock_irq(&cq_table->lock);
379-
380382
if (atomic_dec_and_test(&cq->refcount))
381383
complete(&cq->free);
382384
wait_for_completion(&cq->free);

0 commit comments

Comments
 (0)