Skip to content

Commit 96c9c25

Browse files
Jianbo Liugregkh
authored andcommitted
net/mlx5e: Fix race condition during IPSec ESN update
[ Upstream commit beb6e2e5976a128b0cccf10d158124422210c5ef ] In IPSec full offload mode, the device reports an ESN (Extended Sequence Number) wrap event to the driver. The driver validates this event by querying the IPSec ASO and checking that the esn_event_arm field is 0x0, which indicates an event has occurred. After handling the event, the driver must re-arm the context by setting esn_event_arm back to 0x1. A race condition exists in this handling path. After validating the event, the driver calls mlx5_accel_esp_modify_xfrm() to update the kernel's xfrm state. This function temporarily releases and re-acquires the xfrm state lock. So, need to acknowledge the event first by setting esn_event_arm to 0x1. This prevents the driver from reprocessing the same ESN update if the hardware sends events for other reason. Since the next ESN update only occurs after nearly 2^31 packets are received, there's no risk of missing an update, as it will happen long after this handling has finished. Processing the event twice causes the ESN high-order bits (esn_msb) to be incremented incorrectly. The driver then programs the hardware with this invalid ESN state, which leads to anti-replay failures and a complete halt of IPSec traffic. Fix this by re-arming the ESN event immediately after it is validated, before calling mlx5_accel_esp_modify_xfrm(). This ensures that any spurious, duplicate events are correctly ignored, closing the race window. Fixes: fef0667 ("net/mlx5e: Fix ESN update kernel panic") Signed-off-by: Jianbo Liu <jianbol@nvidia.com> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20260316094603.6999-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 2c6a5be commit 96c9c25

1 file changed

Lines changed: 14 additions & 19 deletions

File tree

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,11 @@ static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry,
310310
mlx5e_ipsec_aso_query(sa_entry, data);
311311
}
312312

313-
static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
314-
u32 mode_param)
313+
static void
314+
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
315+
u32 mode_param,
316+
struct mlx5_accel_esp_xfrm_attrs *attrs)
315317
{
316-
struct mlx5_accel_esp_xfrm_attrs attrs = {};
317318
struct mlx5_wqe_aso_ctrl_seg data = {};
318319

319320
if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) {
@@ -323,18 +324,7 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
323324
sa_entry->esn_state.overlap = 1;
324325
}
325326

326-
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
327-
328-
/* It is safe to execute the modify below unlocked since the only flows
329-
* that could affect this HW object, are create, destroy and this work.
330-
*
331-
* Creation flow can't co-exist with this modify work, the destruction
332-
* flow would cancel this work, and this work is a single entity that
333-
* can't conflict with it self.
334-
*/
335-
spin_unlock_bh(&sa_entry->x->lock);
336-
mlx5_accel_esp_modify_xfrm(sa_entry, &attrs);
337-
spin_lock_bh(&sa_entry->x->lock);
327+
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, attrs);
338328

339329
data.data_offset_condition_operand =
340330
MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
@@ -451,7 +441,9 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
451441
struct mlx5e_ipsec_work *work =
452442
container_of(_work, struct mlx5e_ipsec_work, work);
453443
struct mlx5e_ipsec_sa_entry *sa_entry = work->data;
444+
struct mlx5_accel_esp_xfrm_attrs tmp = {};
454445
struct mlx5_accel_esp_xfrm_attrs *attrs;
446+
bool need_modify = false;
455447
int ret;
456448

457449
attrs = &sa_entry->attrs;
@@ -461,19 +453,22 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
461453
if (ret)
462454
goto unlock;
463455

456+
if (attrs->lft.soft_packet_limit != XFRM_INF)
457+
mlx5e_ipsec_handle_limits(sa_entry);
458+
464459
if (attrs->replay_esn.trigger &&
465460
!MLX5_GET(ipsec_aso, sa_entry->ctx, esn_event_arm)) {
466461
u32 mode_param = MLX5_GET(ipsec_aso, sa_entry->ctx,
467462
mode_parameter);
468463

469-
mlx5e_ipsec_update_esn_state(sa_entry, mode_param);
464+
mlx5e_ipsec_update_esn_state(sa_entry, mode_param, &tmp);
465+
need_modify = true;
470466
}
471467

472-
if (attrs->lft.soft_packet_limit != XFRM_INF)
473-
mlx5e_ipsec_handle_limits(sa_entry);
474-
475468
unlock:
476469
spin_unlock_bh(&sa_entry->x->lock);
470+
if (need_modify)
471+
mlx5_accel_esp_modify_xfrm(sa_entry, &tmp);
477472
kfree(work);
478473
}
479474

0 commit comments

Comments
 (0)