Skip to content

Commit a20257e

Browse files
matnymangregkh
authored andcommitted
xhci: Fix handling timeouted commands on hosts in weird states.
commit 3425aa03f484d45dc21e0e791c2f6c74ea656421 upstream. If commands timeout we mark them for abortion, then stop the command ring, and turn the commands to no-ops and finally restart the command ring. If the host is working properly the no-op commands will finish and pending completions are called. If we notice the host is failing, driver clears the command ring and completes, deletes and frees all pending commands. There are two separate cases reported where host is believed to work properly but is not. In the first case we successfully stop the ring but no abort or stop command ring event is ever sent and host locks up. The second case is if a host is removed, command times out and driver believes the ring is stopped, and assumes it will be restarted, but actually ends up timing out on the same command forever. If one of the pending commands has the xhci->mutex held it will block xhci_stop() in the remove codepath which otherwise would cleanup pending commands. Add a check that clears all pending commands in case host is removed, or we are stuck timing out on the same command. Also restart the command timeout timer when stopping the command ring to ensure we recive an ring stop/abort event. Tested-by: Joe Lawrence <joe.lawrence@stratus.com> Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 4582ddf commit a20257e

1 file changed

Lines changed: 22 additions & 5 deletions

File tree

drivers/usb/host/xhci-ring.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
289289

290290
temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
291291
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
292+
293+
/*
294+
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
295+
* however on some host hw the CMD_RING_RUNNING bit is correctly cleared
296+
* but the completion event in never sent. Use the cmd timeout timer to
297+
* handle those cases. Use twice the time to cover the bit polling retry
298+
*/
299+
mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
292300
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
293301
&xhci->op_regs->cmd_ring);
294302

@@ -313,6 +321,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
313321

314322
xhci_err(xhci, "Stopped the command ring failed, "
315323
"maybe the host is dead\n");
324+
del_timer(&xhci->cmd_timer);
316325
xhci->xhc_state |= XHCI_STATE_DYING;
317326
xhci_quiesce(xhci);
318327
xhci_halt(xhci);
@@ -1252,22 +1261,21 @@ void xhci_handle_command_timeout(unsigned long data)
12521261
int ret;
12531262
unsigned long flags;
12541263
u64 hw_ring_state;
1255-
struct xhci_command *cur_cmd = NULL;
1264+
bool second_timeout = false;
12561265
xhci = (struct xhci_hcd *) data;
12571266

12581267
/* mark this command to be cancelled */
12591268
spin_lock_irqsave(&xhci->lock, flags);
12601269
if (xhci->current_cmd) {
1261-
cur_cmd = xhci->current_cmd;
1262-
cur_cmd->status = COMP_CMD_ABORT;
1270+
if (xhci->current_cmd->status == COMP_CMD_ABORT)
1271+
second_timeout = true;
1272+
xhci->current_cmd->status = COMP_CMD_ABORT;
12631273
}
12641274

1265-
12661275
/* Make sure command ring is running before aborting it */
12671276
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
12681277
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
12691278
(hw_ring_state & CMD_RING_RUNNING)) {
1270-
12711279
spin_unlock_irqrestore(&xhci->lock, flags);
12721280
xhci_dbg(xhci, "Command timeout\n");
12731281
ret = xhci_abort_cmd_ring(xhci);
@@ -1279,6 +1287,15 @@ void xhci_handle_command_timeout(unsigned long data)
12791287
}
12801288
return;
12811289
}
1290+
1291+
/* command ring failed to restart, or host removed. Bail out */
1292+
if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
1293+
spin_unlock_irqrestore(&xhci->lock, flags);
1294+
xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
1295+
xhci_cleanup_command_queue(xhci);
1296+
return;
1297+
}
1298+
12821299
/* command timeout on stopped ring, ring can't be aborted */
12831300
xhci_dbg(xhci, "Command timeout on stopped ring\n");
12841301
xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);

0 commit comments

Comments
 (0)