Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ extension RunnerTests {

func privateAXSnapshotCapture(
app: XCUIApplication,
options: SnapshotOptions
options: SnapshotOptions,
deadline: Date = .distantFuture
) -> SnapshotBackendCapture? {
#if os(iOS) && targetEnvironment(simulator)
let requestedDepth = options.depth ?? 64
Expand All @@ -24,6 +25,13 @@ extension RunnerTests {
var effectiveDepth = requestedDepth
var lastError = "unknown private AX snapshot failure"
for depth in attemptDepths {
// The first rung always runs (the plan gated entry on its own budget); later rungs
// stop when the capture-plan deadline is spent so ladder retries can never stack
// past the runner's main-thread watchdog (#1105).
if depth != attemptDepths.first, Date() >= deadline {
NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_BUDGET_EXHAUSTED depth=%ld", depth)
break
}
response = RunnerAXSnapshotBridge.snapshotTree(
for: app,
maxDepth: depth,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,23 +233,126 @@ extension RunnerTests {
)
}

/// Tracks one main-queue dispatch so the watchdog and the dispatched block can agree —
/// under `mainThreadWorkLock` — on exactly one of: finished in time, or abandoned.
private final class MainThreadWorkState {
var finished = false
var abandoned = false
}

enum MainThreadBusyState {
case idle
case busy(abandonedForSeconds: TimeInterval)
case wedged(abandonedForSeconds: TimeInterval)
}

func currentMainThreadBusyState() -> MainThreadBusyState {
mainThreadWorkLock.lock()
defer { mainThreadWorkLock.unlock() }
guard abandonedMainThreadWorkCount > 0 else { return .idle }
let abandonedFor = abandonedMainThreadWorkSince.map { Date().timeIntervalSince($0) } ?? 0
if abandonedFor > mainThreadWedgeThreshold {
return .wedged(abandonedForSeconds: abandonedFor)
}
return .busy(abandonedForSeconds: abandonedFor)
}

private func runnerBusyResponse(command: Command, abandonedForSeconds: TimeInterval) -> Response {
NSLog(
"AGENT_DEVICE_RUNNER_BUSY command=%@ commandId=%@ abandonedForSeconds=%.1f",
command.command.rawValue,
command.commandId ?? "",
abandonedForSeconds
)
return Response(
ok: false,
error: ErrorPayload(
code: "RUNNER_BUSY",
message:
"The iOS runner is still finishing a previous command that exceeded its execution watchdog (usually an accessibility capture on a heavy or animating screen).",
hint:
"Wait a few seconds and retry. If snapshots keep failing on this screen, use screenshot as visual truth and interact by coordinates, or navigate to another screen."
)
)
}

private func runnerWedgedResponse(command: Command, abandonedForSeconds: TimeInterval) -> Response {
NSLog(
"AGENT_DEVICE_RUNNER_WEDGED command=%@ commandId=%@ abandonedForSeconds=%.1f",
command.command.rawValue,
command.commandId ?? "",
abandonedForSeconds
)
return Response(
ok: false,
error: ErrorPayload(
code: "RUNNER_WEDGED",
message:
"The iOS runner main thread has been stuck in abandoned work for \(Int(abandonedForSeconds)) seconds and cannot recover on its own.",
hint:
"The runner session will be restarted. Retry the command after the restart; if this screen keeps wedging captures, use screenshot as visual truth and interact by coordinates."
)
)
}

private func executeDispatched(command: Command) throws -> Response {
if Thread.isMainThread {
return try executeOnMainSafely(command: command)
}
// XCTest work cannot be cancelled mid-flight: once the watchdog abandons a main-queue
// block, queueing more main-thread commands behind it only buries the runner deeper.
// Refuse fast instead so the daemon backs off while the abandoned work drains; past the
// wedge threshold, escalate so the daemon recycles this runner (#1105).
switch currentMainThreadBusyState() {
case .idle:
break
case .busy(let abandonedForSeconds):
return runnerBusyResponse(command: command, abandonedForSeconds: abandonedForSeconds)
case .wedged(let abandonedForSeconds):
return runnerWedgedResponse(command: command, abandonedForSeconds: abandonedForSeconds)
}
var result: Result<Response, Error>?
let semaphore = DispatchSemaphore(value: 0)
let workState = MainThreadWorkState()
DispatchQueue.main.async {
do {
result = .success(try self.executeOnMainSafely(command: command))
} catch {
result = .failure(error)
}
self.mainThreadWorkLock.lock()
if workState.abandoned {
self.abandonedMainThreadWorkCount -= 1
if self.abandonedMainThreadWorkCount == 0 {
self.abandonedMainThreadWorkSince = nil
NSLog("AGENT_DEVICE_RUNNER_ABANDONED_WORK_DRAINED")
}
} else {
workState.finished = true
}
self.mainThreadWorkLock.unlock()
semaphore.signal()
}
let waitResult = semaphore.wait(timeout: .now() + mainThreadExecutionTimeout)
if waitResult == .timedOut {
// The main queue work may still be running; we stop waiting and report timeout.
mainThreadWorkLock.lock()
let stillRunning = !workState.finished
if stillRunning {
workState.abandoned = true
abandonedMainThreadWorkCount += 1
if abandonedMainThreadWorkSince == nil {
abandonedMainThreadWorkSince = Date()
}
}
mainThreadWorkLock.unlock()
if stillRunning && command.command == .snapshot {
// The next capture on this screen must not re-grind the tree backend.
penalizeSnapshotTreeBackend(
bundleId: command.appBundleId,
reason: "main_thread_watchdog"
)
}
throw NSError(
domain: RunnerErrorDomain.general,
code: RunnerErrorCode.mainThreadExecutionTimedOut,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,17 +302,24 @@ extension RunnerTests {
return DataPayload(nodes: nodes, truncated: false)
}

func snapshotFlatInteractive(app: XCUIApplication, options: SnapshotOptions) -> DataPayload {
func snapshotFlatInteractive(
app: XCUIApplication,
options: SnapshotOptions,
planDeadline: Date = .distantFuture
) -> DataPayload {
var nodes: [SnapshotNode] = [
interactiveRootNode(rect: .zero)
]
if options.depth == 0 {
return DataPayload(nodes: nodes, truncated: false)
}

let deadline = options.interactiveOnly
// Bounded by both its own sweep budget and the umbrella capture-plan deadline, so a
// chained recovery tier can never push the plan past the main-thread watchdog (#1105).
let sweepDeadline = options.interactiveOnly
? Date().addingTimeInterval(Self.flatInteractiveFallbackBudget)
: Date.distantFuture
let deadline = min(sweepDeadline, planDeadline)
let viewport = safeSnapshotViewport(app: app)
var seen = Set<String>()
var candidates: [SnapshotNode] = []
Expand Down Expand Up @@ -562,12 +569,14 @@ extension RunnerTests {

func makeSnapshotTraversalContext(
app: XCUIApplication,
options: SnapshotOptions
options: SnapshotOptions,
captureDeadline: Date = .distantFuture
) throws -> SnapshotTraversalContext? {
let viewport = safeSnapshotViewport(app: app)
let queryRoot = options.scope.flatMap { findScopeElement(app: app, scope: $0) } ?? app

guard let rootSnapshot = try captureSnapshotRoot(queryRoot) else {
let slice = min(treeCaptureSliceBudget, max(0.5, captureDeadline.timeIntervalSinceNow))
guard let rootSnapshot = try captureSnapshotRootBounded(queryRoot, sliceSeconds: slice) else {
return nil
}

Expand All @@ -582,6 +591,74 @@ extension RunnerTests {
)
}

static let treeCaptureTimeoutCode = "IOS_TREE_CAPTURE_TIMEOUT"

func hasAbandonedTreeCapture() -> Bool {
treeCaptureLock.lock()
defer { treeCaptureLock.unlock() }
return abandonedTreeCaptureCount > 0
}

/// Runs the blocking tree-snapshot XPC on a worker thread bounded by `sliceSeconds`. On
/// timeout the XPC keeps running on its worker (it cannot be cancelled); the capture is
/// marked abandoned so plans avoid XCTest-backed tiers until it drains, the tree backend is
/// penalized for this bundle, and the plan moves on to the private AX backend (#1105).
private func captureSnapshotRootBounded(
_ element: XCUIElement,
sliceSeconds: TimeInterval
) throws -> XCUIElementSnapshot? {
final class TreeCaptureBox {
var abandoned = false
var outcome: Result<XCUIElementSnapshot?, Error>?
}
let box = TreeCaptureBox()
let semaphore = DispatchSemaphore(value: 0)
DispatchQueue.global(qos: .userInitiated).async {
var result: Result<XCUIElementSnapshot?, Error>
do {
result = .success(try self.captureSnapshotRoot(element))
} catch {
result = .failure(error)
}
self.treeCaptureLock.lock()
if box.abandoned {
self.abandonedTreeCaptureCount -= 1
self.treeCaptureLock.unlock()
NSLog("AGENT_DEVICE_RUNNER_TREE_CAPTURE_DRAINED")
} else {
box.outcome = result
self.treeCaptureLock.unlock()
}
semaphore.signal()
}
if semaphore.wait(timeout: .now() + sliceSeconds) == .timedOut {
treeCaptureLock.lock()
let timedOut = box.outcome == nil
if timedOut {
box.abandoned = true
abandonedTreeCaptureCount += 1
}
treeCaptureLock.unlock()
if timedOut {
NSLog("AGENT_DEVICE_RUNNER_TREE_CAPTURE_SLICE_TIMEOUT slice=%.1f", sliceSeconds)
penalizeSnapshotTreeBackend(bundleId: currentBundleId, reason: "tree_capture_slice_timeout")
throw SnapshotCaptureFailure(
code: Self.treeCaptureTimeoutCode,
message: "the XCTest tree capture exceeded its \(Int(sliceSeconds))s time slice",
hint: "The capture plan recovers through the private AX backend on this screen."
)
}
}
switch box.outcome {
case .success(let snapshot):
return snapshot
case .failure(let error):
throw error
case .none:
return nil
}
}

private func captureSnapshotRoot(_ element: XCUIElement) throws -> XCUIElementSnapshot? {
var rootSnapshot: XCUIElementSnapshot?
var swiftErrorMessage: String?
Expand Down
Loading
Loading