diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift index cf7828dd8..71f2f3a2d 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+AXSnapshotFallback.swift @@ -12,7 +12,8 @@ extension RunnerTests { func privateAXSnapshotCapture( app: XCUIApplication, - options: SnapshotOptions + options: SnapshotOptions, + deadline: Date = .distantFuture ) -> SnapshotBackendCapture? { #if os(iOS) && targetEnvironment(simulator) let requestedDepth = options.depth ?? 64 @@ -24,6 +25,13 @@ extension RunnerTests { var effectiveDepth = requestedDepth var lastError = "unknown private AX snapshot failure" for depth in attemptDepths { + // The first rung always runs (the plan gated entry on its own budget); later rungs + // stop when the capture-plan deadline is spent so ladder retries can never stack + // past the runner's main-thread watchdog (#1105). + if depth != attemptDepths.first, Date() >= deadline { + NSLog("AGENT_DEVICE_RUNNER_PRIVATE_AX_SNAPSHOT_BUDGET_EXHAUSTED depth=%ld", depth) + break + } response = RunnerAXSnapshotBridge.snapshotTree( for: app, maxDepth: depth, diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift index de08b2d74..34f87cf14 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+CommandExecution.swift @@ -233,23 +233,126 @@ extension RunnerTests { ) } + /// Tracks one main-queue dispatch so the watchdog and the dispatched block can agree — + /// under `mainThreadWorkLock` — on exactly one of: finished in time, or abandoned. + private final class MainThreadWorkState { + var finished = false + var abandoned = false + } + + enum MainThreadBusyState { + case idle + case busy(abandonedForSeconds: TimeInterval) + case wedged(abandonedForSeconds: TimeInterval) + } + + func currentMainThreadBusyState() -> MainThreadBusyState { + mainThreadWorkLock.lock() + defer { mainThreadWorkLock.unlock() } + guard abandonedMainThreadWorkCount > 0 else { return .idle } + let abandonedFor = abandonedMainThreadWorkSince.map { Date().timeIntervalSince($0) } ?? 0 + if abandonedFor > mainThreadWedgeThreshold { + return .wedged(abandonedForSeconds: abandonedFor) + } + return .busy(abandonedForSeconds: abandonedFor) + } + + private func runnerBusyResponse(command: Command, abandonedForSeconds: TimeInterval) -> Response { + NSLog( + "AGENT_DEVICE_RUNNER_BUSY command=%@ commandId=%@ abandonedForSeconds=%.1f", + command.command.rawValue, + command.commandId ?? "", + abandonedForSeconds + ) + return Response( + ok: false, + error: ErrorPayload( + code: "RUNNER_BUSY", + message: + "The iOS runner is still finishing a previous command that exceeded its execution watchdog (usually an accessibility capture on a heavy or animating screen).", + hint: + "Wait a few seconds and retry. If snapshots keep failing on this screen, use screenshot as visual truth and interact by coordinates, or navigate to another screen." + ) + ) + } + + private func runnerWedgedResponse(command: Command, abandonedForSeconds: TimeInterval) -> Response { + NSLog( + "AGENT_DEVICE_RUNNER_WEDGED command=%@ commandId=%@ abandonedForSeconds=%.1f", + command.command.rawValue, + command.commandId ?? "", + abandonedForSeconds + ) + return Response( + ok: false, + error: ErrorPayload( + code: "RUNNER_WEDGED", + message: + "The iOS runner main thread has been stuck in abandoned work for \(Int(abandonedForSeconds)) seconds and cannot recover on its own.", + hint: + "The runner session will be restarted. Retry the command after the restart; if this screen keeps wedging captures, use screenshot as visual truth and interact by coordinates." + ) + ) + } + private func executeDispatched(command: Command) throws -> Response { if Thread.isMainThread { return try executeOnMainSafely(command: command) } + // XCTest work cannot be cancelled mid-flight: once the watchdog abandons a main-queue + // block, queueing more main-thread commands behind it only buries the runner deeper. + // Refuse fast instead so the daemon backs off while the abandoned work drains; past the + // wedge threshold, escalate so the daemon recycles this runner (#1105). + switch currentMainThreadBusyState() { + case .idle: + break + case .busy(let abandonedForSeconds): + return runnerBusyResponse(command: command, abandonedForSeconds: abandonedForSeconds) + case .wedged(let abandonedForSeconds): + return runnerWedgedResponse(command: command, abandonedForSeconds: abandonedForSeconds) + } var result: Result? let semaphore = DispatchSemaphore(value: 0) + let workState = MainThreadWorkState() DispatchQueue.main.async { do { result = .success(try self.executeOnMainSafely(command: command)) } catch { result = .failure(error) } + self.mainThreadWorkLock.lock() + if workState.abandoned { + self.abandonedMainThreadWorkCount -= 1 + if self.abandonedMainThreadWorkCount == 0 { + self.abandonedMainThreadWorkSince = nil + NSLog("AGENT_DEVICE_RUNNER_ABANDONED_WORK_DRAINED") + } + } else { + workState.finished = true + } + self.mainThreadWorkLock.unlock() semaphore.signal() } let waitResult = semaphore.wait(timeout: .now() + mainThreadExecutionTimeout) if waitResult == .timedOut { // The main queue work may still be running; we stop waiting and report timeout. + mainThreadWorkLock.lock() + let stillRunning = !workState.finished + if stillRunning { + workState.abandoned = true + abandonedMainThreadWorkCount += 1 + if abandonedMainThreadWorkSince == nil { + abandonedMainThreadWorkSince = Date() + } + } + mainThreadWorkLock.unlock() + if stillRunning && command.command == .snapshot { + // The next capture on this screen must not re-grind the tree backend. + penalizeSnapshotTreeBackend( + bundleId: command.appBundleId, + reason: "main_thread_watchdog" + ) + } throw NSError( domain: RunnerErrorDomain.general, code: RunnerErrorCode.mainThreadExecutionTimedOut, diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift index 45be79886..838ad1815 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Snapshot.swift @@ -302,7 +302,11 @@ extension RunnerTests { return DataPayload(nodes: nodes, truncated: false) } - func snapshotFlatInteractive(app: XCUIApplication, options: SnapshotOptions) -> DataPayload { + func snapshotFlatInteractive( + app: XCUIApplication, + options: SnapshotOptions, + planDeadline: Date = .distantFuture + ) -> DataPayload { var nodes: [SnapshotNode] = [ interactiveRootNode(rect: .zero) ] @@ -310,9 +314,12 @@ extension RunnerTests { return DataPayload(nodes: nodes, truncated: false) } - let deadline = options.interactiveOnly + // Bounded by both its own sweep budget and the umbrella capture-plan deadline, so a + // chained recovery tier can never push the plan past the main-thread watchdog (#1105). + let sweepDeadline = options.interactiveOnly ? Date().addingTimeInterval(Self.flatInteractiveFallbackBudget) : Date.distantFuture + let deadline = min(sweepDeadline, planDeadline) let viewport = safeSnapshotViewport(app: app) var seen = Set() var candidates: [SnapshotNode] = [] @@ -562,12 +569,14 @@ extension RunnerTests { func makeSnapshotTraversalContext( app: XCUIApplication, - options: SnapshotOptions + options: SnapshotOptions, + captureDeadline: Date = .distantFuture ) throws -> SnapshotTraversalContext? { let viewport = safeSnapshotViewport(app: app) let queryRoot = options.scope.flatMap { findScopeElement(app: app, scope: $0) } ?? app - guard let rootSnapshot = try captureSnapshotRoot(queryRoot) else { + let slice = min(treeCaptureSliceBudget, max(0.5, captureDeadline.timeIntervalSinceNow)) + guard let rootSnapshot = try captureSnapshotRootBounded(queryRoot, sliceSeconds: slice) else { return nil } @@ -582,6 +591,74 @@ extension RunnerTests { ) } + static let treeCaptureTimeoutCode = "IOS_TREE_CAPTURE_TIMEOUT" + + func hasAbandonedTreeCapture() -> Bool { + treeCaptureLock.lock() + defer { treeCaptureLock.unlock() } + return abandonedTreeCaptureCount > 0 + } + + /// Runs the blocking tree-snapshot XPC on a worker thread bounded by `sliceSeconds`. On + /// timeout the XPC keeps running on its worker (it cannot be cancelled); the capture is + /// marked abandoned so plans avoid XCTest-backed tiers until it drains, the tree backend is + /// penalized for this bundle, and the plan moves on to the private AX backend (#1105). + private func captureSnapshotRootBounded( + _ element: XCUIElement, + sliceSeconds: TimeInterval + ) throws -> XCUIElementSnapshot? { + final class TreeCaptureBox { + var abandoned = false + var outcome: Result? + } + let box = TreeCaptureBox() + let semaphore = DispatchSemaphore(value: 0) + DispatchQueue.global(qos: .userInitiated).async { + var result: Result + do { + result = .success(try self.captureSnapshotRoot(element)) + } catch { + result = .failure(error) + } + self.treeCaptureLock.lock() + if box.abandoned { + self.abandonedTreeCaptureCount -= 1 + self.treeCaptureLock.unlock() + NSLog("AGENT_DEVICE_RUNNER_TREE_CAPTURE_DRAINED") + } else { + box.outcome = result + self.treeCaptureLock.unlock() + } + semaphore.signal() + } + if semaphore.wait(timeout: .now() + sliceSeconds) == .timedOut { + treeCaptureLock.lock() + let timedOut = box.outcome == nil + if timedOut { + box.abandoned = true + abandonedTreeCaptureCount += 1 + } + treeCaptureLock.unlock() + if timedOut { + NSLog("AGENT_DEVICE_RUNNER_TREE_CAPTURE_SLICE_TIMEOUT slice=%.1f", sliceSeconds) + penalizeSnapshotTreeBackend(bundleId: currentBundleId, reason: "tree_capture_slice_timeout") + throw SnapshotCaptureFailure( + code: Self.treeCaptureTimeoutCode, + message: "the XCTest tree capture exceeded its \(Int(sliceSeconds))s time slice", + hint: "The capture plan recovers through the private AX backend on this screen." + ) + } + } + switch box.outcome { + case .success(let snapshot): + return snapshot + case .failure(let error): + throw error + case .none: + return nil + } + } + private func captureSnapshotRoot(_ element: XCUIElement) throws -> XCUIElementSnapshot? { var rootSnapshot: XCUIElementSnapshot? var swiftErrorMessage: String? diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SnapshotCapturePlan.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SnapshotCapturePlan.swift index 3cb85eeae..373f7c4bc 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SnapshotCapturePlan.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+SnapshotCapturePlan.swift @@ -63,6 +63,46 @@ extension RunnerTests { static let regularVisiblePlan: [SnapshotBackendKind] = [.recursiveTree, .querySweep, .privateAX] static let rawDiagnosticPlan: [SnapshotBackendKind] = [.recursiveTree, .privateAX] + // MARK: Tree-backend penalty (cross-attempt memory, #1105) + // + // On some deep/dynamic screens the XCTest bulk snapshot no longer fails fast with + // kAXErrorIllegalArgument (the #758 signature) — it grinds for many seconds first. One slow + // grind is tolerable; re-grinding on every subsequent capture of the same screen buries the + // main thread past the execution watchdog. After a slow or abandoned tree capture, later + // plans for the same bundle lead with the private AX backend until the penalty expires. + + func penalizeSnapshotTreeBackend(bundleId: String?, reason: String) { + snapshotTreePenaltyLock.lock() + snapshotTreePenaltyBundleId = bundleId + snapshotTreePenaltyUntil = Date().addingTimeInterval(snapshotTreePenaltyDuration) + snapshotTreePenaltyLock.unlock() + NSLog( + "AGENT_DEVICE_RUNNER_SNAPSHOT_TREE_PENALIZED bundle=%@ reason=%@", + bundleId ?? "", + reason + ) + } + + func isSnapshotTreeBackendPenalized(bundleId: String?) -> Bool { + snapshotTreePenaltyLock.lock() + defer { snapshotTreePenaltyLock.unlock() } + guard Date() < snapshotTreePenaltyUntil else { return false } + // A penalty recorded without a bundle id applies to whatever target is current. + guard let penalized = snapshotTreePenaltyBundleId else { return true } + return penalized == bundleId + } + + /// Pure plan-reorder rule: a penalized tree backend defers to the private AX backend on the + /// regular plan only. The raw diagnostic plan keeps tree-first errors, and unknown plans are + /// left untouched. + static func effectiveSnapshotCapturePlan( + _ plan: [SnapshotBackendKind], + treePenalized: Bool + ) -> (plan: [SnapshotBackendKind], treeDeferred: Bool) { + guard treePenalized, plan == Self.regularVisiblePlan else { return (plan, false) } + return ([.privateAX, .querySweep, .recursiveTree], true) + } + // MARK: Plan runner func runSnapshotCapturePlan( @@ -76,21 +116,56 @@ extension RunnerTests { var axFailure: SnapshotCaptureFailure? let deadline = Date().addingTimeInterval(Self.snapshotPlanBudget) - for kind in plan { - if kind != plan.first && Date() >= deadline { + // Reorder is gated to iOS simulators — the only place the private AX backend exists, so + // the deferred plan cannot degrade platforms where the tree backend is the strongest tier. + var treePenalized = false +#if os(iOS) && targetEnvironment(simulator) + treePenalized = isSnapshotTreeBackendPenalized(bundleId: currentBundleId) +#endif + let (effectivePlan, treeDeferred) = Self.effectiveSnapshotCapturePlan( + plan, + treePenalized: treePenalized + ) + if treeDeferred { + firstFailure = ( + "the XCTest tree backend was deferred after a recent slow capture on this screen", + "budget" + ) + NSLog("AGENT_DEVICE_RUNNER_SNAPSHOT_TREE_DEFERRED bundle=%@", currentBundleId ?? "") + } + + for kind in effectivePlan { + if kind != effectivePlan.first && Date() >= deadline { NSLog("AGENT_DEVICE_RUNNER_SNAPSHOT_PLAN_BUDGET_EXHAUSTED skipped=%@", kind.rawValue) if firstFailure == nil { firstFailure = ("the capture plan ran out of its time budget", "budget") } break } + // While an abandoned tree capture is still grinding inside testmanagerd, XCTest-backed + // tiers would block behind it; only the private AX backend stays responsive (#1105). + if kind != .privateAX, hasAbandonedTreeCapture() { + NSLog("AGENT_DEVICE_RUNNER_SNAPSHOT_TIER_SKIPPED_XCTEST_OCCUPIED tier=%@", kind.rawValue) + if firstFailure == nil { + firstFailure = ( + "the XCTest capture channel is occupied by an abandoned tree capture", + "budget" + ) + } + continue + } let capture: SnapshotBackendCapture + let backendStartedAt = Date() do { - guard let result = try captureWithBackend(kind, app: app, options: options) else { + guard let result = try captureWithBackend(kind, app: app, options: options, deadline: deadline) + else { + recordSlowTreeBackendIfNeeded(kind, startedAt: backendStartedAt) continue } capture = result + recordSlowTreeBackendIfNeeded(kind, startedAt: backendStartedAt) } catch let failure as SnapshotCaptureFailure { + recordSlowTreeBackendIfNeeded(kind, startedAt: backendStartedAt) if Self.isAxSnapshotFailure(failure) { axFailure = failure } if firstFailure == nil { firstFailure = (failure.message, Self.isAxSnapshotFailure(failure) ? "ax-rejected" : "capture-failed") @@ -111,7 +186,7 @@ extension RunnerTests { continue } - let recovered = kind != plan.first + let recovered = kind != effectivePlan.first || treeDeferred if recovered { NSLog( "AGENT_DEVICE_RUNNER_SNAPSHOT_RECOVERED backend=%@ reason=%@", @@ -157,14 +232,33 @@ extension RunnerTests { return fallbackPayload } + /// Marks the tree backend as penalized when a capture attempt ground past the slow-capture + /// threshold — even a successful one: the next capture of this screen must not re-grind. + private func recordSlowTreeBackendIfNeeded(_ kind: SnapshotBackendKind, startedAt: Date) { + guard kind == .recursiveTree else { return } + let elapsed = Date().timeIntervalSince(startedAt) + guard elapsed > snapshotTreeSlowCaptureThreshold else { return } + penalizeSnapshotTreeBackend( + bundleId: currentBundleId, + reason: "slow_tree_capture_\(Int(elapsed * 1000))ms" + ) + } + private func captureWithBackend( _ kind: SnapshotBackendKind, app: XCUIApplication, - options: SnapshotOptions + options: SnapshotOptions, + deadline: Date ) throws -> SnapshotBackendCapture? { switch kind { case .recursiveTree: - guard let context = try makeSnapshotTraversalContext(app: app, options: options) else { + guard + let context = try makeSnapshotTraversalContext( + app: app, + options: options, + captureDeadline: deadline + ) + else { return nil } let payload = options.raw @@ -173,11 +267,11 @@ extension RunnerTests { return SnapshotBackendCapture(payload: payload, effectiveDepth: nil) case .querySweep: return SnapshotBackendCapture( - payload: snapshotFlatInteractive(app: app, options: options), + payload: snapshotFlatInteractive(app: app, options: options, planDeadline: deadline), effectiveDepth: nil ) case .privateAX: - return privateAXSnapshotCapture(app: app, options: options) + return privateAXSnapshotCapture(app: app, options: options, deadline: deadline) } } @@ -405,6 +499,76 @@ extension RunnerTests { ) } + func testEffectiveSnapshotCapturePlanDefersTreeOnlyWhenPenalizedRegularPlan() { + let regular = Self.effectiveSnapshotCapturePlan(Self.regularVisiblePlan, treePenalized: true) + XCTAssertEqual(regular.plan, [.privateAX, .querySweep, .recursiveTree]) + XCTAssertTrue(regular.treeDeferred) + + let unpenalized = Self.effectiveSnapshotCapturePlan(Self.regularVisiblePlan, treePenalized: false) + XCTAssertEqual(unpenalized.plan, Self.regularVisiblePlan) + XCTAssertFalse(unpenalized.treeDeferred) + + // The raw diagnostic plan preserves tree-first error propagation even under penalty. + let raw = Self.effectiveSnapshotCapturePlan(Self.rawDiagnosticPlan, treePenalized: true) + XCTAssertEqual(raw.plan, Self.rawDiagnosticPlan) + XCTAssertFalse(raw.treeDeferred) + } + + func testSnapshotTreeBackendPenaltyMatchesBundleAndExpires() { + defer { + snapshotTreePenaltyBundleId = nil + snapshotTreePenaltyUntil = .distantPast + } + + penalizeSnapshotTreeBackend(bundleId: "xyz.blueskyweb.app", reason: "test") + XCTAssertTrue(isSnapshotTreeBackendPenalized(bundleId: "xyz.blueskyweb.app")) + XCTAssertFalse(isSnapshotTreeBackendPenalized(bundleId: "com.other.app")) + + // A penalty recorded without a bundle applies to any current target. + penalizeSnapshotTreeBackend(bundleId: nil, reason: "test") + XCTAssertTrue(isSnapshotTreeBackendPenalized(bundleId: "com.other.app")) + + // Expired penalties stop applying. + snapshotTreePenaltyUntil = Date(timeIntervalSinceNow: -1) + XCTAssertFalse(isSnapshotTreeBackendPenalized(bundleId: "com.other.app")) + } + + // Pins the record(_:) suppression class via its pure classifier. record(_:) itself is not + // invoked here: feeding it the must-record variants would record real failures and fail + // this very test run. + func testSuppressedAxSnapshotIssueClassifier() { + // AX-server rejections inside a matching-snapshot fetch are muted... + XCTAssertTrue( + Self.isSuppressedAxSnapshotIssueDescription( + "Failed to get matching snapshot: Error kAXErrorIllegalArgument getting snapshot for element {pid=33837}" + ) + ) + // ...including sibling AX server codes. + XCTAssertTrue( + Self.isSuppressedAxSnapshotIssueDescription( + "Failed to get matching snapshot: Error kAXErrorCannotComplete getting snapshot for element" + ) + ) + // The hung-query timeout variant must keep recording. + XCTAssertFalse( + Self.isSuppressedAxSnapshotIssueDescription( + "Failed to get matching snapshot: Timed out while evaluating UI query." + ) + ) + // Unrelated issues must keep recording. + XCTAssertFalse( + Self.isSuppressedAxSnapshotIssueDescription( + "XCTAssertEqual failed: (\"1\") is not equal to (\"2\")" + ) + ) + // A kAXError outside the matching-snapshot fetch context is not this class. + XCTAssertFalse( + Self.isSuppressedAxSnapshotIssueDescription( + "Error kAXErrorIllegalArgument while performing scroll" + ) + ) + } + func testSnapshotAccessibilityUnavailableCarriesSparseVerdict() { currentApp = app currentBundleId = "com.example.app" diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Transport.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Transport.swift index c7d74684a..ec625f7b2 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Transport.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests+Transport.swift @@ -116,6 +116,12 @@ extension RunnerTests { completion((jsonResponse(status: 200, response: executeUptime()), false)) return } + // Re-sends of a still-executing commandId (the daemon's transport retry loop) attach to + // the in-flight execution and receive its response instead of piling a second execution + // onto the main queue behind it (#1105 capture pileup). + if attachToInFlightCommandIfNeeded(command: command, completion: completion) { + return + } NSLog( "AGENT_DEVICE_RUNNER_COMMAND_ACCEPTED command=%@ commandId=%@", command.command.rawValue, @@ -131,7 +137,11 @@ extension RunnerTests { command.commandId ?? "", response.ok ? 1 : 0 ) - completion((self.jsonResponse(status: 200, response: response), command.command == .shutdown)) + self.deliverCommandResult( + command: command, + result: (self.jsonResponse(status: 200, response: response), command.command == .shutdown), + completion: completion + ) } catch { NSLog( "AGENT_DEVICE_RUNNER_COMMAND_FAILED command=%@ commandId=%@ error=%@", @@ -139,17 +149,21 @@ extension RunnerTests { command.commandId ?? "", String(describing: error) ) - completion(( - self.jsonResponse( - status: 500, - response: self.errorResponse( - code: "COMMAND_FAILED", - message: error.localizedDescription, - hint: "Check the runner log for XCTest details, then retry after the app is foregrounded if this was a timeout or activation failure." - ) + self.deliverCommandResult( + command: command, + result: ( + self.jsonResponse( + status: 500, + response: self.errorResponse( + code: "COMMAND_FAILED", + message: error.localizedDescription, + hint: "Check the runner log for XCTest details, then retry after the app is foregrounded if this was a timeout or activation failure." + ) + ), + false ), - false - )) + completion: completion + ) } } } catch { @@ -167,6 +181,59 @@ extension RunnerTests { } } + // MARK: - In-Flight Command Coalescing + + /// Returns true when this send duplicated a still-executing commandId and was attached as a + /// waiter of the in-flight execution. Otherwise marks the commandId in flight and returns + /// false so the caller enqueues the (single) execution. + private func attachToInFlightCommandIfNeeded( + command: Command, + completion: @escaping ((data: Data, shouldFinish: Bool)) -> Void + ) -> Bool { + guard let commandId = normalizedInFlightCommandId(command.commandId) else { return false } + inFlightCommandLock.lock() + if inFlightCommandIds.contains(commandId) { + inFlightCommandWaiters[commandId, default: []].append(completion) + inFlightCommandLock.unlock() + NSLog( + "AGENT_DEVICE_RUNNER_COMMAND_COALESCED command=%@ commandId=%@", + command.command.rawValue, + commandId + ) + return true + } + inFlightCommandIds.insert(commandId) + inFlightCommandLock.unlock() + return false + } + + private func deliverCommandResult( + command: Command, + result: (data: Data, shouldFinish: Bool), + completion: ((data: Data, shouldFinish: Bool)) -> Void + ) { + var waiters: [((data: Data, shouldFinish: Bool)) -> Void] = [] + if let commandId = normalizedInFlightCommandId(command.commandId) { + inFlightCommandLock.lock() + inFlightCommandIds.remove(commandId) + waiters = inFlightCommandWaiters.removeValue(forKey: commandId) ?? [] + inFlightCommandLock.unlock() + } + completion(result) + for waiter in waiters { + waiter(result) + } + } + + private func normalizedInFlightCommandId(_ commandId: String?) -> String? { + guard let trimmed = commandId?.trimmingCharacters(in: .whitespacesAndNewlines), + !trimmed.isEmpty + else { + return nil + } + return trimmed + } + // MARK: - Response Encoding private func jsonResponse(status: Int, response: Response) -> Data { @@ -203,6 +270,9 @@ extension RunnerTests { private func finish() { listener?.cancel() listener = nil + // Guard against double-fulfill: coalesced shutdown sends deliver one result to + // multiple waiters, each of which may ask to finish. doneExpectation?.fulfill() + doneExpectation = nil } } diff --git a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift index 4aa404a6b..b5dc7cb36 100644 --- a/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift +++ b/apple-runner/AgentDeviceRunner/AgentDeviceRunnerUITests/RunnerTests.swift @@ -54,6 +54,41 @@ final class RunnerTests: XCTestCase { var needsFirstInteractionDelay = false var activeRecording: ScreenRecorder? let commandJournal = RunnerCommandJournal() + // Coalesces duplicate transport sends of the same commandId onto the single in-flight + // execution instead of enqueueing them again behind it (#1105 capture pileup). + let inFlightCommandLock = NSLock() + var inFlightCommandIds: Set = [] + var inFlightCommandWaiters: [String: [((data: Data, shouldFinish: Bool)) -> Void]] = [:] + // Tracks main-queue work abandoned by the execution watchdog so new main-thread commands + // fail fast as busy instead of queueing behind work that cannot be cancelled (#1105). + let mainThreadWorkLock = NSLock() + var abandonedMainThreadWorkCount = 0 + var abandonedMainThreadWorkSince: Date? + // Past this age the runner stops claiming "busy, retry soon" and reports itself wedged so + // the daemon recycles it — the only cure once the main thread is stuck for good. + let mainThreadWedgeThreshold: TimeInterval = 120 + // Sticky per-bundle hint: after the XCTest tree backend ground past its slice (or a snapshot + // was abandoned by the watchdog), later capture plans lead with the private AX backend + // instead of re-grinding the tree on the same screen class (#1105). + let snapshotTreePenaltyLock = NSLock() + var snapshotTreePenaltyBundleId: String? + var snapshotTreePenaltyUntil = Date.distantPast + let snapshotTreePenaltyDuration: TimeInterval = 120 + // Bluesky-class screens grind ~4-8s before the tree backend fails; anything past this + // threshold marks the screen hostile so the next capture leads with private AX. + let snapshotTreeSlowCaptureThreshold: TimeInterval = 3 + // The blocking XCTest tree snapshot XPC runs on a worker thread with this slice so a + // content-dependent grind (#1105: seconds to minutes on live Bluesky screens) cannot pin + // the capture plan. On timeout the XPC keeps grinding on its worker; while any abandoned + // tree capture is outstanding, plans skip the XCTest-backed tiers (tree, query sweep) and + // use the private AX backend, which does not go through testmanagerd. + let treeCaptureLock = NSLock() + var abandonedTreeCaptureCount = 0 + let treeCaptureSliceBudget: TimeInterval = 8 + // Observability for the record(_:) suppression below: how many AX-broken-screen snapshot + // issues this session muted, so wedge investigations see the volume without grepping logs. + let suppressedIssueLock = NSLock() + var suppressedAxSnapshotIssueCount = 0 let interactiveTypes: Set = [ .button, .cell, @@ -88,6 +123,43 @@ final class RunnerTests: XCTestCase { continueAfterFailure = true } + /// True for the one recorded-issue class the runner deliberately mutes: an AX-server error + /// (`kAXError*`) inside a "Failed to get matching snapshot" fetch. The kAXError token + /// intentionally covers kAXErrorIllegalArgument and its sibling AX server codes (e.g. + /// kAXErrorCannotComplete): any AX-server rejection inside a matching-snapshot fetch is the + /// same capture-plan noise the plan already classifies and recovers from. The timeout + /// variant ("Failed to get matching snapshot: Timed out while evaluating UI query.") carries + /// no kAXError token and MUST keep recording — it signals a genuinely hung query, exactly + /// the pathology XCTEST_RECORDED_FAILURE must stay able to see. + static func isSuppressedAxSnapshotIssueDescription(_ description: String) -> Bool { + description.contains("Failed to get matching snapshot") && description.contains("kAXError") + } + + /// On AX-broken screens (deep RN trees, #758/#1105) XCUIApplication queries record + /// "Failed to get matching snapshot: ... kAXError..." issues; XCTest tears the whole test + /// case down once a few accumulate, killing the long-lived runner right after the command + /// completes and forcing a ~25s restart per capture. This override is deliberately + /// suite-global (all commands, not just snapshot capture): tap-triggered element queries on + /// the same screens record the same noise and would still tear the runner down, and command + /// outcomes stay honest through their own error paths — only this issue side-channel is + /// muted. Everything else still records (and still drives XCTEST_RECORDED_FAILURE). + override func record(_ issue: XCTIssue) { + let description = issue.compactDescription + if Self.isSuppressedAxSnapshotIssueDescription(description) { + suppressedIssueLock.lock() + suppressedAxSnapshotIssueCount += 1 + let count = suppressedAxSnapshotIssueCount + suppressedIssueLock.unlock() + NSLog( + "AGENT_DEVICE_RUNNER_AX_SNAPSHOT_ISSUE_SUPPRESSED count=%ld description=%@", + count, + description + ) + return + } + super.record(issue) + } + @MainActor func testCommand() throws { if RunnerEnv.isTruthy("AGENT_DEVICE_RUNNER_NOOP_STARTUP") { diff --git a/docs/adr/0004-ios-snapshot-backend-strategy.md b/docs/adr/0004-ios-snapshot-backend-strategy.md index 52ec82d81..9908e5974 100644 --- a/docs/adr/0004-ios-snapshot-backend-strategy.md +++ b/docs/adr/0004-ios-snapshot-backend-strategy.md @@ -70,6 +70,16 @@ Later work moved recovery into the regular visible capture plan so healthy apps recursive tree path while degraded simulator app classes can still return bounded, honest output when fallback query tiers are the only available source of visible controls. +Issue #1105 showed a second failure shape on the same app class: instead of failing fast with +`kAXErrorIllegalArgument`, the recursive tree capture can grind for many seconds on +heavy/animating screens before failing, pushing the chained plan past the runner's main-thread +watchdog and burying the main queue under retries. The plan now carries its umbrella deadline +into the query-sweep and private-AX tiers (later ladder rungs stop when the budget is spent), +and a slow or watchdog-abandoned tree capture penalizes the tree backend for that bundle for a +bounded window: subsequent regular plans lead with the private-AX backend +(`effectiveSnapshotCapturePlan`), stamped as `recovered` with a `budget` reason so the deferral +stays observable. The raw diagnostic plan is exempt — it keeps tree-first error propagation. + ## Consequences Regular snapshots remain the right tool for agents and Maestro compatibility because they describe diff --git a/docs/adr/0011-interaction-guarantee-contract.md b/docs/adr/0011-interaction-guarantee-contract.md index cc2993549..971755288 100644 --- a/docs/adr/0011-interaction-guarantee-contract.md +++ b/docs/adr/0011-interaction-guarantee-contract.md @@ -213,7 +213,11 @@ timeoutPolicy: { with a completeness gate over all public commands. Read-only polling commands declare `preserve-daemon`; the client derives the envelope from the declared -budget source instead of special-casing command names. +budget source instead of special-casing command names. Since #1105 the +capture-resolving interaction commands (`click`, `fill`, `longpress`, `press`, +`type`, `get`, `is`) also declare `preserve-daemon`: their dominant timeout +mode is the same blocked accessibility capture as `snapshot`, and resetting +the daemon there destroyed every healthy app session the daemon owned. ## Consequences diff --git a/src/core/command-descriptor/__tests__/timeout-policy.test.ts b/src/core/command-descriptor/__tests__/timeout-policy.test.ts index 80b051230..b331d3cda 100644 --- a/src/core/command-descriptor/__tests__/timeout-policy.test.ts +++ b/src/core/command-descriptor/__tests__/timeout-policy.test.ts @@ -48,13 +48,27 @@ test('declared timeout policies are structurally valid', () => { test('daemon-preserving timeout commands are a bounded, reviewed set', () => { // CONSERVATIVE: this list may only change in the same PR that updates it - // here. Preserving the daemon on timeout is for read-only capture/polling - // commands that can block in platform accessibility bridges — a timed-out + // here. Preserving the daemon on timeout is for commands whose dominant + // hang mode is a blocked platform accessibility bridge — a timed-out // poll must not turn into a daemon reset that loses every session (#1075). + // Interaction commands joined in #1105: their target resolution runs the + // same capture as snapshot, and resetting the daemon on a wedged capture + // destroyed healthy app sessions. const preserving = commandDescriptors .filter((descriptor) => descriptor.timeoutPolicy.onTimeout === 'preserve-daemon') .map((descriptor) => descriptor.name); - assert.deepEqual(preserving.sort(), ['find', 'snapshot', 'wait']); + assert.deepEqual(preserving.sort(), [ + 'click', + 'fill', + 'find', + 'get', + 'is', + 'longpress', + 'press', + 'snapshot', + 'type', + 'wait', + ]); }); test('budget sources deviating from the default are bounded, reviewed sets', () => { diff --git a/src/core/command-descriptor/registry.ts b/src/core/command-descriptor/registry.ts index 162cccd7d..b666c0990 100644 --- a/src/core/command-descriptor/registry.ts +++ b/src/core/command-descriptor/registry.ts @@ -451,53 +451,59 @@ const RAW_COMMAND_DESCRIPTORS = [ }, // -- interaction (route: interaction) -- + // Interaction commands resolve their target through the same platform accessibility + // capture as snapshot, so a hung capture is their dominant timeout mode. Resetting the + // daemon here destroyed every app session the daemon owned while the app itself was + // still healthy (#1105): keep the daemon (and sessions) alive like snapshot/wait/find, + // and rely on request cancellation + the per-request runner recycle budget to abort the + // stuck Apple runner work. { name: PUBLIC_COMMANDS.click, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.fill, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.longPress, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.press, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: { apple: APPLE_SIM_AND_DEVICE, android: ANDROID_ALL, linux: LINUX_DEVICE }, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.type, daemon: { route: 'interaction', replayScopedAction: true, androidBlockingDialogGuard: true }, capability: ALL_DEVICE_COMMAND_CAPABILITY, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.get, daemon: { route: 'interaction', replayScopedAction: true }, capability: ALL_DEVICE_COMMAND_CAPABILITY, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, { name: PUBLIC_COMMANDS.is, daemon: { route: 'interaction', replayScopedAction: true }, capability: ALL_DEVICE_COMMAND_CAPABILITY, - timeoutPolicy: DEFAULT_TIMEOUT_POLICY, + timeoutPolicy: PRESERVE_DAEMON_TIMEOUT_POLICY, batchable: true, }, diff --git a/src/platforms/apple/core/__tests__/runner-command-retry.test.ts b/src/platforms/apple/core/__tests__/runner-command-retry.test.ts index b287ccf72..c1ab796eb 100644 --- a/src/platforms/apple/core/__tests__/runner-command-retry.test.ts +++ b/src/platforms/apple/core/__tests__/runner-command-retry.test.ts @@ -57,10 +57,12 @@ import { prewarmIosRunnerSession, runAppleRunnerCommand, } from '../runner/runner-client.ts'; +import { resetRunnerRecycleLedgerForTests } from '../runner/runner-recycle-ledger.ts'; import type { RunnerXctestrunArtifact } from '../runner/runner-xctestrun.ts'; beforeEach(() => { vi.resetAllMocks(); + resetRunnerRecycleLedgerForTests(); mockMarkRunnerXctestrunArtifactBadForRun.mockResolvedValue(undefined); }); @@ -1162,3 +1164,60 @@ async function captureDiagnostics(callback: () => Promise): Promise event)); } + +test('a request pays for at most one runner recycle, then fails fast with a preserved-session hint', async () => { + // Dead-runner wedge (#1105): every send fails, every recovery boots a fresh runner. The + // request must stop after ONE recycle instead of stacking ~25s xcodebuild boots until the + // client envelope kills the daemon. + const requestId = 'req-recycle-cap'; + mockEnsureRunnerSession.mockImplementation(async () => makeRunnerSession({ ready: true })); + mockExecuteRunnerCommandWithSession.mockRejectedValue( + new AppError('COMMAND_FAILED', 'fetch failed'), + ); + + await assert.rejects( + () => runAppleRunnerCommand(IOS_SIMULATOR, { command: 'snapshot' }, { requestId }), + (error: unknown) => { + assert.ok(error instanceof AppError); + assert.equal(error.details?.recovery, 'runner_recycle_budget_exhausted'); + assert.match(String(error.details?.hint), /session is preserved/); + return true; + }, + ); + + // Attempt 1 (initial boot, free) + attempt 2 (the single recycle); attempt 3 fails fast + // before paying for another boot. + assert.equal(mockEnsureRunnerSession.mock.calls.length, 2); +}); + +test('a later command in the same request cannot pay for a second recycle boot', async () => { + const requestId = 'req-restart-cap'; + const staleSession = makeRunnerSession({ port: 8100, ready: true }); + const freshSession = makeRunnerSession({ port: 8101, ready: false }); + + mockEnsureRunnerSession.mockResolvedValueOnce(staleSession).mockResolvedValueOnce(freshSession); + mockExecuteRunnerCommandWithSession + .mockRejectedValueOnce(new AppError('COMMAND_FAILED', 'Runner did not accept connection')) + .mockResolvedValueOnce({ message: 'tapped' }); + + // First command consumes the request's only recycle via restart-and-replay. + const result = await runAppleRunnerCommand( + IOS_SIMULATOR, + { command: 'tap', x: 120, y: 240 }, + { requestId }, + ); + assert.deepEqual(result, { message: 'tapped' }); + assert.equal(mockEnsureRunnerSession.mock.calls.length, 2); + + // Second command in the same request finds no live session (runner died again): it must + // fail fast instead of booting a third runner. + await assert.rejects( + () => runAppleRunnerCommand(IOS_SIMULATOR, { command: 'tap', x: 120, y: 240 }, { requestId }), + (error: unknown) => { + assert.ok(error instanceof AppError); + assert.equal(error.details?.recovery, 'runner_recycle_budget_exhausted'); + return true; + }, + ); + assert.equal(mockEnsureRunnerSession.mock.calls.length, 2); +}); diff --git a/src/platforms/apple/core/__tests__/runner-recycle-ledger.test.ts b/src/platforms/apple/core/__tests__/runner-recycle-ledger.test.ts new file mode 100644 index 000000000..e9bc258c1 --- /dev/null +++ b/src/platforms/apple/core/__tests__/runner-recycle-ledger.test.ts @@ -0,0 +1,60 @@ +import { beforeEach, test } from 'vitest'; +import assert from 'node:assert/strict'; +import { AppError } from '../../../../kernel/errors.ts'; +import { + buildRunnerRecycleBudgetExhaustedError, + hasRunnerRequestTouchedSession, + markRunnerRequestTouchedSession, + resetRunnerRecycleLedgerForTests, + runnerRecycleLedgerKey, + tryConsumeRunnerRecycle, +} from '../runner/runner-recycle-ledger.ts'; + +beforeEach(() => { + resetRunnerRecycleLedgerForTests(); +}); + +test('ledger key prefers the request id and falls back to the command id', () => { + assert.equal( + runnerRecycleLedgerKey({ requestId: 'req-1' }, { commandId: 'cmd-1' }), + 'request:req-1', + ); + assert.equal(runnerRecycleLedgerKey({}, { commandId: 'cmd-1' }), 'command:cmd-1'); + assert.equal(runnerRecycleLedgerKey({ requestId: ' ' }, { commandId: ' ' }), undefined); +}); + +test('a request gets exactly one runner recycle, then must fail fast', () => { + const key = 'request:req-wedge'; + assert.equal(tryConsumeRunnerRecycle(key), true); + assert.equal(tryConsumeRunnerRecycle(key), false); + assert.equal(tryConsumeRunnerRecycle(key), false); + // Other requests keep their own budget. + assert.equal(tryConsumeRunnerRecycle('request:req-other'), true); +}); + +test('untracked keys never block (no scope to account against)', () => { + assert.equal(tryConsumeRunnerRecycle(undefined), true); + assert.equal(tryConsumeRunnerRecycle(undefined), true); + assert.equal(hasRunnerRequestTouchedSession(undefined), false); + markRunnerRequestTouchedSession(undefined); // no-op, must not throw +}); + +test('touched-session marker distinguishes a first boot from a recycle boot', () => { + const key = 'request:req-cold'; + assert.equal(hasRunnerRequestTouchedSession(key), false); + markRunnerRequestTouchedSession(key); + assert.equal(hasRunnerRequestTouchedSession(key), true); +}); + +test('exhausted-budget error is actionable and preserves the session contract', () => { + const error = buildRunnerRecycleBudgetExhaustedError( + { command: 'snapshot', commandId: 'cmd-9' }, + { requestId: 'req-9', logPath: '/tmp/runner.log' }, + ); + assert.ok(error instanceof AppError); + assert.equal(error.code, 'COMMAND_FAILED'); + assert.match(error.message, /already restarted/); + assert.equal(error.details?.recovery, 'runner_recycle_budget_exhausted'); + assert.match(String(error.details?.hint), /session is preserved/); + assert.match(String(error.details?.hint), /screenshot/); +}); diff --git a/src/platforms/apple/core/runner/runner-lifecycle.ts b/src/platforms/apple/core/runner/runner-lifecycle.ts index 7c13ff130..dab9efcc8 100644 --- a/src/platforms/apple/core/runner/runner-lifecycle.ts +++ b/src/platforms/apple/core/runner/runner-lifecycle.ts @@ -6,6 +6,7 @@ import { RUNNER_COMMAND_TIMEOUT_MS, RUNNER_STARTUP_TIMEOUT_MS } from './runner-t import { type RunnerSession, ensureRunnerSession, + getRunnerSessionSnapshot, invalidateRunnerSession, executeRunnerCommandWithSession, readRunnerStartupTimeoutMs, @@ -24,6 +25,13 @@ import type { } from './runner-provider.ts'; import { markRunnerXctestrunArtifactBadForRun } from './runner-xctestrun.ts'; import { handleRunnerTransportErrorAfterCommandSend } from './runner-command-recovery.ts'; +import { + buildRunnerRecycleBudgetExhaustedError, + hasRunnerRequestTouchedSession, + markRunnerRequestTouchedSession, + runnerRecycleLedgerKey, + tryConsumeRunnerRecycle, +} from './runner-recycle-ledger.ts'; export type PrepareIosRunnerOptions = AppleRunnerPrepareOptions; export type PrepareIosRunnerResult = AppleRunnerPrepareResult; @@ -241,9 +249,21 @@ export async function executeRunnerCommand( ): Promise> { assertRunnerRequestActive(options.requestId); const signal = getRequestSignal(options.requestId); + const recycleKey = runnerRecycleLedgerKey(options, command); let session: RunnerSession | undefined; try { + // A request that already used a runner session and finds none alive is about to pay for + // a recycle boot (~25s): bound that to the per-request recycle budget so a hostile screen + // fails fast with a preserved session instead of stacking runner boots (#1105). + if ( + !getRunnerSessionSnapshot(device.id)?.alive && + hasRunnerRequestTouchedSession(recycleKey) && + !tryConsumeRunnerRecycle(recycleKey) + ) { + throw buildRunnerRecycleBudgetExhaustedError(command, options); + } session = await ensureRunnerSession(device, options); + markRunnerRequestTouchedSession(recycleKey); const timeoutMs = session.ready ? RUNNER_COMMAND_TIMEOUT_MS : readRunnerStartupTimeoutMs(session); @@ -314,6 +334,12 @@ async function restartSessionAndRunCommand(params: { recoveredDiagnosticPhase?: string; }): Promise> { const { device, command, options, signal, restartReason } = params; + // At most one recycle per request: when the budget is spent, fail fast and KEEP the current + // session — if the runner is merely busy draining abandoned work it answers the next request + // cheaply, and a dead process is detected and cleaned by the next ensureRunnerSession (#1105). + if (!tryConsumeRunnerRecycle(runnerRecycleLedgerKey(options, command))) { + throw buildRunnerRecycleBudgetExhaustedError(command, options); + } await invalidateRunnerSession(params.session, restartReason); const restartedSession = await ensureRunnerSession(device, { ...options, diff --git a/src/platforms/apple/core/runner/runner-recycle-ledger.ts b/src/platforms/apple/core/runner/runner-recycle-ledger.ts new file mode 100644 index 000000000..86506834b --- /dev/null +++ b/src/platforms/apple/core/runner/runner-recycle-ledger.ts @@ -0,0 +1,119 @@ +import { AppError } from '../../../../kernel/errors.ts'; +import { emitDiagnostic } from '../../../../utils/diagnostics.ts'; +import type { RunnerCommand } from './runner-contract.ts'; + +// Caps how many times one daemon request may pay for a full runner recycle +// (invalidate + fresh xcodebuild boot, ~25s each). The #1105 wedge showed a +// single press request doing TWO full recycles before the client envelope +// killed the daemon: after one recycle the screen is proven hostile to +// capture, so the request must fail fast with an actionable hint instead of +// burning another boot. The request's FIRST boot (cold daemon) is not a +// recycle and stays free. +const MAX_RUNNER_RECYCLES_PER_REQUEST = 1; +const LEDGER_ENTRY_TTL_MS = 10 * 60_000; +const LEDGER_MAX_ENTRIES = 512; + +type RunnerRecycleLedgerEntry = { + touchedSession: boolean; + recycles: number; + lastAtMs: number; +}; + +const ledger = new Map(); + +/** Requests are the recycle scope; direct calls without a request fall back to the command id. */ +export function runnerRecycleLedgerKey( + options: { requestId?: string }, + command: Pick, +): string | undefined { + const requestId = options.requestId?.trim(); + if (requestId) return `request:${requestId}`; + const commandId = command.commandId?.trim(); + if (commandId) return `command:${commandId}`; + return undefined; +} + +/** Marks that this request has used a runner session (so a later boot counts as a recycle). */ +export function markRunnerRequestTouchedSession(key: string | undefined): void { + if (!key) return; + const entry = readEntry(key); + entry.touchedSession = true; + writeEntry(key, entry); +} + +export function hasRunnerRequestTouchedSession(key: string | undefined): boolean { + if (!key) return false; + return ledger.get(key)?.touchedSession === true; +} + +/** + * Consumes one recycle from the request's budget. Returns false when the + * budget is exhausted — the caller must fail fast instead of booting another + * runner. Untracked keys always allow (no scope to account against). + */ +export function tryConsumeRunnerRecycle(key: string | undefined): boolean { + if (!key) return true; + const entry = readEntry(key); + if (entry.recycles >= MAX_RUNNER_RECYCLES_PER_REQUEST) { + writeEntry(key, entry); + return false; + } + entry.recycles += 1; + writeEntry(key, entry); + return true; +} + +export function buildRunnerRecycleBudgetExhaustedError( + command: Pick, + options: { requestId?: string; logPath?: string }, +): AppError { + emitDiagnostic({ + level: 'warn', + phase: 'ios_runner_recycle_budget_exhausted', + data: { + command: command.command, + commandId: command.commandId, + requestId: options.requestId, + maxRecycles: MAX_RUNNER_RECYCLES_PER_REQUEST, + }, + }); + return new AppError( + 'COMMAND_FAILED', + `iOS runner was already restarted during this request and "${command.command}" still failed, so agent-device stopped instead of paying for another runner boot.`, + { + command: command.command, + commandId: command.commandId, + recovery: 'runner_recycle_budget_exhausted', + hint: 'The current screen is overwhelming the iOS accessibility capture (usually heavy or animating content). The app session is preserved: run `screenshot` for visual truth and interact with coordinate commands, or navigate to another screen and retry. Re-running the same command immediately will likely wedge again.', + logPath: options.logPath, + }, + ); +} + +/** Test-only: clears all recycle accounting. */ +export function resetRunnerRecycleLedgerForTests(): void { + ledger.clear(); +} + +function readEntry(key: string): RunnerRecycleLedgerEntry { + pruneLedger(); + return ledger.get(key) ?? { touchedSession: false, recycles: 0, lastAtMs: Date.now() }; +} + +function writeEntry(key: string, entry: RunnerRecycleLedgerEntry): void { + entry.lastAtMs = Date.now(); + ledger.delete(key); + ledger.set(key, entry); +} + +function pruneLedger(): void { + const cutoff = Date.now() - LEDGER_ENTRY_TTL_MS; + for (const [key, entry] of ledger) { + if (entry.lastAtMs < cutoff) ledger.delete(key); + } + while (ledger.size > LEDGER_MAX_ENTRIES) { + const oldest = ledger.keys().next().value; + if (oldest === undefined) break; + ledger.delete(oldest); + } +} diff --git a/src/platforms/apple/core/runner/runner-session.ts b/src/platforms/apple/core/runner/runner-session.ts index aab71e549..62fd50711 100644 --- a/src/platforms/apple/core/runner/runner-session.ts +++ b/src/platforms/apple/core/runner/runner-session.ts @@ -908,6 +908,10 @@ function resolveRunnerFatalErrorReason(error: unknown): string | undefined { if (!(error instanceof AppError)) return undefined; if (error.code === 'IOS_AX_SNAPSHOT_FAILED') return 'ax_snapshot_failure'; if (error.code === 'XCTEST_RECORDED_FAILURE') return 'xctest_recorded_failure'; + // The runner reported its main thread stuck in abandoned work past the wedge threshold + // (#1105): only a restart cures it. The per-request recycle budget still bounds how many + // boots one request pays for. + if (error.code === 'RUNNER_WEDGED') return 'runner_main_thread_wedged'; return undefined; } diff --git a/src/utils/__tests__/daemon-client.test.ts b/src/utils/__tests__/daemon-client.test.ts index 0e94dd776..eadf01bc7 100644 --- a/src/utils/__tests__/daemon-client.test.ts +++ b/src/utils/__tests__/daemon-client.test.ts @@ -212,7 +212,10 @@ test('read-only polling command timeouts preserve the daemon like snapshot', () // must not turn one timed-out poll into a daemon reset that loses every session. assert.equal(shouldResetDaemonAfterRequestTimeout('wait'), false); assert.equal(shouldResetDaemonAfterRequestTimeout('find'), false); - assert.equal(shouldResetDaemonAfterRequestTimeout('press'), true); + // Interaction commands resolve targets through the same capture, so their + // timeouts preserve the daemon too (#1105); non-capture commands still reset. + assert.equal(shouldResetDaemonAfterRequestTimeout('press'), false); + assert.equal(shouldResetDaemonAfterRequestTimeout('open'), true); }); test('wait request timeout extends past the user-supplied wait budget', () => {