Agenta-AI · mmabrouk · Jun 26, 2026 · Jun 26, 2026
diff --git a/docs/design/agent-workflows/projects/qa/f040-park-terminal-plan.md b/docs/design/agent-workflows/projects/qa/f040-park-terminal-plan.md
@@ -0,0 +1,68 @@
+# F-040 fix plan: make a HITL park END the `/run` turn gracefully
+
+## The bug (confirmed, live)
+
+On the `/messages` (HITL) path, when the responder returns `park`
+(`engines/sandbox_agent/permissions.ts`), the runner sends NO `respondPermission` and
+relies on `session.prompt()` resolving. Claude-over-ACP does NOT end the turn on an
+unanswered permission gate, so `session.prompt()` (`sandbox_agent.ts:~428`) blocks
+forever. Consequences:
+
+1. The parked turn never terminates → its temp sandbox leaks (the `finally` never runs).
+2. The egress (`vercel/stream.py`) never sees the run iterator end → no `finish` frame →
+   the SSE stream hangs.
+3. The AI-SDK resume fires while the old stream is still open → errors → "agent run
+   failed" (Deny) / 5-min hang then `ERR_ABORTED` (Approve).
+
+## The fix (one move, runner-side)
+
+When the responder returns `park`, **end the current `/run` turn gracefully** instead of
+holding the ACP connection open:
+
+- `permissions.ts` reports the park back to the orchestration loop (a callback / signal).
+- `sandbox_agent.ts` races `session.prompt()` against a "parked" signal. On park it calls
+  `sandbox.destroySession(session.id)`, which (verified in the `sandbox-agent` package):
+  - `cancelPendingPermissionsForSession` resolves the pending permission RPC with
+    `{outcome: "cancelled"}` (NOT a reject → no F-024 clobber), and
+  - sends the managed `session/cancel`, so the in-flight `prompt()` resolves with
+    `StopReason::Cancelled`.
+- The runner then returns a terminal result with `stopReason: "paused"` (a park terminal,
+  distinct from a model `cancelled`), so:
+  - the egress loop ends and emits a proper `finish` frame (`finishReason` maps to
+    `unknown`/omitted — fine; the FE re-runs on resume),
+  - the `finally` disposes the sandbox (NO leak).
+
+The resume then cold-replays as a fresh turn: `extractApprovalDecisions` resolves the
+stored decision via the name+args anchor (already built, #4854), Approve → tool runs →
+completes; Deny → clean denial (`#4859` FE path already maps deny → tool-error, model
+continues).
+
+### Why `destroySession`, not a raw `session/cancel`
+
+The `sandbox-agent` package blocks a manual `session/cancel` via `rawSend`
+(`MANUAL_CANCEL_ERROR`); only `destroySession` is allowed to send the managed cancel.
+`destroySession` is exactly the right call: it cancels the pending permission cleanly AND
+the in-flight prompt, then marks the session destroyed (we dispose the sandbox anyway).
+
+## FE resume contract (point 4) — already built, just verified
+
+`vercel/messages.py` `_tool_part_blocks` already converts an inline
+`state:"approval-responded"` tool part into the `{approved}` envelope, and
+`agentApprovalResume.ts` ensures the FE includes the responded tool part on resume. No FE
+change needed; covered by existing tests.
+
+## Slices
+
+1. **Runner park-terminal** (`permissions.ts` + `sandbox_agent.ts`): park signal →
+   `destroySession` → terminal `stopReason:"paused"`; the `finally` disposes (no leak).
+2. **Tests**: park emits a terminal result (stopReason paused, prompt unblocked); no
+   sandbox leak on park (destroySession + destroySandbox both called); the resume
+   cold-replay still resolves (existing test stays green).
+
+## Acceptance
+
+- Unit: a parked run RETURNS (does not hang), `stopReason:"paused"`, `destroySession`
+  called once, sandbox disposed once.
+- Live: Claude + haiku + gated github tool + Ask rule → Approve completes with the real
+  result (no hang); Deny → clean denial (no "agent run failed"); NO leaked runner temp
+  sandbox after a parked→resumed turn. Reproduce 2-3x.
diff --git a/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py b/sdks/python/agenta/sdk/agents/adapters/vercel/stream.py
@@ -26,6 +26,11 @@
     "function_call": "tool-calls",
     "refusal": "content-filter",
     "content_filter": "content-filter",
+    # A HITL park ends the turn intentionally-but-incomplete (the FE then resumes on the
+    # user's decision). It is neither a model completion nor an error, so map it to the AI
+    # SDK's `other` rather than letting it fall through to `unknown` (F-040).
+    "paused": "other",
+    "cancelled": "other",
 }
 
 

diff --git a/sdks/python/oss/tests/pytest/unit/agents/adapters/test_vercel_stream_park.py b/sdks/python/oss/tests/pytest/unit/agents/adapters/test_vercel_stream_park.py
@@ -0,0 +1,84 @@
+"""Egress finish-on-park (F-040).
+
+A parked HITL turn now ends gracefully on the runner with a terminal ``stopReason:"paused"``
+result, so the streaming egress must drain to a clean ``finish`` frame (the FE then resumes on
+the user's decision). Before the runner fix the parked turn never terminated, the stream hung,
+and no ``finish`` ever arrived. This pins the egress side of that contract: given a parked
+record stream, ``agent_run_to_vercel_parts`` yields the approval request AND a final ``finish``.
+"""
+
+from __future__ import annotations
+
+from typing import Any, AsyncIterator, Dict, List
+
+import pytest
+
+from agenta.sdk.agents.adapters.vercel.stream import agent_run_to_vercel_parts
+from agenta.sdk.agents.streaming import AgentRun
+
+
+async def _records(items: List[Dict[str, Any]]) -> AsyncIterator[Dict[str, Any]]:
+    for item in items:
+        yield item
+
+
+def _parked_run() -> AgentRun:
+    """A runner record stream for a parked turn: the gated tool call, the approval request,
+    a ``done`` event, then the terminal ``paused`` result (what the F-040 runner fix emits)."""
+    return AgentRun(
+        _records(
+            [
+                {
+                    "kind": "event",
+                    "event": {
+                        "type": "tool_call",
+                        "id": "tool-1",
+                        "name": "github__get_user",
+                        "input": {},
+                    },
+                },
+                {
+                    "kind": "event",
+                    "event": {
+                        "type": "interaction_request",
+                        "id": "perm-1",
+                        "kind": "permission",
+                        "payload": {"toolCallId": "tool-1"},
+                    },
+                },
+                {"kind": "event", "event": {"type": "done", "stopReason": "paused"}},
+                {
+                    "kind": "result",
+                    "result": {
+                        "ok": True,
+                        "output": "",
+                        "stopReason": "paused",
+                        "sessionId": "conv-1",
+                        "traceId": "trace-1",
+                    },
+                },
+            ]
+        )
+    )
+
+
+@pytest.mark.asyncio
+async def test_parked_run_emits_approval_then_finish() -> None:
+    parts = [part async for part in agent_run_to_vercel_parts(_parked_run())]
+    types = [p.get("type") for p in parts]
+
+    # The approval request reaches the FE so it can prompt the user ...
+    assert "tool-approval-request" in types
+    # ... and the stream drains to a clean finish (no immortal-park hang, F-040).
+    assert types[-1] == "finish"
+    assert types.count("finish") == 1
+
+    # The approval request carries the gated tool-call id so it attaches to the tool part.
+    approval = next(p for p in parts if p["type"] == "tool-approval-request")
+    assert approval["approvalId"] == "perm-1"
+    assert approval["toolCallId"] == "tool-1"
+
+    # A park is intentional-but-incomplete, mapped to the AI SDK `other` finish reason (not
+    # a model completion `stop`, not `unknown`).
+    finish = parts[-1]
+    assert finish["finishReason"] == "other"
diff --git a/services/agent/src/engines/sandbox_agent.ts b/services/agent/src/engines/sandbox_agent.ts
@@ -398,9 +398,32 @@ export async function runSandboxAgent(
     // decisions the HITLResponder falls back to the base policy and is byte-identical to the
     // old PolicyResponder, so `/invoke` is unchanged.
     const hasHumanSurface = !!(request.sessionId && request.sessionId.trim());
+    // A park (cross-turn HITL) must END the turn gracefully, not hold the ACP connection open
+    // forever (F-040): Claude does not end a turn on an unanswered gate, so `session.prompt()`
+    // would block indefinitely, the sandbox would leak, and the egress would never emit a
+    // `finish` frame. On the first park we `destroySession`, which (a) resolves the pending
+    // permission RPC with `{outcome:"cancelled"}` — NOT a reject, so no F-024 clobber — and
+    // (b) sends the managed `session/cancel` so the in-flight `prompt()` resolves with a
+    // cancelled stop reason. The resume then cold-replays as a fresh turn (#4854).
+    let parked = false;
+    let resolveParked: (() => void) | undefined;
+    const parkedSignal = new Promise<void>((resolve) => {
+      resolveParked = resolve;
+    });
+    const onPark = (): void => {
+      if (parked) return; // first park wins; later gates this turn are moot once we cancel
+      parked = true;
+      resolveParked?.();
+      // Cancel the in-flight prompt so `session.prompt()` returns instead of hanging. The
+      // managed cancel lives on the SandboxAgent handle (the `Session` wrapper has none).
+      // Best effort: the `finally` disposes the sandbox regardless, and the prompt is also
+      // raced against `parkedSignal` below so the turn ends even if this call rejects.
+      void sandbox.destroySession?.(session.id).catch(() => {});
+    };
     attachPermissionResponder({
       session,
       run,
+      onPark,
       responder:
         deps.responderFactory?.(request.permissionPolicy) ??
         new HITLResponder(
@@ -425,11 +448,29 @@ export async function runSandboxAgent(
       );
     }
 
-    const result = await session.prompt([
-      { type: "text", text: plan.turnText },
+    // Race the prompt against the park signal: on a HITL park the prompt either resolves with
+    // a cancelled stop reason (the managed `session/cancel` landed) or never resolves at all
+    // (the harness ignores it) — either way `parkedSignal` ends the turn so the runner returns
+    // promptly, the `finally` disposes the sandbox (no leak, F-040), and the egress emits a
+    // `finish` frame. When the park wins, the orphaned `prompt()` may later reject as the
+    // cancelled/torn-down connection unwinds; swallow that so it is not an `unhandledRejection`
+    // (the daemon teardown's `fetch failed` is expected on a cancel, not a run error).
+    const PARKED = Symbol("parked");
+    const promptPromise = Promise.resolve(
+      session.prompt([{ type: "text", text: plan.turnText }]),
+    );
+    promptPromise.catch(() => {});
+    const raced = await Promise.race([
+      promptPromise,
+      parkedSignal.then(() => PARKED),
     ]);
     await toolRelay?.stop();
-    const stopReason = (result as any)?.stopReason;
+    // A parked turn is terminal-but-incomplete: stop reason `paused` tells the egress to emit
+    // a clean `finish` (the FE then resumes on the user's decision). A real prompt result keeps
+    // the harness's own stop reason.
+    const stopReason =
+      raced === PARKED || parked ? "paused" : (raced as any)?.stopReason;
+    const result = raced === PARKED ? undefined : raced;
     logger(`prompt stopReason=${stopReason}`);
 
     // Usage: Pi writes its totals to a file via the extension. Other harnesses report the

diff --git a/services/agent/src/engines/sandbox_agent/permissions.ts b/services/agent/src/engines/sandbox_agent/permissions.ts
@@ -5,6 +5,15 @@ export interface AttachPermissionResponderInput {
   session: any;
   run: { emitEvent: (event: AgentEvent) => void };
   responder: Responder;
+  /**
+   * Called when the responder PARKS a gate (cross-turn HITL). The orchestration loop uses
+   * this to END the turn gracefully: a parked Claude turn never resolves `session.prompt()`
+   * on its own (the harness does not end the turn on an unanswered gate), so without this the
+   * prompt blocks forever, the sandbox leaks, and the egress never emits a `finish` frame
+   * (F-040). The runner cancels the in-flight prompt (via `destroySession`) so the turn ends
+   * paused and the resume cold-replays. Fires at most once per turn (the first park wins).
+   */
+  onPark?: () => void;
 }
 
 /**
@@ -17,6 +26,7 @@ export function attachPermissionResponder({
   session,
   run,
   responder,
+  onPark,
 }: AttachPermissionResponderInput): void {
   session.onPermissionRequest((req: any) => {
     const id = String(req?.id ?? "");
@@ -37,13 +47,17 @@ export function attachPermissionResponder({
     void responder
       .onPermission({ id, availableReplies, raw: req })
       .then((decision) => {
-        if (!req?.id) return;
         // PARK (cross-turn HITL): send NO reply. The `interaction_request` above is the last
-        // word on this tool call; the harness ends the turn with the tool PENDING and the next
-        // turn's stored decision resolves it. Replying `reject` here would make Claude emit a
-        // failed tool call ("User refused permission") that clobbers the approval prompt on the
-        // same tool-call id (F-024) — do NOT map park onto a reply.
-        if (decision === "park") return;
+        // word on this tool call; the next turn's stored decision resolves it. Replying
+        // `reject` here would make Claude emit a failed tool call ("User refused permission")
+        // that clobbers the approval prompt on the same tool-call id (F-024) — do NOT map park
+        // onto a reply. Instead signal the orchestration loop to end the turn gracefully:
+        // Claude never ends a turn on an unanswered gate, so the prompt would otherwise hang.
+        if (decision === "park") {
+          onPark?.();
+          return;
+        }
+        if (!req?.id) return;
         return session.respondPermission(
           req.id,
           decisionToReply(decision, availableReplies) as any,