From 7228f8e7c73dc38eaac1ed6ad62bd2c18acfd541 Mon Sep 17 00:00:00 2001 From: anmolnoor <0xdevnoor@gmail.com> Date: Wed, 27 May 2026 04:52:30 -0700 Subject: [PATCH] Stage G: surface typed tool-result payloads to the planner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The observation builder only ever read artifact["stdout"], but typed capabilities don't use that field — file.read puts its data in "content", search/files/git in structured fields. So every typed read came back BLANK to the planner: it saw the action "executed" but none of the data. With file.read in particular the model would re-read the same file every iteration, conclude "the content isn't being returned," and loop until the no-progress detector or an empty-response flake killed the turn. (Masked until now because the model got its data from shell `gh api`, whose stdout *is* surfaced.) Add _tool_result_preview: for read-only result types (file read/read_chunk, search, files, git inspect, man/tldr) surface the payload (file content, or a compact JSON of the structured result) into the observation's stdout_preview, capped by the existing _truncate_preview limits. Writes/mutations are deliberately excluded so we don't echo just-written content back and re-bloat the prompt. Verified live: "read the report and summarize" now reads once, sees the content, and answers in a few iterations instead of looping ~20x and crashing. Tests: _tool_result_preview surfaces reads + search but not writes; an orchestrator read surfaces the file content into the next iteration's planner context. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/foundation/services/orchestrator.py | 48 +++++++++++++++++++++ tests/test_orchestrator.py | 56 +++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/src/foundation/services/orchestrator.py b/src/foundation/services/orchestrator.py index f15fd23..d5daf04 100644 --- a/src/foundation/services/orchestrator.py +++ b/src/foundation/services/orchestrator.py @@ -276,6 +276,47 @@ def _unwrap_generated_file_body(text: str) -> str: return text +# Read-only typed results whose payload should be surfaced to the planner so it +# can actually use what the tool returned (not just see that it ran). Writes and +# mutations are excluded — echoing their content back only bloats the prompt. +_RESULT_PREVIEW_TYPES = frozenset( + { + ExecutionArtifactType.FILE_READ, + ExecutionArtifactType.FILE_READ_CHUNK, + ExecutionArtifactType.SEARCH, + ExecutionArtifactType.FILES, + ExecutionArtifactType.GIT, + ExecutionArtifactType.GIT_STATUS, + ExecutionArtifactType.GIT_DIFF, + ExecutionArtifactType.GIT_SHOW, + ExecutionArtifactType.GIT_LOG, + ExecutionArtifactType.MAN, + ExecutionArtifactType.TLDR, + } +) + + +def _tool_result_preview( + artifact: dict[str, object] | None, + artifact_type: ExecutionArtifactType | None, +) -> str: + """Render a read-only tool result's payload for the planner observation. + + Typed capabilities (file reads, search, discovery, git inspect) don't use + the shell ``stdout`` field, so without this their output never reaches the + planner and it can't act on what it just fetched. + """ + if not artifact or artifact_type not in _RESULT_PREVIEW_TYPES: + return "" + content = artifact.get("content") + if isinstance(content, str) and content: + return content + try: + return json.dumps(artifact, default=str) + except (TypeError, ValueError): + return "" + + def _action_target_path(action: PlannedAction) -> str | None: if action.kind is not ActionKind.TOOL_CALL or action.tool_call is None: return None @@ -1280,6 +1321,13 @@ def _build_observation( stdout_preview = _truncate_preview( f'User answered: "{result.artifact["answer"]}"' ) + # Surface typed read-only results (file reads, search, git + # inspect) so the planner sees the data it fetched instead of an + # empty outcome — without this it re-runs the same read forever. + if not stdout_preview: + preview = _tool_result_preview(result.artifact, result.artifact_type) + if preview: + stdout_preview = _truncate_preview(preview) if result.status is ExecutionStatus.PENDING_APPROVAL: approval_outcomes.append(f"{action.id}: pending approval") diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 36da344..ee24fab 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -3007,3 +3007,59 @@ def test_orchestrator_not_found_surfaces_siblings_for_self_correction( and r.artifact_type is ExecutionArtifactType.FILE_READ for r in result.execution_results ) + + +def test_tool_result_preview_surfaces_reads_not_writes() -> None: + from foundation.services.orchestrator import _tool_result_preview + + # File-read content is surfaced verbatim. + assert ( + _tool_result_preview( + {"path": "a.md", "content": "# Hi\nbody"}, ExecutionArtifactType.FILE_READ + ) + == "# Hi\nbody" + ) + # Structured read-only results (search) are surfaced as compact JSON. + search_preview = _tool_result_preview( + {"matches": ["a.py:1: needle"]}, ExecutionArtifactType.SEARCH + ) + assert "a.py:1: needle" in search_preview + # Writes are NOT echoed back (would only re-bloat the prompt). + assert ( + _tool_result_preview( + {"path": "a.md", "content": "huge body"}, ExecutionArtifactType.FILE_WRITE + ) + == "" + ) + assert _tool_result_preview(None, ExecutionArtifactType.FILE_READ) == "" + + +def test_orchestrator_surfaces_file_read_content_to_next_iteration( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + provider = StubProvider( + [ + _provider_response( + { + "assistant_message": "Reading the note.", + "actions": [_read_action("read_note", "note.txt")], + } + ) + # iteration 2: StubProvider returns a zero-action completion by default. + ] + ) + orchestrator, _, workspace_root = _orchestrator(tmp_path, monkeypatch, provider) + (workspace_root / "note.txt").write_text("SECRET-CONTENT-12345\n", encoding="utf-8") + + result = orchestrator.orchestrate(UserRequest(message="read note.txt")) + + # The read succeeded AND its content reached the planner's next iteration — + # previously the observation was blank and the model re-read forever. + assert any( + r.status is ExecutionStatus.EXECUTED + and r.artifact_type is ExecutionArtifactType.FILE_READ + for r in result.execution_results + ) + second_plan_text = "\n".join(m.content for m in provider.calls[1].messages) + assert "SECRET-CONTENT-12345" in second_plan_text