From e34276628693f6f1df5eb9d731605fef7aea0e9b Mon Sep 17 00:00:00 2001
From: anmolnoor <0xdevnoor@gmail.com>
Date: Wed, 27 May 2026 04:15:04 -0700
Subject: [PATCH] Stage E: fix deferred-write content generation + jitter
 repair retries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two robustness fixes found by running the report flow against a live model.

1. Deferred-write body came out wrapped in a JSON plan. _generate_file_body
   replayed the planning conversation (prior plans as assistant turns), which
   primed the model to keep emitting an actions object instead of raw file
   bytes — so the written file was a JSON blob with the real markdown buried in
   a content field, and stats were vague/hallucinated.
   - Reframe the call: drop the assistant-plan turns; pass gathered data as a
     single plain reference block with a hard "you are a file-content writer,
     not a planner — output raw bytes only" instruction.
   - Add _unwrap_generated_file_body as a defensive net: if the model still
     returns an AssistantPlan-shaped blob, extract the write action's content;
     plain text and legitimate JSON files pass through untouched.

2. Plan repair retries were deterministic. At temperature 0 a model that emits
   malformed/empty JSON reproduces it identically on retry, wasting the attempt.
   ProviderPrompt gains an optional temperature; the planner sets 0.4 on repair
   retries (first attempt stays deterministic). Ollama honors it for both JSON
   and text calls.

Verified end-to-end against qwen3.5: the report now writes as clean markdown
using real GitHub values (followers, dates) instead of a JSON wrapper.

Tests: unwrap extracts wrapped content and leaves plain/real-JSON untouched;
orchestrator unwraps a plan-wrapped generated body; provider honors a prompt
temperature; the repair retry carries temperature 0.4.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/foundation/models/orchestration.py  |  1 +
 src/foundation/services/orchestrator.py | 70 ++++++++++++++++----
 src/foundation/services/planner.py      |  8 +++
 src/foundation/services/provider.py     |  7 +-
 tests/test_orchestrator.py              | 85 +++++++++++++++++++++++++
 tests/test_provider.py                  | 22 +++++++
 6 files changed, 179 insertions(+), 14 deletions(-)

diff --git a/src/foundation/models/orchestration.py b/src/foundation/models/orchestration.py
index f2295df..2c8f179 100644
--- a/src/foundation/models/orchestration.py
+++ b/src/foundation/models/orchestration.py
@@ -133,6 +133,7 @@ class ProviderPrompt(StrictModel):
     response_format: ProviderResponseFormat = ProviderResponseFormat.TEXT
     schema_name: str | None = None
     output_schema: dict[str, Any] | None = None
+    temperature: float | None = Field(default=None, ge=0.0, le=2.0)
 
     @model_validator(mode="after")
     def _validate_schema_requirements(self) -> ProviderPrompt:
diff --git a/src/foundation/services/orchestrator.py b/src/foundation/services/orchestrator.py
index 1e0bcc4..f15fd23 100644
--- a/src/foundation/services/orchestrator.py
+++ b/src/foundation/services/orchestrator.py
@@ -244,6 +244,38 @@ def _is_soft_failure_for_path(
     return path in cumulative_changed_paths
 
 
+def _unwrap_generated_file_body(text: str) -> str:
+    """Salvage raw file content if the model wrapped it in an AssistantPlan blob.
+
+    Some models, when asked to generate file content mid-loop, keep "planning"
+    and return a ``{assistant_message, actions: [...]}`` object with the real
+    content buried in a write action's ``content`` argument. Detect that shape
+    and extract the inner content; otherwise return the text untouched.
+    """
+    stripped = text.strip()
+    if not stripped.startswith("{"):
+        return text
+    try:
+        payload = json.loads(stripped)
+    except (json.JSONDecodeError, ValueError):
+        return text
+    if not isinstance(payload, dict) or "actions" not in payload:
+        return text
+    actions = payload.get("actions")
+    if not isinstance(actions, list):
+        return text
+    for action in actions:
+        if not isinstance(action, dict):
+            continue
+        tool_call = action.get("tool_call")
+        if not isinstance(tool_call, dict):
+            continue
+        args = tool_call.get("arguments")
+        if isinstance(args, dict) and isinstance(args.get("content"), str):
+            return args["content"]
+    return text
+
+
 def _action_target_path(action: PlannedAction) -> str | None:
     if action.kind is not ActionKind.TOOL_CALL or action.tool_call is None:
         return None
@@ -691,27 +723,39 @@ def _generate_file_body(
         request: UserRequest,
         observation_messages: list[ProviderMessage],
     ) -> str:
-        messages: list[ProviderMessage] = [
+        # Fold the gathered context into a single plain-text reference block.
+        # We deliberately do NOT replay the planning conversation as assistant
+        # turns: those are JSON plans, and feeding them back primes the model to
+        # keep "planning" (emit another actions object) instead of writing the
+        # file's raw bytes.
+        reference = "\n\n".join(m.content for m in observation_messages).strip()
+        user_parts = [
+            f"Write the complete, literal contents of the file `{path}`.",
+            f"\nWhat the file should contain:\n{brief}",
+        ]
+        if reference:
+            user_parts.append(
+                "\nReference data gathered so far (use the real values from it; "
+                f"do not invent facts):\n{reference}"
+            )
+        user_parts.append(f"\nThe original user request was: {request.message}")
+        messages = [
             ProviderMessage(
                 role=ProviderMessageRole.DEVELOPER,
                 content=(
-                    f"You are generating the complete contents of the file `{path}`. "
-                    "Output ONLY the raw file body — no markdown code fences, no "
-                    "commentary, no surrounding quotes."
+                    "You are a file-content writer, not a planner. Output ONLY the "
+                    "raw, literal bytes to save to the file — nothing else. Do NOT "
+                    "wrap the output in JSON, do NOT emit an actions/plan object, do "
+                    "NOT fence the whole file, and do NOT add commentary before or "
+                    "after the content."
                 ),
-            )
+            ),
+            ProviderMessage(role=ProviderMessageRole.USER, content="\n".join(user_parts)),
         ]
-        messages.extend(observation_messages)
-        messages.append(
-            ProviderMessage(
-                role=ProviderMessageRole.USER,
-                content=f"Original request: {request.message}\n\nFile to write: {brief}",
-            )
-        )
         response = self._provider.complete(
             ProviderPrompt(messages=messages, response_format=ProviderResponseFormat.TEXT)
         )
-        return response.content
+        return _unwrap_generated_file_body(response.content)
 
     def _run_replan_loop(
         self,
diff --git a/src/foundation/services/planner.py b/src/foundation/services/planner.py
index 5045d61..b6cb4db 100644
--- a/src/foundation/services/planner.py
+++ b/src/foundation/services/planner.py
@@ -43,6 +43,9 @@
 
 _MAX_PLAN_ACTIONS = 40
 _MAX_TOTAL_ACTIONS = 200
+# Temperature used on a plan repair retry so the model doesn't deterministically
+# reproduce the same invalid/empty response it produced at temperature 0.
+_PLAN_REPAIR_TEMPERATURE = 0.4
 _COMMIT_INTENT_WORD_RE = re.compile(r"\bcommit(?:ing|ted|s)?\b", re.IGNORECASE)
 _COMMIT_INTENT_PHRASES = (
     "stop for approval",
@@ -139,11 +142,16 @@ def request_plan(
         last_error: Exception | None = None
 
         for attempt in range(1, self._max_plan_attempts + 1):
+            # First attempt decodes deterministically (temperature 0). On a
+            # repair retry, nudge temperature up so the model doesn't
+            # deterministically reproduce the same malformed/empty response.
+            retry_temperature = _PLAN_REPAIR_TEMPERATURE if attempt > 1 else None
             prompt = ProviderPrompt(
                 messages=[*base_messages, *supplemental_messages],
                 response_format=ProviderResponseFormat.JSON_OBJECT,
                 schema_name="assistant_plan",
                 output_schema=AssistantPlan.model_json_schema(),
+                temperature=retry_temperature,
             )
             try:
                 response = self._provider.complete(prompt)
diff --git a/src/foundation/services/provider.py b/src/foundation/services/provider.py
index 41899c6..b8c95c1 100644
--- a/src/foundation/services/provider.py
+++ b/src/foundation/services/provider.py
@@ -618,9 +618,14 @@ def _build_payload(self, prompt: ProviderPrompt) -> dict[str, Any]:
         if prompt.response_format is ProviderResponseFormat.JSON_OBJECT:
             assert prompt.output_schema is not None
             payload["format"] = prompt.output_schema
-            options["temperature"] = 0
+            # Default structured output to deterministic decoding, but allow a
+            # caller-supplied temperature (e.g. a repair retry nudging off 0 so
+            # it doesn't reproduce the same malformed JSON).
+            options["temperature"] = prompt.temperature if prompt.temperature is not None else 0
             if self._needs_think_for_structured_output(self._model):
                 payload["think"] = True
+        elif prompt.temperature is not None:
+            options["temperature"] = prompt.temperature
         if options:
             payload["options"] = options
         return payload
diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py
index 4f3f301..e81f594 100644
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -329,6 +329,9 @@ def complete(self, prompt: ProviderPrompt) -> ProviderResponse:
     repair_text = "\n".join(m.content for m in provider.calls[1].messages)
     assert "truncated" in repair_text.lower()
     assert "content_brief" in repair_text
+    # First attempt is deterministic; the repair retry nudges temperature off 0.
+    assert provider.calls[0].temperature is None
+    assert provider.calls[1].temperature == 0.4
     assert result.summary is not None
 
 
@@ -2884,3 +2887,85 @@ def git(*args: str) -> None:
         and action.requires_approval
         for action in result.iterations[-1].plan.actions
     )
+
+
+def test_unwrap_generated_file_body_extracts_plan_wrapped_content() -> None:
+    from foundation.services.orchestrator import _unwrap_generated_file_body
+
+    wrapped = json.dumps(
+        {
+            "assistant_message": "writing the file",
+            "actions": [
+                {
+                    "id": "w",
+                    "kind": "tool_call",
+                    "tool_call": {
+                        "capability_id": "foundation.file.write",
+                        "arguments": {"path": "r.md", "content": "# Real Title\n\nbody text"},
+                    },
+                }
+            ],
+        }
+    )
+    assert _unwrap_generated_file_body(wrapped) == "# Real Title\n\nbody text"
+
+
+def test_unwrap_generated_file_body_passes_through_plain_and_real_json() -> None:
+    from foundation.services.orchestrator import _unwrap_generated_file_body
+
+    assert _unwrap_generated_file_body("# Just markdown\n") == "# Just markdown\n"
+    # A legitimate JSON file (no actions array) must be left untouched.
+    config = '{"key": "value", "nested": {"a": 1}}'
+    assert _unwrap_generated_file_body(config) == config
+
+
+def test_orchestrator_unwraps_plan_wrapped_generated_body(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    clean = "# Clean Report\n\nThe actual body of the report.\n"
+    wrapped_body = json.dumps(
+        {
+            "assistant_message": "writing",
+            "actions": [
+                {
+                    "id": "w",
+                    "kind": "tool_call",
+                    "tool_call": {
+                        "capability_id": "foundation.file.write",
+                        "arguments": {"path": "report.md", "content": clean},
+                    },
+                }
+            ],
+        }
+    )
+    provider = StubProvider(
+        [
+            _provider_response(
+                {
+                    "assistant_message": "Writing the report.",
+                    "actions": [
+                        {
+                            "id": "write_report",
+                            "kind": "tool_call",
+                            "summary": "Write the report",
+                            "tool_call": {
+                                "capability_id": "foundation.file.write",
+                                "arguments": {
+                                    "path": "report.md",
+                                    "content_brief": "a report about the project",
+                                },
+                            },
+                        }
+                    ],
+                }
+            ),
+            _text_response(wrapped_body),
+        ]
+    )
+    orchestrator, _, workspace_root = _orchestrator(tmp_path, monkeypatch, provider)
+
+    orchestrator.orchestrate(UserRequest(message="write a report"))
+
+    # The plan-wrapped generation is unwrapped to the clean file body.
+    assert (workspace_root / "report.md").read_text(encoding="utf-8") == clean
diff --git a/tests/test_provider.py b/tests/test_provider.py
index 2c6f613..ce83f63 100644
--- a/tests/test_provider.py
+++ b/tests/test_provider.py
@@ -535,3 +535,25 @@ def test_openai_adapter_raises_truncated_on_incomplete_response() -> None:
         adapter.complete(_structured_prompt())
 
     assert exc_info.value.code is ProviderErrorCode.TRUNCATED
+
+
+def test_ollama_adapter_honors_prompt_temperature() -> None:
+    transport = FakeTransport(
+        [{"message": {"role": "assistant", "content": '{"assistant_message":"ok","actions":[]}'}}]
+    )
+    adapter = OllamaChatAdapter(
+        model="glm-5.1:cloud",
+        base_url="http://localhost:11434/api",
+        transport=transport,
+    )
+    prompt = ProviderPrompt(
+        messages=[ProviderMessage(role=ProviderMessageRole.USER, content="Plan this.")],
+        response_format=ProviderResponseFormat.JSON_OBJECT,
+        schema_name="assistant_plan",
+        output_schema={"type": "object"},
+        temperature=0.4,
+    )
+
+    adapter.complete(prompt)
+
+    assert transport.calls[0]["payload"]["options"]["temperature"] == 0.4