Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/foundation/models/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ class ProviderPrompt(StrictModel):
response_format: ProviderResponseFormat = ProviderResponseFormat.TEXT
schema_name: str | None = None
output_schema: dict[str, Any] | None = None
temperature: float | None = Field(default=None, ge=0.0, le=2.0)

@model_validator(mode="after")
def _validate_schema_requirements(self) -> ProviderPrompt:
Expand Down
70 changes: 57 additions & 13 deletions src/foundation/services/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,38 @@ def _is_soft_failure_for_path(
return path in cumulative_changed_paths


def _unwrap_generated_file_body(text: str) -> str:
"""Salvage raw file content if the model wrapped it in an AssistantPlan blob.

Some models, when asked to generate file content mid-loop, keep "planning"
and return a ``{assistant_message, actions: [...]}`` object with the real
content buried in a write action's ``content`` argument. Detect that shape
and extract the inner content; otherwise return the text untouched.
"""
stripped = text.strip()
if not stripped.startswith("{"):
return text
try:
payload = json.loads(stripped)
except (json.JSONDecodeError, ValueError):
return text
if not isinstance(payload, dict) or "actions" not in payload:
return text
actions = payload.get("actions")
if not isinstance(actions, list):
return text
for action in actions:
if not isinstance(action, dict):
continue
tool_call = action.get("tool_call")
if not isinstance(tool_call, dict):
continue
args = tool_call.get("arguments")
if isinstance(args, dict) and isinstance(args.get("content"), str):
return args["content"]
return text


def _action_target_path(action: PlannedAction) -> str | None:
if action.kind is not ActionKind.TOOL_CALL or action.tool_call is None:
return None
Expand Down Expand Up @@ -691,27 +723,39 @@ def _generate_file_body(
request: UserRequest,
observation_messages: list[ProviderMessage],
) -> str:
messages: list[ProviderMessage] = [
# Fold the gathered context into a single plain-text reference block.
# We deliberately do NOT replay the planning conversation as assistant
# turns: those are JSON plans, and feeding them back primes the model to
# keep "planning" (emit another actions object) instead of writing the
# file's raw bytes.
reference = "\n\n".join(m.content for m in observation_messages).strip()
user_parts = [
f"Write the complete, literal contents of the file `{path}`.",
f"\nWhat the file should contain:\n{brief}",
]
if reference:
user_parts.append(
"\nReference data gathered so far (use the real values from it; "
f"do not invent facts):\n{reference}"
)
user_parts.append(f"\nThe original user request was: {request.message}")
messages = [
ProviderMessage(
role=ProviderMessageRole.DEVELOPER,
content=(
f"You are generating the complete contents of the file `{path}`. "
"Output ONLY the raw file body — no markdown code fences, no "
"commentary, no surrounding quotes."
"You are a file-content writer, not a planner. Output ONLY the "
"raw, literal bytes to save to the file — nothing else. Do NOT "
"wrap the output in JSON, do NOT emit an actions/plan object, do "
"NOT fence the whole file, and do NOT add commentary before or "
"after the content."
),
)
),
ProviderMessage(role=ProviderMessageRole.USER, content="\n".join(user_parts)),
]
messages.extend(observation_messages)
messages.append(
ProviderMessage(
role=ProviderMessageRole.USER,
content=f"Original request: {request.message}\n\nFile to write: {brief}",
)
)
response = self._provider.complete(
ProviderPrompt(messages=messages, response_format=ProviderResponseFormat.TEXT)
)
return response.content
return _unwrap_generated_file_body(response.content)

def _run_replan_loop(
self,
Expand Down
8 changes: 8 additions & 0 deletions src/foundation/services/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@

_MAX_PLAN_ACTIONS = 40
_MAX_TOTAL_ACTIONS = 200
# Temperature used on a plan repair retry so the model doesn't deterministically
# reproduce the same invalid/empty response it produced at temperature 0.
_PLAN_REPAIR_TEMPERATURE = 0.4
_COMMIT_INTENT_WORD_RE = re.compile(r"\bcommit(?:ing|ted|s)?\b", re.IGNORECASE)
_COMMIT_INTENT_PHRASES = (
"stop for approval",
Expand Down Expand Up @@ -139,11 +142,16 @@ def request_plan(
last_error: Exception | None = None

for attempt in range(1, self._max_plan_attempts + 1):
# First attempt decodes deterministically (temperature 0). On a
# repair retry, nudge temperature up so the model doesn't
# deterministically reproduce the same malformed/empty response.
retry_temperature = _PLAN_REPAIR_TEMPERATURE if attempt > 1 else None
prompt = ProviderPrompt(
messages=[*base_messages, *supplemental_messages],
response_format=ProviderResponseFormat.JSON_OBJECT,
schema_name="assistant_plan",
output_schema=AssistantPlan.model_json_schema(),
temperature=retry_temperature,
)
try:
response = self._provider.complete(prompt)
Expand Down
7 changes: 6 additions & 1 deletion src/foundation/services/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -618,9 +618,14 @@ def _build_payload(self, prompt: ProviderPrompt) -> dict[str, Any]:
if prompt.response_format is ProviderResponseFormat.JSON_OBJECT:
assert prompt.output_schema is not None
payload["format"] = prompt.output_schema
options["temperature"] = 0
# Default structured output to deterministic decoding, but allow a
# caller-supplied temperature (e.g. a repair retry nudging off 0 so
# it doesn't reproduce the same malformed JSON).
options["temperature"] = prompt.temperature if prompt.temperature is not None else 0
if self._needs_think_for_structured_output(self._model):
payload["think"] = True
elif prompt.temperature is not None:
options["temperature"] = prompt.temperature
if options:
payload["options"] = options
return payload
Expand Down
85 changes: 85 additions & 0 deletions tests/test_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,9 @@ def complete(self, prompt: ProviderPrompt) -> ProviderResponse:
repair_text = "\n".join(m.content for m in provider.calls[1].messages)
assert "truncated" in repair_text.lower()
assert "content_brief" in repair_text
# First attempt is deterministic; the repair retry nudges temperature off 0.
assert provider.calls[0].temperature is None
assert provider.calls[1].temperature == 0.4
assert result.summary is not None


Expand Down Expand Up @@ -2884,3 +2887,85 @@ def git(*args: str) -> None:
and action.requires_approval
for action in result.iterations[-1].plan.actions
)


def test_unwrap_generated_file_body_extracts_plan_wrapped_content() -> None:
from foundation.services.orchestrator import _unwrap_generated_file_body

wrapped = json.dumps(
{
"assistant_message": "writing the file",
"actions": [
{
"id": "w",
"kind": "tool_call",
"tool_call": {
"capability_id": "foundation.file.write",
"arguments": {"path": "r.md", "content": "# Real Title\n\nbody text"},
},
}
],
}
)
assert _unwrap_generated_file_body(wrapped) == "# Real Title\n\nbody text"


def test_unwrap_generated_file_body_passes_through_plain_and_real_json() -> None:
from foundation.services.orchestrator import _unwrap_generated_file_body

assert _unwrap_generated_file_body("# Just markdown\n") == "# Just markdown\n"
# A legitimate JSON file (no actions array) must be left untouched.
config = '{"key": "value", "nested": {"a": 1}}'
assert _unwrap_generated_file_body(config) == config


def test_orchestrator_unwraps_plan_wrapped_generated_body(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
clean = "# Clean Report\n\nThe actual body of the report.\n"
wrapped_body = json.dumps(
{
"assistant_message": "writing",
"actions": [
{
"id": "w",
"kind": "tool_call",
"tool_call": {
"capability_id": "foundation.file.write",
"arguments": {"path": "report.md", "content": clean},
},
}
],
}
)
provider = StubProvider(
[
_provider_response(
{
"assistant_message": "Writing the report.",
"actions": [
{
"id": "write_report",
"kind": "tool_call",
"summary": "Write the report",
"tool_call": {
"capability_id": "foundation.file.write",
"arguments": {
"path": "report.md",
"content_brief": "a report about the project",
},
},
}
],
}
),
_text_response(wrapped_body),
]
)
orchestrator, _, workspace_root = _orchestrator(tmp_path, monkeypatch, provider)

orchestrator.orchestrate(UserRequest(message="write a report"))

# The plan-wrapped generation is unwrapped to the clean file body.
assert (workspace_root / "report.md").read_text(encoding="utf-8") == clean
22 changes: 22 additions & 0 deletions tests/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,3 +535,25 @@ def test_openai_adapter_raises_truncated_on_incomplete_response() -> None:
adapter.complete(_structured_prompt())

assert exc_info.value.code is ProviderErrorCode.TRUNCATED


def test_ollama_adapter_honors_prompt_temperature() -> None:
transport = FakeTransport(
[{"message": {"role": "assistant", "content": '{"assistant_message":"ok","actions":[]}'}}]
)
adapter = OllamaChatAdapter(
model="glm-5.1:cloud",
base_url="http://localhost:11434/api",
transport=transport,
)
prompt = ProviderPrompt(
messages=[ProviderMessage(role=ProviderMessageRole.USER, content="Plan this.")],
response_format=ProviderResponseFormat.JSON_OBJECT,
schema_name="assistant_plan",
output_schema={"type": "object"},
temperature=0.4,
)

adapter.complete(prompt)

assert transport.calls[0]["payload"]["options"]["temperature"] == 0.4
Loading