Agenta-AI · mmabrouk · Jun 25, 2026 · Jun 25, 2026 · coderabbitai · Jun 25, 2026
diff --git a/sdks/python/agenta/sdk/agents/connections/errors.py b/sdks/python/agenta/sdk/agents/connections/errors.py
@@ -46,12 +46,16 @@ class MissingProviderError(ConnectionResolutionError):
     NOT a tolerated self-managed/OAuth fallback case: the config itself is underspecified.
     """
 
-    def __init__(self, *, model: str) -> None:
+    def __init__(self, *, model: str, hint_provider: str = "openai") -> None:
+        # The example provider in the hint is harness-appropriate: a Claude harness must read
+        # "anthropic/<model>", never "openai/<model>" (Claude reaches Anthropic only). The
+        # caller passes the harness's default provider so the suggested prefix is reachable.
         super().__init__(
-            f"model '{model}' needs a provider prefix (e.g. 'openai/{model}') "
+            f"model '{model}' needs a provider prefix (e.g. '{hint_provider}/{model}') "
             "or a structured {provider, model}; a bare model id can't resolve a credential"
         )
         self.model = model
+        self.hint_provider = hint_provider
 
 
 class AmbiguousConnectionError(ConnectionResolutionError):

diff --git a/sdks/python/agenta/sdk/agents/platform/connections.py b/sdks/python/agenta/sdk/agents/platform/connections.py
@@ -18,6 +18,10 @@
 
 from agenta.sdk.utils.logging import get_module_logger
 
+from ..capabilities import (
+    CLAUDE_MODEL_ALIASES,
+    HARNESS_CONNECTION_CAPABILITIES,
+)
 from ..connections import (
     AmbiguousConnectionError,
     ConnectionNotFoundError,
@@ -46,6 +50,47 @@
     "openrouter": "OPENROUTER_API_KEY",
 }
 
+# The Claude harness selects a model by a bare alias (``haiku``/``sonnet``/``opus`` + ``[1m]``)
+# or by a dated id (``claude-opus-4-8``), never with a ``provider/`` prefix. Those bare ids are
+# unambiguously Anthropic, so the F-017 "needs a provider prefix" rule must not reject them: a
+# bare alias resolves to ``anthropic`` here before the fail-loud check. The canonical alias set
+# lives in ``capabilities.py`` (the ``/inspect`` surface) so the two never drift.
+_CLAUDE_ALIASES: Set[str] = {alias.lower() for alias in CLAUDE_MODEL_ALIASES}
+
+
+def _inferred_claude_provider(model: ModelRef) -> Optional[str]:
+    """Return ``"anthropic"`` when a bare model id is a known Claude alias, else ``None``.
+
+    Only fires for a bare id (no explicit ``provider``). A known alias (the ``capabilities.py``
+    set) or any ``claude-*`` dated id (the Anthropic naming convention) is unambiguously
+    Anthropic, so it resolves to the ``anthropic`` provider rather than failing loud as a
+    provider-less model.
+    """
+    if model.provider:
+        return None
+    bare = (model.model or "").strip().lower()
+    if not bare:
+        return None
+    if bare in _CLAUDE_ALIASES or bare.startswith("claude-"):
+        return "anthropic"
+    return None
+
+
+def _harness_default_provider(harness: Optional[str]) -> str:
+    """The provider to suggest in a missing-provider hint for ``harness``.
+
+    Claude reaches Anthropic only, so its hint must read ``anthropic/<model>``; every other
+    harness defaults to ``openai`` (the existing hint). Derived from the capability table's
+    provider list so a harness's reachable providers stay the single source of truth.
+    """
+    caps = HARNESS_CONNECTION_CAPABILITIES.get(harness or "")
+    if caps and caps.providers:
+        if "openai" in caps.providers:
+            return "openai"
+        return caps.providers[0]
+    return "openai"
+
+
 # Extras keys the current UI stores on custom_provider secrets, normalized to harness env.
 _SNAKE_EXTRA_ENV_ALIASES: Dict[str, str] = {
     "aws_region_name": "AWS_REGION",
@@ -299,14 +344,19 @@ def _candidate_pool(
 
 
 def _choose_default(
-    candidates: Sequence[_ConnectionCandidate], model: ModelRef
+    candidates: Sequence[_ConnectionCandidate],
+    model: ModelRef,
+    harness: Optional[str] = None,
 ) -> _ConnectionCandidate:
     pool = _candidate_pool(candidates, model)
     if not pool and not model.provider:
         # A bare model id (no provider prefix) matched nothing by model id, so there is no
         # provider to look a credential up against. Fail loud with an actionable message rather
         # than degrade to no-credential and surface later as a misleading "add your key" error.
-        raise MissingProviderError(model=model.model)
+        # The hint names the harness-reachable provider (anthropic for Claude, not openai).
+        raise MissingProviderError(
+            model=model.model, hint_provider=_harness_default_provider(harness)
+        )
     if len(pool) == 1:
         return pool[0]
     default_named = [candidate for candidate in pool if candidate.slug == "default"]
@@ -350,9 +400,15 @@ def _choose_named(
 
 
 def _resolve_from_secrets(
-    *, secrets: Sequence[Any], model: ModelRef
+    *, secrets: Sequence[Any], model: ModelRef, harness: Optional[str] = None
 ) -> ResolvedConnection:
     connection = model.connection
+    # A bare Claude alias (haiku/sonnet/opus + [1m]) or a dated claude-* id is unambiguously
+    # Anthropic: infer the provider so the F-017 fail-loud rule does not reject a documented
+    # Claude model id. Inference only fills a missing provider; an explicit provider is honored.
+    inferred = _inferred_claude_provider(model)
+    if inferred:
+        model = model.model_copy(update={"provider": inferred})
     if connection.mode == "self_managed":
         return ResolvedConnection(
             provider=model.provider or "",
@@ -368,7 +424,7 @@ def _resolve_from_secrets(
     chosen = (
         _choose_named(candidates, model, slug)
         if slug
-        else _choose_default(candidates, model)
+        else _choose_default(candidates, model, harness)
     )
     provider = chosen.resolved_provider(model)
     env = chosen.resolved_env(provider)
@@ -434,7 +490,7 @@ async def resolve(
         data = response.json() or []
         if not isinstance(data, list):
             raise ConnectionResolutionError("connection resolution returned a non-list")
-        return _resolve_from_secrets(secrets=data, model=model)
+        return _resolve_from_secrets(secrets=data, model=model, harness=context.harness)
 
 
 class _StaticSecretsResolver:
@@ -447,4 +503,6 @@ async def resolve(
         model: ModelRef,
         context: RuntimeAuthContext,
     ) -> ResolvedConnection:
-        return _resolve_from_secrets(secrets=self._secrets, model=model)
+        return _resolve_from_secrets(
+            secrets=self._secrets, model=model, harness=context.harness
+        )
diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
@@ -11,8 +11,24 @@
 import os
 from typing import Any, AsyncIterator, Dict, Optional, Sequence
 
+from agenta.sdk.utils.logging import get_module_logger
+
 _DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180"))
 
+log = get_module_logger(__name__)
+
+
+def _transport_error(user_message: str, *, detail: str) -> RuntimeError:
+    """A transport RuntimeError whose surfaced text is clean; the raw detail is logged only.
+
+    The HTTP body / process stderr / raw stdout that pinpoints a transport failure is internal:
+    log it for diagnosis but keep it out of the caller/UI-facing error, which gets only the
+    short ``user_message``. Mirrors ``wire.sanitize_runner_error`` for the result boundary.
+    """
+    if detail:
+        log.warning("agent: %s | detail: %s", user_message, detail)
+    return RuntimeError(user_message)
+
 
 async def deliver_http(
     base_url: str,
@@ -27,10 +43,12 @@ async def deliver_http(
     async with httpx.AsyncClient(timeout=timeout) as client:
         response = await client.post(url, json=payload)
     # Any non-2xx is a transport failure; 4xx left to fall through surfaces as an opaque
-    # JSON parse error instead of a clear runner failure.
+    # JSON parse error instead of a clear runner failure. The response body can carry internal
+    # detail, so it is logged, not surfaced.
     if response.status_code >= 400:
-        raise RuntimeError(
-            f"Agent runner HTTP {response.status_code}: {response.text[:1000]}"
+        raise _transport_error(
+            f"Agent runner HTTP {response.status_code}",
+            detail=response.text[:1000],
         )
     return response.json()
 
@@ -67,14 +85,16 @@ async def deliver_subprocess(
     out = stdout.decode("utf-8", "replace")
     err = stderr.decode("utf-8", "replace")
     if not out.strip():
-        raise RuntimeError(
-            f"Agent runner returned no output. exit={proc.returncode} stderr={err[-2000:]}"
+        raise _transport_error(
+            "Agent runner returned no output",
+            detail=f"exit={proc.returncode} stderr={err[-2000:]}",
         )
     try:
         return json.loads(out)
     except json.JSONDecodeError as exc:
-        raise RuntimeError(
-            f"Agent runner returned invalid JSON. stdout={out[:500]} stderr={err[-1000:]}"
+        raise _transport_error(
+            "Agent runner returned invalid JSON",
+            detail=f"stdout={out[:500]} stderr={err[-1000:]}",
         ) from exc
 
 
@@ -110,8 +130,9 @@ async def deliver_http_stream(
         ) as response:
             if response.status_code >= 400:
                 body = await response.aread()
-                raise RuntimeError(
-                    f"Agent runner HTTP {response.status_code}: {body[:1000]!r}"
+                raise _transport_error(
+                    f"Agent runner HTTP {response.status_code}",
+                    detail=repr(body[:1000]),
                 )
             async for line in response.aiter_lines():
                 line = line.strip()
@@ -175,9 +196,10 @@ async def deliver_subprocess_stream(
             err = b""
             if proc.stderr is not None:
                 err = await proc.stderr.read()
-            raise RuntimeError(
-                "Agent runner stream ended without a terminal result record. "
-                f"exit={proc.returncode} stderr={err.decode('utf-8', 'replace')[-2000:]}"
+            raise _transport_error(
+                "Agent runner stream ended without a terminal result record",
+                detail=f"exit={proc.returncode} "
+                f"stderr={err.decode('utf-8', 'replace')[-2000:]}",
             )
     finally:
         if proc.returncode is None:

diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py
@@ -17,8 +17,11 @@
 
 from __future__ import annotations
 
+import re
 from typing import Any, Dict, List, Optional, Sequence
 
+from agenta.sdk.utils.logging import get_module_logger
+
 from ..dtos import (
     AgentEvent,
     AgentResult,
@@ -29,6 +32,38 @@
     TraceContext,
 )
 
+log = get_module_logger(__name__)
+
+# The user-facing error must not carry an internal stack/path dump. Cap the surfaced line and
+# strip the patterns that leak implementation detail; the full text is logged, never shown.
+_ERROR_MAX_LEN = 300
+# A stack frame leaked into the message ("at fn (/abs/path:12:3)" / 'File "/abs/path", line 12').
+_STACK_FRAME_RE = re.compile(r"\b(at\s+\S+\s*\(|File\s+\"|/[\w./-]+:\d+)")
+
+
+def sanitize_runner_error(error: Any) -> str:
+    """Reduce a runner ``error`` to one clean user-facing line, logging the full detail.
+
+    The runner already concise-formats its known auth/credit failures, but the fall-through case
+    returns the raw first line of an SDK/JS error, and the transport errors (HTTP/stderr/stdout
+    dumps) carry internal text. This is the single boundary that reaches the caller/UI, so it
+    keeps the actionable message, drops stack-frame and path noise, caps the length, and logs the
+    untruncated original for the trace/logs. A clean concise message passes through unchanged.
+    """
+    raw = "" if error is None else str(error)
+    if raw and (
+        len(raw) > _ERROR_MAX_LEN or "\n" in raw or _STACK_FRAME_RE.search(raw)
+    ):
+        log.warning("agent: runner reported a failure: %s", raw)
+    # Keep only the first line; a multi-line body is a stack dump, never the message.
+    message = raw.split("\n", 1)[0].strip()
+    # If even the first line is a raw stack frame, fall back to a generic line.
+    if not message or _STACK_FRAME_RE.match(message):
+        return "agent run failed"
+    if len(message) > _ERROR_MAX_LEN:
+        message = message[: _ERROR_MAX_LEN - 1].rstrip() + "…"
+    return message
+
 
 def request_to_wire(
     *,
@@ -91,10 +126,13 @@ def result_from_wire(data: Dict[str, Any]) -> AgentResult:
     """Parse a ``/run`` result JSON into an :class:`AgentResult`.
 
     Raises ``RuntimeError`` when the runner reported a failure, so the caller surfaces a
-    clear message rather than handing the model an empty reply.
+    clear message rather than handing the model an empty reply. The runner ``error`` is
+    sanitized at this boundary (one clean line, no stack/path leak); the full detail is logged.
     """
     if not data.get("ok"):
-        raise RuntimeError(f"Agent run failed: {data.get('error')}")
+        raise RuntimeError(
+            f"Agent run failed: {sanitize_runner_error(data.get('error'))}"
+        )
 
     messages: List[Message] = []
     for raw in data.get("messages") or []:

diff --git a/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py b/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py
@@ -128,7 +128,7 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
     # F-017: a bare model id (no `provider/` prefix) that matches no vault candidate by model
     # id has no provider to look a credential up against. It must fail loud with an actionable
     # message, not degrade silently to no-credential (which surfaced as a misleading auth error
-    # even when the key was present).
+    # even when the key was present). On a Pi harness the hint suggests an openai/ prefix.
     fake_http(connections, payload=[_provider_key("openai-prod", "openai", "sk-prod")])
     with pytest.raises(MissingProviderError) as exc:
         await VaultConnectionResolver(connection).resolve(
@@ -138,6 +138,56 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
     assert "openai/gpt-4o-mini" in str(exc.value)
 
 
+async def test_missing_provider_hint_is_harness_correct_for_claude(
+    fake_http, connection
+):
+    # F-031: the missing-provider hint must name a harness-REACHABLE provider. On a Claude
+    # harness (Anthropic only) an unrecognized bare id must read "anthropic/<m>", never
+    # "openai/<m>" (which Claude cannot reach). Use a non-alias bare id so it still fails loud.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    with pytest.raises(MissingProviderError) as exc:
+        await VaultConnectionResolver(connection).resolve(
+            model=ModelRef.coerce("some-unknown-model"),
+            context=RuntimeAuthContext(harness="claude"),
+        )
+    message = str(exc.value)
+    assert "anthropic/some-unknown-model" in message
+    assert "openai/" not in message
+
+
+async def test_bare_claude_alias_resolves_to_anthropic(fake_http, connection):
+    # F-031: a bare Claude alias (haiku/sonnet/opus + [1m]) is unambiguously Anthropic, so the
+    # F-017 prefix rule must NOT reject it. It resolves against the vault's anthropic key the
+    # same way the documented `anthropic/haiku` form does, instead of failing loud.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    for alias in ("haiku", "sonnet", "opus", "opus[1m]"):
+        resolved = await VaultConnectionResolver(connection).resolve(
+            model=ModelRef.coerce(alias),
+            context=RuntimeAuthContext(harness="claude"),
+        )
+        assert resolved.provider == "anthropic", alias
+        assert resolved.model == alias, alias
+        assert resolved.env == {"ANTHROPIC_API_KEY": "sk-ant"}, alias
+
+
+async def test_bare_claude_dated_id_resolves_to_anthropic(fake_http, connection):
+    # F-031: a bare dated Anthropic id (claude-opus-4-8) is also unambiguously Anthropic via the
+    # claude-* naming convention, so it resolves rather than failing loud on a missing prefix.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    resolved = await VaultConnectionResolver(connection).resolve(
+        model=ModelRef.coerce("claude-opus-4-8"),
+        context=RuntimeAuthContext(harness="claude"),
+    )
+    assert resolved.provider == "anthropic"
+    assert resolved.model == "claude-opus-4-8"
+
+
 async def test_bare_model_matching_a_candidate_infers_the_provider(
     fake_http, connection
 ):