Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions sdks/python/agenta/sdk/agents/connections/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,16 @@ class MissingProviderError(ConnectionResolutionError):
NOT a tolerated self-managed/OAuth fallback case: the config itself is underspecified.
"""

def __init__(self, *, model: str) -> None:
def __init__(self, *, model: str, hint_provider: str = "openai") -> None:
# The example provider in the hint is harness-appropriate: a Claude harness must read
# "anthropic/<model>", never "openai/<model>" (Claude reaches Anthropic only). The
# caller passes the harness's default provider so the suggested prefix is reachable.
super().__init__(
f"model '{model}' needs a provider prefix (e.g. 'openai/{model}') "
f"model '{model}' needs a provider prefix (e.g. '{hint_provider}/{model}') "
"or a structured {provider, model}; a bare model id can't resolve a credential"
)
self.model = model
self.hint_provider = hint_provider


class AmbiguousConnectionError(ConnectionResolutionError):
Expand Down
70 changes: 64 additions & 6 deletions sdks/python/agenta/sdk/agents/platform/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@

from agenta.sdk.utils.logging import get_module_logger

from ..capabilities import (
CLAUDE_MODEL_ALIASES,
HARNESS_CONNECTION_CAPABILITIES,
)
from ..connections import (
AmbiguousConnectionError,
ConnectionNotFoundError,
Expand Down Expand Up @@ -46,6 +50,47 @@
"openrouter": "OPENROUTER_API_KEY",
}

# The Claude harness selects a model by a bare alias (``haiku``/``sonnet``/``opus`` + ``[1m]``)
# or by a dated id (``claude-opus-4-8``), never with a ``provider/`` prefix. Those bare ids are
# unambiguously Anthropic, so the F-017 "needs a provider prefix" rule must not reject them: a
# bare alias resolves to ``anthropic`` here before the fail-loud check. The canonical alias set
# lives in ``capabilities.py`` (the ``/inspect`` surface) so the two never drift.
_CLAUDE_ALIASES: Set[str] = {alias.lower() for alias in CLAUDE_MODEL_ALIASES}


def _inferred_claude_provider(model: ModelRef) -> Optional[str]:
"""Return ``"anthropic"`` when a bare model id is a known Claude alias, else ``None``.

Only fires for a bare id (no explicit ``provider``). A known alias (the ``capabilities.py``
set) or any ``claude-*`` dated id (the Anthropic naming convention) is unambiguously
Anthropic, so it resolves to the ``anthropic`` provider rather than failing loud as a
provider-less model.
"""
if model.provider:
return None
bare = (model.model or "").strip().lower()
if not bare:
return None
if bare in _CLAUDE_ALIASES or bare.startswith("claude-"):
return "anthropic"
return None


def _harness_default_provider(harness: Optional[str]) -> str:
"""The provider to suggest in a missing-provider hint for ``harness``.

Claude reaches Anthropic only, so its hint must read ``anthropic/<model>``; every other
harness defaults to ``openai`` (the existing hint). Derived from the capability table's
provider list so a harness's reachable providers stay the single source of truth.
"""
caps = HARNESS_CONNECTION_CAPABILITIES.get(harness or "")
if caps and caps.providers:
if "openai" in caps.providers:
return "openai"
return caps.providers[0]
return "openai"


# Extras keys the current UI stores on custom_provider secrets, normalized to harness env.
_SNAKE_EXTRA_ENV_ALIASES: Dict[str, str] = {
"aws_region_name": "AWS_REGION",
Expand Down Expand Up @@ -299,14 +344,19 @@ def _candidate_pool(


def _choose_default(
candidates: Sequence[_ConnectionCandidate], model: ModelRef
candidates: Sequence[_ConnectionCandidate],
model: ModelRef,
harness: Optional[str] = None,
) -> _ConnectionCandidate:
pool = _candidate_pool(candidates, model)
if not pool and not model.provider:
# A bare model id (no provider prefix) matched nothing by model id, so there is no
# provider to look a credential up against. Fail loud with an actionable message rather
# than degrade to no-credential and surface later as a misleading "add your key" error.
raise MissingProviderError(model=model.model)
# The hint names the harness-reachable provider (anthropic for Claude, not openai).
raise MissingProviderError(
model=model.model, hint_provider=_harness_default_provider(harness)
)
if len(pool) == 1:
return pool[0]
default_named = [candidate for candidate in pool if candidate.slug == "default"]
Expand Down Expand Up @@ -350,9 +400,15 @@ def _choose_named(


def _resolve_from_secrets(
*, secrets: Sequence[Any], model: ModelRef
*, secrets: Sequence[Any], model: ModelRef, harness: Optional[str] = None
) -> ResolvedConnection:
connection = model.connection
# A bare Claude alias (haiku/sonnet/opus + [1m]) or a dated claude-* id is unambiguously
# Anthropic: infer the provider so the F-017 fail-loud rule does not reject a documented
# Claude model id. Inference only fills a missing provider; an explicit provider is honored.
inferred = _inferred_claude_provider(model)
if inferred:
model = model.model_copy(update={"provider": inferred})
if connection.mode == "self_managed":
return ResolvedConnection(
provider=model.provider or "",
Expand All @@ -368,7 +424,7 @@ def _resolve_from_secrets(
chosen = (
_choose_named(candidates, model, slug)
if slug
else _choose_default(candidates, model)
else _choose_default(candidates, model, harness)
)
provider = chosen.resolved_provider(model)
env = chosen.resolved_env(provider)
Expand Down Expand Up @@ -434,7 +490,7 @@ async def resolve(
data = response.json() or []
if not isinstance(data, list):
raise ConnectionResolutionError("connection resolution returned a non-list")
return _resolve_from_secrets(secrets=data, model=model)
return _resolve_from_secrets(secrets=data, model=model, harness=context.harness)


class _StaticSecretsResolver:
Expand All @@ -447,4 +503,6 @@ async def resolve(
model: ModelRef,
context: RuntimeAuthContext,
) -> ResolvedConnection:
return _resolve_from_secrets(secrets=self._secrets, model=model)
return _resolve_from_secrets(
secrets=self._secrets, model=model, harness=context.harness
)
46 changes: 34 additions & 12 deletions sdks/python/agenta/sdk/agents/utils/ts_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,24 @@
import os
from typing import Any, AsyncIterator, Dict, Optional, Sequence

from agenta.sdk.utils.logging import get_module_logger

_DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180"))

log = get_module_logger(__name__)


def _transport_error(user_message: str, *, detail: str) -> RuntimeError:
"""A transport RuntimeError whose surfaced text is clean; the raw detail is logged only.

The HTTP body / process stderr / raw stdout that pinpoints a transport failure is internal:
log it for diagnosis but keep it out of the caller/UI-facing error, which gets only the
short ``user_message``. Mirrors ``wire.sanitize_runner_error`` for the result boundary.
"""
if detail:
log.warning("agent: %s | detail: %s", user_message, detail)
return RuntimeError(user_message)


async def deliver_http(
base_url: str,
Expand All @@ -27,10 +43,12 @@ async def deliver_http(
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(url, json=payload)
# Any non-2xx is a transport failure; 4xx left to fall through surfaces as an opaque
# JSON parse error instead of a clear runner failure.
# JSON parse error instead of a clear runner failure. The response body can carry internal
# detail, so it is logged, not surfaced.
if response.status_code >= 400:
raise RuntimeError(
f"Agent runner HTTP {response.status_code}: {response.text[:1000]}"
raise _transport_error(
f"Agent runner HTTP {response.status_code}",
detail=response.text[:1000],
)
return response.json()

Expand Down Expand Up @@ -67,14 +85,16 @@ async def deliver_subprocess(
out = stdout.decode("utf-8", "replace")
err = stderr.decode("utf-8", "replace")
if not out.strip():
raise RuntimeError(
f"Agent runner returned no output. exit={proc.returncode} stderr={err[-2000:]}"
raise _transport_error(
"Agent runner returned no output",
detail=f"exit={proc.returncode} stderr={err[-2000:]}",
)
try:
return json.loads(out)
except json.JSONDecodeError as exc:
raise RuntimeError(
f"Agent runner returned invalid JSON. stdout={out[:500]} stderr={err[-1000:]}"
raise _transport_error(
"Agent runner returned invalid JSON",
detail=f"stdout={out[:500]} stderr={err[-1000:]}",
) from exc


Expand Down Expand Up @@ -110,8 +130,9 @@ async def deliver_http_stream(
) as response:
if response.status_code >= 400:
body = await response.aread()
raise RuntimeError(
f"Agent runner HTTP {response.status_code}: {body[:1000]!r}"
raise _transport_error(
f"Agent runner HTTP {response.status_code}",
detail=repr(body[:1000]),
Comment on lines 131 to +135

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚀 Performance & Scalability | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

python3 - <<'PY'
from pathlib import Path
path = Path('sdks/python/agenta/sdk/agents/utils/ts_runner.py')
for start, end in [(1, 240)]:
    print(f'লines {start}-{end}')
    with path.open() as f:
        for i, line in enumerate(f, 1):
            if start <= i <= end:
                print(f"{i:4d}: {line.rstrip()}")
PY

Repository: Agenta-AI/agenta

Length of output: 9205


🏁 Script executed:

#!/bin/bash
set -euo pipefail

python3 - <<'PY'
from pathlib import Path
path = Path('sdks/python/agenta/sdk/agents/utils/ts_runner.py')
for start, end in [(1, 240)]:
    print(f"lines {start}-{end}")
    with path.open() as f:
        for i, line in enumerate(f, 1):
            if start <= i <= end:
                print(f"{i:4d}: {line.rstrip()}")
PY

Repository: Agenta-AI/agenta

Length of output: 9205


Avoid buffering the full streamed error body. await response.aread() still loads the whole error payload into memory even though only the first 1000 bytes are logged; read a bounded prefix instead and let the response close.

)
async for line in response.aiter_lines():
line = line.strip()
Expand Down Expand Up @@ -175,9 +196,10 @@ async def deliver_subprocess_stream(
err = b""
if proc.stderr is not None:
err = await proc.stderr.read()
raise RuntimeError(
"Agent runner stream ended without a terminal result record. "
f"exit={proc.returncode} stderr={err.decode('utf-8', 'replace')[-2000:]}"
raise _transport_error(
"Agent runner stream ended without a terminal result record",
detail=f"exit={proc.returncode} "
f"stderr={err.decode('utf-8', 'replace')[-2000:]}",
)
finally:
if proc.returncode is None:
Expand Down
42 changes: 40 additions & 2 deletions sdks/python/agenta/sdk/agents/utils/wire.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@

from __future__ import annotations

import re
from typing import Any, Dict, List, Optional, Sequence

from agenta.sdk.utils.logging import get_module_logger

from ..dtos import (
AgentEvent,
AgentResult,
Expand All @@ -29,6 +32,38 @@
TraceContext,
)

log = get_module_logger(__name__)

# The user-facing error must not carry an internal stack/path dump. Cap the surfaced line and
# strip the patterns that leak implementation detail; the full text is logged, never shown.
_ERROR_MAX_LEN = 300
# A stack frame leaked into the message ("at fn (/abs/path:12:3)" / 'File "/abs/path", line 12').
_STACK_FRAME_RE = re.compile(r"\b(at\s+\S+\s*\(|File\s+\"|/[\w./-]+:\d+)")


def sanitize_runner_error(error: Any) -> str:
"""Reduce a runner ``error`` to one clean user-facing line, logging the full detail.

The runner already concise-formats its known auth/credit failures, but the fall-through case
returns the raw first line of an SDK/JS error, and the transport errors (HTTP/stderr/stdout
dumps) carry internal text. This is the single boundary that reaches the caller/UI, so it
keeps the actionable message, drops stack-frame and path noise, caps the length, and logs the
untruncated original for the trace/logs. A clean concise message passes through unchanged.
"""
raw = "" if error is None else str(error)
if raw and (
len(raw) > _ERROR_MAX_LEN or "\n" in raw or _STACK_FRAME_RE.search(raw)
):
log.warning("agent: runner reported a failure: %s", raw)
# Keep only the first line; a multi-line body is a stack dump, never the message.
message = raw.split("\n", 1)[0].strip()
# If even the first line is a raw stack frame, fall back to a generic line.
if not message or _STACK_FRAME_RE.match(message):
return "agent run failed"
if len(message) > _ERROR_MAX_LEN:
message = message[: _ERROR_MAX_LEN - 1].rstrip() + "…"
return message
Comment on lines +59 to +65

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security & Privacy | 🟠 Major | ⚡ Quick win

Single-line path leaks still pass through.

This only falls back when the first retained line starts with a stack-frame pattern. Errors like ENOENT: ... '/tmp/session/.claude/settings.json' or Cannot find module /app/... stay intact and will still be surfaced by result_from_wire(), which misses the PR’s “no stack/path leak” contract. Please redact path-like substrings in the kept line, or fall back whenever the retained line still contains one, and add a regression test for that case.

Also applies to: 132-135



def request_to_wire(
*,
Expand Down Expand Up @@ -91,10 +126,13 @@ def result_from_wire(data: Dict[str, Any]) -> AgentResult:
"""Parse a ``/run`` result JSON into an :class:`AgentResult`.

Raises ``RuntimeError`` when the runner reported a failure, so the caller surfaces a
clear message rather than handing the model an empty reply.
clear message rather than handing the model an empty reply. The runner ``error`` is
sanitized at this boundary (one clean line, no stack/path leak); the full detail is logged.
"""
if not data.get("ok"):
raise RuntimeError(f"Agent run failed: {data.get('error')}")
raise RuntimeError(
f"Agent run failed: {sanitize_runner_error(data.get('error'))}"
)

messages: List[Message] = []
for raw in data.get("messages") or []:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
# F-017: a bare model id (no `provider/` prefix) that matches no vault candidate by model
# id has no provider to look a credential up against. It must fail loud with an actionable
# message, not degrade silently to no-credential (which surfaced as a misleading auth error
# even when the key was present).
# even when the key was present). On a Pi harness the hint suggests an openai/ prefix.
fake_http(connections, payload=[_provider_key("openai-prod", "openai", "sk-prod")])
with pytest.raises(MissingProviderError) as exc:
await VaultConnectionResolver(connection).resolve(
Expand All @@ -138,6 +138,56 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
assert "openai/gpt-4o-mini" in str(exc.value)


async def test_missing_provider_hint_is_harness_correct_for_claude(
fake_http, connection
):
# F-031: the missing-provider hint must name a harness-REACHABLE provider. On a Claude
# harness (Anthropic only) an unrecognized bare id must read "anthropic/<m>", never
# "openai/<m>" (which Claude cannot reach). Use a non-alias bare id so it still fails loud.
fake_http(
connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
)
with pytest.raises(MissingProviderError) as exc:
await VaultConnectionResolver(connection).resolve(
model=ModelRef.coerce("some-unknown-model"),
context=RuntimeAuthContext(harness="claude"),
)
message = str(exc.value)
assert "anthropic/some-unknown-model" in message
assert "openai/" not in message


async def test_bare_claude_alias_resolves_to_anthropic(fake_http, connection):
# F-031: a bare Claude alias (haiku/sonnet/opus + [1m]) is unambiguously Anthropic, so the
# F-017 prefix rule must NOT reject it. It resolves against the vault's anthropic key the
# same way the documented `anthropic/haiku` form does, instead of failing loud.
fake_http(
connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
)
for alias in ("haiku", "sonnet", "opus", "opus[1m]"):
resolved = await VaultConnectionResolver(connection).resolve(
model=ModelRef.coerce(alias),
context=RuntimeAuthContext(harness="claude"),
)
assert resolved.provider == "anthropic", alias
assert resolved.model == alias, alias
assert resolved.env == {"ANTHROPIC_API_KEY": "sk-ant"}, alias


async def test_bare_claude_dated_id_resolves_to_anthropic(fake_http, connection):
# F-031: a bare dated Anthropic id (claude-opus-4-8) is also unambiguously Anthropic via the
# claude-* naming convention, so it resolves rather than failing loud on a missing prefix.
fake_http(
connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
)
resolved = await VaultConnectionResolver(connection).resolve(
model=ModelRef.coerce("claude-opus-4-8"),
context=RuntimeAuthContext(harness="claude"),
)
assert resolved.provider == "anthropic"
assert resolved.model == "claude-opus-4-8"


async def test_bare_model_matching_a_candidate_infers_the_provider(
fake_http, connection
):
Expand Down
Loading
Loading