diff --git a/sdks/python/agenta/sdk/agents/connections/errors.py b/sdks/python/agenta/sdk/agents/connections/errors.py
index becd44d2c9..2d01f0888e 100644
--- a/sdks/python/agenta/sdk/agents/connections/errors.py
+++ b/sdks/python/agenta/sdk/agents/connections/errors.py
@@ -46,12 +46,16 @@ class MissingProviderError(ConnectionResolutionError):
     NOT a tolerated self-managed/OAuth fallback case: the config itself is underspecified.
     """
 
-    def __init__(self, *, model: str) -> None:
+    def __init__(self, *, model: str, hint_provider: str = "openai") -> None:
+        # The example provider in the hint is harness-appropriate: a Claude harness must read
+        # "anthropic/<model>", never "openai/<model>" (Claude reaches Anthropic only). The
+        # caller passes the harness's default provider so the suggested prefix is reachable.
         super().__init__(
-            f"model '{model}' needs a provider prefix (e.g. 'openai/{model}') "
+            f"model '{model}' needs a provider prefix (e.g. '{hint_provider}/{model}') "
             "or a structured {provider, model}; a bare model id can't resolve a credential"
         )
         self.model = model
+        self.hint_provider = hint_provider
 
 
 class AmbiguousConnectionError(ConnectionResolutionError):
diff --git a/sdks/python/agenta/sdk/agents/platform/connections.py b/sdks/python/agenta/sdk/agents/platform/connections.py
index cf175a4752..73cff4334f 100644
--- a/sdks/python/agenta/sdk/agents/platform/connections.py
+++ b/sdks/python/agenta/sdk/agents/platform/connections.py
@@ -18,6 +18,10 @@
 
 from agenta.sdk.utils.logging import get_module_logger
 
+from ..capabilities import (
+    CLAUDE_MODEL_ALIASES,
+    HARNESS_CONNECTION_CAPABILITIES,
+)
 from ..connections import (
     AmbiguousConnectionError,
     ConnectionNotFoundError,
@@ -46,6 +50,47 @@
     "openrouter": "OPENROUTER_API_KEY",
 }
 
+# The Claude harness selects a model by a bare alias (``haiku``/``sonnet``/``opus`` + ``[1m]``)
+# or by a dated id (``claude-opus-4-8``), never with a ``provider/`` prefix. Those bare ids are
+# unambiguously Anthropic, so the F-017 "needs a provider prefix" rule must not reject them: a
+# bare alias resolves to ``anthropic`` here before the fail-loud check. The canonical alias set
+# lives in ``capabilities.py`` (the ``/inspect`` surface) so the two never drift.
+_CLAUDE_ALIASES: Set[str] = {alias.lower() for alias in CLAUDE_MODEL_ALIASES}
+
+
+def _inferred_claude_provider(model: ModelRef) -> Optional[str]:
+    """Return ``"anthropic"`` when a bare model id is a known Claude alias, else ``None``.
+
+    Only fires for a bare id (no explicit ``provider``). A known alias (the ``capabilities.py``
+    set) or any ``claude-*`` dated id (the Anthropic naming convention) is unambiguously
+    Anthropic, so it resolves to the ``anthropic`` provider rather than failing loud as a
+    provider-less model.
+    """
+    if model.provider:
+        return None
+    bare = (model.model or "").strip().lower()
+    if not bare:
+        return None
+    if bare in _CLAUDE_ALIASES or bare.startswith("claude-"):
+        return "anthropic"
+    return None
+
+
+def _harness_default_provider(harness: Optional[str]) -> str:
+    """The provider to suggest in a missing-provider hint for ``harness``.
+
+    Claude reaches Anthropic only, so its hint must read ``anthropic/<model>``; every other
+    harness defaults to ``openai`` (the existing hint). Derived from the capability table's
+    provider list so a harness's reachable providers stay the single source of truth.
+    """
+    caps = HARNESS_CONNECTION_CAPABILITIES.get(harness or "")
+    if caps and caps.providers:
+        if "openai" in caps.providers:
+            return "openai"
+        return caps.providers[0]
+    return "openai"
+
+
 # Extras keys the current UI stores on custom_provider secrets, normalized to harness env.
 _SNAKE_EXTRA_ENV_ALIASES: Dict[str, str] = {
     "aws_region_name": "AWS_REGION",
@@ -299,14 +344,19 @@ def _candidate_pool(
 
 
 def _choose_default(
-    candidates: Sequence[_ConnectionCandidate], model: ModelRef
+    candidates: Sequence[_ConnectionCandidate],
+    model: ModelRef,
+    harness: Optional[str] = None,
 ) -> _ConnectionCandidate:
     pool = _candidate_pool(candidates, model)
     if not pool and not model.provider:
         # A bare model id (no provider prefix) matched nothing by model id, so there is no
         # provider to look a credential up against. Fail loud with an actionable message rather
         # than degrade to no-credential and surface later as a misleading "add your key" error.
-        raise MissingProviderError(model=model.model)
+        # The hint names the harness-reachable provider (anthropic for Claude, not openai).
+        raise MissingProviderError(
+            model=model.model, hint_provider=_harness_default_provider(harness)
+        )
     if len(pool) == 1:
         return pool[0]
     default_named = [candidate for candidate in pool if candidate.slug == "default"]
@@ -350,9 +400,15 @@ def _choose_named(
 
 
 def _resolve_from_secrets(
-    *, secrets: Sequence[Any], model: ModelRef
+    *, secrets: Sequence[Any], model: ModelRef, harness: Optional[str] = None
 ) -> ResolvedConnection:
     connection = model.connection
+    # A bare Claude alias (haiku/sonnet/opus + [1m]) or a dated claude-* id is unambiguously
+    # Anthropic: infer the provider so the F-017 fail-loud rule does not reject a documented
+    # Claude model id. Inference only fills a missing provider; an explicit provider is honored.
+    inferred = _inferred_claude_provider(model)
+    if inferred:
+        model = model.model_copy(update={"provider": inferred})
     if connection.mode == "self_managed":
         return ResolvedConnection(
             provider=model.provider or "",
@@ -368,7 +424,7 @@ def _resolve_from_secrets(
     chosen = (
         _choose_named(candidates, model, slug)
         if slug
-        else _choose_default(candidates, model)
+        else _choose_default(candidates, model, harness)
     )
     provider = chosen.resolved_provider(model)
     env = chosen.resolved_env(provider)
@@ -434,7 +490,7 @@ async def resolve(
         data = response.json() or []
         if not isinstance(data, list):
             raise ConnectionResolutionError("connection resolution returned a non-list")
-        return _resolve_from_secrets(secrets=data, model=model)
+        return _resolve_from_secrets(secrets=data, model=model, harness=context.harness)
 
 
 class _StaticSecretsResolver:
@@ -447,4 +503,6 @@ async def resolve(
         model: ModelRef,
         context: RuntimeAuthContext,
     ) -> ResolvedConnection:
-        return _resolve_from_secrets(secrets=self._secrets, model=model)
+        return _resolve_from_secrets(
+            secrets=self._secrets, model=model, harness=context.harness
+        )
diff --git a/sdks/python/agenta/sdk/agents/utils/ts_runner.py b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
index 590f47cd1c..92599a8108 100644
--- a/sdks/python/agenta/sdk/agents/utils/ts_runner.py
+++ b/sdks/python/agenta/sdk/agents/utils/ts_runner.py
@@ -11,8 +11,24 @@
 import os
 from typing import Any, AsyncIterator, Dict, Optional, Sequence
 
+from agenta.sdk.utils.logging import get_module_logger
+
 _DEFAULT_TIMEOUT = float(os.getenv("AGENTA_AGENT_RUNNER_TIMEOUT_SECONDS", "180"))
 
+log = get_module_logger(__name__)
+
+
+def _transport_error(user_message: str, *, detail: str) -> RuntimeError:
+    """A transport RuntimeError whose surfaced text is clean; the raw detail is logged only.
+
+    The HTTP body / process stderr / raw stdout that pinpoints a transport failure is internal:
+    log it for diagnosis but keep it out of the caller/UI-facing error, which gets only the
+    short ``user_message``. Mirrors ``wire.sanitize_runner_error`` for the result boundary.
+    """
+    if detail:
+        log.warning("agent: %s | detail: %s", user_message, detail)
+    return RuntimeError(user_message)
+
 
 async def deliver_http(
     base_url: str,
@@ -27,10 +43,12 @@ async def deliver_http(
     async with httpx.AsyncClient(timeout=timeout) as client:
         response = await client.post(url, json=payload)
     # Any non-2xx is a transport failure; 4xx left to fall through surfaces as an opaque
-    # JSON parse error instead of a clear runner failure.
+    # JSON parse error instead of a clear runner failure. The response body can carry internal
+    # detail, so it is logged, not surfaced.
     if response.status_code >= 400:
-        raise RuntimeError(
-            f"Agent runner HTTP {response.status_code}: {response.text[:1000]}"
+        raise _transport_error(
+            f"Agent runner HTTP {response.status_code}",
+            detail=response.text[:1000],
         )
     return response.json()
 
@@ -67,14 +85,16 @@ async def deliver_subprocess(
     out = stdout.decode("utf-8", "replace")
     err = stderr.decode("utf-8", "replace")
     if not out.strip():
-        raise RuntimeError(
-            f"Agent runner returned no output. exit={proc.returncode} stderr={err[-2000:]}"
+        raise _transport_error(
+            "Agent runner returned no output",
+            detail=f"exit={proc.returncode} stderr={err[-2000:]}",
         )
     try:
         return json.loads(out)
     except json.JSONDecodeError as exc:
-        raise RuntimeError(
-            f"Agent runner returned invalid JSON. stdout={out[:500]} stderr={err[-1000:]}"
+        raise _transport_error(
+            "Agent runner returned invalid JSON",
+            detail=f"stdout={out[:500]} stderr={err[-1000:]}",
         ) from exc
 
 
@@ -110,8 +130,9 @@ async def deliver_http_stream(
         ) as response:
             if response.status_code >= 400:
                 body = await response.aread()
-                raise RuntimeError(
-                    f"Agent runner HTTP {response.status_code}: {body[:1000]!r}"
+                raise _transport_error(
+                    f"Agent runner HTTP {response.status_code}",
+                    detail=repr(body[:1000]),
                 )
             async for line in response.aiter_lines():
                 line = line.strip()
@@ -175,9 +196,10 @@ async def deliver_subprocess_stream(
             err = b""
             if proc.stderr is not None:
                 err = await proc.stderr.read()
-            raise RuntimeError(
-                "Agent runner stream ended without a terminal result record. "
-                f"exit={proc.returncode} stderr={err.decode('utf-8', 'replace')[-2000:]}"
+            raise _transport_error(
+                "Agent runner stream ended without a terminal result record",
+                detail=f"exit={proc.returncode} "
+                f"stderr={err.decode('utf-8', 'replace')[-2000:]}",
             )
     finally:
         if proc.returncode is None:
diff --git a/sdks/python/agenta/sdk/agents/utils/wire.py b/sdks/python/agenta/sdk/agents/utils/wire.py
index cee5c67b53..c78f570bc1 100644
--- a/sdks/python/agenta/sdk/agents/utils/wire.py
+++ b/sdks/python/agenta/sdk/agents/utils/wire.py
@@ -17,8 +17,11 @@
 
 from __future__ import annotations
 
+import re
 from typing import Any, Dict, List, Optional, Sequence
 
+from agenta.sdk.utils.logging import get_module_logger
+
 from ..dtos import (
     AgentEvent,
     AgentResult,
@@ -29,6 +32,38 @@
     TraceContext,
 )
 
+log = get_module_logger(__name__)
+
+# The user-facing error must not carry an internal stack/path dump. Cap the surfaced line and
+# strip the patterns that leak implementation detail; the full text is logged, never shown.
+_ERROR_MAX_LEN = 300
+# A stack frame leaked into the message ("at fn (/abs/path:12:3)" / 'File "/abs/path", line 12').
+_STACK_FRAME_RE = re.compile(r"\b(at\s+\S+\s*\(|File\s+\"|/[\w./-]+:\d+)")
+
+
+def sanitize_runner_error(error: Any) -> str:
+    """Reduce a runner ``error`` to one clean user-facing line, logging the full detail.
+
+    The runner already concise-formats its known auth/credit failures, but the fall-through case
+    returns the raw first line of an SDK/JS error, and the transport errors (HTTP/stderr/stdout
+    dumps) carry internal text. This is the single boundary that reaches the caller/UI, so it
+    keeps the actionable message, drops stack-frame and path noise, caps the length, and logs the
+    untruncated original for the trace/logs. A clean concise message passes through unchanged.
+    """
+    raw = "" if error is None else str(error)
+    if raw and (
+        len(raw) > _ERROR_MAX_LEN or "\n" in raw or _STACK_FRAME_RE.search(raw)
+    ):
+        log.warning("agent: runner reported a failure: %s", raw)
+    # Keep only the first line; a multi-line body is a stack dump, never the message.
+    message = raw.split("\n", 1)[0].strip()
+    # If even the first line is a raw stack frame, fall back to a generic line.
+    if not message or _STACK_FRAME_RE.match(message):
+        return "agent run failed"
+    if len(message) > _ERROR_MAX_LEN:
+        message = message[: _ERROR_MAX_LEN - 1].rstrip() + "…"
+    return message
+
 
 def request_to_wire(
     *,
@@ -91,10 +126,13 @@ def result_from_wire(data: Dict[str, Any]) -> AgentResult:
     """Parse a ``/run`` result JSON into an :class:`AgentResult`.
 
     Raises ``RuntimeError`` when the runner reported a failure, so the caller surfaces a
-    clear message rather than handing the model an empty reply.
+    clear message rather than handing the model an empty reply. The runner ``error`` is
+    sanitized at this boundary (one clean line, no stack/path leak); the full detail is logged.
     """
     if not data.get("ok"):
-        raise RuntimeError(f"Agent run failed: {data.get('error')}")
+        raise RuntimeError(
+            f"Agent run failed: {sanitize_runner_error(data.get('error'))}"
+        )
 
     messages: List[Message] = []
     for raw in data.get("messages") or []:
diff --git a/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py b/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py
index 02bd06e8af..bfa6e1daa1 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/platform/test_connections_http.py
@@ -128,7 +128,7 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
     # F-017: a bare model id (no `provider/` prefix) that matches no vault candidate by model
     # id has no provider to look a credential up against. It must fail loud with an actionable
     # message, not degrade silently to no-credential (which surfaced as a misleading auth error
-    # even when the key was present).
+    # even when the key was present). On a Pi harness the hint suggests an openai/ prefix.
     fake_http(connections, payload=[_provider_key("openai-prod", "openai", "sk-prod")])
     with pytest.raises(MissingProviderError) as exc:
         await VaultConnectionResolver(connection).resolve(
@@ -138,6 +138,56 @@ async def test_bare_model_without_provider_fails_loud(fake_http, connection):
     assert "openai/gpt-4o-mini" in str(exc.value)
 
 
+async def test_missing_provider_hint_is_harness_correct_for_claude(
+    fake_http, connection
+):
+    # F-031: the missing-provider hint must name a harness-REACHABLE provider. On a Claude
+    # harness (Anthropic only) an unrecognized bare id must read "anthropic/<m>", never
+    # "openai/<m>" (which Claude cannot reach). Use a non-alias bare id so it still fails loud.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    with pytest.raises(MissingProviderError) as exc:
+        await VaultConnectionResolver(connection).resolve(
+            model=ModelRef.coerce("some-unknown-model"),
+            context=RuntimeAuthContext(harness="claude"),
+        )
+    message = str(exc.value)
+    assert "anthropic/some-unknown-model" in message
+    assert "openai/" not in message
+
+
+async def test_bare_claude_alias_resolves_to_anthropic(fake_http, connection):
+    # F-031: a bare Claude alias (haiku/sonnet/opus + [1m]) is unambiguously Anthropic, so the
+    # F-017 prefix rule must NOT reject it. It resolves against the vault's anthropic key the
+    # same way the documented `anthropic/haiku` form does, instead of failing loud.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    for alias in ("haiku", "sonnet", "opus", "opus[1m]"):
+        resolved = await VaultConnectionResolver(connection).resolve(
+            model=ModelRef.coerce(alias),
+            context=RuntimeAuthContext(harness="claude"),
+        )
+        assert resolved.provider == "anthropic", alias
+        assert resolved.model == alias, alias
+        assert resolved.env == {"ANTHROPIC_API_KEY": "sk-ant"}, alias
+
+
+async def test_bare_claude_dated_id_resolves_to_anthropic(fake_http, connection):
+    # F-031: a bare dated Anthropic id (claude-opus-4-8) is also unambiguously Anthropic via the
+    # claude-* naming convention, so it resolves rather than failing loud on a missing prefix.
+    fake_http(
+        connections, payload=[_provider_key("anthropic-prod", "anthropic", "sk-ant")]
+    )
+    resolved = await VaultConnectionResolver(connection).resolve(
+        model=ModelRef.coerce("claude-opus-4-8"),
+        context=RuntimeAuthContext(harness="claude"),
+    )
+    assert resolved.provider == "anthropic"
+    assert resolved.model == "claude-opus-4-8"
+
+
 async def test_bare_model_matching_a_candidate_infers_the_provider(
     fake_http, connection
 ):
diff --git a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
index c1385940cb..0fb0d0aa1e 100644
--- a/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
+++ b/sdks/python/oss/tests/pytest/unit/agents/test_wire_contract.py
@@ -32,7 +32,11 @@
     ToolCallback,
     TraceContext,
 )
-from agenta.sdk.agents.utils.wire import request_to_wire, result_from_wire
+from agenta.sdk.agents.utils.wire import (
+    request_to_wire,
+    result_from_wire,
+    sanitize_runner_error,
+)
 
 # The full set of top-level keys ``request_to_wire`` may emit. The TS ``AgentRunRequest``
 # interface must declare a superset of these. Adding a key here without adding it to
@@ -361,6 +365,52 @@ def test_result_from_wire_raises_on_failure(golden):
         result_from_wire(golden("run_result.error.json"))
 
 
+def test_sanitize_runner_error_passes_clean_message_through():
+    # A concise, single-line message (what conciseError emits for known cases) is unchanged.
+    clean = "pi_core: model authentication failed — add the project's Anthropic key."
+    assert sanitize_runner_error(clean) == clean
+
+
+def test_sanitize_runner_error_strips_multiline_stack_to_first_line():
+    raw = (
+        "TypeError: cannot read x\n"
+        "    at run (/app/services/agent/src/engine.ts:12:3)\n"
+        "    at process (/app/node_modules/foo.js:99:1)"
+    )
+    # Only the first line survives; the stack frames never reach the caller.
+    assert sanitize_runner_error(raw) == "TypeError: cannot read x"
+
+
+def test_sanitize_runner_error_falls_back_when_first_line_is_a_stack_frame():
+    raw = 'File "/abs/secret/path.py", line 12, in run\n    raise ValueError("boom")'
+    assert sanitize_runner_error(raw) == "agent run failed"
+
+
+def test_sanitize_runner_error_caps_length():
+    raw = "x" * 1000
+    result = sanitize_runner_error(raw)
+    assert len(result) <= 300
+    assert result.endswith("…")
+
+
+def test_sanitize_runner_error_handles_none_and_empty():
+    assert sanitize_runner_error(None) == "agent run failed"
+    assert sanitize_runner_error("") == "agent run failed"
+
+
+def test_result_from_wire_sanitizes_a_leaky_error():
+    leaky = {
+        "ok": False,
+        "error": "boom\n    at run (/app/src/engine.ts:1:1)",
+    }
+    with pytest.raises(RuntimeError) as exc:
+        result_from_wire(leaky)
+    message = str(exc.value)
+    assert "boom" in message
+    assert "/app/src/engine.ts" not in message
+    assert "\n" not in message
+
+
 def test_result_from_wire_minimal_ok():
     # A bare success: empty output, empty collections, no capabilities.
     result = result_from_wire({"ok": True})
diff --git a/services/agent/src/engines/sandbox_agent.ts b/services/agent/src/engines/sandbox_agent.ts
index 311c650ef6..5d0c9b844c 100644
--- a/services/agent/src/engines/sandbox_agent.ts
+++ b/services/agent/src/engines/sandbox_agent.ts
@@ -190,6 +190,10 @@ export async function runSandboxAgent(
     ? buildPiExtensionEnv(request, !plan.isDaytona, {
         relayDir: plan.relayDir,
         usageOutPath: plan.usageOutPath,
+        // The materialized skill names (author + forced `_agenta.*`) so Pi's own agent span
+        // records which skills loaded (F-029); local Pi self-instruments, so the runner's
+        // sandbox-agent otel has no span to stamp here.
+        skills: plan.skillDirs.map((s) => s.name),
       })
     : {};
   Object.assign(env, piExtEnv); // local daemon inherits it; daytona gets it via envVars
@@ -293,6 +297,9 @@ export async function runSandboxAgent(
       isPi: plan.isPi,
       capabilities,
       harness: plan.harness,
+      // Daytona: skip the internal loopback HTTP MCP channel (unreachable from the in-sandbox
+      // harness); gateway tools are delivered through the Daytona file relay started below.
+      isDaytona: plan.isDaytona,
       toolSpecs: plan.toolSpecs,
       userMcpServers: request.mcpServers,
       relayDir: plan.relayDir,
@@ -321,6 +328,9 @@ export async function runSandboxAgent(
     const run = (deps.createOtel ?? createSandboxAgentOtel)({
       harness: plan.harness,
       model,
+      // The names of every skill that materialized for this run (author + forced `_agenta.*`),
+      // stamped on the agent span so the trace shows which skills loaded (F-029).
+      skills: plan.skillDirs.map((s) => s.name),
       traceparent: request.trace?.traceparent,
       baggage: request.trace?.baggage,
       endpoint: request.trace?.endpoint,
@@ -400,34 +410,38 @@ export async function runSandboxAgent(
     });
     run.setUsage(usage);
 
-    const output = run.finish();
-    await run.flush();
-
-    // Fail loud on a swallowed model error (A7 / "fail loud, not silent"). When Pi's provider
-    // call fails (out-of-quota, bad key, rate limit, unknown model, ...), Pi's pi-acp bridge
-    // reports the turn as a plain `end_turn` with NO content, so without this the run would
-    // return an `ok:true` empty turn and the user would see a silent "No response" instead of
-    // the real failure. On the LOCAL Pi path the error is recoverable from Pi's own session
-    // transcript; surface it as a run error. Only checked when the turn produced no output and
-    // ran no tools (a real tool-only turn legitimately has empty text), and never on Daytona
-    // (the transcript lives in the remote sandbox).
-    if (
+    // Peek for a swallowed model error BEFORE finishing the trace so the error message + provider
+    // can be stamped on the still-open agent span (F-030). When Pi's provider call fails
+    // (out-of-quota, bad key, rate limit, unknown model, ...), Pi's pi-acp bridge reports the
+    // turn as a plain `end_turn` with NO content, so without this the run would return an
+    // `ok:true` empty turn and the user would see a silent "No response" instead of the real
+    // failure. On the LOCAL Pi path the error is recoverable from Pi's own session transcript.
+    // Only checked when the turn produced no output and ran no tools (a real tool-only turn
+    // legitimately has empty text), and never on Daytona (the transcript lives in the remote
+    // sandbox).
+    const swallowedPiError =
       plan.isPi &&
       !plan.isDaytona &&
-      !output.trim() &&
+      !run.output().trim() &&
       !run.events().some((e) => e.type === "tool_call")
-    ) {
-      const piError = findSwallowedPiError(plan.sourcePiAgentDir, plan.cwd);
-      if (piError) {
-        return {
-          ok: false,
-          error: conciseError(
-            new Error(piError),
-            plan.harness,
-            request.provider,
-          ),
-        };
-      }
+        ? findSwallowedPiError(plan.sourcePiAgentDir, plan.cwd)
+        : undefined;
+    let swallowedError: string | undefined;
+    if (swallowedPiError) {
+      swallowedError = conciseError(
+        new Error(swallowedPiError),
+        plan.harness,
+        request.provider,
+      );
+      run.recordError(swallowedError, request.provider);
+    }
+
+    const output = run.finish();
+    await run.flush();
+
+    // Fail loud on the swallowed error detected above (A7 / "fail loud, not silent").
+    if (swallowedError) {
+      return { ok: false, error: swallowedError };
     }
 
     return {
@@ -450,11 +464,15 @@ export async function runSandboxAgent(
       traceId: run.traceId(),
     };
   } catch (err) {
+    const error = conciseError(err, plan.harness, request.provider);
+    // Stamp the error message + provider on the agent span before finishing it (F-030), so a
+    // trace carries the same diagnostic the response does (it previously held only a count).
+    otel?.recordError(error, request.provider);
     otel?.finish();
     await otel?.flush().catch(() => {});
     return {
       ok: false,
-      error: conciseError(err, plan.harness, request.provider),
+      error,
     };
   } finally {
     await toolRelay?.stop().catch(() => {});
diff --git a/services/agent/src/engines/sandbox_agent/pi-assets.ts b/services/agent/src/engines/sandbox_agent/pi-assets.ts
index 8c6b975040..50856056bc 100644
--- a/services/agent/src/engines/sandbox_agent/pi-assets.ts
+++ b/services/agent/src/engines/sandbox_agent/pi-assets.ts
@@ -33,7 +33,7 @@ export const EXTENSION_BUNDLE =
 export function buildPiExtensionEnv(
   request: AgentRunRequest,
   tracing: boolean,
-  opts: { relayDir?: string; usageOutPath?: string } = {},
+  opts: { relayDir?: string; usageOutPath?: string; skills?: string[] } = {},
 ): Record<string, string> {
   const env: Record<string, string> = {};
   const trace = tracing ? request.trace : undefined;
@@ -42,6 +42,11 @@ export function buildPiExtensionEnv(
   if (trace?.authorization) env.AGENTA_OTLP_AUTHORIZATION = trace.authorization;
   if (trace && trace.captureContent === false)
     env.AGENTA_CAPTURE_CONTENT = "false";
+  // The skills that materialized for this run (author + forced `_agenta.*`), so Pi's own agent
+  // span records which skills loaded (F-029). Only set under tracing (the extension's only span
+  // consumer); a JSON array string the extension parses.
+  if (trace && opts.skills && opts.skills.length > 0)
+    env.AGENTA_SKILLS_LOADED = JSON.stringify(opts.skills);
 
   const specs = publicToolSpecs(
     (request.customTools as ResolvedToolSpec[]) ?? [],
diff --git a/services/agent/src/extensions/agenta.ts b/services/agent/src/extensions/agenta.ts
index d44810db79..2ec51ebf7d 100644
--- a/services/agent/src/extensions/agenta.ts
+++ b/services/agent/src/extensions/agenta.ts
@@ -35,6 +35,19 @@ function log(message: string): void {
   process.stderr.write(`[agenta-pi-ext] ${message}\n`);
 }
 
+/** Parse the JSON array of loaded skill names from AGENTA_SKILLS_LOADED; [] on absent/malformed. */
+function parseSkillsLoaded(raw: string | undefined): string[] {
+  if (!raw) return [];
+  try {
+    const parsed = JSON.parse(raw);
+    return Array.isArray(parsed)
+      ? parsed.filter((s): s is string => typeof s === "string")
+      : [];
+  } catch {
+    return [];
+  }
+}
+
 /** Register public tool metadata as Pi tools whose execution relays to the runner. */
 function registerTools(pi: ExtensionAPI): void {
   const raw = process.env.AGENTA_TOOL_PUBLIC_SPECS;
@@ -78,8 +91,12 @@ function registerTools(pi: ExtensionAPI): void {
 const factory = (pi: ExtensionAPI): void => {
   // Fully inert unless Agenta wired this run (so it is safe to install globally in a
   // shared Pi agent dir — a normal `pi` session with no Agenta env does nothing).
-  const hasTracing = !!(process.env.AGENTA_TRACEPARENT || process.env.AGENTA_OTLP_ENDPOINT);
-  const hasTools = !!(process.env.AGENTA_TOOL_PUBLIC_SPECS && process.env.AGENTA_TOOL_RELAY_DIR);
+  const hasTracing = !!(
+    process.env.AGENTA_TRACEPARENT || process.env.AGENTA_OTLP_ENDPOINT
+  );
+  const hasTools = !!(
+    process.env.AGENTA_TOOL_PUBLIC_SPECS && process.env.AGENTA_TOOL_RELAY_DIR
+  );
   const usageOut = process.env.AGENTA_USAGE_OUT;
   if (!hasTracing && !hasTools && !usageOut) return;
 
@@ -96,6 +113,9 @@ const factory = (pi: ExtensionAPI): void => {
     endpoint: process.env.AGENTA_OTLP_ENDPOINT,
     authorization: process.env.AGENTA_OTLP_AUTHORIZATION,
     captureContent: process.env.AGENTA_CAPTURE_CONTENT !== "false",
+    // The skills that loaded for this run (author + forced `_agenta.*`), stamped on the agent
+    // span so a trace shows which skills surfaced (F-029). A JSON array string from the runner.
+    skills: parseSkillsLoaded(process.env.AGENTA_SKILLS_LOADED),
   });
   otel.register(pi); // lifecycle handlers (spans + usage accumulation)
 
diff --git a/services/agent/src/tracing/otel.ts b/services/agent/src/tracing/otel.ts
index eb4acbe012..589eca2da8 100644
--- a/services/agent/src/tracing/otel.ts
+++ b/services/agent/src/tracing/otel.ts
@@ -198,7 +198,8 @@ function orderParentFirst(spans: ReadableSpan[]): ReadableSpan[] {
   const ordered: ReadableSpan[] = [];
   const visit = (s: ReadableSpan) => {
     ordered.push(s);
-    for (const child of childrenOf.get(s.spanContext().spanId) ?? []) visit(child);
+    for (const child of childrenOf.get(s.spanContext().spanId) ?? [])
+      visit(child);
   };
   roots.forEach(visit);
   // Any spans not reached (defensive) get appended so nothing is dropped.
@@ -221,7 +222,8 @@ function parentContext(traceparent?: string): Context | undefined {
     traceId,
     spanId,
     // Honor the incoming sampled bit; default to sampled so child spans record.
-    traceFlags: (parseInt(flags, 16) & 1) === 1 ? TraceFlags.SAMPLED : TraceFlags.NONE,
+    traceFlags:
+      (parseInt(flags, 16) & 1) === 1 ? TraceFlags.SAMPLED : TraceFlags.NONE,
     isRemote: true,
   };
   return trace.setSpanContext(ROOT_CONTEXT, spanContext);
@@ -249,6 +251,12 @@ export interface RunConfig {
   provider?: string;
   /** Resolved model id, set after the model is picked. */
   requestModel?: string;
+  /**
+   * Skill names materialized for this run (author + forced `_agenta.*`), stamped on the agent
+   * span so a trace shows which skills loaded (F-029). Set on the local-Pi path, where Pi's own
+   * extension owns the agent span (the runner's sandbox-agent otel is span-less there).
+   */
+  skills?: string[];
   /** Filled by the extension on agent_start so the runner can flush/return it. */
   traceId?: string;
 }
@@ -354,7 +362,9 @@ function applyAssistant(span: Span, msg: any, capture: boolean): void {
     span.setAttribute("gen_ai.response.model", msg.responseModel ?? msg.model);
   if (msg.responseId) span.setAttribute("gen_ai.response.id", msg.responseId);
   if (msg.stopReason)
-    span.setAttribute("gen_ai.response.finish_reasons", [String(msg.stopReason)]);
+    span.setAttribute("gen_ai.response.finish_reasons", [
+      String(msg.stopReason),
+    ]);
 
   const u = msg.usage;
   if (u) {
@@ -372,8 +382,12 @@ function applyAssistant(span: Span, msg: any, capture: boolean): void {
     if (u.cacheRead)
       span.setAttribute("gen_ai.usage.cache_read_input_tokens", u.cacheRead);
     if (u.cacheWrite)
-      span.setAttribute("gen_ai.usage.cache_creation_input_tokens", u.cacheWrite);
-    if (u.cost?.total != null) span.setAttribute("gen_ai.usage.cost", u.cost.total);
+      span.setAttribute(
+        "gen_ai.usage.cache_creation_input_tokens",
+        u.cacheWrite,
+      );
+    if (u.cost?.total != null)
+      span.setAttribute("gen_ai.usage.cost", u.cost.total);
   }
 
   emitMessages(span, "llm.output_messages", [msg], capture);
@@ -415,6 +429,7 @@ export function createAgentaOtel(
     sessionId: init.sessionId,
     provider: init.provider,
     requestModel: init.requestModel,
+    skills: init.skills,
   };
 
   const tracer = trace.getTracer("agenta-pi-otel", "0.1.0");
@@ -457,11 +472,22 @@ export function createAgentaOtel(
       agentSpan.setAttribute("openinference.span.kind", "AGENT");
       agentSpan.setAttribute("gen_ai.operation.name", "invoke_agent");
       agentSpan.setAttribute("gen_ai.agent.name", "pi");
+      // F-029: record which skills loaded (author + forced `_agenta.*`) on Pi's own agent span,
+      // so a local-Pi trace shows the surfaced skills, not just the author config echoed
+      // elsewhere. The set is passed from the runner via AGENTA_SKILLS_LOADED.
+      if (config.skills && config.skills.length > 0) {
+        agentSpan.setAttribute("ag.agent.skills.loaded", config.skills);
+        agentSpan.setAttribute("ag.agent.skills.count", config.skills.length);
+      }
       if (config.sessionId) {
         agentSpan.setAttribute("session.id", config.sessionId);
         agentSpan.setAttribute("gen_ai.conversation.id", config.sessionId);
       }
-      setInputs(agentSpan, { prompt: pendingPrompt ?? "" }, config.captureContent);
+      setInputs(
+        agentSpan,
+        { prompt: pendingPrompt ?? "" },
+        config.captureContent,
+      );
 
       const traceId = agentSpan.spanContext().traceId;
       config.traceId = traceId;
@@ -479,24 +505,39 @@ export function createAgentaOtel(
 
     pi.on("turn_start", async (event: any) => {
       const parent = agentCtx ?? context.active();
-      const name = event?.turnIndex != null ? `turn ${event.turnIndex}` : "turn";
+      const name =
+        event?.turnIndex != null ? `turn ${event.turnIndex}` : "turn";
       const span = tracer.startSpan(name, undefined, parent);
       span.setAttribute("openinference.span.kind", "CHAIN");
-      if (event?.turnIndex != null) span.setAttribute("pi.turn.index", event.turnIndex);
-      currentTurn = { span, ctx: trace.setSpan(parent, span), index: event?.turnIndex };
+      if (event?.turnIndex != null)
+        span.setAttribute("pi.turn.index", event.turnIndex);
+      currentTurn = {
+        span,
+        ctx: trace.setSpan(parent, span),
+        index: event?.turnIndex,
+      };
     });
 
     pi.on("before_provider_request", async (_event: any, ctx: any) => {
       const parent = currentTurn?.ctx ?? agentCtx ?? context.active();
       const modelId = config.requestModel ?? ctx?.model?.id;
       const providerName = config.provider ?? ctx?.model?.provider;
-      llmSpan = tracer.startSpan(modelId ? `chat ${modelId}` : "chat", undefined, parent);
+      llmSpan = tracer.startSpan(
+        modelId ? `chat ${modelId}` : "chat",
+        undefined,
+        parent,
+      );
       llmSpan.setAttribute("openinference.span.kind", "LLM");
       llmSpan.setAttribute("gen_ai.operation.name", "chat");
       if (providerName) llmSpan.setAttribute("gen_ai.system", providerName);
       if (modelId) llmSpan.setAttribute("gen_ai.request.model", modelId);
       if (lastContextMessages)
-        emitMessages(llmSpan, "llm.input_messages", lastContextMessages, config.captureContent);
+        emitMessages(
+          llmSpan,
+          "llm.input_messages",
+          lastContextMessages,
+          config.captureContent,
+        );
     });
 
     pi.on("message_end", async (event: any) => {
@@ -510,18 +551,28 @@ export function createAgentaOtel(
 
     pi.on("tool_execution_start", async (event: any) => {
       const parent = currentTurn?.ctx ?? agentCtx ?? context.active();
-      const name = event?.toolName ? `execute_tool ${event.toolName}` : "execute_tool";
+      const name = event?.toolName
+        ? `execute_tool ${event.toolName}`
+        : "execute_tool";
       const span = tracer.startSpan(name, undefined, parent);
       span.setAttribute("openinference.span.kind", "TOOL");
       span.setAttribute("gen_ai.operation.name", "execute_tool");
-      if (event?.toolName) span.setAttribute("gen_ai.tool.name", event.toolName);
-      if (event?.toolCallId) span.setAttribute("gen_ai.tool.call.id", event.toolCallId);
-      setInputs(span, (event?.args as Record<string, unknown>) ?? {}, config.captureContent);
+      if (event?.toolName)
+        span.setAttribute("gen_ai.tool.name", event.toolName);
+      if (event?.toolCallId)
+        span.setAttribute("gen_ai.tool.call.id", event.toolCallId);
+      setInputs(
+        span,
+        (event?.args as Record<string, unknown>) ?? {},
+        config.captureContent,
+      );
       if (event?.toolCallId) toolSpans.set(event.toolCallId, span);
     });
 
     pi.on("tool_execution_end", async (event: any) => {
-      const span = event?.toolCallId ? toolSpans.get(event.toolCallId) : undefined;
+      const span = event?.toolCallId
+        ? toolSpans.get(event.toolCallId)
+        : undefined;
       if (!span) return;
       setOutput(span, toolResultText(event?.result), config.captureContent);
       if (event?.isError) span.setStatus({ code: SpanStatusCode.ERROR });
@@ -546,16 +597,24 @@ export function createAgentaOtel(
 
     pi.on("agent_end", async (event: any) => {
       if (!agentSpan) return;
-      setOutput(agentSpan, lastAssistantText(event?.messages), config.captureContent);
+      setOutput(
+        agentSpan,
+        lastAssistantText(event?.messages),
+        config.captureContent,
+      );
       // Stamp the run total on the agent span so it shows the agent's tokens/cost even
       // though Agenta cannot roll the per-turn LLM spans up across batches.
       if (runUsage.total > 0) {
         agentSpan.setAttribute("gen_ai.usage.input_tokens", runUsage.input);
         agentSpan.setAttribute("gen_ai.usage.output_tokens", runUsage.output);
         agentSpan.setAttribute("gen_ai.usage.prompt_tokens", runUsage.input);
-        agentSpan.setAttribute("gen_ai.usage.completion_tokens", runUsage.output);
+        agentSpan.setAttribute(
+          "gen_ai.usage.completion_tokens",
+          runUsage.output,
+        );
         agentSpan.setAttribute("gen_ai.usage.total_tokens", runUsage.total);
-        if (runUsage.cost > 0) agentSpan.setAttribute("gen_ai.usage.cost", runUsage.cost);
+        if (runUsage.cost > 0)
+          agentSpan.setAttribute("gen_ai.usage.cost", runUsage.cost);
       }
       agentSpan.end();
       agentSpan = undefined;
@@ -593,7 +652,8 @@ export function createAgentaOtel(
 function acpBlockText(block: any): string {
   if (!block) return "";
   if (typeof block === "string") return block;
-  if (block.type === "text" && typeof block.text === "string") return block.text;
+  if (block.type === "text" && typeof block.text === "string")
+    return block.text;
   return "";
 }
 
@@ -726,6 +786,12 @@ export interface SandboxAgentOtelInit extends Partial<RunConfig> {
   harness?: string;
   /** Resolved model id ("openai-codex/gpt-5.5"); set on the LLM span. */
   model?: string;
+  /**
+   * Skill names actually materialized for this run — BOTH the author-supplied skills and the
+   * forced Agenta platform `_agenta.*` skills the server injected. Stamped on the agent span so
+   * a trace shows which skills loaded (F-029), not just the author config echoed elsewhere.
+   */
+  skills?: string[];
   /**
    * Emit the span tree from the ACP event stream. Default true. Set false when the
    * harness instruments itself (e.g. Pi via the agenta extension propagates the trace
@@ -756,6 +822,12 @@ export interface SandboxAgentOtel {
   emitEvent(event: AgentEvent): void;
   /** End all open spans. Returns the accumulated assistant text. */
   finish(): string;
+  /**
+   * Record a run-level error on the agent span: the user-facing message (F-030) plus the
+   * provider that failed, and an OTel exception event, so a trace carries the same diagnostic
+   * the HTTP response does (it previously showed only an error COUNT). Call before finish/flush.
+   */
+  recordError(message: string, provider?: string): void;
   /** Set final run usage before finish/flush so events and exported spans carry final totals. */
   setUsage(usage: AgentUsage | undefined): void;
   /** Flush this run's trace to Agenta (invoke_agent has a remote parent). */
@@ -774,7 +846,9 @@ export interface SandboxAgentOtel {
  * Build an ACP-event-driven tracer scoped to a single sandbox-agent run. Call `start` once,
  * `handleUpdate` for every ACP session update, then `finish` + `await flush`.
  */
-export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgentOtel {
+export function createSandboxAgentOtel(
+  init: SandboxAgentOtelInit,
+): SandboxAgentOtel {
   ensureProvider();
 
   const capture = init.captureContent !== false;
@@ -918,10 +992,16 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
       record({ type: "reasoning_start", id: reasoningBlockId });
     }
     record({ type: "reasoning_delta", id: reasoningBlockId, delta });
-    reasoningEmitted = target.startsWith(reasoningEmitted) ? target : reasoningEmitted + delta;
+    reasoningEmitted = target.startsWith(reasoningEmitted)
+      ? target
+      : reasoningEmitted + delta;
   }
 
-  function start(input: { prompt?: string; messages?: any[]; sessionId?: string }): void {
+  function start(input: {
+    prompt?: string;
+    messages?: any[];
+    sessionId?: string;
+  }): void {
     // Span-less mode (harness self-instruments): only track the trace id so the run can
     // report it; the harness emits the spans under the propagated parent.
     if (!emitSpans) {
@@ -934,6 +1014,12 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
     agentSpan.setAttribute("openinference.span.kind", "AGENT");
     agentSpan.setAttribute("gen_ai.operation.name", "invoke_agent");
     agentSpan.setAttribute("gen_ai.agent.name", init.harness ?? "agent");
+    // F-029: stamp the skills that actually materialized (author + forced `_agenta.*`) so a
+    // trace shows which skills loaded, not just the author config echoed on the workflow span.
+    if (init.skills && init.skills.length > 0) {
+      agentSpan.setAttribute("ag.agent.skills.loaded", init.skills);
+      agentSpan.setAttribute("ag.agent.skills.count", init.skills.length);
+    }
     const sessionId = input.sessionId ?? init.sessionId;
     if (sessionId) {
       agentSpan.setAttribute("session.id", sessionId);
@@ -950,7 +1036,11 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
     turnSpan.setAttribute("pi.turn.index", 0);
     turnCtx = trace.setSpan(agentCtx, turnSpan);
 
-    llmSpan = tracer.startSpan(modelId ? `chat ${modelId}` : "chat", undefined, turnCtx);
+    llmSpan = tracer.startSpan(
+      modelId ? `chat ${modelId}` : "chat",
+      undefined,
+      turnCtx,
+    );
     llmSpan.setAttribute("openinference.span.kind", "LLM");
     llmSpan.setAttribute("gen_ai.operation.name", "chat");
     if (provider) llmSpan.setAttribute("gen_ai.system", provider);
@@ -1009,7 +1099,12 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
           setInputs(span, update.rawInput as Record<string, unknown>, capture);
       }
       toolSpans.set(id, { span, name: String(name) });
-      record({ type: "tool_call", id: String(id), name: String(name), input: update.rawInput });
+      record({
+        type: "tool_call",
+        id: String(id),
+        name: String(name),
+        input: update.rawInput,
+      });
       // A tool_call can arrive already completed (status set up front).
       maybeCloseTool(id, update);
       return;
@@ -1029,8 +1124,8 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
       usage = {
         input: usage?.input ?? 0,
         output: usage?.output ?? 0,
-        total: typeof total === "number" ? total : usage?.total ?? 0,
-        cost: typeof cost === "number" ? cost : usage?.cost ?? 0,
+        total: typeof total === "number" ? total : (usage?.total ?? 0),
+        cost: typeof cost === "number" ? cost : (usage?.cost ?? 0),
       };
       record({ type: "usage", ...usage });
     }
@@ -1043,14 +1138,68 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
     if (!entry) return;
     const status = update?.status;
     if (status !== "completed" && status !== "failed") return;
-    const out = acpToolContentText(update.content) || acpToolContentText(update.rawOutput);
+    const out =
+      acpToolContentText(update.content) ||
+      acpToolContentText(update.rawOutput);
     if (entry.span) {
       setOutput(entry.span, out, capture);
-      if (status === "failed") entry.span.setStatus({ code: SpanStatusCode.ERROR });
+      if (status === "failed")
+        entry.span.setStatus({ code: SpanStatusCode.ERROR });
       entry.span.end();
     }
     toolSpans.delete(id);
-    record({ type: "tool_result", id, output: out, isError: status === "failed" });
+    record({
+      type: "tool_result",
+      id,
+      output: out,
+      isError: status === "failed",
+    });
+  }
+
+  /**
+   * Stamp a run-level error (the user-facing message + the provider that failed) and an OTel
+   * exception event so a trace carries the same diagnostic the HTTP response does (F-030).
+   *
+   * Two cases:
+   *  - This tracer owns the agent span (emitSpans=true: Claude / gateway / Daytona) — stamp it
+   *    directly, before finish() ends it.
+   *  - The harness self-instruments (emitSpans=false: local Pi emits its own spans in another
+   *    process, which never carry the error and are already flushed) — emit a standalone error
+   *    span as a child of the caller's traceparent, so the error still reaches the /invoke trace.
+   * Idempotent and best-effort: a second call or a tracing failure must never break the run.
+   */
+  function recordError(message: string, errorProvider?: string): void {
+    const text = message || "agent run failed";
+    const stamp = (span: Span): void => {
+      span.setAttribute("ag.error.message", text);
+      const failedProvider = errorProvider ?? provider;
+      if (failedProvider)
+        span.setAttribute("ag.error.provider", failedProvider);
+      span.recordException({ name: "AgentRunError", message: text });
+      span.setStatus({ code: SpanStatusCode.ERROR, message: text });
+    };
+    try {
+      if (agentSpan) {
+        stamp(agentSpan);
+        return;
+      }
+      // No owned span (harness self-instruments). Emit a standalone error span under the
+      // caller's traceparent so the failure is visible in the /invoke trace.
+      const parent = parentContext(init.traceparent);
+      const errSpan = tracer.startSpan("agent_error", undefined, parent);
+      errSpan.setAttribute("openinference.span.kind", "AGENT");
+      errSpan.setAttribute("gen_ai.operation.name", "invoke_agent");
+      errSpan.setAttribute("gen_ai.agent.name", init.harness ?? "agent");
+      stamp(errSpan);
+      errSpan.end();
+      // The standalone span shares the caller's trace id; make sure the run reports it so the
+      // engine flushes this trace (a self-instrumenting run otherwise only tracked it from the
+      // traceparent, which is the same id — but set it defensively if it was never resolved).
+      runTraceId = runTraceId ?? errSpan.spanContext().traceId;
+      traceTargets.set(runTraceId, { endpoint, authorization });
+    } catch {
+      // tracing must never break the run
+    }
   }
 
   function finish(): string {
@@ -1110,6 +1259,7 @@ export function createSandboxAgentOtel(init: SandboxAgentOtelInit): SandboxAgent
     handleUpdate,
     emitEvent: record,
     finish,
+    recordError,
     setUsage,
     flush: () => flushTrace(runTraceId),
     traceId: () => runTraceId,
diff --git a/services/agent/tests/unit/otel-skills-error.test.ts b/services/agent/tests/unit/otel-skills-error.test.ts
new file mode 100644
index 0000000000..e203718f54
--- /dev/null
+++ b/services/agent/tests/unit/otel-skills-error.test.ts
@@ -0,0 +1,219 @@
+/**
+ * Unit tests for the tracing additions in `tracing/otel.ts`:
+ *
+ *  - F-029: the loaded skills (author + forced `_agenta.*`) are stamped on the agent span via
+ *    `ag.agent.skills.loaded` / `ag.agent.skills.count`, on BOTH the sandbox-agent ACP tracer
+ *    (`createSandboxAgentOtel`, non-Pi / Daytona) and Pi's own extension tracer
+ *    (`createAgentaOtel`, local Pi).
+ *  - F-030: `recordError` stamps the user-facing error message + the provider that failed plus
+ *    an exception event on the agent span, so an error run's trace carries the same diagnostic
+ *    the HTTP response does. When the harness self-instruments (no owned span) it emits a
+ *    standalone `agent_error` span so the failure still reaches the trace.
+ *
+ * Spans export over OTLP from a module-level provider, so we spy on the OTel API tracer and
+ * capture the attributes/exceptions each span records, rather than wiring an in-memory exporter.
+ *
+ * Run: pnpm exec vitest run tests/unit/otel-skills-error.test.ts
+ */
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { trace, type Span } from "@opentelemetry/api";
+
+import {
+  createSandboxAgentOtel,
+  createAgentaOtel,
+} from "../../src/tracing/otel.ts";
+
+interface FakeSpan {
+  name: string;
+  attributes: Record<string, unknown>;
+  exceptions: Array<{ name?: string; message?: string }>;
+  status?: { code: number; message?: string };
+  ended: boolean;
+}
+
+/** Replace the OTel tracer so every span built records into a captured array. */
+function spyTracer(): FakeSpan[] {
+  const spans: FakeSpan[] = [];
+  const makeSpan = (name: string): Span => {
+    const span: FakeSpan = {
+      name,
+      attributes: {},
+      exceptions: [],
+      ended: false,
+    };
+    spans.push(span);
+    const api = {
+      setAttribute(key: string, value: unknown) {
+        span.attributes[key] = value;
+        return api;
+      },
+      setAttributes(attrs: Record<string, unknown>) {
+        Object.assign(span.attributes, attrs);
+        return api;
+      },
+      recordException(exc: { name?: string; message?: string }) {
+        span.exceptions.push(exc);
+      },
+      setStatus(status: { code: number; message?: string }) {
+        span.status = status;
+        return api;
+      },
+      end() {
+        span.ended = true;
+      },
+      spanContext() {
+        return {
+          traceId: "0".repeat(32),
+          spanId: "0".repeat(16),
+          traceFlags: 1,
+        };
+      },
+      isRecording() {
+        return true;
+      },
+      addEvent() {
+        return api;
+      },
+      updateName() {
+        return api;
+      },
+    };
+    return api as unknown as Span;
+  };
+  vi.spyOn(trace, "getTracer").mockReturnValue({
+    startSpan: (name: string) => makeSpan(name),
+    startActiveSpan: ((name: string, fn: (s: Span) => unknown) =>
+      fn(makeSpan(name))) as any,
+  } as any);
+  return spans;
+}
+
+afterEach(() => {
+  vi.restoreAllMocks();
+});
+
+describe("otel skills + error tracing", () => {
+  it("stamps loaded skills on the sandbox-agent agent span (F-029)", () => {
+    const spans = spyTracer();
+    const otel = createSandboxAgentOtel({
+      harness: "claude",
+      model: "anthropic/claude-haiku",
+      emitSpans: true,
+      skills: ["weather-oracle", "_agenta.agenta-getting-started"],
+    });
+    otel.start({ prompt: "hi" });
+
+    const agentSpan = spans.find((s) => s.name === "invoke_agent");
+    expect(agentSpan).toBeTruthy();
+    expect(agentSpan?.attributes["ag.agent.skills.loaded"]).toEqual([
+      "weather-oracle",
+      "_agenta.agenta-getting-started",
+    ]);
+    expect(agentSpan?.attributes["ag.agent.skills.count"]).toBe(2);
+  });
+
+  it("omits the skills attributes when no skills loaded", () => {
+    const spans = spyTracer();
+    const otel = createSandboxAgentOtel({
+      harness: "claude",
+      model: "anthropic/claude-haiku",
+      emitSpans: true,
+    });
+    otel.start({ prompt: "hi" });
+    const agentSpan = spans.find((s) => s.name === "invoke_agent");
+    expect(agentSpan?.attributes["ag.agent.skills.loaded"]).toBeUndefined();
+    expect(agentSpan?.attributes["ag.agent.skills.count"]).toBeUndefined();
+  });
+
+  it("stamps loaded skills on Pi's own agent span (F-029, local Pi)", () => {
+    const spans = spyTracer();
+    const otel = createAgentaOtel({
+      captureContent: true,
+      skills: ["weather-oracle", "_agenta.agenta-getting-started"],
+    });
+    // Drive the Pi lifecycle: register handlers, then fire before_agent_start + agent_start.
+    const handlers: Record<string, (e: any) => Promise<void>> = {};
+    otel.register({
+      on: (name: string, fn: (e: any) => Promise<void>) => {
+        handlers[name] = fn;
+      },
+    } as any);
+    return (async () => {
+      await handlers["before_agent_start"]?.({ prompt: "hi" });
+      await handlers["agent_start"]?.({});
+      const agentSpan = spans.find((s) => s.name === "invoke_agent");
+      expect(agentSpan?.attributes["ag.agent.skills.loaded"]).toEqual([
+        "weather-oracle",
+        "_agenta.agenta-getting-started",
+      ]);
+      expect(agentSpan?.attributes["ag.agent.skills.count"]).toBe(2);
+    })();
+  });
+
+  it("records the error message + provider + exception on the owned agent span (F-030)", () => {
+    const spans = spyTracer();
+    const otel = createSandboxAgentOtel({
+      harness: "claude",
+      model: "anthropic/claude-haiku",
+      emitSpans: true,
+    });
+    otel.start({ prompt: "hi" });
+    otel.recordError(
+      "claude: model authentication failed — add the project's Anthropic key.",
+      "anthropic",
+    );
+
+    const agentSpan = spans.find((s) => s.name === "invoke_agent");
+    expect(agentSpan?.attributes["ag.error.message"]).toContain(
+      "model authentication failed",
+    );
+    expect(agentSpan?.attributes["ag.error.provider"]).toBe("anthropic");
+    expect(agentSpan?.exceptions.length).toBe(1);
+    expect(agentSpan?.exceptions[0].message).toContain(
+      "model authentication failed",
+    );
+    // ERROR status (SpanStatusCode.ERROR = 2).
+    expect(agentSpan?.status?.code).toBe(2);
+  });
+
+  it("emits a standalone error span when the harness self-instruments (F-030, local Pi)", () => {
+    const spans = spyTracer();
+    const otel = createSandboxAgentOtel({
+      harness: "pi",
+      model: "openai/gpt-4o-mini",
+      // Local Pi: the runner's sandbox-agent tracer is span-less; Pi emits its own spans.
+      emitSpans: false,
+      traceparent: `00-${"a".repeat(32)}-${"b".repeat(16)}-01`,
+    });
+    otel.start({ prompt: "hi" });
+    // No invoke_agent span exists yet (span-less mode).
+    expect(spans.find((s) => s.name === "invoke_agent")).toBeUndefined();
+
+    otel.recordError(
+      "pi_core: model authentication failed — add the project's Anthropic key.",
+      "anthropic",
+    );
+
+    const errSpan = spans.find((s) => s.name === "agent_error");
+    expect(errSpan).toBeTruthy();
+    expect(errSpan?.attributes["ag.error.message"]).toContain(
+      "model authentication failed",
+    );
+    expect(errSpan?.attributes["ag.error.provider"]).toBe("anthropic");
+    expect(errSpan?.exceptions.length).toBe(1);
+    expect(errSpan?.ended).toBe(true);
+  });
+
+  it("falls back to the init model provider when recordError omits one", () => {
+    const spans = spyTracer();
+    const otel = createSandboxAgentOtel({
+      harness: "claude",
+      model: "anthropic/claude-haiku",
+      emitSpans: true,
+    });
+    otel.start({ prompt: "hi" });
+    otel.recordError("some failure");
+    const agentSpan = spans.find((s) => s.name === "invoke_agent");
+    expect(agentSpan?.attributes["ag.error.provider"]).toBe("anthropic");
+  });
+});
diff --git a/services/agent/tests/unit/sandbox-agent-orchestration.test.ts b/services/agent/tests/unit/sandbox-agent-orchestration.test.ts
index 4fe7fb9c4b..f3bff1ec0f 100644
--- a/services/agent/tests/unit/sandbox-agent-orchestration.test.ts
+++ b/services/agent/tests/unit/sandbox-agent-orchestration.test.ts
@@ -52,6 +52,7 @@ function fakeHarness(options: FakeOptions = {}) {
     }>,
     runFinished: 0,
     runFlushed: 0,
+    recordedErrors: [] as Array<{ message: string; provider?: string }>,
   };
   const events: AgentEvent[] = [];
   let eventHandler: ((event: any) => void) | undefined;
@@ -121,6 +122,12 @@ function fakeHarness(options: FakeOptions = {}) {
       calls.runFinished += 1;
       return options.output ?? "assistant output";
     },
+    recordError(message: string, provider?: string) {
+      calls.recordedErrors.push({ message, provider });
+    },
+    output() {
+      return options.output ?? "assistant output";
+    },
     async flush() {
       calls.runFlushed += 1;
     },
diff --git a/services/agent/tests/unit/sandbox-agent-pi-assets.test.ts b/services/agent/tests/unit/sandbox-agent-pi-assets.test.ts
index cb0042aca3..f5d5d5527f 100644
--- a/services/agent/tests/unit/sandbox-agent-pi-assets.test.ts
+++ b/services/agent/tests/unit/sandbox-agent-pi-assets.test.ts
@@ -107,6 +107,41 @@ describe("buildPiExtensionEnv", () => {
     assert.equal(env.AGENTA_TOOL_PUBLIC_SPECS, undefined);
     assert.equal(env.AGENTA_TOOL_RELAY_DIR, undefined);
   });
+
+  it("carries the loaded skill names under tracing (F-029)", () => {
+    const request = {
+      trace: {
+        traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
+      },
+    } as AgentRunRequest;
+
+    const env = buildPiExtensionEnv(request, true, {
+      skills: ["weather-oracle", "_agenta.agenta-getting-started"],
+    });
+
+    assert.deepEqual(JSON.parse(env.AGENTA_SKILLS_LOADED ?? "[]"), [
+      "weather-oracle",
+      "_agenta.agenta-getting-started",
+    ]);
+  });
+
+  it("omits the loaded skills env when there are none or tracing is off", () => {
+    const request = {
+      trace: {
+        traceparent: "00-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa-bbbbbbbbbbbbbbbb-01",
+      },
+    } as AgentRunRequest;
+
+    assert.equal(
+      buildPiExtensionEnv(request, true, { skills: [] }).AGENTA_SKILLS_LOADED,
+      undefined,
+    );
+    assert.equal(
+      buildPiExtensionEnv(request, false, { skills: ["x"] })
+        .AGENTA_SKILLS_LOADED,
+      undefined,
+    );
+  });
 });
 
 describe("writeSystemPromptLocal", () => {