diff --git a/api/oss/src/core/tools/dtos.py b/api/oss/src/core/tools/dtos.py index 224a956227..f0dca37e28 100644 --- a/api/oss/src/core/tools/dtos.py +++ b/api/oss/src/core/tools/dtos.py @@ -46,6 +46,9 @@ class ToolCatalogAction(BaseModel): # categories: List[str] = [] logo: Optional[str] = None + # + # From the MCP behavioral hints: True (read-only), False (mutating), None (unknown). + read_only: Optional[bool] = None class ToolCatalogActionDetails(ToolCatalogAction): @@ -274,6 +277,7 @@ class ResolvedAgentTool(BaseModel): description: Optional[str] = None input_schema: Optional[Dict[str, Any]] = None call_ref: str + read_only: Optional[bool] = None class AgentToolsResolution(BaseModel): diff --git a/api/oss/src/core/tools/providers/composio/adapter.py b/api/oss/src/core/tools/providers/composio/adapter.py index 80fcdee100..e388d7072e 100644 --- a/api/oss/src/core/tools/providers/composio/adapter.py +++ b/api/oss/src/core/tools/providers/composio/adapter.py @@ -16,7 +16,10 @@ ) from oss.src.core.tools.interfaces import ToolsGatewayInterface from oss.src.core.tools.exceptions import AdapterError -from oss.src.core.tools.providers.composio.catalog import ComposioCatalogClient +from oss.src.core.tools.providers.composio.catalog import ( + ComposioCatalogClient, + _derive_read_only, +) log = get_module_logger(__name__) @@ -173,6 +176,7 @@ async def get_action( if input_params or output_params else None, scopes=item.get("scopes") or None, + read_only=_derive_read_only(item.get("tags")), ) # ----------------------------------------------------------------------- diff --git a/api/oss/src/core/tools/providers/composio/catalog.py b/api/oss/src/core/tools/providers/composio/catalog.py index 0ffa589027..555eb995fe 100644 --- a/api/oss/src/core/tools/providers/composio/catalog.py +++ b/api/oss/src/core/tools/providers/composio/catalog.py @@ -359,6 +359,22 @@ def _parse_integration_detail(item: Dict[str, Any]) -> ToolCatalogIntegration: ) +def _derive_read_only(raw_tags: Any) -> Optional[bool]: + """Distil the MCP behavioral hint tags into a single read-only signal. + + ``readOnlyHint`` with no mutating hint -> read-only; ``destructiveHint`` or + ``updateHint`` -> mutating. No hint present -> unknown (``None``); never guessed. + """ + if not isinstance(raw_tags, list): + return None + tags = {t for t in raw_tags if isinstance(t, str)} + if "destructiveHint" in tags or "updateHint" in tags: + return False + if "readOnlyHint" in tags: + return True + return None + + def _parse_action(item: Dict[str, Any], integration_key: str) -> ToolCatalogAction: raw_tags = item.get("tags") # Tags mix MCP hint flags with semantic categories — strip the known hints @@ -381,4 +397,5 @@ def _parse_action(item: Dict[str, Any], integration_key: str) -> ToolCatalogActi name=item.get("name", ""), description=item.get("description"), categories=categories, + read_only=_derive_read_only(raw_tags), ) diff --git a/api/oss/src/core/tools/service.py b/api/oss/src/core/tools/service.py index 03c8244f5d..e304519497 100644 --- a/api/oss/src/core/tools/service.py +++ b/api/oss/src/core/tools/service.py @@ -583,4 +583,5 @@ async def _resolve_composio_tool( description=action.description, input_schema=input_schema, call_ref=call_ref, + read_only=action.read_only, ) diff --git a/api/oss/tests/pytest/unit/tools/test_agent_resolution.py b/api/oss/tests/pytest/unit/tools/test_agent_resolution.py index 12ad49266a..1689a3f635 100644 --- a/api/oss/tests/pytest/unit/tools/test_agent_resolution.py +++ b/api/oss/tests/pytest/unit/tools/test_agent_resolution.py @@ -10,6 +10,7 @@ from oss.src.apis.fastapi.tools.models import ToolResolveRequest from oss.src.core.tools.dtos import AgentBuiltinTool, AgentComposioTool +from oss.src.core.tools.providers.composio.catalog import _derive_read_only from oss.src.core.tools.service import ToolsService @@ -59,6 +60,7 @@ async def _action(**_kwargs): schemas=SimpleNamespace( inputs={"type": "object", "properties": {}}, ), + read_only=True, ) monkeypatch.setattr(service, "resolve_connection_by_slug", _connection) @@ -77,3 +79,25 @@ async def _action(**_kwargs): ) assert result.builtins == ["read"] assert result.custom[0].call_ref == "tools.composio.github.GET_USER.c1" + assert result.custom[0].read_only is True + + +@pytest.mark.parametrize( + "tags, expected", + [ + (["readOnlyHint"], True), + (["updateHint"], False), + (["destructiveHint"], False), + # A mutating hint wins even when readOnlyHint is also present. + (["destructiveHint", "readOnlyHint"], False), + (["updateHint", "readOnlyHint"], False), + # Unknown == None (never guess), not False. + ([], None), + (None, None), + (["unrelatedHint"], None), + # Non-list input is ignored. + ("readOnlyHint", None), + ], +) +def test_derive_read_only_tag_matrix(tags, expected): + assert _derive_read_only(tags) is expected diff --git a/docs/design/agent-workflows/projects/capability-config/README.md b/docs/design/agent-workflows/projects/capability-config/README.md new file mode 100644 index 0000000000..6e0d0be7eb --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/README.md @@ -0,0 +1,31 @@ +# Capability and permission configuration + +How an author configures what an Agenta agent may do (files, network, tools, tool approvals), +and how those controls enforce end to end: from the playground form, through the SDK and agent +service, to the runner and the harness. Graduated from the scratch notes in +`../../scratch/capability-architecture.md` on 2026-06-23. + +## The shape in one paragraph + +Three configuration layers, each with one job and one enforcement point. **Layer 1, harness +configuration:** the runner translates author kwargs into the harness's own config (a +`.claude/settings.json` for Claude, `builtin_names` for Pi). **Layer 2, sandbox permission:** an +optional `sandbox_permission` field draws the network and filesystem boundary, enforced by the +backend when it provisions the sandbox. **Layer 3, tool permission:** a per-tool permission +(always-allow / ask / deny), enforced at the runner relay for resolved tools and at the harness +permission plane for builtins. The work spans the playground frontend, the schema, the SDK, the +service, and the runner. + +## Files + +- `context.md` — why this exists, goals, non-goals, background, how it relates to the sibling + projects. +- `proposal.md` — the three-layer design. The canonical spec. +- `plan.md` — phased execution plan, end to end including the playground frontend. +- `research.md` — current-state codebase findings and exact insertion points (backend, runner, + frontend), plus the library facts the design rests on. +- `status.md` — progress, decisions, and open questions. The source of truth for state. + +## Status + +Code-complete and reviewed; backend + runner + FE built and green, live-QA'd on the running stack. Live Daytona egress + Claude behavioral cells pending credentials. See `status.md`. diff --git a/docs/design/agent-workflows/projects/capability-config/context.md b/docs/design/agent-workflows/projects/capability-config/context.md new file mode 100644 index 0000000000..525a000fb6 --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/context.md @@ -0,0 +1,73 @@ +# Context + +## Why this work exists + +Every agent run has to be governed. The author needs to say what the agent may touch, and the +system has to enforce that across two harnesses (`pi` and `claude`) and three backends +(sandbox-agent local, sandbox-agent on Daytona, and a future in-process local SDK). + +Today almost none of this is wired: + +- The runner drops Pi's `builtin_names`, so even Pi's own tool selection has no effect on the + sandbox-agent path. +- The runner never restricts Claude. It creates the session with only `cwd` and `mcpServers`, + so "Claude without web" or "Claude read-only" is not expressible. +- The runner never sets a network boundary, so a Daytona run has full egress by default. +- `permission_policy` is the only live control, and it is coarse (auto or deny, all tools at + once) and effective on Claude only. +- The playground renders every config field unconditionally, with no per-harness gating and no + way to set capability or per-tool approval. + +So a request as simple as "give this agent web access but not write access" cannot be +expressed, on either harness, from the playground or the SDK. This project makes capability and +permission a real, configurable, end-to-end feature. + +## Goals + +1. A three-layer configuration model the author can set: harness configuration, sandbox + permission, and per-tool permission. Each layer has one job and one enforcement point. +2. End to end. The playground frontend is in scope: the config form gains the new sections, and + the agent chat gains a tool-approval surface for the "ask" permission. +3. Honest enforcement. The sandbox layer is authoritative for the network and the filesystem. A + run fails loud when a backend cannot deliver a requested guarantee, rather than pretending. +4. Sensible defaults. Read-only tools default to always-allow and mutating tools to ask, using + Composio's read/write metadata, so the author does not label every tool by hand. + +## Non-goals (for now) + +- **Pi MCP.** Deferred. When built it follows the same permission pattern as Claude + (settings-style `mcp__` rules). Tracked in `../harness-capabilities/`. +- **A real filesystem jail.** No backend confines the filesystem today; the local cwd is a temp + dir, not a jail. Layer 2 ships network first; filesystem stays tool-plane only until a backend + can enforce it. +- **Durable / unattended HITL approval.** The "ask" permission this project ships asks the user + in the open chat. The global, durable approval channel that survives a closed tab or a + scheduled run is Flow 7 in `../../scratch/flows-and-capabilities.md`, a later milestone. +- **A sandbox boundary for the local backend.** The local sidecar is the host; it cannot enforce + Layer 2. That is by design, and the fail-loud rule covers it. + +## Background + +The runtime splits work across a Python agent service (`services/oss/src/agent/`, decides what +to run) and a TypeScript runner (`services/agent/`, runs it). The runner drives the harness over +an ACP bridge, `sandbox-agent`, on a chosen backend. The SDK (`sdks/python/agenta/sdk/agents/`) +owns the neutral config, the ports, and the per-harness adapters. + +Three earlier scratch documents set up this project, and their facts are folded into +`research.md`: + +- `../../scratch/capability-map.md` — the current-state web/exec/read/write cut: what each + harness can do, what is on by default, what the backend changes. +- `../../scratch/capability-architecture.md` — the design exploration this project's + `proposal.md` cleans up. +- `../../scratch/flows-and-capabilities.md` — the user-facing flows, including Flow 7 (HITL). + +## Relation to sibling projects + +- `../harness-capabilities/` declares which capabilities each harness supports (the static + capability table) and owns the deferred Pi-MCP work. This project sets the capability *values* + the author chooses; that project declares which choices a harness can honor. They meet at the + schema and the fail-loud check. +- `../model-config/` is the same static-then-dynamic pattern for the model axis. Layer 1's + Claude `model` setting overlaps it. +- `../skills-config/` configures forced skills, a different axis on the same agent config. diff --git a/docs/design/agent-workflows/projects/capability-config/plan.md b/docs/design/agent-workflows/projects/capability-config/plan.md new file mode 100644 index 0000000000..eedae886f1 --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/plan.md @@ -0,0 +1,181 @@ +# Execution plan + +Phased, end to end. Each phase is shippable and de-risks the next. File references and exact +insertion points live in `research.md`; this plan names the work and the acceptance bar. Some +line numbers in `research.md` are approximate and must be reconfirmed at implementation time. + +The dependency spine: Phase 0 unblocks Pi and the metadata defaults. Phases 1 to 3 build the +three layers in the backend and runner. Phases 4 and 5 surface them in the playground. Phase 6 +verifies live. Phases 1 and the Composio half of Phase 0 are independent and can run in +parallel. + +## Phase 0: prerequisites + +Two latent gaps that the rest of the design assumes are closed. + +- **(S0b) Investigate Pi's real control surface.** The runner does drop `request.tools` + (`run-plan.ts:101`), but `pi-acp@0.0.29` spawns `pi --mode rpc --no-themes` with no `tools` + field on `newSession` (`index.js:135`), so honoring `builtin_names` is not simply a matter of + forwarding `request.tools`. Find the actual lever — a `pi` CLI flag, a Pi config file written + into the cwd, or an ACP session field — or conclude that Pi built-in restriction is unsupported + over sandbox-agent. Read-only; record the finding in `research.md`. +- **(S0a) Stop stripping Composio read/write hints.** Carry a `read_only` flag from the catalog + through to the resolved tool, so Phase 3 can default permissions from it. + (`api/oss/src/core/tools/providers/composio/catalog.py`, `api/oss/src/core/tools/dtos.py`, the + resolved `ToolSpec`.) + +Acceptance: S0b ends with a written finding (the lever, or "unsupported → fail loud"); S0a makes +a resolved Composio tool carry `read_only` on the wire (read-only for `readOnlyHint`, mutating for +`destructiveHint`/`updateHint`). If S0b finds Pi restriction unsupported, Layer 1 for Pi fails +loud rather than silently granting the full tool set. + +## Phase 1: Layer 2, sandbox permission (backend) + +The network boundary, enforced at sandbox provisioning. Filesystem is declared but not enforced +yet (no backend confines it). + +- Add an optional `sandbox_permission` object to `AgentConfig` and the `agent_config` schema + (presets plus `network_egress` with `mode` and `enforcement`). (`dtos.py`, + `sdk/utils/types.py` `AgentConfigSchema`, `services/oss/src/agent/schemas.py`.) +- **Thread the policy across the Python→TS wire**, because Daytona is provisioned in the runner, + not in Python: `SessionConfig` → `request_to_wire` (`sdk/agents/utils/wire.py`) → + `AgentRunRequest` (`protocol.ts`) → `buildRunPlan` (`run-plan.ts`) → `buildSandboxProvider`. + Move the golden fixtures and both wire-contract tests with the new field. The Python + `Backend.create_sandbox` also gains the typed policy for the local/in-process path. +- Apply it on Daytona: set `networkBlockAll` / `networkAllowList` on the provider `create` + object. (`services/agent/src/engines/sandbox_agent/provider.ts`.) +- Fail loud when a backend cannot enforce a requested guarantee (local sidecar, local SDK), + unless the author sets a per-axis opt-out. +- **(S1g) Close the runner-host hole before claiming `network: off`.** `relay.ts` runs `code` and + gateway/callback tools on the runner host, so a network-blocked Daytona sandbox does not confine + them. When `network: off`/`exec: off`, reject or strip `code` tools, gateway/callback tools, and + stdio MCP servers — or move their execution into the sandbox. The runner must not report + `network: off` while a runner-side tool is still reachable. See `status.md` open question 1. + +Acceptance: a `network: off` config on Daytona blocks egress (a `curl` from `bash` fails) **and** +a `code`/gateway tool under that config either refuses at plan time or runs inside the sandbox, +never on the runner host; the same config on local fails loud unless opted out. + +## Phase 2: Layer 1, harness configuration (settings.json) + +The runner renders author kwargs into the harness's native config. + +- Define the `harness_options` surface for Claude: permission mode plus per-tool allow/deny/ask + rules. Decide raw `ClaudeCodeSettings` passthrough vs a curated subset. (`dtos.py` + `harness_options`.) +- In the runner, write `.claude/settings.json` into the session cwd before `createSession`. + (`run-plan.ts` cwd creation; the engine before `sandbox.createSession` in `sandbox_agent.ts`.) +- Derive the baseline rules from Layer 2 and the read-only profile (for example `network: off` + emits `deny: ["WebFetch","WebSearch"]`; read-only emits `deny: ["Write","Edit","Bash"]`). + +Acceptance: a Claude run with a settings.json `deny` rule does not call the denied tool; a +`defaultMode` setting takes effect. + +## Phase 3: Layer 3, tool permission (backend + runner) + +Per-tool permission: always-allow / ask / deny. + +- Carry the permission on each tool's spec and each MCP server's spec. The frontend already + stores `agenta_metadata.permission_mode` on each tool; define its canonical values + (always-allow/ask/deny) and serialize them. `permission_policy` stays as the global default. + Add `read_only` to `ToolSpec` (Phase 0). (`dtos.py`, `tools/models.py`.) +- Enforce resolved tools at the relay: deny refuses, ask parks, always-allow runs. + (`services/agent/src/tools/relay.ts`.) +- Enforce Claude builtins through the responder: pass the per-tool map to the responder, which + reads the tool name off the permission request and applies the permission. + (`responder.ts`, `engines/sandbox_agent/permissions.ts`.) +- Default the permission from the `read_only` flag: read-only to always-allow, mutating to ask. + The author overrides. + +Known risk handled in S1g (Phase 1), not here: resolved `code` **and** gateway/callback tools run +on the runner host, not the sandbox, so a network-blocked Daytona run does not confine their +egress. The interim guard (reject `code`, gateway/callback, and stdio MCP when `network`/`exec` +are off) versus the target (move resolved-tool execution into the sandbox) is decided there. See +`status.md` open question 1. + +Acceptance: a tool set to deny never runs; a tool set to ask raises an approval request; a +read-only Composio tool defaults to always-allow. + +## Phase 4: playground form (the three sections) + +The form is generic-schema-driven, so most fields appear once the schema carries them. The +work is the controls and the gating. + +- Render the new sections in `AgentConfigControl.tsx`: a sandbox-permission section (presets + plus network/filesystem toggles), an advanced harness-config section, and a per-tool + permission control (allow / ask / deny on each tool). NOTE (S3a finding): `permission_mode` is + NOT round-tripped in the FE today — this control must be built from scratch, writing + `agenta_metadata.permission_mode` with the `allow|ask|deny` vocabulary (the SDK already accepts + that key via `AliasChoices`, so no mapping is needed). +- Default the per-tool control from the tool's `read_only` metadata. +- Gate fields by the live `harness` value (hide `permission`-mode controls for Pi, hide + `mcp_servers` where not applicable), reading the harness-capabilities map from `/inspect`. + (`AgentConfigControl.tsx`, `SchemaPropertyRenderer.tsx`.) + +Acceptance: an author sets a preset and a per-tool permission in the playground, saves, and the +values persist on the variant config and reach the run. + +## Phase 5: playground HITL approval surface (the "ask" path) + +The chat already exposes `addToolApprovalResponse` (`AgentChatPanel.tsx:86`) **and** already +renders approve/deny buttons (`ToolPart.tsx:153`). So the UI is largely built; the missing piece +is the runtime parked/resumed path, not the buttons. + +- Map the runner's `interaction_request` (permission) event onto the ai-sdk approval part, so a + Layer 3 "ask" in the runtime surfaces as the existing approval UI, and route the user's + decision back through `addToolApprovalResponse` to resume (or reject) the parked call. + (`web/oss/src/components/AgentChatSlice/`: `AgentChatPanel.tsx`, `AgentMessage.tsx`, + `ToolPart.tsx`; runner responder seam `responder.ts`, `permissions.ts`.) +- Confirm the responder actually parks the call and resumes it on the answer (the underestimated + part), rather than only rendering a prompt. + +Acceptance: a tool set to ask shows the approve/deny prompt in the playground chat, and the +answer resolves the parked call end to end. + +## Phase 6 (S6): tests and live verification + +**Unit / golden — DONE.** The settings.json builder, the relay permission enforcement, the +`resolvePermission` table, the `HITLResponder`, the Composio `read_only` mapping, and the +permission default ladder are all unit-tested; the new wire fields cross the golden fixtures and +both contract tests. 293 SDK + 15 API + 10 services/oss + 177 TS pass. + +**Live — PENDING (needs a deployed stack).** The running dev stack is +`agenta-ee-dev-wp-b2-rendering` (traefik `:8280`, sandbox-agent sidecar `:8765`). The SDK changes +need a container reload and the TS-runner changes need a sandbox-agent image rebuild before these +checks reflect this branch. Run each check and record pass/fail here: + +1. **L2 Daytona `network: off` blocks egress (E3).** Config `sandbox_permission.network.mode=off`, + harness `pi` or `claude`, sandbox `daytona`. Make the agent run `curl https://example.com` (or + a `code` tool doing a fetch). Expect egress to FAIL. Confirm the Daytona create call carried + `networkBlockAll: true` (sidecar logs). Re-run with `enforcement=strict` on the LOCAL sidecar: + expect the run to fail loud at plan time, not silently. +2. **L2 runner-host guard (S1g).** Config `network: off`, `enforcement=strict`, plus a `code` or + gateway tool (or a stdio MCP server). Expect the run to be REJECTED at plan time with the + runner-host message. With `enforcement=best_effort`, expect it to run. +3. **L1 Claude `settings.json` deny (E2/E3, claude).** Author `harness_options.claude.permissions.deny=["WebFetch"]` + (or `network:off` to derive it). Ask Claude to fetch a URL. Expect Claude NOT to call WebFetch. + Confirm `/.claude/settings.json` was written (sidecar logs / Daytona FS). Also verify a + `defaultMode` takes effect. +4. **L1 MCP `mcp__` rule.** An MCP server with `permission:"deny"` → confirm Claude cannot + call its tools; `permission:"allow"` → confirm it runs without a prompt. +5. **L3 per-tool deny / read-only default.** A gateway tool with `permission:"deny"` → the relay + refuses it (refusal string in the transcript, tool not executed). A read-only Composio tool with + no explicit permission → defaults to allow (runs without a prompt). A mutating one → defaults to + ask. Verify the permission survived the save (it is a TOP-LEVEL `permission` key on the tool, + not in `agenta_metadata`). +6. **L3 HITL multi-turn round-trip (the deferred S5 unknown).** In the `/messages` playground chat, + trigger an `ask` tool. Turn 1: expect a `tool-approval-request` to surface (approve/deny buttons) + and the tool NOT to run. Approve. Turn 2: expect the gate to re-raise and the tool to actually + run; deny variant: expect refusal. THIS is the unverified design assumption (cold-replay + re-raise) — see `../../scratch/open-issues.md`. If turn 2 does not re-raise, apply the fallback + noted there. +7. **Surfaces.** Drive checks 1–5 via BOTH the SDK (E4 script pulling config + running on host) and + the playground UI (`mcp__chrome-devtools__*` against `:8280`). The playground form (S4) must + render the three sections, gate the Claude section to the claude harness, and persist values + that reach the run. + +**Pin a replay regression.** Capture one green `/run` per layer, redact volatile fields, and write +a replay test (`agent-replay-test` skill) so the cells stay green cost-free in CI. + +Acceptance: the matrix in `../qa/` gains capability + per-tool-permission rows, green on the +harness/backend pairings the enforceability table claims. diff --git a/docs/design/agent-workflows/projects/capability-config/proposal.md b/docs/design/agent-workflows/projects/capability-config/proposal.md new file mode 100644 index 0000000000..a40fb76d25 --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/proposal.md @@ -0,0 +1,222 @@ +# Proposal: configuring agent capabilities and permissions + +The canonical design. How an author controls what an agent may do, and how those controls reach +each harness and backend. Why this matters and how it fits the other agent-workflows projects is +in `context.md`; the codebase findings are in `research.md`; the work is phased in `plan.md`. + +## The problem + +Every agent run has to be governed. The author needs to say what the agent may touch (files, +the network, which tools), and the system has to enforce that across two harnesses (`pi` and +`claude`) and three backends (sandbox-agent local, sandbox-agent on Daytona, and a future +in-process local SDK). + +Today almost none of this is wired. The runner drops Pi's tool selection, never restricts +Claude, and never sets a network boundary. So a request as simple as "give this agent web +access but not write access" is not expressible on either harness. + +The fix is three configuration layers, each with one job and one enforcement point. This +document defines the three and shows how each reaches `pi` and `claude`. + +## The three layers + +The three layers answer three different questions. Keep them separate. Collapsing them is what +made the current code confusing. + +1. **Harness configuration** sets how the harness itself behaves: its permission mode and its + own tool rules. +2. **Sandbox permission** sets what the running process can physically do: reach the network, + write the filesystem. +3. **Tool permission** sets what happens to a single tool call: run it, ask a human, or refuse + it. + +Layer 1 configures the agent. Layer 2 draws the security boundary. Layer 3 governs each call. +The next three sections take them in turn. + +### Layer 1: harness configuration (author kwargs to a settings file) + +The author sets harness-specific options in the existing generic `harness_options` kwargs (a map +keyed by harness: `harness_options.claude.permissions`, `harness_options.pi.*`, and so on, room +for any future harness). The author never writes harness-native config by hand and the generic +interface stays free of per-harness fields. The SDK's per-harness adapter (in Python) translates +those neutral options into the harness's native config files, and the runner writes those files +into the session cwd before the session starts. The runner stays harness-agnostic: it materializes +a generic `harnessFiles` list (`{path, content}`) and knows nothing about Claude. This scales: +ten harnesses means ten Python adapters, not ten first-party interface fields. + +For **Claude**, that native config is a `.claude/settings.json` file written into the session's +working directory. The Python claude adapter renders it; the Claude ACP adapter reads it. The Claude ACP adapter reads it, because it builds the underlying SDK query +with `settingSources: ["user", "project", "local"]` (`acp-agent.js:954`). Through that one file +the runner sets: + +- the permission mode, via `permissions.defaultMode` (`default`, `acceptEdits`, `plan`, or + `bypassPermissions`), which the adapter reads at session start (`acp-agent.js:935`); +- per-tool rules, via `permissions.allow` / `deny` / `ask`, in Claude's rule syntax (`"Read"`, + `"Bash(npm run:*)"`, `"mcp____"`); +- `env` and `model`. + +This is the clean delivery path, and it is the only one. The other channel, the +`_meta.claudeCode.options` passthrough, never arrives: sandbox-agent strips `_meta` from the +session request (`index.d.ts:2778`). The settings file does not depend on that channel, so it +works over sandbox-agent today with no upstream change. The runner already owns the working +directory (a temp dir), so it writes the file before it creates the session. For the mode alone +there is also a runtime control, `session.setMode(modeId)`, if we ever want to switch mode +mid-run. + +For **Pi**, the lever is thinner. Pi exposes no settings file and no permission mode over the +ACP bridge; its probe reports `permissions: false`. Pi's Layer 1 is its built-in tool selection, +`builtin_names`: which native tools (read, write, edit, bash) the model is given at all. The +runner must honor that list, which it drops today. If Pi later grows a config surface, it +attaches here. + +### Layer 2: sandbox permission (the security boundary) + +An optional `sandbox_permission` field on `AgentConfig` declares what the process may physically +do: its network egress and its filesystem access. The backend enforces it when it provisions +the sandbox. The harness does not, and the runner logic does not. + +This layer is the only real boundary for the network and the filesystem. A harness tool rule can +hide Claude's `WebFetch`, but Pi can still `curl` from `bash`, so "no web" is a guarantee only +when the sandbox blocks egress. On Daytona the backend sets `networkBlockAll` or a +`networkAllowList` (CIDR ranges) at create time. The local sidecar and the future local SDK have +no sandbox, so they cannot enforce this layer at all. + +That gap must stay honest. When a config asks for a network or filesystem guarantee that the +chosen backend cannot deliver, the run fails loud, unless the author sets an explicit per-axis +opt-out for local development. We never tell the author the web is off when it is not. Filesystem +confinement is not real on any backend today, so Layer 2 ships network first and declares +filesystem without enforcing it. + +### Layer 3: tool permission (the sidecar-managed permission policy) + +This layer is the sidecar's own permission policy, and it carries the human-in-the-loop gate. It +subsumes `permission_policy`: that auto/deny switch is just this layer's global default. + +For each tool, the author assigns one permission, stored on the tool's own spec: + +- **allow.** The call runs with no prompt. If the tool is one we resolved (a gateway or + code tool, not a harness builtin), the runner runs it through the relay. This is today's + auto-accept behavior. +- **ask.** The runner raises a human-in-the-loop request and waits for the answer. For now it + asks the user in the playground chat. Later it raises a durable approval event, so a triggered + or scheduled run can be answered even when no one has the chat open (Flow 7). +- **deny.** The call never runs. + +The permission lives next to the thing it governs. A resolved tool carries its permission on +its tool spec; an MCP server carries one on its server spec, which the runner renders as a +whole-server `mcp__` rule or a per-tool `mcp____` rule. Anything with no +explicit permission falls back to the global default, `permission_policy`. Harness builtins have +no spec, so their permission is rendered in Layer 1 instead: Claude builtins as settings.json +rules, Pi builtins as `builtin_names` selection. + +Where Layer 3 is enforced depends on where the tool runs, and this is the subtle part. There +are two cases. + +Resolved tools (gateway, code) run in the runner, through the relay. The runner is the choke +point, so it applies the permission directly: allow runs the call, ask parks it, deny +refuses it. This works the same on `pi` and `claude`. + +Harness builtins (Claude `Bash`/`Edit`/`Read`, Pi `bash`/`read`) run inside the harness, where +the runner cannot intercept them. For Claude, the settings.json `allow`/`deny`/`ask` rules set +the static baseline, and any call that still needs a decision arrives at the runner's responder +through Claude's permission callback, carrying the tool name. The responder applies the +permission there. For Pi, builtins cannot be gated, because Pi never asks. The only way to deny +a Pi builtin is to not grant it in Layer 1. + +The author should not have to label every tool by hand. Composio already tells us whether a tool +reads or writes, through hint tags (`readOnlyHint`, `destructiveHint`, `updateHint`) that the +catalog parser strips today. We will keep them, carry a read-only flag onto the tool, and +default read-only tools to allow and mutating tools to ask. The author overrides any +default. + +## Where `permission_policy` fits + +Folded in. `permission_policy` (auto or deny) is the global default of Layer 3: the answer the +sidecar gives for any tool with no explicit permission and no human. The HITL gate, the +per-tool permissions, and `permission_policy` are one thing, the sidecar-managed permission +policy. There is no separate permission plane. + +One implementation caution, so the fold does not blur two ideas in code. Layer 3 carries two +distinct things, and they must stay distinct internally even though they are one policy to the +author: + +- the **permission** of a tool — allow, ask, or deny. This is static capability config and + rides on the tool's spec. +- the **responder mode** for an `ask` that reaches the runtime with no human — block and wait for + the UI, emit a durable approval event, auto-allow, or deny. This is a runtime answering choice, + and `permission_policy` is its default. + +A tool set to `ask` always asks; what happens to that ask when nobody is watching is the +responder mode. Collapsing the two — treating `permission_policy` as if it were a fourth +permission — is the mistake to avoid. + +## A worked example: web off, read-only, on Daytona + +Take an author who sets `sandbox_permission.network: off` and a read-only tool profile, and runs +on Daytona. + +- Layer 1 gives Claude a `.claude/settings.json` that denies `Write`, `Edit`, `Bash`, + `WebFetch`, and `WebSearch`. Pi gets a `builtin_names` of `read` only. +- Layer 2 tells the Daytona backend to create the sandbox with `networkBlockAll: true`. +- Layer 3 has little to do here, because the write and web tools are already gone. Any resolved + tool the agent still calls runs through the relay under its permission. + +Claude now holds no write or web tools, and the VM has no egress. Pi holds only `read`, and its +`bash` is gone, so it cannot `curl` even if it tried, and the VM would block it anyway. Both +harnesses are safe, with the tool layer and the sandbox layer reinforcing each other. + +Run the same config on the local sidecar and Layer 1 still works: Claude loses the tools, Pi +gets only `read`. Layer 2 cannot: the local provider is the host, with no egress control. So the +run fails loud unless the author opted into unsafe local execution. That is the +honest-degradation rule in action. + +## Known risk: the runner-host execution surface + +The "sandbox layer is authoritative" claim holds only for tools that run inside the sandbox. +Resolved `code` **and** gateway/callback tools do not: the relay runs both in the runner process +(`tools/relay.ts` — `runCodeTool` for `code`, `callAgentaTool` for gateway). So a network-blocked +Daytona sandbox confines the harness's own `bash` and `WebFetch`, but it does not confine a +resolved code tool or a gateway tool, which run on the runner host with the runner's network. A +network-blocked agent can still egress through either. + +This project does not hand-wave that. The guarantee is gated at Phase 1, with a target and an +interim guard: + +- **Target:** move resolved-tool execution into the sandbox, so one boundary covers everything. +- **Interim guard:** when `network: off` or `exec: off`, reject or remove `code` tools, + gateway/callback tools, and stdio MCP servers (which are arbitrary commands), and confine the + runner host separately. + +The decision is tracked in `status.md`. Until it lands, `network: off` is only a full guarantee +for harness-native tools, so the runner must not *claim* `network: off` while a runner-side tool +is still reachable. + +## What each pairing can enforce + +The guarantees vary by harness and backend. The design must record that variance, not discover +it at runtime. + +| Capability | Pi tool layer | Claude tool layer | Daytona sandbox layer | Local sandbox layer | +| --- | --- | --- | --- | --- | +| network off | none (no web tool; curl remains) | deny WebFetch/WebSearch | enforce (networkBlockAll) | cannot, fail loud | +| network allowlist | none | none (no per-host tool gate) | enforce (networkAllowList CIDR) | cannot, fail loud | +| no code execution | drop `bash` from builtins | deny Bash + mode | partial (interpreters remain) | tool layer only | +| read-only | drop write/edit/bash | deny Write/Edit/Bash | no fs jail today | no fs jail today | + +The empty cells are the honest gaps. Pi has no web tool, so its web access is purely a sandbox +concern. Neither harness has a per-host web gate, so a network allowlist is a sandbox guarantee +for both. This is why the network boundary has to live in Layer 2 to be real. + +## Decisions + +1. Three layers, three jobs. Harness configuration, sandbox permission, tool permission. +2. Claude config ships as `.claude/settings.json`, written into the session cwd before session + start. No `_meta`, no upstream change. +3. MCP permissions are settings.json `mcp__` rules. The separate per-server `tools` allowlist + that the runner parses but never enforces is dropped. +4. Pi MCP stays out of scope, and follows the Claude pattern when built. +5. Composio hints drive Layer 3 defaults: keep them, carry a read-only flag, default read-only + to allow and mutating to ask. +6. The sandbox layer is authoritative for the network; it declares filesystem confinement but + enforces none today (no backend has a real fs jail). The tool layer is best effort. A run + fails loud when a backend cannot deliver a requested guarantee. diff --git a/docs/design/agent-workflows/projects/capability-config/research.md b/docs/design/agent-workflows/projects/capability-config/research.md new file mode 100644 index 0000000000..8c630e6b2e --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/research.md @@ -0,0 +1,204 @@ +# Research: current state and insertion points + +Self-contained map of how the runtime governs an agent today and where the three layers attach. +All claims trace to code or installed package source. The deep web/exec/read/write current-state +cut lives in `../../scratch/capability-map.md`; this doc adds the enforcement mechanics and the +exact insertion points for the backend, the runner, and the frontend. Line numbers are accurate +as of 2026-06-23 and a few are approximate (flagged); reconfirm at implementation time. + +## 1. Enforcement mechanics (the load-bearing library facts) + +**Claude config reaches the harness through `.claude/settings.json`.** The Claude ACP adapter +builds the underlying SDK query with `settingSources: ["user", "project", "local"]` +(`@zed-industries/claude-agent-acp` `acp-agent.js:954`), so the SDK reads +`/.claude/settings.json` and honors its `permissions.allow` / `deny` / `ask` rules. The +adapter reads `permissions.defaultMode` for the initial permission mode (`acp-agent.js:935`). +The `_meta.claudeCode.options` channel (disallowedTools, permissionMode) is unusable, because +sandbox-agent strips `_meta` from the session request +(`sessionInit?: Omit`, `sandbox-agent/dist/index.d.ts:2778`). So the +settings file is the one clean delivery path. For the mode alone there is a runtime control, +`session.setMode(modeId)` (`index.d.ts:3064`; modes `default`/`acceptEdits`/`plan`/ +`bypassPermissions`, `acp-agent.js:1046-1078`). The runner owns the cwd (a temp dir), so it can +write the file before `createSession`. + +**Pi has no permission gate over ACP, and its built-in tool restriction is backend-dependent.** +Its permission probe reports `permissions: false`. The built-in tool lever (S0b finding, verified +in code) is real but only reachable on one backend: + +- The Pi SDK natively supports restriction: `createAgentSession({ tools, excludeTools, noTools })` + and the CLI flags `--tools` / `--no-builtin-tools` / `--exclude-tools` + (`@earendil-works/pi-coding-agent@0.79.4` `dist/core/sdk.d.ts`, `dist/cli/args.js:79-96`). +- The **in-process** Pi engine already uses it: `pi.ts:311-314` passes `tools: toolAllowlist` + (built from `request.tools`) into `createAgentSession()`. So Pi Layer 1 **works on the + in-process / local backend**. +- The **sandbox-agent ACP** path does not. `pi-acp@0.0.29` hardcodes the spawn as + `pi --mode rpc --no-themes` (`pi-acp/dist/index.js:134-142`) and its `newSession` accepts only + `cwd` + `mcpServers` (`index.js:1701`) — no `tools`/`excludeTools`/`noTools` forwarded. And + `sandbox_agent.ts:208-212` passes only `{ agent, cwd, sessionInit: { cwd, mcpServers } }`, + dropping `request.tools` (`run-plan.ts:101`). + +So Pi's Layer 1 is supported in-process and **unsupported over sandbox-agent ACP** until pi-acp +forwards the flags. Design consequence: honor `builtin_names` on the in-process backend, and +**fail loud** when an author requests Pi built-in restriction on a sandbox-agent backend +(`buildRunPlan` validation in `sandbox_agent.ts`, ~:108-115), rather than silently granting the +full set. Future lever: patch/fork pi-acp to add `--tools`/`--exclude-tools` to the spawn args +(or upstream it), which would lift the restriction to the ACP path too. + +**The permission responder seam already exists.** When the harness raises an ACP permission +request, `attachPermissionResponder` emits an `interaction_request` event and asks a `Responder` +(`engines/sandbox_agent/permissions.ts:16-45`). Today `PolicyResponder` answers headlessly from +`permission_policy` (`responder.ts:44-62`). The request carries the tool call, so a per-tool +responder can branch on the tool name. + +**Resolved tools run in the runner, not the sandbox.** Gateway and code tools execute runner-side +through the relay (`tools/relay.ts`), regardless of backend. This is the basis for enforcing +Layer 3 at the relay, and it is also the security caveat in section 5. + +## 2. Backend, SDK, and runner insertion points + +From a code sweep on 2026-06-23. Reconfirm line numbers when implementing. + +**Config DTOs** (`sdks/python/agenta/sdk/agents/dtos.py`): +- `AgentConfig` (~309-369) holds `instructions`, `model`, `tools`, `mcp_servers`, `skills`, + `harness_options`. Add optional `sandbox_permission` here. +- `RunSelection` (~372-395) already holds `harness`, `sandbox`, `permission_policy`. +- `SessionConfig` (~571-620) is the wire bag. Per-tool permissions ride on each `ToolSpec`, not + a separate map here; MCP permissions ride on each `mcp_servers` entry. +- `PiAgentConfig` / `ClaudeAgentConfig` / `AgentaAgentConfig` (~468-564) and their `wire_tools()` + are where tool/permission serialization happens. +- `ToolSpec` (`tools/models.py:95-157`) already has `needs_approval` and `render`; add the + permission (always-allow/ask/deny) and the `read_only` flag, and serialize both in + `to_wire()`. Each `mcp_servers` entry gains a matching permission field. + +**Schema generation:** +- `AgentConfigSchema` (`sdk/utils/types.py` ~1065-1138) is the `agent_config` catalog type the + playground renders. Add `sandbox_permission`, and a per-tool permission shape on `tools`. +- `services/oss/src/agent/schemas.py` holds `_DEFAULT_AGENT_CONFIG` and the `x-ag-type-ref` + reference; add the default for the new field. + +**Concrete `sandbox_permission` schema (decision 11).** The first-slice shape, kept minimal and +extensible: + +```python +class NetworkEgress(BaseModel): + mode: Literal["on", "off", "allowlist"] = "on" + allowlist: list[str] = [] # CIDR ranges; honored when mode == "allowlist" + +class SandboxPermission(BaseModel): + network: NetworkEgress = NetworkEgress() + filesystem: Optional[Literal["on", "readonly", "off"]] = None # declared, NOT enforced today + enforcement: Literal["strict", "best_effort"] = "strict" # strict = fail loud; best_effort = local opt-out +``` + +Daytona maps `network.mode == "off"` → `networkBlockAll: true`, and `"allowlist"` → +`networkAllowList: `. `filesystem` is carried and surfaced but enforces nothing until a +backend gains an fs jail. `enforcement: "strict"` makes a backend that cannot deliver a requested +network guarantee (local sidecar, local SDK) fail loud; `"best_effort"` is the per-axis opt-out +for local development. Named presets (e.g. "locked down") are FE sugar deferred to S4, not a wire +concept. + +**Ports for Layer 2 — the policy crosses the Python→TS wire.** Daytona is provisioned in the TS +runner (`buildSandboxProvider`), not in the Python `Backend.create_sandbox`, so the policy must +travel on the run request, not only as a Python call argument. The full path (Codex-confirmed): + +- Python authoring → wire: add `sandbox_permission` to `AgentConfig` + (`sdks/python/agenta/sdk/agents/dtos.py`), carry it on `SessionConfig`, and serialize it in + `request_to_wire` (`sdk/agents/utils/wire.py`) so it lands in the `/run` request. +- TS runner: surface it on `AgentRunRequest` (`services/agent/src/protocol.ts`), thread it through + `buildRunPlan` (`engines/sandbox_agent/run-plan.ts`) into `buildSandboxProvider`, and set it on + the Daytona `create` object (`engines/sandbox_agent/provider.ts` ~14-43, which sets + `snapshot`/`target`/`envVars`/`ephemeral` only today) as `networkBlockAll` / `networkAllowList`. +- The golden wire fixtures (`.../golden/run_request.*.json`) and both contract tests + (`test_wire_contract.py`, `tests/unit/wire-contract.test.ts`) move together with the new field. +- The Python `Backend.create_sandbox` (`interfaces.py` ~155, parameterless) still gains the typed + policy for the local/in-process backend path, and `Environment` (~177-232) derives it from + `session_config`. For shared sandboxes (`sandbox_per_session=False`), reject a post-create + policy change or cache by policy. But the wire path above is the one that reaches Daytona. + +**Runner enforcement:** +- `run-plan.ts` (~67-145) builds the cwd; write `.claude/settings.json` here or just before + `sandbox.createSession` in `sandbox_agent.ts` (~195). +- `responder.ts` `PolicyResponder` and `permissions.ts` `attachPermissionResponder`: thread the + per-tool map into the responder. +- `relay.ts` `executeRelayedTool` (~92-113): enforce deny/ask/allow for resolved tools. +- `mcp.ts:26`: the per-server `tools` allowlist is parsed but unenforced; replace it with + settings.json `mcp__` / `mcp____` rules. + +## 3. Frontend insertion points + +From a code sweep on 2026-06-23. The form is generic-schema-driven, which is the good news: +fields that the schema declares render through the existing pipeline. + +**The agent config form** (`web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/`): +- `AgentConfigControl.tsx` (the composite, ~186-323) renders instructions, model, tools, + mcp_servers, harness, sandbox, permission_policy. It reads the live `harness` off the config + object. New sections render here. +- `SchemaPropertyRenderer.tsx` (~80-217) routes a schema field to a control by type + (`enum` → EnumSelectControl, `grouped_choice` → GroupedChoiceControl, `agent_config` → + AgentConfigControl). A custom `x-ag-type` dispatches to a custom control. +- `McpServerItemControl.tsx` and `ToolItemControl.tsx` are the per-item controls. + +**Already present and reusable:** +- CORRECTION (S3a, verified 2026-06-23): the FE does **not** round-trip + `tool.agenta_metadata.permission_mode` today — there are zero `permission_mode` references in + `web/`. The earlier "head start" claim was a future-state assumption. `ToolItemControl` / + `AgentConfigControl` round-trip a free-form `agenta_metadata` bag with no permission. So S4 must + **build** the per-tool permission control, not just wire it. Good news: the SDK `permission` + field accepts `permission_mode` / `permissionMode` via `AliasChoices`, so if S4 writes + `agenta_metadata.permission_mode` with the `allow|ask|deny` vocabulary, it deserializes into + `permission` with no mapping and no breaking change. +- The HITL "ask" surface DOES have a head start (confirmed in S2 review): the agent chat uses + ai-sdk `useChat` and already exposes `addToolApprovalResponse` + (`web/oss/src/components/AgentChatSlice/AgentChatPanel.tsx:86`), and `ToolPart.tsx:153` already + renders approve/deny buttons. The missing piece for S5 is the runtime parked/resumed path: + mapping the runner's `interaction_request` onto the ai-sdk approval part and resuming the call. + +**Schema and persistence flow:** the schema arrives through the workflow molecule +(`agenta-entities/src/workflow/state/molecule.ts`, `parametersSchema`), and config edits persist +through `updateConfiguration` → `updateWorkflowDraftAtom`. New fields under `data.parameters.*` +persist automatically once the schema declares them and `AgentConfigControl` calls `setField`. + +**Per-harness gating is not built.** Today every field renders unconditionally. Gating reads the +live `harness` value and a harness-capabilities map (the `/inspect` capabilities document from +`../harness-capabilities/`) and hides inapplicable fields. + +## 4. Composio read/write metadata + +Composio returns MCP behavioral hint tags per action (`readOnlyHint`, `destructiveHint`, +`updateHint`, `idempotentHint`, ...). The catalog parser strips them as noise +(`api/oss/src/core/tools/providers/composio/catalog.py:278,362`), and `ToolCatalogAction` / +`ToolCatalogActionDetails` (`api/oss/src/core/tools/dtos.py:41-54`) carry no mutation field. So +the read-vs-write signal exists at the source and we discard it. Phase 0 keeps it: derive +`read_only` from `readOnlyHint` (and treat `destructiveHint`/`updateHint` as mutating), carry it +on the catalog action and the resolved tool, and default Layer 3 permissions from it. + +## 5. Security caveat: the runner-host execution surface + +This is the sharpest correctness risk and the design must not hide it. `executeRelayedTool` +(`tools/relay.ts` ~:92-113) runs **both** resolved `code` tools (`runCodeTool`, ~:101) and +gateway/callback tools (`callAgentaTool` over HTTP, ~:106) in the runner process, not in the +sandbox. So Daytona's `networkBlockAll` confines the harness's own `bash`/`WebFetch` (which run +in the VM) but does **not** confine a resolved `code` tool *or* a gateway/callback tool, both of +which run on the runner host with the runner's network. A network-blocked Daytona agent can still +egress through either. Two ways out, gating the `network: off` guarantee in Phase 1 (S1g): + +- **(a) target:** move resolved-tool execution into the sandbox, so the sandbox plane is truly + authoritative for every tool. +- **(b) interim guard:** keep the relay runner-side, but when `network: off` or `exec: off`, + reject or remove `code` tools, **gateway/callback tools**, and stdio MCP servers (MCP servers + are arbitrary commands), and confine the runner host separately. The guard must cover + gateway/callback, not only `code` and MCP — that was the gap Codex caught. + +## 6. Uncertainties to resolve during implementation + +1. How the Layer 2 policy threads into `buildSandboxProvider` (no config param today). +2. Mutation detection for `read_only` enforcement on arbitrary resolved-tool code (explicit flag + vs input inspection). The honest first cut is to treat `read_only` as an advisory default for + the permission, not a hard runtime block on resolved tools. +3. `LocalBackend.create_sandbox` signature parity with the new policy parameter. +4. The exact `.claude/settings.json` contents validated against Claude Code's settings schema, + and the `mcp____` naming validated on a live run. +5. Resolved: per-tool permissions live on the tool spec, and MCP permissions live on the MCP + server spec. (The FE does NOT round-trip `permission_mode` yet — see the S3a correction in + section 3; S4 builds that control.) + Keep the wire field name consistent across SDK, wire, and FE. diff --git a/docs/design/agent-workflows/projects/capability-config/status.md b/docs/design/agent-workflows/projects/capability-config/status.md new file mode 100644 index 0000000000..095d2d541d --- /dev/null +++ b/docs/design/agent-workflows/projects/capability-config/status.md @@ -0,0 +1,330 @@ +# Status + +Source of truth for where this project stands. Keep it current. + +## State + +**Code-complete, reviewed, green. Live QA + branch pending.** All three layers plus Phase 0 +are implemented across backend, runner, and frontend, each slice paired with a review and tests. +Full suites pass together: **293 SDK agents + 15 API tools + 10 services/oss + 177 TS**, tsc and +ruff clean. What remains is (1) the live QA pass against a deployed stack (checklist in +`plan.md` S6) and (2) landing the work on a GitButler lane. Codex reviewed the project (below) +and its feedback is folded in. Built under the `implement-feature` skill, slice by slice. + +## Live QA (2026-06-24, against :8280, direct sandbox-agent /run) + +Ran on the deployed stack (`agenta-ee-dev-wp-b2-rendering`, sidecar mounts `services/agent/src` + +tsx, so a restart deployed the runner changes). Drove `/run` directly with forced un-guessable +tokens. **No feature-code bugs found.** + +- **L3 per-tool `deny` — PASS.** Identical code tool, only `permission` flipped: `deny` → + `tool_result="Tool 'get_build_id' is denied by policy."`, token `BUILDID_A7F3_QA` absent, code + never ran; `allow` → token present. (relay.ts enforcement.) +- **L1 Claude `.claude/settings.json` — write PASS, behavior BLOCKED.** Captured the file the + runner actually wrote mid-run: `{"permissions":{"deny":["WebFetch","WebSearch"]}}` (exactly the + `claudeSettings.deny`). The behavioral half (Claude refusing WebFetch) needs a real + `secrets.ANTHROPIC_API_KEY` — none was obtainable (vault scan correctly safety-denied), runs die + at Claude auth. The write is the mechanism; Claude honoring it is its documented `settingSources` + behavior (code-verified earlier). +- **L2 runner-host guard — PASS.** daytona+code+`network:off`/strict and local+`network:off`/strict + both rejected at plan time with the runner-host / local-cannot-enforce messages; `best_effort` + control allowed the run. (run-plan.ts guard.) +- **L2 Daytona `networkBlockAll` — INCONCLUSIVE (environment, not feature).** Code maps + `off`→`{networkBlockAll:true}` correctly and the field reaches Daytona without error, but the + Daytona org blocks ALL outbound, so the `network:on` positive control also fails. Needs a Daytona + target with working egress to demonstrate off-vs-on. + +**Finding — backend-routing footgun (now hardened):** `backend:"pi"` (legacy in-process +`engines/pi.ts`) enforced NONE of the three layers — a silent bypass. Only `backend:"sandbox-agent"` +(the default) enforces. Addressed by making the in-process engine fail loud (see changelog). + +What still needs a live run before full sign-off: the Daytona egress positive control (needs an +egress-capable target) and the Claude behavioral refusal + HITL multi-turn round-trip (need a +project Anthropic key). The enforcement LOGIC is otherwise live-proven. + +## Decisions made + +1. **Three layers, three jobs.** Harness configuration (Layer 1), sandbox permission (Layer 2), + tool permission (Layer 3). Each has one enforcement point. +2. **Claude config ships as `.claude/settings.json`,** written into the session cwd before + session start. It carries the permission mode and the per-tool allow/deny/ask rules. No + `_meta`, no upstream sandbox-agent change. +3. **MCP permissions are settings.json `mcp__` rules** (`mcp__` or + `mcp____`). The unenforced per-server `tools` allowlist is dropped. +4. **Pi MCP stays out of scope,** and adopts the Claude pattern when built (tracked in + `../harness-capabilities/`). +5. **Composio read/write hints drive Layer 3 defaults:** keep the hints, carry a `read_only` + flag, default read-only to always-allow and mutating to ask. +6. **The sandbox layer is authoritative for the network.** It declares filesystem confinement but enforces none today (no fs jail on any backend). The tool layer is best + effort. A run fails loud when a backend cannot deliver a requested guarantee. +7. **Scope is end to end, including the playground frontend:** the config form (Phase 4) and the + chat tool-approval surface (Phase 5). +8. **`sandbox_permission` is the confirmed name for Layer 2.** +9. **`permission_policy` folds into Layer 3 as its global default.** The HITL gate, the per-tool + permissions, and `permission_policy` are one sidecar-managed permission policy. There is no + separate permission plane. +10. **Permissions live on the spec they govern:** a resolved tool's permission on its tool + spec, an MCP server's permission on its server spec. Not a separate map. Harness builtins + have no spec, so their permission is rendered in Layer 1 (Claude settings.json rules, Pi + `builtin_names`). +11. **`sandbox_permission` shape (S1).** A typed object: `network` = `{ mode: on|off|allowlist, + allowlist: [cidr] }` (default `on`); `filesystem` = `on|readonly|off` declared but **not + enforced** today; `enforcement` = `strict|best_effort` (default `strict` = fail loud when the + backend cannot deliver the requested guarantee; `best_effort` is the per-axis local opt-out). + The first slice ships `network: off` enforcement on Daytona; `allowlist` and named presets are + follow-ups (presets become FE sugar in S4). Concrete schema in `research.md` §2. +12. **Runner-host hole → interim guard now, sandbox-execution later (S1g).** Best-judgment call + (user authorized "use best judgement, note it"): ship the interim guard — when `network: off` + or `exec: off`, reject at plan time any runner-side-executed tool (`code`, gateway/callback, + stdio MCP) unless it can run inside the sandbox. The target (move resolved-tool execution into + the sandbox so one boundary covers everything) is recorded as deferred future work, not built + in this project. The runner must never *report* `network: off` while a runner-side tool is + still reachable. + +## Resolved (2026-06-23, user) + +- `sandbox_permission` is the confirmed name for Layer 2. +- `permission_policy` folds into Layer 3 as the global default of the sidecar-managed permission + policy. The HITL gate is the same thing, not a separate plane. +- Per-tool permissions live on the tool spec; MCP permissions live on the MCP server spec. + +## Codex review (2026-06-23) + +Codex (gpt-5.5, xhigh, read-only) reviewed the project. Verdict: "Good direction, not ready to +implement as written" — the three-layer shape is right, three seams need work. Summary of +its seven points: + +**Acting on (Codex correct, verified in code):** + +1. **Runner-host guard is under-scoped and too late.** `relay.ts` `executeRelayedTool` runs + *both* `code` (`runCodeTool`, ~:101) and gateway/callback (`callAgentaTool`, ~:106) tools in + the runner process. The interim guard must cover gateway/callback too, not just `code` + stdio + MCP, and it gates Phase 1's `network: off` acceptance, not Phase 3. Folded into the guard + scope in `proposal.md`/`research.md` and into Phase 1 acceptance in `plan.md`. +2. **Layer 2 wire path was vague.** Daytona is provisioned in the TS runner's + `buildSandboxProvider`, not in `Backend.create_sandbox` (parameterless). The policy must cross + the Python→TS wire: `SessionConfig` → `request_to_wire` → `AgentRunRequest` → `buildRunPlan` → + provider `create`. Pinned in `research.md` section 2 and Phase 1. +3. **Pi Phase 0 may be impossible as written.** The runner does drop `request.tools` + (`run-plan.ts:101`), but `pi-acp@0.0.29` spawns `pi --mode rpc --no-themes` with **no `tools` + field on `newSession`** (`index.js:135`). Phase 0's Pi half is now an investigation: find Pi's + real control surface (CLI args, a Pi config file, or an ACP session field) or mark Pi + builtin-restriction unsupported on sandbox-agent and fail loud. Verified in code. +4. **Filesystem-authority contradiction.** The proposal said the sandbox layer is authoritative + for the filesystem in one place and "declared, not enforced" in another. Corrected to: Layer 2 + is authoritative for the network; it declares filesystem but enforces nothing today. +5. **Phase 5 is more "wire the runtime" than "build UI."** The approve/deny buttons already exist + (`ToolPart.tsx:153`) on top of `addToolApprovalResponse`. The real work is the parked/resumed + responder path, not rendering. Sharpened in `plan.md` Phase 5. + +**Holding the user's decisions (Codex conflicts, recorded not applied):** + +6. **Keep `sandbox_permission`.** Codex prefers `sandbox_policy`/`environment_policy` because + "permission" is overloaded. The user confirmed `sandbox_permission` twice; keeping it. Note + recorded so the objection is not lost. +7. **Keep `permission_policy` folded into Layer 3.** The user decided this. Codex's fair + sub-distinction is captured as a note: Layer 3 holds two things that must not be conflated in + code — the per-tool *permission* (allow/ask/deny, static, on the spec) and the *responder + mode* (what a headless `ask` does: block for UI / emit durable approval / auto-allow / deny). + `permission_policy` is the responder-mode default; both live in the one sidecar-managed policy. + +## Open questions + +1. **The runner-host execution hole (Phase 1/3).** Resolved tools — `code` **and** + gateway/callback — run on the runner host, so a network-blocked Daytona sandbox does not + confine them. Pick the target (move execution into the sandbox) versus the interim guard + (reject `code`, gateway/callback, and stdio MCP when exec/network are off). Blocks the + `network: off` guarantee. See `research.md` section 5. +2. **`read_only` enforcement strength.** Is `read_only` a hard runtime block on resolved tools, + or only an advisory default for the permission? The honest first cut is advisory. +3. ~~**Pi's real control surface (Phase 0).**~~ **Resolved by S0b.** Backend-dependent: supported + in-process (`pi.ts:311` passes `tools`), unsupported over sandbox-agent ACP (`pi-acp@0.0.29` + forwards nothing). Design: honor `builtin_names` in-process, fail loud over sandbox-agent. +4. **The implementation branch.** No pre-named `capability-config` lane exists. Best-judgment + plan: a new GitButler lane `feat/agent-capability-config` created at Phase 6, anchored on the + appropriate parent (the `docs/agent-harness-capabilities` lane or `main`), confirmed with the + user before any push. Recorded per the "use best judgement, note it" instruction. + +## Slices (implementation cut) + +Smallest shippable, independently reviewable units. Each names its acceptance check. Status: +`todo` / `wip` / `done`. + +- **S0a — Composio `read_only` on the wire** (`done`, reviewed). Stopped stripping the Composio + mutation hints; `read_only` now flows catalog (`_derive_read_only`) → `ToolCatalogAction` / + `ResolvedAgentTool` → SDK `ToolSpec.to_wire()` (camelCase `readOnly`, `exclude_none`) → TS + `protocol.ts` `ResolvedToolSpec`. Golden + both contract tests updated. 218 SDK + 15 API + 7 TS + tests pass; reviewer APPROVED (precedence both-hints→mutating, no behavior change without a hint, + single resolution site covered). Unblocks S3 defaults. +- **S0b — Pi control-surface investigation** (`done`). Finding (in `research.md` §1): Pi built-in + restriction is **backend-dependent**. The Pi SDK supports `tools`/`excludeTools`/`noTools` + (+ CLI `--tools`/`--no-builtin-tools`); the in-process engine already passes it (`pi.ts:311`). + But `pi-acp@0.0.29` hardcodes `pi --mode rpc --no-themes` and forwards nothing, so restriction + is **unsupported over sandbox-agent ACP** → honor `builtin_names` in-process, fail loud over + sandbox-agent. Future lever: patch/fork pi-acp to add the flags to its spawn. +- **S1a — `sandbox_permission` config + wire plumbing** (`done`, reviewed). Model + (`NetworkEgress`/`SandboxPermission`) on `AgentConfig`, threaded `from_params` → + `_to_harness_config` (all 3 harnesses) → `request_to_wire` (`sandboxPermission`) → `protocol.ts` + `AgentRunRequest` → `buildRunPlan`/`RunPlan`. Catalog schema + `_DEFAULT_AGENT_CONFIG` declare + it; both `KNOWN_REQUEST_KEYS` guards + both contract tests updated; optionality proven (no key + when unset, claude golden omits it). 220 SDK + 111 TS pass, tsc + ruff clean. Reviewer APPROVED. + NO enforcement (deferred to S1b via `TODO(S1b)`). +- **S1b — Daytona enforcement + fail-loud** (`done`, reviewed). `daytonaNetworkFields` maps + `off`→`networkBlockAll:true`, `allowlist`(non-empty)→`networkAllowList:"a,b"`, + `allowlist`(empty)→`networkBlockAll:true` (block-all, the must-fix from review — empty list = + allow nothing, never default-open); `buildSandboxProvider` spreads it into the Daytona `create`. + `buildRunPlan` fails loud (`strict` + restricted) on the local sidecar. Daytona network fields + verified against `@daytonaio/sdk` (`networkBlockAll?: boolean`, `networkAllowList?: string`). + Live `network: off` egress check deferred to Phase 3. typecheck + 126 TS tests pass. +- **S1g — Runner-host guard** (`done`, reviewed). `buildRunPlan` rejects, at plan time (before + any cwd/temp-dir allocation), a `strict` + restricted-network run that carries a runner-side + tool: `executableToolSpecs` (code **and** gateway/callback — reviewer confirmed it filters out + only `kind:"client"`, so Codex's hole is closed) or a stdio MCP server (`hasStdioMcpServer`, + mirrors `mcp.ts` delivery rule). `best_effort` is the opt-out. Covered by run-plan unit tests. + +## Deferred follow-ups (defer-todo) + +- **S1b-py — Python in-process fail-loud.** A `network: off` + `strict` run on the in-process + Python SDK backend does NOT fail loud (it has no sandbox). `SessionConfig` intentionally omits + sandbox concerns, so this needs the typed policy threaded through `Backend.create_sandbox` / + `Environment.create_session` / `LocalBackend` (`sdks/python/agenta/sdk/agents/interfaces.py`, + `adapters/local.py`). Repro: run the in-process SDK path with `sandbox_permission.network=off`, + `enforcement=strict` — expect a loud failure, observe a silent unconfined run. +- **S1b-pi-inproc — TS in-process Pi fail-loud.** Symmetric gap inside `services/agent`: + `engines/pi.ts` (in-process Pi, not the sandbox-agent path) has no sandbox-permission handling, + so `network: off` + `strict` runs unconfined there while the local-sidecar path correctly + rejects. Add the same fail-loud check to the in-process engine. Reviewer-flagged, nice-to-have. +- **S4-readonly-fe — surface Composio `read_only` in the FE tool catalog.** The per-tool + permission control defaults from `read_only`, but the FE tool catalog / `ToolSelectionMeta` + (`ToolSelectorPopover.tsx`) doesn't carry `read_only` yet, so the default shows "Inherit policy" + until an author picks. The backend catalog already exposes `read_only` (S0a); plumb it from the + `/tools` catalog response into the added tool object so the default auto-populates. Layer 3 works + on explicit author choice without this; nice-to-have. +- **S2 — Layer 1 Claude `.claude/settings.json`** (`done`, reviewed). `claudeSettings` (mode + + allow/deny/ask) flows `harness_options["claude"]["permissions"]` → wire → `prepareWorkspace`, + which writes `/.claude/settings.json` for Claude (local `mkdirSync`+`writeFileSync`, + Daytona `mkdirFs`+`writeFsFile`), merging author rules with Layer-2-derived denies + (`network≠on`→WebFetch/WebSearch; `filesystem` readonly/off→Write/Edit), deduped. Pure + `buildClaudeSettings` returns `undefined` for Pi (no file) and when fully-open+unset. Structured + as `RuleSet[]` for S3. Reviewer live-verified the ACP adapter reads it + rejects invalid modes; + mkdir-before-write safe both ways. 136 TS + 274 SDK pass, tsc + ruff clean. Live rule-syntax + check deferred to Phase 3. mcp__ tool-spec rules + Layer-3 permissions deferred to S3. +- **S3a — Layer 3 permission plumbing** (`done`, self-reviewed). `permission` + (`allow`/`ask`/`deny`) on `ToolSpecBase` + `MCPServerConfig`/`ResolvedMCPServer` + `protocol.ts` + `ResolvedToolSpec`/`McpServerConfig`. `effective_permission()` default ladder: explicit wins → + `needs_approval`→ask → `read_only` true→allow/false→ask → unset (runner falls to + `permissionPolicy`). `AliasChoices` accepts the FE's `permission_mode`. Goldens + contract tests + updated (sub-key, no `KNOWN_REQUEST_KEYS` change). 148 TS + 286 SDK pass. NO enforcement (S3b). +- **S3b — Layer 3 enforcement** (`done`, reviewed). Relay enforces resolved-tool permission + (`resolvePermission`: deny→refusal string before any execution, allow→run, ask/unset→headless + `permissionPolicy`); `permissionPolicy` threaded into `startToolRelay`, same resolver as the + Claude-builtin responder. `claude-settings.ts` renders per-MCP-server `mcp__` rules + (name verified against `toAcpMcpServers`). Responder untouched (Claude builtins handled by S2 + settings.json + existing `PolicyResponder`); no fragile per-resolved-tool mcp rules. HITL "ask" + surfacing deferred to S5 (`TODO(S5)`). 166 TS pass. Reviewer APPROVED (deny-before-execute + + MCP-name both verified against downstream consumers). +- **S4 — Playground form** (`done`, reviewed + fixed). New controls: `SandboxPermissionControl` + (network mode/allowlist/filesystem/enforcement), `ClaudePermissionsControl` (mode + + allow/deny/ask, gated to Claude harness, collapsible advanced), per-tool `ToolPermissionControl` + (allow/ask/deny). Persist under `data.parameters.agent.*` (`sandbox_permission`, + `harness_options.claude.permissions` preserving sibling slices, top-level tool `permission`). + Non-destructive; lint + typecheck clean. Live browser check deferred to Phase 3. +- **S4b — Layer 3 persistence fix** (`done`). Review caught that per-tool permission was written + into `agenta_metadata` (stripped on save) AND that the authored config layer didn't carry it. + Fixed end to end: `ToolConfigBase.permission` (AliasChoices), `_copy_tool_metadata` + + `_apply_tool_metadata` + `platform/gateway.py` `CallbackToolSpec` (the gateway/playground path, + an extra miss) all carry it, FE writes top-level `permission` (survives strip). Round-trip + tests prove config dict → `ToolSpec.to_wire()` carries it (52 pass). Also fixed the ` ` + placeholder cosmetic. +- **S5 — Playground HITL parked/resume** (`done` core, live round-trip deferred). The HITL path + was ~60% built (responder seam, `interaction_request` emission, Vercel egress/ingress, FE + approve/deny UI + auto-resume). This slice added the missing runtime: `HITLResponder` + (responder.ts) applies a stored approval decision on resume, parks (deny) on first occurrence + when a human surface is present (`hasHumanSurface = !!request.sessionId`; verified `/invoke` + leaves it None so headless is byte-identical to `PolicyResponder`), and falls to `basePolicy` + headless. `extractApprovalDecisions` reads the existing Vercel-converted `tool_result` + `{approved}` envelope by `toolCallId` — no new wire carrier. 177 TS pass. DEFERRED to live + verification: the multi-turn round-trip (does cold-replay re-raise the gate on turn 2 so the + decision applies, and does the harness re-attempt the tool) — exact live test in `open-issues.md`. + Relay-tool HITL deferred as S5.2 (relay is fire-and-forget; see `open-issues.md`). +- **S6 — Tests + live verification** (`todo`). Unit, golden wire, and the QA-matrix cells the + enforceability table claims; pin one green run as a replay test. + +Current run starts with **S0a** (independent, low-risk) and **S0b** (read-only investigation) in +parallel. + +## Next steps + +1. **Live QA** against a deployed stack: run the `plan.md` S6 checklist (Daytona egress block, + Claude settings.json, per-tool deny, HITL round-trip). Needs SDK container reload + a + sandbox-agent image rebuild + Daytona/Anthropic keys. +2. **Land the branch.** The work is on the GitButler lane `feat/agent-capability-config`. Push + + open the PR when ready (`but push ` then `gh pr create --base `). +3. **After live QA:** update the user-facing `documentation/` pages and resolve the deferred + follow-ups (S1b-py, S1b-pi-inproc, S4-readonly-fe, S5.2 relay HITL, the S5 multi-turn round-trip). + +## Landing (branch handoff) + +The non-shared work is committed to the GitButler lane **`feat/agent-capability-config`** +(commit `97581d25ef`, based on `main`): 32 files, +2451 lines — all new code + the project +docs. Verified the concurrent skills-config slice was NOT swept in. + +**Left UNASSIGNED on purpose** (co-edited by the concurrent skills-config slice; both slices +extend the same core config/wire surface, so wholesale-staging would steal the other session's +hunks). Hunk-split these from the skills slice — or land the two slices together — before the +lane builds/pushes: + +- `sdks/python/agenta/sdk/agents/dtos.py` (SandboxPermission, ClaudePermissions, wire methods) +- `sdks/python/agenta/sdk/agents/utils/wire.py`, `adapters/harnesses.py`, `__init__.py` +- `sdks/python/agenta/sdk/utils/types.py` (AgentConfigSchema fields) +- `services/oss/src/agent/schemas.py` (`_DEFAULT_AGENT_CONFIG` default) +- `services/agent/src/protocol.ts` (the new wire interfaces), `engines/sandbox_agent.ts` + (HITLResponder + relay policy wiring), `engines/sandbox_agent/run-plan.ts` +- `web/.../SchemaControls/AgentConfigControl.tsx`, `index.ts` +- `sdks/python/oss/tests/.../golden/run_request.{pi,claude}.json`, `test_wire_contract.py` +- `services/agent/tests/unit/{wire-contract,sandbox-agent-run-plan,sandbox-agent-orchestration}.test.ts` + +**Finish + push** (after the shared files are assigned to the lane): + +```bash +but rub feat/agent-capability-config # per shared file/hunk +but commit feat/agent-capability-config --only -m "..." # the shared remainder +but push feat/agent-capability-config +gh pr create --head feat/agent-capability-config --base main +``` + +## Changelog + +- 2026-06-23: Project created from `../../scratch/capability-architecture.md`. Design, plan, + research, and status written. Scope extended to the playground frontend. +- 2026-06-23: Confirmed `sandbox_permission` naming; folded `permission_policy` into Layer 3 as + its global default; placed per-tool permissions on the tool spec and MCP permissions on the + MCP server spec. +- 2026-06-23: Codex reviewed (verdict above). Verified two claims in code: `pi-acp@0.0.29` + spawns `pi --mode rpc` with no `tools` field, and `relay.ts` runs gateway/callback tools + runner-side alongside `code`. Folded the feedback in, broadened the runner-host guard to + gateway/callback, fixed the filesystem-authority contradiction, and re-scoped Pi Phase 0 to an + investigation. Started `implement-feature`; cut the slice list (S0a–S6). +- 2026-06-23: Landed Phase 0 + Layer 2 (code-complete, reviewed). S0a (Composio `read_only`, + reviewed), S0b (Pi finding), S1a (`sandbox_permission` config + wire, reviewed), S1b+S1g + (Daytona enforcement + runner-host guard, reviewed; fixed the empty-allowlist→default-open + footgun to block-all). All green: 220 SDK + 126 TS tests, tsc + ruff clean. Live Daytona/Claude + end-to-end verification batched to a later Phase-3 pass. Two in-process fail-loud gaps deferred + (S1b-py, S1b-pi-inproc). Next: S2 (Layer 1 Claude settings.json). +- 2026-06-24: LIVE QA on :8280 + PR. Pushed PR #4811 (non-shared slice). Live-proved L3 deny + (token absent on deny, present on allow), the L1 settings.json write (captured the real file), + and the L2 runner-host guard (both reject messages + best_effort control). Daytona egress block + is code-correct but env-inconclusive (org blocks all egress); Claude behavioral refusal needs a + project Anthropic key. Found + FIXED + live-verified a footgun: `backend:"pi"` (in-process engine) + silently ignored all 3 layers; added `unenforceableCapabilityConfig` fail-loud guard in + `engines/pi.ts` (rejects restrictive sandbox_permission + deny/ask permissions), 8 tests, live + rejection confirmed. 185 TS green. Note: `engines/pi.ts` is in the shared-files set (carries the + guard); test `pi-capability-guard.test.ts` is new. +- 2026-06-24: Landed Layers 1 + 3 + the playground (code-complete, reviewed). S2 (Claude + `.claude/settings.json`, reviewed), S3a (permission plumbing), S3b (relay + MCP-rule + enforcement, reviewed), S4 (playground form, reviewed) + S4b (fixed the `agenta_metadata`-strip + + authored-config-layer gap so per-tool permission persists end to end), S5 (HITL cross-turn + responder core; multi-turn round-trip + relay HITL deferred to live verification / S5.2). Full + suites green together: 293 SDK + 15 API + 10 services/oss + 177 TS, tsc + ruff clean. Corrected + two stale research claims (FE `permission_mode` round-trip did not exist; Pi restriction is + backend-dependent). Remaining: live QA (S6 checklist) + push the branch. diff --git a/sdks/python/agenta/sdk/agents/adapters/claude_settings.py b/sdks/python/agenta/sdk/agents/adapters/claude_settings.py new file mode 100644 index 0000000000..ca9bf842fa --- /dev/null +++ b/sdks/python/agenta/sdk/agents/adapters/claude_settings.py @@ -0,0 +1,194 @@ +"""Layer 1 for Claude: render the harness's permission settings into a ``.claude/settings.json``. + +This is the claude adapter. It builds the full file CONTENT in Python (the translation used to +live in the TS runner's ``claude-settings.ts``); the runner is now a dumb file-writer that drops +whatever ``harnessFiles`` the adapter produced into the session cwd. The Claude ACP adapter reads +``/.claude/settings.json`` because it builds its SDK query with +``settingSources: ["user", "project", "local"]`` (and applies ``permissions.defaultMode``); that +file is the only clean Claude-config path because the sandbox-agent daemon strips ACP ``_meta``. + +Three rule sources merge here: + - the AUTHOR's options (Layer 1), read from the generic ``harness_options["claude"]["permissions"]`` + slice: ``default_mode`` + per-tool ``allow``/``deny``/``ask`` strings. This is the only place the + claude-specific shape of that slice is known. + - rules DERIVED from ``sandbox_permission`` (Layer 2): baseline reinforcement of the sandbox + boundary as Claude-tool rules (block web tools when egress is off, block edits when the + filesystem is read-only/off). A safety floor, not the primary enforcement. + - rules DERIVED from per-MCP-server ``permission`` (Layer 3, S3b): each user MCP server with a + set permission becomes a whole-server ``mcp__`` allow/ask/deny rule. + +Layer 3 enforcement is split by tool source: resolved tools (code / gateway-callback) run +runner-side and are enforced at the relay, NOT here. Only the per-MCP-server permission lands in +this file. +""" + +from __future__ import annotations + +import json +from typing import Any, Dict, List, Optional, Sequence + +# Claude Code's four permission modes (its ``permissions.defaultMode``); any other authored value +# is dropped. +PERMISSION_MODES = frozenset({"default", "acceptEdits", "plan", "bypassPermissions"}) + +# Where the rendered settings land, relative to the session cwd. +SETTINGS_PATH = ".claude/settings.json" + + +def _string_list(value: Any) -> List[str]: + """Keep only the string entries of an authored allow/deny/ask value; default to ``[]``.""" + if not isinstance(value, list): + return [] + return [item for item in value if isinstance(item, str)] + + +def _parse_author_permissions(slice_: Any) -> Dict[str, Any]: + """Parse the untyped author block from ``harness_options["claude"]["permissions"]``. + + ``default_mode`` (also accepted as ``defaultMode``) survives only when it is one of the four + valid modes; ``allow``/``deny``/``ask`` become string lists. This is where the claude-specific + knowledge of that slice lives. Returns ``{mode?, allow, deny, ask}`` (mode omitted when unset + or invalid). + """ + if not isinstance(slice_, dict): + return {"allow": [], "deny": [], "ask": []} + out: Dict[str, Any] = {} + mode = slice_.get("default_mode", slice_.get("defaultMode")) + if isinstance(mode, str) and mode in PERMISSION_MODES: + out["mode"] = mode + out["allow"] = _string_list(slice_.get("allow")) + out["deny"] = _string_list(slice_.get("deny")) + out["ask"] = _string_list(slice_.get("ask")) + return out + + +def _dedupe(values: Sequence[str]) -> List[str]: + """Dedupe in first-seen order, dropping falsy entries.""" + seen: set[str] = set() + out: List[str] = [] + for value in values: + if not value or value in seen: + continue + seen.add(value) + out.append(value) + return out + + +def _rules_from_sandbox_permission(sandbox_permission: Any) -> Dict[str, List[str]]: + """Derive baseline Claude-tool rules from the Layer-2 sandbox boundary. + + These reinforce the declared boundary at the harness level (the sandbox provider is the real + enforcement): + - network not fully ``on`` (off / allowlist) -> deny the web tools (``WebFetch``, ``WebSearch``); + - filesystem ``readonly`` or ``off`` -> deny the mutating file tools (``Write``, ``Edit``). + + Accepts either a :class:`~agenta.sdk.agents.dtos.SandboxPermission` or a plain dict (network is + a nested object with a ``mode``; filesystem is a plain string). + """ + deny: List[str] = [] + if sandbox_permission is None: + return {"deny": deny} + + network = _get(sandbox_permission, "network") + network_mode = _get(network, "mode") if network is not None else None + if network is not None and (network_mode or "on") != "on": + deny.extend(["WebFetch", "WebSearch"]) + + filesystem = _get(sandbox_permission, "filesystem") + if filesystem in ("readonly", "off"): + deny.extend(["Write", "Edit"]) + + return {"deny": deny} + + +def _rules_from_mcp_permissions(mcp_servers: Any) -> Dict[str, List[str]]: + """Derive whole-server Claude rules from each MCP server's Layer-3 ``permission`` (S3b). + + Claude addresses a whole MCP server as ``mcp__`` (a per-tool rule is + ``mcp____``); the server name is the ``name`` carried to the runtime verbatim. + ``allow``/``ask``/``deny`` route to the matching list; a server with no permission contributes + nothing (falls back to the global policy). Accepts a list of + :class:`~agenta.sdk.agents.mcp.models.ResolvedMCPServer` or plain dicts. + """ + allow: List[str] = [] + ask: List[str] = [] + deny: List[str] = [] + for server in mcp_servers or []: + name = _get(server, "name") + permission = _get(server, "permission") + if not permission or not name: + continue + rule = f"mcp__{name}" + if permission == "allow": + allow.append(rule) + elif permission == "ask": + ask.append(rule) + elif permission == "deny": + deny.append(rule) + return {"allow": allow, "ask": ask, "deny": deny} + + +def _get(obj: Any, key: str) -> Any: + """Read ``key`` off a pydantic model (attribute) or a plain dict (item).""" + if obj is None: + return None + if isinstance(obj, dict): + return obj.get(key) + return getattr(obj, key, None) + + +def build_claude_settings_files( + harness_options: Optional[Dict[str, Any]], + sandbox_permission: Any = None, + mcp_servers: Any = None, +) -> List[Dict[str, str]]: + """Build the Claude ``settings.json`` as a generic ``harnessFiles`` entry, or ``[]`` if none. + + Reads the author's Layer-1 options from ``harness_options["claude"]["permissions"]``, merges + them with the Layer-2-derived rules (from ``sandbox_permission``) and the Layer-3-derived MCP + rules (from ``mcp_servers``), dedupes each list, and emits the smallest valid file: + ``permissions.defaultMode`` is set only when authored (and valid), and each allow/deny/ask list + appears only when non-empty. When there is nothing to write at all (no author options AND no + derived rules) it returns ``[]`` so the runner writes no file. + + Returns ``[{"path": ".claude/settings.json", "content": }]`` or ``[]``. + """ + author = _parse_author_permissions(_claude_permissions_slice(harness_options)) + + # Merge order: author rules first, then derived rules (Layer 2, then Layer 3). ``_dedupe`` + # keeps first-seen order, so an author rule wins its position and derived rules append. + sandbox_rules = _rules_from_sandbox_permission(sandbox_permission) + mcp_rules = _rules_from_mcp_permissions(mcp_servers) + + allow = _dedupe([*author["allow"], *mcp_rules.get("allow", [])]) + deny = _dedupe( + [*author["deny"], *sandbox_rules.get("deny", []), *mcp_rules.get("deny", [])] + ) + ask = _dedupe([*author["ask"], *mcp_rules.get("ask", [])]) + + permissions: Dict[str, Any] = {} + if "mode" in author: + permissions["defaultMode"] = author["mode"] + if allow: + permissions["allow"] = allow + if deny: + permissions["deny"] = deny + if ask: + permissions["ask"] = ask + + # Nothing authored and nothing derived -> no file (the boundary-free Claude run is unchanged). + if not permissions: + return [] + + content = json.dumps({"permissions": permissions}, indent=2) + return [{"path": SETTINGS_PATH, "content": content}] + + +def _claude_permissions_slice(harness_options: Optional[Dict[str, Any]]) -> Any: + """Pull the ``claude.permissions`` slice from the generic per-harness options map.""" + if not isinstance(harness_options, dict): + return None + claude = harness_options.get("claude") + if not isinstance(claude, dict): + return None + return claude.get("permissions") diff --git a/sdks/python/agenta/sdk/agents/mcp/models.py b/sdks/python/agenta/sdk/agents/mcp/models.py index 37c3f6806b..f782dd8c0b 100644 --- a/sdks/python/agenta/sdk/agents/mcp/models.py +++ b/sdks/python/agenta/sdk/agents/mcp/models.py @@ -4,7 +4,14 @@ from typing import Any, Dict, List, Literal, Optional -from pydantic import BaseModel, ConfigDict, Field, model_validator +from pydantic import AliasChoices, BaseModel, ConfigDict, Field, model_validator + + +# Layer-3 per-server permission (same value set as a tool's): ``allow`` runs with +# no prompt, ``ask`` raises a human-in-the-loop request, ``deny`` never runs. Absent means the +# runner falls back to the global ``permissionPolicy`` default. An MCP server carries no +# ``read_only`` hint, so there is no default to compute: an explicit author value or nothing. +Permission = Literal["allow", "ask", "deny"] class MCPServerConfig(BaseModel): @@ -18,6 +25,12 @@ class MCPServerConfig(BaseModel): url: Optional[str] = None secrets: Dict[str, str] = Field(default_factory=dict) tools: List[str] = Field(default_factory=list) + permission: Optional[Permission] = Field( + default=None, + validation_alias=AliasChoices( + "permission", "permission_mode", "permissionMode" + ), + ) @model_validator(mode="after") def _validate_transport(self) -> "MCPServerConfig": @@ -38,6 +51,7 @@ class ResolvedMCPServer(BaseModel): env: Dict[str, str] = Field(default_factory=dict, repr=False) url: Optional[str] = None tools: List[str] = Field(default_factory=list) + permission: Optional[Permission] = None @model_validator(mode="after") def _validate_transport(self) -> "ResolvedMCPServer": @@ -62,4 +76,6 @@ def to_wire(self) -> Dict[str, Any]: wire["url"] = self.url if self.tools: wire["tools"] = list(self.tools) + if self.permission is not None: + wire["permission"] = self.permission return wire diff --git a/sdks/python/agenta/sdk/agents/mcp/resolver.py b/sdks/python/agenta/sdk/agents/mcp/resolver.py index 6ce78162dd..1593f9de8d 100644 --- a/sdks/python/agenta/sdk/agents/mcp/resolver.py +++ b/sdks/python/agenta/sdk/agents/mcp/resolver.py @@ -63,6 +63,7 @@ async def resolve( env=env, url=server_config.url, tools=list(server_config.tools), + permission=server_config.permission, ) ) return resolved diff --git a/sdks/python/agenta/sdk/agents/platform/gateway.py b/sdks/python/agenta/sdk/agents/platform/gateway.py index 3fc9a41692..c297aee048 100644 --- a/sdks/python/agenta/sdk/agents/platform/gateway.py +++ b/sdks/python/agenta/sdk/agents/platform/gateway.py @@ -195,6 +195,8 @@ async def resolve( call_ref=str(raw_spec["call_ref"]), needs_approval=tool_config.needs_approval, render=tool_config.render, + permission=tool_config.permission, + read_only=raw_spec.get("read_only"), ) ) diff --git a/sdks/python/agenta/sdk/agents/tools/compat.py b/sdks/python/agenta/sdk/agents/tools/compat.py index d2ddf16b4b..033cafdc78 100644 --- a/sdks/python/agenta/sdk/agents/tools/compat.py +++ b/sdks/python/agenta/sdk/agents/tools/compat.py @@ -56,6 +56,12 @@ def _copy_tool_metadata( result["needs_approval"] = source["needs_approval"] if isinstance(source.get("render"), dict): result["render"] = dict(source["render"]) + # Layer-3 permission: accept any of the keys the FE may write (the playground used + # ``permission_mode``); the config model's ``Permission`` field validates the value. + for key in ("permission", "permission_mode", "permissionMode"): + if key in source: + result["permission"] = source[key] + break return result diff --git a/sdks/python/agenta/sdk/agents/tools/models.py b/sdks/python/agenta/sdk/agents/tools/models.py index 6e467f51dd..ecf2a5416a 100644 --- a/sdks/python/agenta/sdk/agents/tools/models.py +++ b/sdks/python/agenta/sdk/agents/tools/models.py @@ -19,13 +19,30 @@ def _empty_object_schema() -> Dict[str, Any]: return {"type": "object", "properties": {}} +# Layer-3 per-tool permission: ``allow`` runs with no prompt, ``ask`` raises a +# human-in-the-loop request, ``deny`` never runs. Absent means "fall back to the global +# ``permissionPolicy`` default" (the runner resolves that, in a later slice). +Permission = Literal["allow", "ask", "deny"] + + class ToolConfigBase(BaseModel): """Fields shared by every persisted tool declaration.""" - model_config = ConfigDict(extra="forbid") + model_config = ConfigDict(extra="forbid", populate_by_name=True) needs_approval: bool = False render: Optional[Dict[str, Any]] = None + # Layer-3 permission the author set on this tool. Mirrors + # ``ToolSpecBase.permission``: accepts ``permission_mode``/``permissionMode`` too (the keys + # the playground writes), so an FE-set value deserializes onto the ``extra="forbid"`` config + # without a breaking change. ``_apply_tool_metadata`` then carries it onto the resolved spec. + permission: Optional[Permission] = Field( + default=None, + validation_alias=AliasChoices( + "permission", "permission_mode", "permissionMode" + ), + serialization_alias="permission", + ) class BuiltinToolConfig(ToolConfigBase): @@ -114,6 +131,43 @@ class ToolSpecBase(BaseModel): serialization_alias="needsApproval", ) render: Optional[Dict[str, Any]] = None + read_only: Optional[bool] = Field( + default=None, + validation_alias=AliasChoices("read_only", "readOnly"), + serialization_alias="readOnly", + ) + # Layer-3 permission the author set on this tool. Accepts ``permission_mode`` + # too, the key the playground writes into ``agenta_metadata``, so an FE-set value + # deserializes without a later breaking change. Unset means "no explicit author choice"; + # ``effective_permission`` then derives a default from ``read_only`` / ``needs_approval``. + permission: Optional[Permission] = Field( + default=None, + validation_alias=AliasChoices( + "permission", "permission_mode", "permissionMode" + ), + serialization_alias="permission", + ) + + def effective_permission(self) -> Optional[Permission]: + """Resolve the permission that rides the wire, by this precedence: + + 1. An explicit author ``permission`` wins outright. + 2. Else, when ``needs_approval`` is set, the default is ``"ask"`` (approval beats the + read-only auto-allow: an author who asked to be prompted still gets prompted). + 3. Else, default from ``read_only``: ``True`` -> ``"allow"`` (read-only tools are safe to + auto-run), ``False`` -> ``"ask"`` (mutating tools prompt). + 4. Else (``read_only`` is ``None`` and nothing explicit) -> ``None`` (unset), so the runner + falls back to the global ``permissionPolicy`` default in a later slice. + """ + if self.permission is not None: + return self.permission + if self.needs_approval: + return "ask" + if self.read_only is True: + return "allow" + if self.read_only is False: + return "ask" + return None def to_wire(self) -> Dict[str, Any]: wire = self.model_dump( @@ -125,6 +179,11 @@ def to_wire(self) -> Dict[str, Any]: wire.pop("needsApproval", None) if not wire.get("env"): wire.pop("env", None) + permission = self.effective_permission() + if permission is not None: + wire["permission"] = permission + else: + wire.pop("permission", None) return wire diff --git a/sdks/python/agenta/sdk/agents/tools/resolver.py b/sdks/python/agenta/sdk/agents/tools/resolver.py index 54f4c8b03f..306b9d0134 100644 --- a/sdks/python/agenta/sdk/agents/tools/resolver.py +++ b/sdks/python/agenta/sdk/agents/tools/resolver.py @@ -40,6 +40,7 @@ def _apply_tool_metadata(tool_spec: ToolSpec, tool_config: ToolConfig) -> ToolSp update={ "needs_approval": tool_config.needs_approval, "render": tool_config.render, + "permission": tool_config.permission, } ) diff --git a/sdks/python/oss/tests/pytest/unit/agents/adapters/__init__.py b/sdks/python/oss/tests/pytest/unit/agents/adapters/__init__.py new file mode 100644 index 0000000000..0597a675ca --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/adapters/__init__.py @@ -0,0 +1 @@ +# Unit tests for the agent runtime adapters. diff --git a/sdks/python/oss/tests/pytest/unit/agents/adapters/test_claude_settings.py b/sdks/python/oss/tests/pytest/unit/agents/adapters/test_claude_settings.py new file mode 100644 index 0000000000..7b5c26a7b3 --- /dev/null +++ b/sdks/python/oss/tests/pytest/unit/agents/adapters/test_claude_settings.py @@ -0,0 +1,176 @@ +"""The Python claude adapter: ``build_claude_settings_files`` (Layer 1 + Layer 2/3 derivation). + +This is the translation that used to live in the TS runner's ``claude-settings.ts``. It merges +three rule sources into one ``.claude/settings.json``: the author's +``harness_options["claude"]["permissions"]`` slice, the Layer-2 sandbox-boundary derivation, and +the per-MCP-server Layer-3 permissions. The runner is now a dumb writer of the rendered +``harnessFiles`` entry this produces. +""" + +from __future__ import annotations + +import json + +from agenta.sdk.agents.adapters.claude_settings import build_claude_settings_files +from agenta.sdk.agents.dtos import SandboxPermission +from agenta.sdk.agents.mcp import ResolvedMCPServer + + +def _settings(files): + """Unwrap the single rendered file and parse its JSON content.""" + assert len(files) == 1 + assert files[0]["path"] == ".claude/settings.json" + return json.loads(files[0]["content"]) + + +def _claude(permissions): + return {"claude": {"permissions": permissions}} + + +def test_renders_author_mode_and_rules(): + files = build_claude_settings_files( + _claude( + { + "default_mode": "acceptEdits", + "allow": ["Read", "Bash(npm run:*)"], + "deny": ["Write"], + "ask": ["mcp__github__create_issue"], + } + ) + ) + assert _settings(files) == { + "permissions": { + "defaultMode": "acceptEdits", + "allow": ["Read", "Bash(npm run:*)"], + "deny": ["Write"], + "ask": ["mcp__github__create_issue"], + } + } + + +def test_content_is_json_dumps_indent_2(): + # The exact serialization matters (it is pinned by the golden); assert the indent-2 form. + files = build_claude_settings_files(_claude({"deny": ["Write"]})) + assert files[0]["content"] == json.dumps( + {"permissions": {"deny": ["Write"]}}, indent=2 + ) + + +def test_network_off_denies_web_tools(): + files = build_claude_settings_files( + None, SandboxPermission(network={"mode": "off"}) + ) + assert _settings(files)["permissions"]["deny"] == ["WebFetch", "WebSearch"] + + +def test_network_allowlist_denies_web_tools(): + files = build_claude_settings_files( + None, + SandboxPermission(network={"mode": "allowlist", "allowlist": ["10.0.0.0/8"]}), + ) + assert _settings(files)["permissions"]["deny"] == ["WebFetch", "WebSearch"] + + +def test_filesystem_readonly_denies_write_edit(): + files = build_claude_settings_files(None, SandboxPermission(filesystem="readonly")) + assert _settings(files)["permissions"]["deny"] == ["Write", "Edit"] + + +def test_filesystem_off_denies_write_edit(): + files = build_claude_settings_files(None, SandboxPermission(filesystem="off")) + assert _settings(files)["permissions"]["deny"] == ["Write", "Edit"] + + +def test_mcp_permission_deny_renders_server_rule(): + server = ResolvedMCPServer( + name="github", transport="http", url="https://x", permission="deny" + ) + files = build_claude_settings_files(None, None, [server]) + perms = _settings(files)["permissions"] + assert perms["deny"] == ["mcp__github"] + assert "allow" not in perms + assert "ask" not in perms + + +def test_mcp_permissions_route_to_their_lists_and_skip_unset(): + servers = [ + ResolvedMCPServer( + name="filesystem", transport="http", url="https://x", permission="allow" + ), + ResolvedMCPServer( + name="github", transport="http", url="https://x", permission="ask" + ), + ResolvedMCPServer( + name="shell", transport="http", url="https://x", permission="deny" + ), + ResolvedMCPServer(name="unset", transport="http", url="https://x"), + ] + perms = _settings(build_claude_settings_files(None, None, servers))["permissions"] + assert perms["allow"] == ["mcp__filesystem"] + assert perms["ask"] == ["mcp__github"] + assert perms["deny"] == ["mcp__shell"] + + +def test_merges_author_with_derived_and_dedupes(): + # Author `WebFetch` keeps its position; the network-derived `WebFetch` is deduped, and the + # filesystem-derived `Write`/`Edit` append. + files = build_claude_settings_files( + _claude({"default_mode": "plan", "deny": ["WebFetch"]}), + SandboxPermission( + network={"mode": "allowlist", "allowlist": ["10.0.0.0/8"]}, + filesystem="readonly", + ), + ) + perms = _settings(files)["permissions"] + assert perms["deny"] == ["WebFetch", "WebSearch", "Write", "Edit"] + assert perms["defaultMode"] == "plan" + + +def test_invalid_default_mode_dropped(): + files = build_claude_settings_files( + _claude({"default_mode": "yolo", "deny": ["Write"]}) + ) + perms = _settings(files)["permissions"] + assert "defaultMode" not in perms + assert perms["deny"] == ["Write"] + + +def test_accepts_camelcase_default_mode_alias(): + files = build_claude_settings_files(_claude({"defaultMode": "plan"})) + assert _settings(files)["permissions"]["defaultMode"] == "plan" + + +def test_accepts_plain_dicts_for_sandbox_and_mcp(): + # The builder duck-types its inputs, so plain dicts (not pydantic models) work too. + files = build_claude_settings_files( + None, + {"network": {"mode": "off"}}, + [{"name": "github", "permission": "deny"}], + ) + perms = _settings(files)["permissions"] + assert perms["deny"] == ["WebFetch", "WebSearch", "mcp__github"] + + +def test_empty_inputs_render_nothing(): + assert build_claude_settings_files(None) == [] + assert build_claude_settings_files({}) == [] + assert build_claude_settings_files(_claude({})) == [] + # network `on` + filesystem `on` derive nothing. + assert ( + build_claude_settings_files( + None, SandboxPermission(network={"mode": "on"}, filesystem="on") + ) + == [] + ) + # an MCP server with no permission contributes nothing. + assert ( + build_claude_settings_files( + None, None, [ResolvedMCPServer(name="x", transport="http", url="https://x")] + ) + == [] + ) + + +def test_non_claude_slice_renders_nothing(): + # Only the `claude` slice is the claude adapter's concern; a `pi` slice contributes nothing. + assert build_claude_settings_files({"pi": {"system": "You are Pi."}}) == [] diff --git a/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py index a8a97ab6f0..983571f379 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py +++ b/sdks/python/oss/tests/pytest/unit/agents/mcp/test_resolver.py @@ -60,6 +60,32 @@ async def test_missing_mcp_secret_is_explicit(): ) +async def test_permission_rides_the_wire_when_set(): + # An author's per-server permission is carried onto the resolved server and serialized. + servers = await MCPResolver(secret_provider=DictSecretProvider({})).resolve( + [MCPServerConfig(name="github", command="npx", permission="ask")] + ) + assert servers[0].permission == "ask" + assert servers[0].to_wire()["permission"] == "ask" + + +async def test_permission_absent_from_wire_when_unset(): + # No permission declared -> no `permission` key (a server has no read_only to default from). + servers = await MCPResolver(secret_provider=DictSecretProvider({})).resolve( + [MCPServerConfig(name="github", command="npx")] + ) + assert servers[0].permission is None + assert "permission" not in servers[0].to_wire() + + +def test_permission_accepts_fe_permission_mode_alias(): + # The playground writes `permission_mode`; the server config deserializes it. + config = MCPServerConfig.model_validate( + {"name": "github", "command": "npx", "permission_mode": "deny"} + ) + assert config.permission == "deny" + + async def test_mcp_compatibility_policy_can_omit_missing_secret(): servers = await MCPResolver( secret_provider=DictSecretProvider({}), diff --git a/sdks/python/oss/tests/pytest/unit/agents/platform/test_gateway_http.py b/sdks/python/oss/tests/pytest/unit/agents/platform/test_gateway_http.py index d41407a07d..d44008aa57 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/platform/test_gateway_http.py +++ b/sdks/python/oss/tests/pytest/unit/agents/platform/test_gateway_http.py @@ -42,6 +42,7 @@ async def test_gateway_metadata_and_description_fallback_are_preserved( "description": None, "input_schema": {"type": "object"}, "call_ref": "tools.composio.github.GET_USER.c1", + "read_only": True, } ] }, @@ -58,7 +59,12 @@ async def test_gateway_metadata_and_description_fallback_are_preserved( assert spec.description == "get_user" # falls back to name when null assert spec.needs_approval is True assert spec.render == {"kind": "component", "component": "User"} + assert spec.read_only is True # the catalog read-only hint reaches the spec assert spec.to_wire()["needsApproval"] is True + assert spec.to_wire()["readOnly"] is True + # needs_approval beats the read-only auto-allow: the gateway resolver's effective + # permission is "ask" (pins the real precedence end to end, not just the model helper). + assert spec.to_wire()["permission"] == "ask" assert isinstance(resolved.tool_callback, ToolCallback) assert resolved.tool_callback.endpoint == "https://api.x/api/tools/call" assert resolved.tool_callback.authorization == "Access tok" diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py index f823b4f32c..ace0549665 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_models.py @@ -40,6 +40,8 @@ def test_code_spec_serializes_only_runner_fields(): "env": {"TOKEN": "secret"}, "needsApproval": True, "render": {"kind": "component", "component": "Calculator"}, + # needs_approval with no explicit permission -> derived `ask`. + "permission": "ask", } @@ -61,3 +63,62 @@ def test_secret_values_are_hidden_from_repr(): env={"TOKEN": "do-not-print"}, ) assert "do-not-print" not in repr(spec) + + +# --- Layer-3 permission default ladder (S3a) ----------------------------------------- + + +def _spec(**kwargs): + return CallbackToolSpec( + name="t", + description="t", + call_ref="tools.composio.x.Y.c1", + **kwargs, + ) + + +def test_permission_explicit_author_value_wins(): + # An explicit author permission wins over any read_only/needs_approval default. + spec = _spec(read_only=False, permission="allow") + assert spec.effective_permission() == "allow" + assert spec.to_wire()["permission"] == "allow" + + +def test_permission_default_from_read_only_true_is_allow(): + spec = _spec(read_only=True) + assert spec.effective_permission() == "allow" + assert spec.to_wire()["permission"] == "allow" + + +def test_permission_default_from_read_only_false_is_ask(): + spec = _spec(read_only=False) + assert spec.effective_permission() == "ask" + assert spec.to_wire()["permission"] == "ask" + + +def test_permission_needs_approval_beats_read_only_auto_allow(): + # needs_approval (no explicit permission) forces `ask` even when read_only would allow. + spec = _spec(read_only=True, needs_approval=True) + assert spec.effective_permission() == "ask" + assert spec.to_wire()["permission"] == "ask" + + +def test_permission_absent_when_all_unset(): + # read_only is None and nothing explicit -> no permission on the wire (runner falls back). + spec = _spec() + assert spec.effective_permission() is None + assert "permission" not in spec.to_wire() + + +def test_permission_accepts_fe_permission_mode_alias(): + # The playground writes `permission_mode` into agenta_metadata; the spec deserializes it. + spec = CallbackToolSpec.model_validate( + { + "name": "t", + "description": "t", + "callRef": "tools.composio.x.Y.c1", + "permission_mode": "deny", + } + ) + assert spec.permission == "deny" + assert spec.to_wire()["permission"] == "deny" diff --git a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py index 2f707a7ab5..aa7b9c18bf 100644 --- a/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py +++ b/sdks/python/oss/tests/pytest/unit/agents/tools/test_parsing.py @@ -65,6 +65,60 @@ def test_compat_parser_does_not_flip_string_false_needs_approval(): assert approved.needs_approval is True +def test_compat_parser_carries_top_level_permission_on_typed_config(): + # A canonical typed config dict carries the author's Layer-3 permission end to end. + gateway = coerce_tool_config( + { + "type": "gateway", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + "permission": "deny", + } + ) + assert isinstance(gateway, GatewayToolConfig) + assert gateway.permission == "deny" + + +def test_compat_parser_carries_permission_from_gateway_slug(): + # The playground writes a gateway slug + a sibling permission; both survive coercion. + gateway = coerce_tool_config( + { + "function": {"name": "tools.composio.github.GET_USER.c1"}, + "permission": "deny", + } + ) + assert isinstance(gateway, GatewayToolConfig) + assert gateway.permission == "deny" + + +def test_compat_parser_accepts_permission_mode_alias_for_permission(): + # The legacy FE key `permission_mode` deserializes to the same `permission` field. + gateway = coerce_tool_config( + { + "type": "gateway", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + "permission_mode": "deny", + } + ) + assert gateway.permission == "deny" + + +def test_compat_parser_omits_permission_when_absent(): + # Backward compatible: a tool with no permission leaves the field unset. + gateway = coerce_tool_config( + { + "type": "gateway", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + } + ) + assert gateway.permission is None + + def test_coerce_tool_configs_rejects_invalid_on_error(): with pytest.raises(ValueError): coerce_tool_configs(["read"], on_error="bogus") # type: ignore[arg-type] diff --git a/services/agent/src/engines/sandbox_agent/provider.ts b/services/agent/src/engines/sandbox_agent/provider.ts index 6ab03dd052..13fd392fa5 100644 --- a/services/agent/src/engines/sandbox_agent/provider.ts +++ b/services/agent/src/engines/sandbox_agent/provider.ts @@ -1,15 +1,44 @@ import { local } from "sandbox-agent/local"; import { daytona } from "sandbox-agent/daytona"; +import type { SandboxPermission } from "../../protocol.ts"; import { applyDaytonaClientEnv, daytonaEnvVars } from "./daytona.ts"; +/** + * Translate the Layer 2 network policy into Daytona create fields. Daytona enforces egress + * at the sandbox boundary: `networkBlockAll` blocks all outbound, `networkAllowList` is a + * COMMA-SEPARATED CIDR string (not an array). `mode: "on"` (or no policy) leaves both unset + * so the sandbox stays default-open. The create object is cast `as any` at the call site, so + * these pass through even though the daytona wrapper's create type does not surface them. + * + * `mode: "allowlist"` with an EMPTY list maps to `networkBlockAll` (block-all), not default-open: + * "allow these zero ranges" is faithfully read as "allow nothing", and it keeps this mapping + * consistent with `buildRunPlan`, which already treats any `mode !== "on"` as a restricted + * boundary. Leaving it unset would silently grant full egress — the opposite of the author's + * intent — so an empty allowlist locks down rather than opens up. + */ +export function daytonaNetworkFields( + sandboxPermission: SandboxPermission | undefined, +): { networkBlockAll: true } | { networkAllowList: string } | {} { + const network = sandboxPermission?.network; + if (network?.mode === "off") return { networkBlockAll: true }; + if (network?.mode === "allowlist") { + const allowlist = network.allowlist ?? []; + if (allowlist.length > 0) return { networkAllowList: allowlist.join(",") }; + return { networkBlockAll: true }; + } + return {}; +} + /** * Build the sandbox-agent provider for the requested axis. * * Daytona needs an image or snapshot that carries the daemon and harness CLI. The * code-evaluator `DAYTONA_SNAPSHOT` is intentionally not reused because it has no daemon. * Provider keys come from the request secrets. Pi's self-managed login is only uploaded - * when no key is available. + * when no key is available. The Layer 2 network policy (S1b) is enforced on Daytona via + * `networkBlockAll` / `networkAllowList`; `buildRunPlan` rejects restricted policies the + * local provider cannot enforce before this is reached. */ export function buildSandboxProvider( sandboxId: string, @@ -17,6 +46,7 @@ export function buildSandboxProvider( binaryPath: string | undefined, piExtEnv: Record, secrets: Record, + sandboxPermission?: SandboxPermission, ) { if (sandboxId === "daytona") { applyDaytonaClientEnv(); @@ -31,6 +61,7 @@ export function buildSandboxProvider( // suppresses that so the snapshot is used as-is. ...(snapshot ? { snapshot, image: undefined } : {}), ...(target ? { target } : {}), + ...daytonaNetworkFields(sandboxPermission), envVars: daytonaEnvVars(piExtEnv, secrets), ephemeral: true, } as any, diff --git a/services/agent/src/responder.ts b/services/agent/src/responder.ts index 63348764e7..ea2fe92e0f 100644 --- a/services/agent/src/responder.ts +++ b/services/agent/src/responder.ts @@ -9,14 +9,18 @@ * * - `PolicyResponder` is the headless answer (a fixed `auto` / `deny` policy, no human). * It reproduces the previous behavior exactly and is what `/invoke` uses. - * - A cross-turn responder (the `/messages` HITL path) slots in here later: it surfaces the - * request to the browser, ends the turn, and resolves on the next turn's reply. The - * harness adapter does not change when the responder does. + * - `HITLResponder` is the cross-turn responder (the `/messages` HITL path): it parks an + * un-decided permission (the `interaction_request` already went to the browser), ends the + * turn, and resolves on the next turn's stored reply. With no decision and no human + * surface it falls back to the base policy, so the headless path is unchanged. The harness + * adapter does not change when the responder does. * * Resolution is modeled as `allow` / `deny`; the adapter maps that onto the harness's * available ACP replies via `decisionToReply`. */ +import type { AgentRunRequest, ContentBlock } from "./protocol.ts"; + export type PermissionPolicy = "auto" | "deny"; export type PermissionDecision = "allow" | "deny"; @@ -49,13 +53,121 @@ export class PolicyResponder implements Responder { } } +/** + * A lookup of approval decisions the user already made on a prior turn, keyed by the + * identifier carried on the permission request. Both the tool-call id and the tool name are + * indexed because a cold-replayed run mints fresh ACP permission/tool-call ids each turn, so + * the stable cross-turn anchor is whichever the harness re-presents. `toolCallId` is the + * precise match; `toolName` is the fallback when the id was not preserved across the boundary. + */ +export type ApprovalDecisions = ReadonlyMap; + +/** + * The cross-turn human-in-the-loop responder for the `/messages` path. + * + * It answers a permission gate three ways, in order: + * 1. The user already decided (a stored `decisions` entry for this tool-call id or tool + * name) -> apply it. THIS IS THE RESUME PATH: turn N parks, turn N+1 carries the reply. + * 2. No stored decision and there is a human surface (`hasHumanSurface`) -> `deny` to PARK. + * The `interaction_request` was already emitted upstream (the FE prompts), so denying + * here just declines to run the unapproved tool this turn; the turn ends safely and a + * later turn carrying the decision resolves it via branch 1. + * 3. No stored decision and no human surface (headless `/invoke`) -> the `basePolicy` + * decision. This branch is byte-identical to `PolicyResponder`, so `/invoke` is unchanged. + * + * Pure: every input (decisions, base policy, surface flag) is injected; no I/O. + */ +export class HITLResponder implements Responder { + constructor( + private readonly decisions: ApprovalDecisions, + private readonly basePolicy: PermissionPolicy, + private readonly hasHumanSurface: boolean, + ) {} + + async onPermission(request: PermissionRequest): Promise { + const stored = this.lookup(request); + if (stored) return stored; + if (this.hasHumanSurface) return "deny"; // park: do not run the unapproved tool this turn + return this.basePolicy === "deny" ? "deny" : "allow"; // headless: PolicyResponder parity + } + + private lookup(request: PermissionRequest): PermissionDecision | undefined { + for (const key of permissionRequestKeys(request)) { + const decision = this.decisions.get(key); + if (decision) return decision; + } + return undefined; + } +} + +/** The identifiers a stored decision may be keyed by: the gated tool-call id, then its name. */ +function permissionRequestKeys(request: PermissionRequest): string[] { + const keys: string[] = []; + const raw = request.raw as + | { toolCall?: { toolCallId?: unknown; name?: unknown } } + | undefined; + const toolCallId = raw?.toolCall?.toolCallId; + if (typeof toolCallId === "string" && toolCallId) keys.push(toolCallId); + const name = raw?.toolCall?.name; + if (typeof name === "string" && name) keys.push(name); + return keys; +} + +/** + * Build the approval-decision lookup from the inbound run request's message history. + * + * The signal is the converted approval reply that the Vercel adapter + * (`_approval_response_blocks`) already produces: a `tool_result` content block keyed by + * `toolCallId` whose `output` is an `{ approved: boolean }` envelope. That envelope shape is + * unique to an approval response (an ordinary tool result carries the tool's real output, not + * an `approved` flag), so no new wire carrier is needed. We index each decision by its + * `toolCallId` and, when the block also names a tool, by `toolName` (the cross-turn fallback + * when a cold replay did not preserve the id). + */ +export function extractApprovalDecisions( + request: AgentRunRequest, +): Map { + const decisions = new Map(); + for (const message of request.messages ?? []) { + const content = message?.content; + if (!Array.isArray(content)) continue; + for (const block of content) { + const decision = approvalDecisionOf(block); + if (!decision) continue; + if (block.toolCallId) decisions.set(block.toolCallId, decision); + if (block.toolName) decisions.set(block.toolName, decision); + } + } + return decisions; +} + +/** A `tool_result` block whose `output` is an `{ approved: boolean }` envelope -> a decision. */ +function approvalDecisionOf( + block: ContentBlock, +): PermissionDecision | undefined { + if (!block || block.type !== "tool_result") return undefined; + const output = block.output; + if ( + output && + typeof output === "object" && + !Array.isArray(output) && + typeof (output as { approved?: unknown }).approved === "boolean" + ) { + return (output as { approved: boolean }).approved ? "allow" : "deny"; + } + return undefined; +} + /** * Resolve the permission policy with the same precedence as before: an explicit per-run * `permissionPolicy: "deny"` or the `SANDBOX_AGENT_DENY_PERMISSIONS` env flips to deny; the * default is auto-allow, because backend-resolved tools are trusted and the run is headless. */ export function policyFromRequest(permissionPolicy?: string): PermissionPolicy { - if (permissionPolicy === "deny" || process.env.SANDBOX_AGENT_DENY_PERMISSIONS === "true") { + if ( + permissionPolicy === "deny" || + process.env.SANDBOX_AGENT_DENY_PERMISSIONS === "true" + ) { return "deny"; } return "auto"; diff --git a/services/agent/src/tools/code.ts b/services/agent/src/tools/code.ts index d4d8db92b8..14544b0cfd 100644 --- a/services/agent/src/tools/code.ts +++ b/services/agent/src/tools/code.ts @@ -1,197 +1,27 @@ /** - * Code-tool executor: run a resolved `code` tool's snippet in the agent sandbox. + * Code-tool sidecar execution gate. * - * A code tool ships a snippet (`code`) + a runtime (`python` | `node`) + a scoped `env` (the - * tool's declared vault secrets, resolved server-side). Unlike a `callback` tool, it never - * touches Agenta's /tools/call — it runs locally where the harness runs, which is exactly why - * its secrets are injected here as subprocess env (and nowhere else). - * - * Entry convention (same for both runtimes): the snippet defines a top-level `main`. A bare - * `def main(**inputs)` / `function main(inputs)` is found automatically; an explicit export - * (`module.exports.main` / `exports.main` / `module.exports = fn` in Node) is also accepted. - * Python calls `main(**inputs)` (keyword args from the tool input object); Node calls - * `main(inputs)` (the input object) and may return a promise. The return value is - * JSON-serialized and handed to the model as the tool result. - * - * Shared by every delivery path that runs code locally: engines/pi.ts (in-process Pi), - * extensions/agenta.ts (Pi under sandbox-agent), tools/mcp-server.ts (the MCP bridge for other - * harnesses). + * The code-tool interface still exists and code tools are still advertised to harnesses. The + * sidecar no longer executes author-supplied snippets locally, though: every delivery path + * funnels a `kind: "code"` call through this function, so throwing here makes direct Pi, + * sandbox Pi, and the ACP/MCP bridge fail consistently without changing the public wire shape. */ -import { spawn } from "node:child_process"; -import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; - -/** Per-call budget for a code tool. Surfaced as a tool error on timeout. */ -export const CODE_TOOL_TIMEOUT_MS = Number( - process.env.AGENTA_AGENT_CODE_TOOL_TIMEOUT_MS ?? 30000, -); - -// argv[1] is the snippet path (python `-c`/node `-e` put the first extra arg at argv[1]); -// the tool input arrives as JSON on stdin. Both bootstraps evaluate the snippet in a fresh -// scope and pick up a top-level `main` (a bare `def main`/`function main`), falling back to an -// explicit export. Either way the contract is: define a callable `main`. -const PY_BOOTSTRAP = `import sys, json -_path = sys.argv[1] -with open(_path) as _f: - _src = _f.read() -_ns = {} -exec(compile(_src, _path, "exec"), _ns) -if not callable(_ns.get("main")): - sys.stderr.write("code tool must define a callable main(**inputs)") - sys.exit(1) -_args = json.loads(sys.stdin.read() or "{}") -_out = _ns["main"](**_args) -sys.stdout.write(json.dumps(_out)) -`; - -// `require(path)` would only see CommonJS exports, so a bare top-level `function main` (which -// exports nothing under CommonJS) would be invisible. Instead read the source and evaluate it -// in a scope that captures a top-level `main`, while still honoring an explicit -// `module.exports.main` / `exports.main` / `module.exports = fn`. -const NODE_BOOTSTRAP = `const fs = require("fs"); -const path = process.argv[1]; -const src = fs.readFileSync(path, "utf8"); -const mod = { exports: {} }; -const factory = new Function( - "exports", - "require", - "module", - "__filename", - "__dirname", - src + - "\\n;return (typeof main !== 'undefined' ? main : (module.exports && (module.exports.main || module.exports.default)) || module.exports);", -); -const fn = factory(mod.exports, require, mod, path, require("path").dirname(path)); -if (typeof fn !== "function") { - process.stderr.write("code tool must define or export a callable main(inputs) function"); - process.exit(1); -} -const args = JSON.parse(fs.readFileSync(0, "utf8") || "{}"); -Promise.resolve(fn(args)) - .then((out) => process.stdout.write(JSON.stringify(out === undefined ? null : out))) - .catch((err) => { process.stderr.write(String((err && err.stack) || err)); process.exit(1); }); -`; export type CodeRuntime = "python" | "node"; -// The minimal set of host env vars a python3/node runtime needs to start. Deliberately -// excludes everything secret-bearing or sidecar-specific: no AGENTA_*, no *_API_KEY / -// *_TOKEN, no COMPOSIO_* / DAYTONA_*, no provider keys that the in-process Pi path writes -// into process.env before a run. Only the tool's declared scoped `env` is layered on top. -const BASE_ENV_ALLOWLIST = [ - "PATH", - "HOME", - "LANG", - "LC_ALL", - "LC_CTYPE", - "TZ", - "TMPDIR", - "TEMP", - "TMP", - "NODE_PATH", - // Windows essentials, copied only when present. - "SystemRoot", - "ComSpec", -]; - -/** Build the child env from a minimal allowlist (copied only when set) plus scoped secrets. */ -function buildChildEnv( - env: Record | undefined, -): Record { - const base: Record = {}; - for (const key of BASE_ENV_ALLOWLIST) { - const value = process.env[key]; - if (value !== undefined) base[key] = value; - } - return { ...base, ...(env ?? {}) }; -} +export const CODE_TOOL_UNSUPPORTED_MESSAGE = + "Code tools are not supported by the sidecar."; /** - * Run a code tool's snippet and return its JSON-serialized output as text. Throws on a - * non-zero exit, a timeout, or an abort; callers turn the throw into a tool-error result so - * the model loop continues. + * Fail a code-tool invocation. Callers turn this throw into a tool-error result so the model + * loop continues rather than crashing the whole run. */ export async function runCodeTool( - runtime: CodeRuntime | undefined, - code: string, - env: Record | undefined, - args: unknown, - signal?: AbortSignal, + _runtime: CodeRuntime | undefined, + _code: string, + _env: Record | undefined, + _args: unknown, + _signal?: AbortSignal, ): Promise { - const dir = mkdtempSync(join(tmpdir(), "agenta-code-")); - try { - const isNode = runtime === "node"; - const scriptPath = join(dir, isNode ? "tool.js" : "tool.py"); - writeFileSync(scriptPath, code ?? "", "utf8"); - - const command = isNode ? "node" : "python3"; - const childArgs = isNode - ? ["-e", NODE_BOOTSTRAP, scriptPath] - : ["-c", PY_BOOTSTRAP, scriptPath]; - - return await new Promise((resolve, reject) => { - const child = spawn(command, childArgs, { - // The child inherits ONLY a minimal startup allowlist (PATH, HOME, locale/temp, and - // Windows essentials when present) plus the tool's declared scoped secrets. It does - // NOT inherit the sidecar's process.env, so provider keys (OPENAI_API_KEY, etc.) that - // the in-process Pi path writes into process.env, AGENTA_* config, and other secret- - // bearing vars never reach an author-supplied snippet. Nothing is written to the - // agent-visible filesystem beyond the temp dir. - env: buildChildEnv(env), - stdio: ["pipe", "pipe", "pipe"], - }); - - let stdout = ""; - let stderr = ""; - let settled = false; - const finish = (fn: () => void) => { - if (settled) return; - settled = true; - clearTimeout(timer); - if (signal) signal.removeEventListener("abort", onAbort); - fn(); - }; - - const timer = setTimeout(() => { - child.kill("SIGKILL"); - finish(() => - reject(new Error(`code tool timed out after ${CODE_TOOL_TIMEOUT_MS}ms`)), - ); - }, CODE_TOOL_TIMEOUT_MS); - - const onAbort = () => { - child.kill("SIGKILL"); - finish(() => reject(new Error("aborted"))); - }; - if (signal) { - if (signal.aborted) onAbort(); - else signal.addEventListener("abort", onAbort, { once: true }); - } - - child.stdout.on("data", (d) => (stdout += d)); - child.stderr.on("data", (d) => (stderr += d)); - child.on("error", (err) => finish(() => reject(err))); - child.on("close", (exitCode) => - finish(() => { - if (exitCode === 0) resolve(stdout.trim()); - else - reject( - new Error( - `code tool exited ${exitCode}: ${stderr.slice(0, 500) || "(no stderr)"}`, - ), - ); - }), - ); - - child.stdin.write(JSON.stringify(args ?? {})); - child.stdin.end(); - }); - } finally { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort cleanup of the throwaway snippet dir - } - } + throw new Error(CODE_TOOL_UNSUPPORTED_MESSAGE); } diff --git a/services/agent/src/tools/dispatch.ts b/services/agent/src/tools/dispatch.ts index e8e2d25e38..89fdda10d1 100644 --- a/services/agent/src/tools/dispatch.ts +++ b/services/agent/src/tools/dispatch.ts @@ -9,7 +9,7 @@ * advertise/skip behavior for `client` tools — only the execution itself is shared. * * The three executor kinds (see `ResolvedToolSpec`): - * - `code`: run the snippet in a sandbox subprocess with its scoped secret `env`. + * - `code`: advertised to harnesses, but rejected by the sidecar as unsupported. * - `client`: browser-fulfilled across a turn boundary; never executed in-sandbox (throws). * - `callback` (default): POST back through Agenta's /tools/call so the Composio key and * connection auth stay server-side. On Daytona the in-sandbox process can't reach Agenta, @@ -96,7 +96,7 @@ export async function relayToolCall( * Execute one resolved tool and return its result text. Throws on failure; every call site * turns the throw into a tool-error result so the model loop continues rather than crashing. * - * - `code` → run the snippet locally (scoped secret env), no callback/relay. + * - `code` -> reject as unsupported by the sidecar, no callback/relay. * - `client` → throw: browser-fulfilled, never executed in-sandbox. * - default/`callback` → relay through the runner when `opts.relayDir` is set (Daytona), * else POST directly to `opts.endpoint`. diff --git a/services/agent/src/tools/relay.ts b/services/agent/src/tools/relay.ts index 4889b110af..824cbe16dd 100644 --- a/services/agent/src/tools/relay.ts +++ b/services/agent/src/tools/relay.ts @@ -20,6 +20,7 @@ import { existsSync, mkdirSync, readdirSync, readFileSync, writeFileSync } from import { callAgentaTool } from "./callback.ts"; import { runCodeTool } from "./code.ts"; import type { ResolvedToolSpec, ToolCallbackContext } from "../protocol.ts"; +import type { PermissionPolicy } from "../responder.ts"; export const RELAY_REQ_SUFFIX = ".req.json"; export const RELAY_RES_SUFFIX = ".res.json"; @@ -44,6 +45,27 @@ export function sanitizeRelayId(id: string): string { export const sleep = (ms: number): Promise => new Promise((r) => setTimeout(r, ms)); +/** + * Layer 3 enforcement (S3b): resolve a resolved-tool spec's `permission` to a concrete + * runner-side decision. `allow` runs, `deny` never runs; `ask` and an UNSET permission + * degrade to the run's headless `permissionPolicy` (`auto` -> allow, `deny` -> deny). + * + * Resolved tools (code / gateway-callback) run runner-side via the relay, harness-agnostic, so + * this is where their permission is enforced (Claude builtins are enforced at Layer 1 via + * .claude/settings.json instead). Surfacing an `ask` to a live human is the cross-turn HITL + * path (S5); here `ask` is a headless run, so it collapses onto the policy. + */ +export function resolvePermission( + permission: string | undefined, + policy: PermissionPolicy, +): "allow" | "deny" { + if (permission === "allow") return "allow"; + if (permission === "deny") return "deny"; + // `ask` or unset: headless, so defer to the run policy. + // TODO(S5): surface ask to HITL instead of collapsing onto permissionPolicy. + return policy === "deny" ? "deny" : "allow"; +} + export interface RelayHost { list: (dir: string) => Promise; read: (path: string) => Promise; @@ -93,7 +115,19 @@ async function executeRelayedTool( spec: ResolvedToolSpec, req: RelayRequest, callback: ToolCallbackContext | undefined, + policy: PermissionPolicy, ): Promise { + // Layer 3 enforcement (S3b): gate the call on the spec's permission before it runs. + // `deny` returns a refusal string (not a throw) so the harness folds it into the tool + // result and the model loop continues. `ask`/unset degrade to the headless policy. + const decision = resolvePermission(spec.permission, policy); + if (decision === "deny") { + if (spec.permission === "deny") { + return `Tool '${spec.name}' is denied by policy.`; + } + // ask/unset that the headless policy refused. TODO(S5): surface ask to HITL. + return `Tool '${spec.name}' requires approval; denied in headless mode.`; + } if (spec.kind === "client") { throw new Error(`client tool '${spec.name}' is browser-fulfilled and cannot be executed`); } @@ -114,7 +148,7 @@ async function executeRelayedTool( /** * Runner-side relay loop. Polls the sandbox relay dir for request files, executes each - * against Agenta's /tools/call (which the runner can reach), and writes the response file + * against the private spec in memory, and writes the response file * the in-sandbox extension is waiting on. Returns `stop()` to end the loop and drain any * in-flight executions; call it once the prompt resolves. */ @@ -123,6 +157,7 @@ export function startToolRelay( relayDir: string, specs: ResolvedToolSpec[], callback: ToolCallbackContext | undefined, + policy: PermissionPolicy, ): { stop: () => Promise } { let active = true; const seen = new Set(); @@ -141,6 +176,7 @@ export function startToolRelay( spec, { ...req, toolCallId: req.toolCallId ?? id }, callback, + policy, ); res = { ok: true, text }; } catch (err) { diff --git a/services/agent/tests/unit/code-tool.test.ts b/services/agent/tests/unit/code-tool.test.ts index 5a3566614d..5914797459 100644 --- a/services/agent/tests/unit/code-tool.test.ts +++ b/services/agent/tests/unit/code-tool.test.ts @@ -1,89 +1,25 @@ /** - * Unit test for the code-tool executor (runCodeTool). + * Unit test for the code-tool sidecar execution gate. * - * Exercises both runtimes end-to-end through real subprocesses: a python tool, node tools - * written as a bare top-level `function main` (the F2 regression) and as an explicit - * `module.exports.main`, an async node `main`, the F3 env-isolation guarantee (provider keys - * do NOT leak in; declared scoped secrets DO), and the non-zero-exit reject path. - * - * Needs `python3` and `node` on PATH (both present locally and on ubuntu CI runners). - * - * Run: pnpm test (or: pnpm exec vitest run tests/unit/code-tool.test.ts) + * Code tools remain part of the public tool interface and can still be advertised to harnesses, + * but the sidecar must not execute author-supplied snippets locally. */ import { describe, it } from "vitest"; import assert from "node:assert/strict"; -import { runCodeTool } from "../../src/tools/code.ts"; +import { CODE_TOOL_UNSUPPORTED_MESSAGE, runCodeTool } from "../../src/tools/code.ts"; describe("runCodeTool", () => { - it("runs a python bare `def main(**kw)`", async () => { - const code = 'def main(**kw):\n return {"sum": kw.get("a", 0) + kw.get("b", 0)}\n'; - const out = await runCodeTool("python", code, undefined, { a: 2, b: 3 }); - assert.deepEqual(JSON.parse(out), { sum: 5 }, "python bare main returns the right JSON"); - }); - - it("runs a node bare top-level `function main` (F2 regression)", async () => { - const code = "function main(inputs) { return { got: inputs }; }"; - const out = await runCodeTool("node", code, undefined, { hello: "world" }); - assert.deepEqual( - JSON.parse(out), - { got: { hello: "world" } }, - "node bare function main executes and echoes the input", - ); - }); - - it("runs a node explicit `module.exports.main`", async () => { - const code = "module.exports.main = function (inputs) { return { via: 'exports', got: inputs }; };"; - const out = await runCodeTool("node", code, undefined, { x: 1 }); - assert.deepEqual( - JSON.parse(out), - { via: "exports", got: { x: 1 } }, - "node module.exports.main works", - ); - }); - - it("runs an async node `main` returning a Promise", async () => { - const code = - "async function main(inputs) { await new Promise((r) => setTimeout(r, 5)); return { doubled: inputs.n * 2 }; }"; - const out = await runCodeTool("node", code, undefined, { n: 21 }); - assert.deepEqual(JSON.parse(out), { doubled: 42 }, "node async main resolves"); - }); - - it("F3: provider keys do NOT leak; scoped secrets DO", async () => { - const hadKey = "OPENAI_API_KEY" in process.env; - const prevKey = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "leak-me-xyz"; - try { - // The provider key sits in process.env but must not reach the snippet. - const leakCode = "function main() { return { key: process.env.OPENAI_API_KEY ?? 'absent' }; }"; - const leakOut = await runCodeTool("node", leakCode, undefined, {}); - assert.deepEqual( - JSON.parse(leakOut), - { key: "absent" }, - "F3: OPENAI_API_KEY did NOT leak into the snippet env", - ); - - // A secret declared on the tool (passed via the scoped `env` arg) must be visible. - const scopedCode = - "function main() { return { secret: process.env.MY_TOOL_SECRET ?? 'absent' }; }"; - const scopedOut = await runCodeTool("node", scopedCode, { MY_TOOL_SECRET: "ok" }, {}); - assert.deepEqual( - JSON.parse(scopedOut), - { secret: "ok" }, - "F3: scoped MY_TOOL_SECRET IS visible to the snippet", - ); - } finally { - if (hadKey) process.env.OPENAI_API_KEY = prevKey; - else delete process.env.OPENAI_API_KEY; - } - }); - - it("rejects when the snippet throws / exits non-zero", async () => { - const code = "function main() { throw new Error('boom'); }"; + it("fails with a clear unsupported error instead of executing code", async () => { await assert.rejects( - () => runCodeTool("node", code, undefined, {}), - /boom|exited/, - "a throwing snippet rejects", + () => + runCodeTool( + "node", + "function main() { return { executed: true }; }", + { MY_TOOL_SECRET: "secret" }, + { input: true }, + ), + new RegExp(CODE_TOOL_UNSUPPORTED_MESSAGE), ); }); }); diff --git a/services/agent/tests/unit/pi-capability-guard.test.ts b/services/agent/tests/unit/pi-capability-guard.test.ts new file mode 100644 index 0000000000..653fe084d9 --- /dev/null +++ b/services/agent/tests/unit/pi-capability-guard.test.ts @@ -0,0 +1,76 @@ +import { describe, it } from "vitest"; +import assert from "node:assert"; + +import { unenforceableCapabilityConfig } from "../../src/engines/pi.ts"; +import type { AgentRunRequest } from "../../src/protocol.ts"; + +/** + * The in-process `pi` backend has no sandbox and runs tools without the relay, so it must reject + * capability config it cannot enforce instead of silently ignoring it (the backend-routing footgun + * found in live QA). `sandbox-agent` is the enforcing backend. + */ +function req(extra: Partial): AgentRunRequest { + return { harness: "pi", messages: [], ...extra } as AgentRunRequest; +} + +describe("unenforceableCapabilityConfig (in-process pi guard)", () => { + it("passes a plain request (no capability config)", () => { + assert.equal(unenforceableCapabilityConfig(req({})), undefined); + }); + + it("passes a permissive sandbox_permission (network on, no fs restriction)", () => { + assert.equal( + unenforceableCapabilityConfig( + req({ sandboxPermission: { network: { mode: "on" }, enforcement: "strict" } }), + ), + undefined, + ); + }); + + it("rejects a restricted network", () => { + const msg = unenforceableCapabilityConfig( + req({ sandboxPermission: { network: { mode: "off" }, enforcement: "strict" } }), + ); + assert.match(msg ?? "", /cannot enforce sandbox_permission\.network='off'/); + }); + + it("rejects an allowlist network too", () => { + const msg = unenforceableCapabilityConfig( + req({ + sandboxPermission: { network: { mode: "allowlist", allowlist: ["10.0.0.0/8"] }, enforcement: "strict" }, + }), + ); + assert.match(msg ?? "", /network='allowlist'/); + }); + + it("rejects a restricting filesystem", () => { + const msg = unenforceableCapabilityConfig( + req({ sandboxPermission: { network: { mode: "on" }, filesystem: "readonly", enforcement: "strict" } }), + ); + assert.match(msg ?? "", /filesystem='readonly'/); + }); + + it("rejects a deny tool permission and names the tool", () => { + const msg = unenforceableCapabilityConfig( + req({ customTools: [{ kind: "code", name: "danger", permission: "deny" } as never] }), + ); + assert.match(msg ?? "", /does not enforce tool permissions/); + assert.match(msg ?? "", /danger/); + }); + + it("rejects an ask tool permission", () => { + const msg = unenforceableCapabilityConfig( + req({ customTools: [{ kind: "code", name: "t", permission: "ask" } as never] }), + ); + assert.match(msg ?? "", /deny\/ask/); + }); + + it("passes an allow permission (no enforcement needed)", () => { + assert.equal( + unenforceableCapabilityConfig( + req({ customTools: [{ kind: "code", name: "t", permission: "allow" } as never] }), + ), + undefined, + ); + }); +}); diff --git a/services/agent/tests/unit/sandbox-agent-provider.test.ts b/services/agent/tests/unit/sandbox-agent-provider.test.ts new file mode 100644 index 0000000000..2e33286f95 --- /dev/null +++ b/services/agent/tests/unit/sandbox-agent-provider.test.ts @@ -0,0 +1,54 @@ +/** + * Unit tests for the Layer 2 network policy -> Daytona create field mapping (S1b). + * + * The mapping is tested directly because the real `daytona()` provider closes over its + * create object and constructs a Daytona client (needs API-key env), so it cannot be + * inspected through `buildSandboxProvider`. The orchestration test covers that the run + * plan's `sandboxPermission` reaches `buildSandboxProvider` as the new argument. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/sandbox-agent-provider.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { daytonaNetworkFields } from "../../src/engines/sandbox_agent/provider.ts"; + +describe("daytonaNetworkFields", () => { + it("blocks all egress for network:off", () => { + assert.deepEqual( + daytonaNetworkFields({ network: { mode: "off" }, enforcement: "strict" }), + { networkBlockAll: true }, + ); + }); + + it("renders a non-empty allowlist as a comma-separated CIDR string", () => { + assert.deepEqual( + daytonaNetworkFields({ + network: { mode: "allowlist", allowlist: ["a", "b"] }, + enforcement: "strict", + }), + { networkAllowList: "a,b" }, + ); + }); + + it("blocks all egress for an empty allowlist (allow zero ranges == allow nothing)", () => { + assert.deepEqual( + daytonaNetworkFields({ + network: { mode: "allowlist", allowlist: [] }, + enforcement: "strict", + }), + { networkBlockAll: true }, + ); + }); + + it("leaves the sandbox default-open for network:on", () => { + assert.deepEqual( + daytonaNetworkFields({ network: { mode: "on" }, enforcement: "strict" }), + {}, + ); + }); + + it("leaves the sandbox default-open when no policy is set", () => { + assert.deepEqual(daytonaNetworkFields(undefined), {}); + }); +}); diff --git a/services/agent/tests/unit/tool-bridge.test.ts b/services/agent/tests/unit/tool-bridge.test.ts index fcd7eb6a13..0371633b20 100644 --- a/services/agent/tests/unit/tool-bridge.test.ts +++ b/services/agent/tests/unit/tool-bridge.test.ts @@ -2,8 +2,8 @@ * Unit tests for buildToolMcpServers (the tool MCP bridge attachment decision). * * Regression cover for F4: attachment must be decided per tool kind, not on the callback - * endpoint alone. A `code` tool runs locally in mcp-server.ts and needs no endpoint, so a run - * whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind + * endpoint alone. A `code` tool is still advertised through mcp-server.ts and needs no endpoint, + * so a run whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind * tools require AGENTA_TOOL_CALLBACK_ENDPOINT; missing it must degrade those tools, not drop the * whole server. `client` tools are browser-fulfilled and never justify attaching the bridge. * @@ -109,7 +109,7 @@ describe("buildToolMcpServers", () => { ]; const out = buildToolMcpServers(specs, relayDir); assert.notDeepEqual(out, [], "mixed run with no endpoint must not return []"); - assert.equal(out.length, 1, "still attaches the server so the code tool works"); + assert.equal(out.length, 1, "still attaches the server so the code tool is advertised"); assert.equal( envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), undefined, diff --git a/services/agent/tests/unit/tool-dispatch.test.ts b/services/agent/tests/unit/tool-dispatch.test.ts index af27dc991f..a6ca4e8e85 100644 --- a/services/agent/tests/unit/tool-dispatch.test.ts +++ b/services/agent/tests/unit/tool-dispatch.test.ts @@ -7,9 +7,9 @@ * advertising behavior that stays per-site: * - buildCustomTools (pi.ts) skips `client` specs, builds a tool per `code`/`callback` spec, * and skips a `callback` spec with no callback endpoint. - * - runResolvedTool runs a real `code` snippet end-to-end (python) and throws for `client`. + * - runResolvedTool advertises code tools but fails their sidecar execution, and throws for `client`. * - * No network and no harness: the `code` path shells out to python3 (available locally); the + * No network and no harness: the `code` path now fails before any subprocess; the * `callback`/relay paths are not exercised here (they need a live /tools/call or a relay dir). * * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-dispatch.test.ts) @@ -66,15 +66,14 @@ describe("buildCustomTools routing", () => { }); describe("runResolvedTool", () => { - it("runs a code spec end-to-end (python)", async () => { - const text = await runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { - toolCallId: "call-1", - }); - const parsed = JSON.parse(text); - assert.deepEqual( - parsed, - { echo: { greeting: "hi", n: 3 } }, - "code tool runs the snippet and returns its JSON output containing the input", + it("fails code specs with a clear unsupported error", async () => { + await assert.rejects( + () => + runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { + toolCallId: "call-1", + }), + /Code tools are not supported by the sidecar\./, + "code tools remain advertised but are not executable by the sidecar", ); }); diff --git a/services/agent/tests/unit/tool-relay-permission.test.ts b/services/agent/tests/unit/tool-relay-permission.test.ts new file mode 100644 index 0000000000..5c4cff38db --- /dev/null +++ b/services/agent/tests/unit/tool-relay-permission.test.ts @@ -0,0 +1,109 @@ +/** + * Unit tests for Layer-3 permission enforcement in the runner-side tool relay (S3b). + * + * - `resolvePermission` is the pure ladder: allow/deny are honored as-is; ask/unset degrade to + * the headless permission policy (auto -> allow, deny -> deny). + * - `startToolRelay` enforces that ladder before executing a relayed tool: a `deny` spec is + * refused before execution and an `allow` code spec reaches the sidecar unsupported gate. + * + * The relay loop is driven over a real temp dir via `localRelayHost`: write a `.req.json`, + * poll for the `.res.json` the runner writes back. A `code` spec is used so execution + * needs no network or callback; the sidecar now returns a deterministic unsupported error + * instead of spawning a runtime. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-relay-permission.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { + localRelayHost, + resolvePermission, + startToolRelay, + type RelayResponse, +} from "../../src/tools/relay.ts"; +import type { ResolvedToolSpec } from "../../src/protocol.ts"; + +const codeSpec = ( + name: string, + permission?: ResolvedToolSpec["permission"], +): ResolvedToolSpec => ({ + name, + kind: "code", + runtime: "python", + code: 'def main(**kw):\n return {"ran": True, "echo": kw}\n', + permission, +}); + +/** Drive one tool call through the relay loop and return the response the runner wrote. */ +async function relayOnce( + spec: ResolvedToolSpec, + policy: "auto" | "deny", +): Promise { + const dir = mkdtempSync(join(tmpdir(), "agenta-relay-disp-")); + try { + const id = "call-1"; + writeFileSync( + join(dir, `${id}.req.json`), + JSON.stringify({ toolName: spec.name, toolCallId: id, args: { a: 1 } }), + ); + const relay = startToolRelay(localRelayHost(), dir, [spec], undefined, policy); + const resPath = join(dir, `${id}.res.json`); + const deadline = Date.now() + 5000; + while (Date.now() < deadline && !existsSync(resPath)) { + await new Promise((r) => setTimeout(r, 20)); + } + await relay.stop(); + assert.ok(existsSync(resPath), "the relay wrote a response file"); + return JSON.parse(readFileSync(resPath, "utf-8")) as RelayResponse; + } finally { + rmSync(dir, { recursive: true, force: true }); + } +} + +describe("resolvePermission", () => { + const cases: Array<[ResolvedToolSpec["permission"], "auto" | "deny", "allow" | "deny"]> = [ + ["allow", "auto", "allow"], + ["allow", "deny", "allow"], + ["deny", "auto", "deny"], + ["deny", "deny", "deny"], + ["ask", "auto", "allow"], + ["ask", "deny", "deny"], + [undefined, "auto", "allow"], + [undefined, "deny", "deny"], + // A garbage/unrecognized permission must fall to the policy (never auto-allow). + ["bogus" as ResolvedToolSpec["permission"], "auto", "allow"], + ["bogus" as ResolvedToolSpec["permission"], "deny", "deny"], + ]; + + for (const [permission, policy, expected] of cases) { + it(`permission=${permission ?? "unset"} policy=${policy} -> ${expected}`, () => { + assert.equal(resolvePermission(permission, policy), expected); + }); + } +}); + +describe("startToolRelay permission enforcement", () => { + it("refuses a deny spec without executing its code", async () => { + const res = await relayOnce(codeSpec("blocked", "deny"), "auto"); + assert.equal(res.ok, true, "a policy refusal rides as an ok tool result, not an error"); + assert.equal(res.text, "Tool 'blocked' is denied by policy."); + // The refusal string is the whole result: the snippet's `{"ran": true}` never appears. + assert.ok(!String(res.text).includes("ran"), "the denied tool's code did not run"); + }); + + it("returns unsupported for an allow code spec", async () => { + const res = await relayOnce(codeSpec("permitted", "allow"), "auto"); + assert.equal(res.ok, false); + assert.match(res.error ?? "", /Code tools are not supported by the sidecar\./); + }); + + it("refuses an unset spec when the headless policy is deny", async () => { + const res = await relayOnce(codeSpec("gated", undefined), "deny"); + assert.equal(res.ok, true); + assert.equal(res.text, "Tool 'gated' requires approval; denied in headless mode."); + }); +}); diff --git a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ClaudePermissionsControl.tsx b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ClaudePermissionsControl.tsx new file mode 100644 index 0000000000..96445f3d83 --- /dev/null +++ b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ClaudePermissionsControl.tsx @@ -0,0 +1,166 @@ +/** + * ClaudePermissionsControl + * + * Layer 1 of the agent capability config, Claude-only: the Claude harness's own permission knobs. + * These map 1:1 to Claude Code's `permissions` settings block and persist into the neutral + * `harness_options.claude.permissions` bag on the agent config (the SDK's `ClaudePermissions`, + * agenta.sdk.agents.dtos). The shape: + * { default_mode?: "default"|"acceptEdits"|"plan"|"bypassPermissions", + * allow: string[], deny: string[], ask: string[] } + * + * It is rendered as a collapsed "advanced" section by AgentConfigControl and is hidden when the + * harness is not Claude (nothing here applies to Pi, which never gates tool use). Non-destructive: + * an author who touches nothing leaves `harness_options` untouched. + */ +import {memo, useCallback, useMemo} from "react" + +import {LabeledField} from "@agenta/ui/components/presentational" +import {cn} from "@agenta/ui/styles" +import {Input, Select} from "antd" + +type ClaudePermissionMode = "default" | "acceptEdits" | "plan" | "bypassPermissions" + +interface ClaudePermissionsValue { + default_mode?: ClaudePermissionMode | null + allow?: string[] + deny?: string[] + ask?: string[] +} + +export interface ClaudePermissionsControlProps { + /** The `harness_options.claude.permissions` value (object or null/undefined). */ + value?: Record | null + /** Called with the next permissions object. */ + onChange: (value: Record) => void + /** Disable the control */ + disabled?: boolean + /** Additional CSS classes */ + className?: string +} + +const MODE_OPTIONS: {value: ClaudePermissionMode; label: string}[] = [ + {value: "default", label: "Default (prompt on each gated tool)"}, + {value: "acceptEdits", label: "Accept edits (auto-accept file edits)"}, + {value: "plan", label: "Plan (read-only planning)"}, + {value: "bypassPermissions", label: "Bypass (skip every gate)"}, +] + +/** Read the current value, defaulting the rule lists to empty arrays. */ +function readValue(value: Record | null | undefined): { + defaultMode: ClaudePermissionMode | null + allow: string[] + deny: string[] + ask: string[] +} { + const v = (value ?? {}) as ClaudePermissionsValue + return { + defaultMode: (v.default_mode as ClaudePermissionMode | null | undefined) ?? null, + allow: Array.isArray(v.allow) ? v.allow : [], + deny: Array.isArray(v.deny) ? v.deny : [], + ask: Array.isArray(v.ask) ? v.ask : [], + } +} + +/** Split a textarea (one rule per line) into a trimmed, non-empty rule list. */ +function parseRules(text: string): string[] { + return text + .split("\n") + .map((s) => s.trim()) + .filter(Boolean) +} + +export const ClaudePermissionsControl = memo(function ClaudePermissionsControl({ + value, + onChange, + disabled = false, + className, +}: ClaudePermissionsControlProps) { + const current = useMemo(() => readValue(value), [value]) + + // Compose the full permissions object, overriding one slice. `default_mode` is only written + // when set so an author who only edits rules never emits a null mode. + const write = useCallback( + (patch: { + defaultMode?: ClaudePermissionMode | null + allow?: string[] + deny?: string[] + ask?: string[] + }) => { + const next = {...current, ...patch} + const out: Record = { + allow: next.allow, + deny: next.deny, + ask: next.ask, + } + if (next.defaultMode) out.default_mode = next.defaultMode + onChange(out) + }, + [current, onChange], + ) + + return ( +
+ + + value={current.defaultMode ?? undefined} + onChange={(v) => write({defaultMode: v ?? null})} + options={MODE_OPTIONS} + disabled={disabled} + placeholder="Claude default" + allowClear + className="w-full" + size="small" + /> + + + + write({allow: parseRules(e.target.value)})} + disabled={disabled} + placeholder={"Read\nBash(npm run:*)"} + rows={2} + className="resize-y font-mono text-xs" + /> + + + + write({ask: parseRules(e.target.value)})} + disabled={disabled} + placeholder="Bash(rm:*)" + rows={2} + className="resize-y font-mono text-xs" + /> + + + + write({deny: parseRules(e.target.value)})} + disabled={disabled} + placeholder={"Write\nmcp__server__tool"} + rows={2} + className="resize-y font-mono text-xs" + /> + +
+ ) +}) diff --git a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/SandboxPermissionControl.tsx b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/SandboxPermissionControl.tsx new file mode 100644 index 0000000000..eb6ddd4aa1 --- /dev/null +++ b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/SandboxPermissionControl.tsx @@ -0,0 +1,192 @@ +/** + * SandboxPermissionControl + * + * Layer 2 of the agent capability config: the sandbox security boundary the agent runs inside + * (`sandbox_permission` on the agent config). It applies to every harness. The shape mirrors + * the SDK's `SandboxPermission` (agenta.sdk.agents.dtos): + * { network: { mode: "on"|"off"|"allowlist", allowlist: string[] }, + * filesystem?: "on"|"readonly"|"off", + * enforcement: "strict"|"best_effort" } + * + * The emitted `agent_config` schema exposes `sandbox_permission` as a nullable object (anyOf), + * so the generic SchemaPropertyRenderer can only show a "click to expand" drill-in indicator for + * it. This control renders the four knobs as a real form instead: a network-mode selector, a + * CIDR allowlist shown only when mode = allowlist, a filesystem selector, and an enforcement + * toggle. It is non-destructive: an unset value stays `null` (no declared boundary) until the + * author changes something. + */ +import {memo, useCallback, useMemo} from "react" + +import {LabeledField} from "@agenta/ui/components/presentational" +import {cn} from "@agenta/ui/styles" +import {Input, Select, Typography} from "antd" + +type NetworkMode = "on" | "off" | "allowlist" +type FilesystemMode = "on" | "readonly" | "off" +type Enforcement = "strict" | "best_effort" + +interface NetworkEgress { + mode: NetworkMode + allowlist: string[] +} + +interface SandboxPermissionValue { + network?: NetworkEgress + filesystem?: FilesystemMode | null + enforcement?: Enforcement +} + +export interface SandboxPermissionControlProps { + /** The `sandbox_permission` value (object or null) from the agent config. */ + value?: Record | null + /** Called with the next `sandbox_permission` object (never null once the author touches it). */ + onChange: (value: Record) => void + /** Disable the control */ + disabled?: boolean + /** Additional CSS classes */ + className?: string +} + +const NETWORK_MODE_OPTIONS: {value: NetworkMode; label: string}[] = [ + {value: "on", label: "Allow all egress"}, + {value: "off", label: "Block all egress"}, + {value: "allowlist", label: "Allowlist (CIDR ranges)"}, +] + +const FILESYSTEM_OPTIONS: {value: FilesystemMode; label: string}[] = [ + {value: "on", label: "Read/write"}, + {value: "readonly", label: "Read-only"}, + {value: "off", label: "No access"}, +] + +const ENFORCEMENT_OPTIONS: {value: Enforcement; label: string}[] = [ + {value: "strict", label: "Strict (fail if unenforceable)"}, + {value: "best_effort", label: "Best effort"}, +] + +/** Read the current value with the same defaults the SDK applies, so the form is never blank. */ +function readValue(value: Record | null | undefined): { + networkMode: NetworkMode + allowlist: string[] + filesystem: FilesystemMode | null + enforcement: Enforcement +} { + const v = (value ?? {}) as SandboxPermissionValue + const network = (v.network ?? {}) as NetworkEgress + return { + networkMode: network.mode ?? "on", + allowlist: Array.isArray(network.allowlist) ? network.allowlist : [], + filesystem: (v.filesystem as FilesystemMode | null | undefined) ?? null, + enforcement: v.enforcement ?? "strict", + } +} + +export const SandboxPermissionControl = memo(function SandboxPermissionControl({ + value, + onChange, + disabled = false, + className, +}: SandboxPermissionControlProps) { + const current = useMemo(() => readValue(value), [value]) + + // Compose the full `sandbox_permission` object from the current state, overriding one slice. + // We always write the network object (with mode + allowlist) and enforcement; filesystem is + // only written when set (it is declared, not enforced, and stays omitted otherwise). + const write = useCallback( + (patch: { + networkMode?: NetworkMode + allowlist?: string[] + filesystem?: FilesystemMode | null + enforcement?: Enforcement + }) => { + const next = {...current, ...patch} + const out: Record = { + network: { + mode: next.networkMode, + allowlist: next.networkMode === "allowlist" ? next.allowlist : [], + }, + enforcement: next.enforcement, + } + if (next.filesystem) out.filesystem = next.filesystem + onChange(out) + }, + [current, onChange], + ) + + return ( +
+ Sandbox permissions + + + + value={current.networkMode} + onChange={(v) => write({networkMode: v})} + options={NETWORK_MODE_OPTIONS} + disabled={disabled} + className="w-full" + size="small" + /> + + + {current.networkMode === "allowlist" && ( + + + write({ + allowlist: e.target.value + .split("\n") + .map((s) => s.trim()) + .filter(Boolean), + }) + } + disabled={disabled} + placeholder={"10.0.0.0/8\n192.168.0.0/16"} + rows={3} + className="resize-y font-mono text-xs" + /> + + )} + + + + value={current.filesystem ?? undefined} + onChange={(v) => write({filesystem: v ?? null})} + options={FILESYSTEM_OPTIONS} + disabled={disabled} + placeholder="Not declared" + allowClear + className="w-full" + size="small" + /> + + + + + value={current.enforcement} + onChange={(v) => write({enforcement: v})} + options={ENFORCEMENT_OPTIONS} + disabled={disabled} + className="w-full" + size="small" + /> + +
+ ) +}) diff --git a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ToolItemControl.tsx b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ToolItemControl.tsx index 205bb21c85..70bfee0613 100644 --- a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ToolItemControl.tsx +++ b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/ToolItemControl.tsx @@ -23,7 +23,7 @@ import {CollapseToggleButton, getCollapseStyle} from "@agenta/ui/components/pres import {useDrillInUI} from "@agenta/ui/drill-in" import {getProviderIcon} from "@agenta/ui/select-llm-provider" import {CopySimple, MinusCircle} from "@phosphor-icons/react" -import {Button, Tooltip, Typography} from "antd" +import {Button, Select, Tooltip, Typography} from "antd" import clsx from "clsx" import {TOOL_PROVIDERS_META, TOOL_SPECS, parseGatewayFunctionName, type ToolObj} from "./toolUtils" @@ -435,6 +435,98 @@ function defaultRenderProviderIcon(providerKey: string): React.ReactNode { return } +// ============================================================================ +// PER-TOOL PERMISSION (Layer 3: allow / ask / deny) +// ============================================================================ + +/** + * The tool-use permission vocabulary. Stored as a TOP-LEVEL `permission` key on the tool + * object (not inside `agenta_metadata`, which `stripAgentaMetadataDeep` removes on every + * save/run path). The SDK tool config/spec deserializes it with no mapping + * (`AliasChoices("permission", "permission_mode", "permissionMode")`). Unset means the tool + * inherits the global permission policy. + */ +type ToolPermission = "allow" | "ask" | "deny" + +const PERMISSION_OPTIONS: {value: ToolPermission; label: string}[] = [ + {value: "allow", label: "Allow"}, + {value: "ask", label: "Ask"}, + {value: "deny", label: "Deny"}, +] + +/** + * Default the permission from the tool's `read_only` metadata, when the catalog supplied it: + * `read_only === true` → allow (no side effect), `read_only === false` → ask, unknown → unset + * (inherit the global policy). This is a display default only — it is not written back unless + * the author changes it. + */ +function defaultPermissionFromReadOnly( + metadata: Record | undefined, +): ToolPermission | undefined { + const readOnly = metadata?.read_only + if (readOnly === true) return "allow" + if (readOnly === false) return "ask" + return undefined +} + +function isPermission(value: unknown): value is ToolPermission { + return value === "allow" || value === "ask" || value === "deny" +} + +/** + * Read the stored permission. Canonical home is the TOP-LEVEL `permission` key (it survives + * `stripAgentaMetadataDeep`); fall back to any legacy `agenta_metadata.permission_mode` so values + * written before this change still display. + */ +function readPermission( + topLevel: unknown, + metadata: Record | undefined, +): ToolPermission | undefined { + if (isPermission(topLevel)) return topLevel + if (isPermission(metadata?.permission_mode)) return metadata.permission_mode + return undefined +} + +interface ToolPermissionControlProps { + /** The current top-level `permission` value on the tool (canonical store). */ + permission: unknown + /** The current `agenta_metadata` of the tool (read for the read_only default + legacy mode). */ + metadata: Record | undefined + /** Called with the chosen permission (or null to clear back to the global policy). */ + onChange: (permission: ToolPermission | null) => void + disabled?: boolean +} + +/** Compact allow/ask/deny selector for one tool. Displays the read_only-derived default unwritten. */ +function ToolPermissionControl({ + permission, + metadata, + onChange, + disabled, +}: ToolPermissionControlProps) { + const stored = readPermission(permission, metadata) + const displayDefault = defaultPermissionFromReadOnly(metadata) + return ( +
+ + + Permission + + + + value={stored ?? undefined} + onChange={(v) => onChange(v ?? null)} + options={PERMISSION_OPTIONS} + disabled={disabled} + placeholder={displayDefault ? `${displayDefault} (default)` : "Inherit policy"} + allowClear + size="small" + className="w-[160px]" + /> +
+ ) +} + export interface ToolItemControlProps { /** Tool value (object or JSON string) */ value: unknown @@ -500,6 +592,16 @@ export const ToolItemControl = memo(function ToolItemControl({ [onChange, agentaMetadata], ) + // The current `agenta_metadata` as an object (read for the permission control's read_only + // default and any legacy `permission_mode`). + const metadataObj = useMemo( + () => + agentaMetadata && typeof agentaMetadata === "object" && !Array.isArray(agentaMetadata) + ? (agentaMetadata as Record) + : undefined, + [agentaMetadata], + ) + const { toolObj, editorText, @@ -507,6 +609,47 @@ export const ToolItemControl = memo(function ToolItemControl({ onEditorChange, } = useToolState(cleanedValue, isReadOnly, handleChange) + // The current top-level `permission` on the tool (canonical store, survives metadata strip). + // Read off the LIVE editor object (`toolObj`), not the `cleanedValue` prop: the JSON editor + // mutates `toolObj` inside `useToolState` and only propagates to the parent (and back into + // `value`/`cleanedValue`) asynchronously, so the prop lags in-flight edits. + const topLevelPermission = useMemo( + () => + toolObj && typeof toolObj === "object" && !Array.isArray(toolObj) + ? (toolObj as Record).permission + : undefined, + [toolObj], + ) + + // Set (or clear) the TOP-LEVEL `permission` key on this tool, leaving the rest of the tool + // definition intact. It lives at the top level (not inside `agenta_metadata`) so it survives + // `stripAgentaMetadataDeep` on the save/run path. Clearing removes the key (and any legacy + // `agenta_metadata.permission_mode`) so the tool falls back to the global policy. + // + // Compose off the LIVE editor object (`toolObj`), the same source `handleChange` propagates + // through `useToolState`, so a permission change merges with in-flight JSON edits instead of + // clobbering them with the stale `cleanedValue` prop. + const handlePermissionChange = useCallback( + (permission: "allow" | "ask" | "deny" | null) => { + if (!onChange) return + const base = (toolObj && typeof toolObj === "object" ? toolObj : {}) as Record< + string, + unknown + > + const {permission: _dropPermission, ...restBase} = base + // Drop any legacy `permission_mode` stored inside `agenta_metadata`; the top-level key + // is now canonical. + const {permission_mode: _dropLegacy, ...restMeta} = metadataObj ?? {} + const withMeta = + Object.keys(restMeta).length > 0 + ? {...restBase, agenta_metadata: restMeta} + : restBase + const merged = permission ? {...withMeta, permission} : withMeta + onChange(merged as ToolObj) + }, + [onChange, metadataObj, toolObj], + ) + const functionName = (toolObj as Record)?.function && typeof (toolObj as Record).function === "object" @@ -655,12 +798,20 @@ export const ToolItemControl = memo(function ToolItemControl({ containerRef={containerRef} /> {!minimized && ( -