Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions api/oss/src/core/tools/dtos.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ class ToolCatalogAction(BaseModel):
#
categories: List[str] = []
logo: Optional[str] = None
#
# From the MCP behavioral hints: True (read-only), False (mutating), None (unknown).
read_only: Optional[bool] = None


class ToolCatalogActionDetails(ToolCatalogAction):
Expand Down Expand Up @@ -274,6 +277,7 @@ class ResolvedAgentTool(BaseModel):
description: Optional[str] = None
input_schema: Optional[Dict[str, Any]] = None
call_ref: str
read_only: Optional[bool] = None


class AgentToolsResolution(BaseModel):
Expand Down
6 changes: 5 additions & 1 deletion api/oss/src/core/tools/providers/composio/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@
)
from oss.src.core.tools.interfaces import ToolsGatewayInterface
from oss.src.core.tools.exceptions import AdapterError
from oss.src.core.tools.providers.composio.catalog import ComposioCatalogClient
from oss.src.core.tools.providers.composio.catalog import (
ComposioCatalogClient,
_derive_read_only,
)


log = get_module_logger(__name__)
Expand Down Expand Up @@ -173,6 +176,7 @@ async def get_action(
if input_params or output_params
else None,
scopes=item.get("scopes") or None,
read_only=_derive_read_only(item.get("tags")),
)

# -----------------------------------------------------------------------
Expand Down
17 changes: 17 additions & 0 deletions api/oss/src/core/tools/providers/composio/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,22 @@ def _parse_integration_detail(item: Dict[str, Any]) -> ToolCatalogIntegration:
)


def _derive_read_only(raw_tags: Any) -> Optional[bool]:
"""Distil the MCP behavioral hint tags into a single read-only signal.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how is this an mcp behavior hint, its composio and composio is not mcp


``readOnlyHint`` with no mutating hint -> read-only; ``destructiveHint`` or
``updateHint`` -> mutating. No hint present -> unknown (``None``); never guessed.
"""
if not isinstance(raw_tags, list):
return None
tags = {t for t in raw_tags if isinstance(t, str)}
if "destructiveHint" in tags or "updateHint" in tags:
return False
if "readOnlyHint" in tags:
return True
return None


def _parse_action(item: Dict[str, Any], integration_key: str) -> ToolCatalogAction:
raw_tags = item.get("tags")
# Tags mix MCP hint flags with semantic categories — strip the known hints
Expand All @@ -381,4 +397,5 @@ def _parse_action(item: Dict[str, Any], integration_key: str) -> ToolCatalogActi
name=item.get("name", ""),
description=item.get("description"),
categories=categories,
read_only=_derive_read_only(raw_tags),
)
1 change: 1 addition & 0 deletions api/oss/src/core/tools/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,4 +583,5 @@ async def _resolve_composio_tool(
description=action.description,
input_schema=input_schema,
call_ref=call_ref,
read_only=action.read_only,
)
24 changes: 24 additions & 0 deletions api/oss/tests/pytest/unit/tools/test_agent_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from oss.src.apis.fastapi.tools.models import ToolResolveRequest
from oss.src.core.tools.dtos import AgentBuiltinTool, AgentComposioTool
from oss.src.core.tools.providers.composio.catalog import _derive_read_only
from oss.src.core.tools.service import ToolsService


Expand Down Expand Up @@ -59,6 +60,7 @@ async def _action(**_kwargs):
schemas=SimpleNamespace(
inputs={"type": "object", "properties": {}},
),
read_only=True,
)

monkeypatch.setattr(service, "resolve_connection_by_slug", _connection)
Expand All @@ -77,3 +79,25 @@ async def _action(**_kwargs):
)
assert result.builtins == ["read"]
assert result.custom[0].call_ref == "tools.composio.github.GET_USER.c1"
assert result.custom[0].read_only is True


@pytest.mark.parametrize(
"tags, expected",
[
(["readOnlyHint"], True),
(["updateHint"], False),
(["destructiveHint"], False),
# A mutating hint wins even when readOnlyHint is also present.
(["destructiveHint", "readOnlyHint"], False),
(["updateHint", "readOnlyHint"], False),
# Unknown == None (never guess), not False.
([], None),
(None, None),
(["unrelatedHint"], None),
# Non-list input is ignored.
("readOnlyHint", None),
],
)
def test_derive_read_only_tag_matrix(tags, expected):
assert _derive_read_only(tags) is expected
31 changes: 31 additions & 0 deletions docs/design/agent-workflows/projects/capability-config/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Capability and permission configuration

How an author configures what an Agenta agent may do (files, network, tools, tool approvals),
and how those controls enforce end to end: from the playground form, through the SDK and agent
service, to the runner and the harness. Graduated from the scratch notes in
`../../scratch/capability-architecture.md` on 2026-06-23.

## The shape in one paragraph

Three configuration layers, each with one job and one enforcement point. **Layer 1, harness
configuration:** the runner translates author kwargs into the harness's own config (a
`.claude/settings.json` for Claude, `builtin_names` for Pi). **Layer 2, sandbox permission:** an
optional `sandbox_permission` field draws the network and filesystem boundary, enforced by the
backend when it provisions the sandbox. **Layer 3, tool permission:** a per-tool permission
(always-allow / ask / deny), enforced at the runner relay for resolved tools and at the harness
permission plane for builtins. The work spans the playground frontend, the schema, the SDK, the
service, and the runner.

## Files

- `context.md` — why this exists, goals, non-goals, background, how it relates to the sibling
projects.
- `proposal.md` — the three-layer design. The canonical spec.
- `plan.md` — phased execution plan, end to end including the playground frontend.
- `research.md` — current-state codebase findings and exact insertion points (backend, runner,
frontend), plus the library facts the design rests on.
- `status.md` — progress, decisions, and open questions. The source of truth for state.

## Status

Code-complete and reviewed; backend + runner + FE built and green, live-QA'd on the running stack. Live Daytona egress + Claude behavioral cells pending credentials. See `status.md`.
73 changes: 73 additions & 0 deletions docs/design/agent-workflows/projects/capability-config/context.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Context

## Why this work exists

Every agent run has to be governed. The author needs to say what the agent may touch, and the
system has to enforce that across two harnesses (`pi` and `claude`) and three backends
(sandbox-agent local, sandbox-agent on Daytona, and a future in-process local SDK).

Today almost none of this is wired:

- The runner drops Pi's `builtin_names`, so even Pi's own tool selection has no effect on the
sandbox-agent path.
- The runner never restricts Claude. It creates the session with only `cwd` and `mcpServers`,
so "Claude without web" or "Claude read-only" is not expressible.
- The runner never sets a network boundary, so a Daytona run has full egress by default.
- `permission_policy` is the only live control, and it is coarse (auto or deny, all tools at
once) and effective on Claude only.
- The playground renders every config field unconditionally, with no per-harness gating and no
way to set capability or per-tool approval.

So a request as simple as "give this agent web access but not write access" cannot be
expressed, on either harness, from the playground or the SDK. This project makes capability and
permission a real, configurable, end-to-end feature.

## Goals

1. A three-layer configuration model the author can set: harness configuration, sandbox
permission, and per-tool permission. Each layer has one job and one enforcement point.
2. End to end. The playground frontend is in scope: the config form gains the new sections, and
the agent chat gains a tool-approval surface for the "ask" permission.
3. Honest enforcement. The sandbox layer is authoritative for the network and the filesystem. A
run fails loud when a backend cannot deliver a requested guarantee, rather than pretending.
4. Sensible defaults. Read-only tools default to always-allow and mutating tools to ask, using
Composio's read/write metadata, so the author does not label every tool by hand.

## Non-goals (for now)

- **Pi MCP.** Deferred. When built it follows the same permission pattern as Claude
(settings-style `mcp__<server>` rules). Tracked in `../harness-capabilities/`.
- **A real filesystem jail.** No backend confines the filesystem today; the local cwd is a temp
dir, not a jail. Layer 2 ships network first; filesystem stays tool-plane only until a backend
can enforce it.
- **Durable / unattended HITL approval.** The "ask" permission this project ships asks the user
in the open chat. The global, durable approval channel that survives a closed tab or a
scheduled run is Flow 7 in `../../scratch/flows-and-capabilities.md`, a later milestone.
- **A sandbox boundary for the local backend.** The local sidecar is the host; it cannot enforce
Layer 2. That is by design, and the fail-loud rule covers it.

## Background

The runtime splits work across a Python agent service (`services/oss/src/agent/`, decides what
to run) and a TypeScript runner (`services/agent/`, runs it). The runner drives the harness over
an ACP bridge, `sandbox-agent`, on a chosen backend. The SDK (`sdks/python/agenta/sdk/agents/`)
owns the neutral config, the ports, and the per-harness adapters.

Three earlier scratch documents set up this project, and their facts are folded into
`research.md`:

- `../../scratch/capability-map.md` — the current-state web/exec/read/write cut: what each
harness can do, what is on by default, what the backend changes.
- `../../scratch/capability-architecture.md` — the design exploration this project's
`proposal.md` cleans up.
- `../../scratch/flows-and-capabilities.md` — the user-facing flows, including Flow 7 (HITL).

## Relation to sibling projects

- `../harness-capabilities/` declares which capabilities each harness supports (the static
capability table) and owns the deferred Pi-MCP work. This project sets the capability *values*
the author chooses; that project declares which choices a harness can honor. They meet at the
schema and the fail-loud check.
- `../model-config/` is the same static-then-dynamic pattern for the model axis. Layer 1's
Claude `model` setting overlaps it.
- `../skills-config/` configures forced skills, a different axis on the same agent config.
Loading
Loading