From 59bf20a98fda115c3cb96a0de197b140faae1587 Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Tue, 16 Jun 2026 20:17:40 +0200 Subject: [PATCH] feat(agent): runnable tools as agent configuration (WP-7) --- api/oss/src/apis/fastapi/tools/models.py | 18 ++ api/oss/src/apis/fastapi/tools/router.py | 89 ++++--- api/oss/src/core/tools/dtos.py | 62 ++++- api/oss/src/core/tools/exceptions.py | 18 ++ api/oss/src/core/tools/service.py | 159 ++++++++++++ .../tracing-in-the-agent-service.md | 5 +- .../wp-2-agent-service/implementation-plan.md | 8 + .../agent-workflows/wp-2-agent-service/qa.md | 176 +++++++++++++ .../agent-workflows/wp-7-tools/README.md | 97 ++++++- .../docker-compose/ee/docker-compose.dev.yml | 8 +- services/agent/README.md | 38 ++- services/agent/docker-compose.agent.yml | 98 ------- services/agent/docker-compose.stack.yml | 86 ------- services/agent/scripts/register_agent_app.py | 166 ------------ services/agent/src/runPi.ts | 165 +++++++++++- services/entrypoints/agent_main.py | 47 ---- services/oss/src/agent.py | 241 +++++++++++++++++- services/oss/src/agent_pi/config.py | 8 +- services/oss/src/agent_pi/pi_harness.py | 4 + services/oss/src/agent_pi/pi_http_harness.py | 4 + services/oss/src/agent_pi/ports.py | 26 ++ services/oss/src/agent_pi/schemas.py | 32 ++- 22 files changed, 1089 insertions(+), 466 deletions(-) create mode 100644 docs/design/agent-workflows/wp-2-agent-service/qa.md delete mode 100644 services/agent/docker-compose.agent.yml delete mode 100644 services/agent/docker-compose.stack.yml delete mode 100644 services/agent/scripts/register_agent_app.py delete mode 100644 services/entrypoints/agent_main.py diff --git a/api/oss/src/apis/fastapi/tools/models.py b/api/oss/src/apis/fastapi/tools/models.py index 891b276c22..768574f23c 100644 --- a/api/oss/src/apis/fastapi/tools/models.py +++ b/api/oss/src/apis/fastapi/tools/models.py @@ -15,6 +15,9 @@ ToolConnectionCreate, # Tool Calls ToolResult, + # Agent tools + AgentToolReference, + ResolvedAgentTool, ) @@ -87,3 +90,18 @@ class ToolConnectionsResponse(BaseModel): class ToolCallResponse(BaseModel): call: ToolResult + + +# --------------------------------------------------------------------------- +# Agent tool resolution +# --------------------------------------------------------------------------- + + +class ToolResolveRequest(BaseModel): + tools: List[AgentToolReference] = [] + + +class ToolResolveResponse(BaseModel): + count: int = 0 + builtins: List[str] = [] + custom: List[ResolvedAgentTool] = [] diff --git a/api/oss/src/apis/fastapi/tools/router.py b/api/oss/src/apis/fastapi/tools/router.py index 043d114fa7..3cc689a055 100644 --- a/api/oss/src/apis/fastapi/tools/router.py +++ b/api/oss/src/apis/fastapi/tools/router.py @@ -29,6 +29,9 @@ ToolConnectionsResponse, # ToolCallResponse, + # + ToolResolveRequest, + ToolResolveResponse, ) from oss.src.core.shared.dtos import Status @@ -42,10 +45,12 @@ ToolResultData, ) from oss.src.core.tools.exceptions import ( + ActionNotFoundError, AdapterError, ConnectionInactiveError, ConnectionInvalidError, ConnectionNotFoundError, + ToolSlugInvalidError, ) from oss.src.core.tools.service import ( ToolsService, @@ -208,6 +213,14 @@ def __init__( ) # --- Tool operations --- + self.router.add_api_route( + "/resolve", + self.resolve_tools, + methods=["POST"], + operation_id="resolve_agent_tools", + response_model=ToolResolveResponse, + response_model_exclude_none=True, + ) self.router.add_api_route( "/call", self.call_tool, @@ -886,6 +899,51 @@ async def callback_connection( # Tool Calls # ----------------------------------------------------------------------- + @intercept_exceptions() + @handle_adapter_exceptions() + async def resolve_tools( + self, + request: Request, + *, + body: ToolResolveRequest, + ) -> ToolResolveResponse: + """Resolve an agent's tool references into model-ready specs. + + Validates Composio connections up front and enriches each action from the + catalog, so a running agent (e.g. Pi) gets ``customTools`` whose ``execute`` + routes back through ``POST /tools/call`` — provider keys stay server-side. + """ + if is_ee(): + has_permission = await check_action_access( + user_uid=request.state.user_id, + project_id=request.state.project_id, + permission=Permission.VIEW_TOOLS, + ) + if not has_permission: + raise FORBIDDEN_EXCEPTION + + try: + resolution = await self.tools_service.resolve_agent_tools( + project_id=UUID(request.state.project_id), + tools=body.tools, + ) + except ConnectionNotFoundError as e: + raise HTTPException(status_code=404, detail=e.message) from e + except ConnectionInactiveError as e: + raise HTTPException(status_code=400, detail=e.message) from e + except ConnectionInvalidError as e: + raise HTTPException(status_code=400, detail=e.message) from e + except ToolSlugInvalidError as e: + raise HTTPException(status_code=400, detail=e.message) from e + except ActionNotFoundError as e: + raise HTTPException(status_code=404, detail=e.message) from e + + return ToolResolveResponse( + count=len(resolution.builtins) + len(resolution.custom), + builtins=resolution.builtins, + custom=resolution.custom, + ) + @intercept_exceptions() @handle_adapter_exceptions() async def call_tool( @@ -931,39 +989,12 @@ async def call_tool( connection_slug = slug_parts[4] try: - connections = await self.tools_service.query_connections( + connection = await self.tools_service.resolve_connection_by_slug( project_id=UUID(request.state.project_id), provider_key=provider_key, integration_key=integration_key, + connection_slug=connection_slug, ) - - connection = next( - (c for c in connections if c.slug == connection_slug), None - ) - - if not connection: - raise ConnectionNotFoundError( - connection_slug=connection_slug, - provider_key=provider_key, - integration_key=integration_key, - ) - - if not connection.is_active: - raise ConnectionInactiveError(connection_id=connection_slug) - - if not connection.is_valid: - raise ConnectionInvalidError( - connection_slug=connection_slug, - detail="Please refresh the connection.", - ) - - if not connection.provider_connection_id: - raise ConnectionNotFoundError( - connection_slug=connection_slug, - provider_key=provider_key, - integration_key=integration_key, - ) - except ConnectionNotFoundError as e: raise HTTPException(status_code=404, detail=e.message) from e except ConnectionInactiveError as e: diff --git a/api/oss/src/core/tools/dtos.py b/api/oss/src/core/tools/dtos.py index a588965f61..3c3f0ec53e 100644 --- a/api/oss/src/core/tools/dtos.py +++ b/api/oss/src/core/tools/dtos.py @@ -1,8 +1,8 @@ from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Annotated, Any, Dict, List, Literal, Optional, Union from agenta.sdk.models.workflows import JsonSchemas -from pydantic import BaseModel +from pydantic import BaseModel, Field from oss.src.core.shared.dtos import ( Header, @@ -238,3 +238,61 @@ class ToolExecutionResponse(BaseModel): data: Optional[Json] = None error: Optional[str] = None successful: bool = False + + +# --------------------------------------------------------------------------- +# Agent tools (config references + resolution) +# --------------------------------------------------------------------------- + +# A provider-agnostic list of tool references lives under an agent revision's +# ``parameters["tools"]``. Each entry is a discriminated union on ``type``: config +# holds references and display metadata only, never secrets. The backend resolves +# them into model-ready specs at invoke time (see ToolsService.resolve_agent_tools). + + +class AgentBuiltinTool(BaseModel): + """A Pi built-in tool, referenced by name (e.g. ``read``, ``bash``).""" + + type: Literal["builtin"] = "builtin" + name: str + + +class AgentComposioTool(BaseModel): + """A Composio action, carrying the slug segments ``/tools/call`` parses.""" + + type: Literal["composio"] = "composio" + integration: str + action: str + connection: str + # Function name shown to the model. Defaults to ``{integration}__{action}``. + name: Optional[str] = None + + +AgentToolReference = Annotated[ + Union[AgentBuiltinTool, AgentComposioTool], + Field(discriminator="type"), +] + + +class ResolvedAgentTool(BaseModel): + """A runnable reference resolved into a model-ready tool spec. + + ``call_ref`` is the ``tools.{provider}.{integration}.{action}.{connection}`` slug + the execution bridge sends back to ``POST /tools/call``. + """ + + name: str + description: Optional[str] = None + input_schema: Optional[Dict[str, Any]] = None + call_ref: str + + +class AgentToolsResolution(BaseModel): + """Outcome of resolving an agent's ``tools`` list. + + ``builtins`` pass straight into Pi's ``tools: string[]``; ``custom`` become Pi + ``customTools`` whose ``execute`` routes through ``/tools/call``. + """ + + builtins: List[str] = [] + custom: List[ResolvedAgentTool] = [] diff --git a/api/oss/src/core/tools/exceptions.py b/api/oss/src/core/tools/exceptions.py index f46c08b6cd..e9dbd54f3f 100644 --- a/api/oss/src/core/tools/exceptions.py +++ b/api/oss/src/core/tools/exceptions.py @@ -40,6 +40,24 @@ def __init__( super().__init__(msg) +class ActionNotFoundError(ToolsError): + """Raised when a catalog action cannot be found for an integration.""" + + def __init__( + self, + *, + provider_key: str, + integration_key: str, + action_key: str, + ): + self.provider_key = provider_key + self.integration_key = integration_key + self.action_key = action_key + super().__init__( + f"Action not found: {provider_key}/{integration_key}/{action_key}" + ) + + class ConnectionSlugConflictError(ToolsError): """Raised when a connection slug already exists for the integration.""" diff --git a/api/oss/src/core/tools/service.py b/api/oss/src/core/tools/service.py index f603bc4d42..a9e1e4c779 100644 --- a/api/oss/src/core/tools/service.py +++ b/api/oss/src/core/tools/service.py @@ -1,3 +1,4 @@ +import re from typing import Any, Dict, List, Optional, Tuple from uuid import UUID @@ -6,6 +7,11 @@ from oss.src.core.tools.utils import make_oauth_state from oss.src.core.tools.dtos import ( + AgentBuiltinTool, + AgentComposioTool, + AgentToolReference, + AgentToolsResolution, + ResolvedAgentTool, ToolCatalogAction, ToolCatalogActionDetails, ToolCatalogIntegration, @@ -15,17 +21,27 @@ ToolConnectionRequest, ToolExecutionRequest, ToolExecutionResponse, + ToolProviderKind, ) from oss.src.core.tools.interfaces import ( ToolsDAOInterface, ) from oss.src.core.tools.registry import ToolsGatewayRegistry from oss.src.core.tools.exceptions import ( + ActionNotFoundError, ConnectionInactiveError, + ConnectionInvalidError, ConnectionNotFoundError, + ToolSlugInvalidError, ) +# A slug segment is safe for the ``tools.{provider}.{integration}.{action}.{connection}`` +# call-ref. ``__`` is forbidden because ``/tools/call`` round-trips ``__`` <-> ``.`` when +# parsing function names, so a ``__`` inside a segment would corrupt the split. +_SLUG_SEGMENT_RE = re.compile(r"^[a-zA-Z0-9-]+(?:_[a-zA-Z0-9-]+)*$") + + log = get_module_logger(__name__) @@ -408,3 +424,146 @@ async def execute_tool( arguments=arguments, ), ) + + # ----------------------------------------------------------------------- + # Connection resolution (shared by the call endpoint and the agent resolver) + # ----------------------------------------------------------------------- + + async def resolve_connection_by_slug( + self, + *, + project_id: UUID, + provider_key: str, + integration_key: str, + connection_slug: str, + ) -> ToolConnection: + """Resolve a project-scoped connection slug to a usable connection row. + + Raises a domain exception when the connection is missing, inactive, invalid, + or never finished its provider handshake. Shared by ``call_tool`` (execution) + and ``resolve_agent_tools`` (up-front validation). + """ + # Query all (not active-only) so an inactive connection yields a precise + # "inactive" error instead of an indistinguishable "not found". + connections = await self.query_connections( + project_id=project_id, + provider_key=provider_key, + integration_key=integration_key, + is_active=None, + ) + + connection = next( + (c for c in connections if c.slug == connection_slug), + None, + ) + + if not connection: + raise ConnectionNotFoundError( + provider_key=provider_key, + integration_key=integration_key, + connection_slug=connection_slug, + ) + + if not connection.is_active: + raise ConnectionInactiveError(connection_id=connection_slug) + + if not connection.is_valid: + raise ConnectionInvalidError( + connection_slug=connection_slug, + detail="Please refresh the connection.", + ) + + if not connection.provider_connection_id: + raise ConnectionNotFoundError( + provider_key=provider_key, + integration_key=integration_key, + connection_slug=connection_slug, + ) + + return connection + + # ----------------------------------------------------------------------- + # Agent tool resolution + # ----------------------------------------------------------------------- + + async def resolve_agent_tools( + self, + *, + project_id: UUID, + tools: List[AgentToolReference], + ) -> AgentToolsResolution: + """Resolve an agent's tool references into model-ready specs. + + ``builtin`` references pass through as names. ``composio`` references are + validated against the project's connections up front and enriched from the + catalog (description + input schema), so the model never sees a stale schema + and the invoke fails fast on a missing/invalid connection rather than mid-loop. + """ + builtins: List[str] = [] + custom: List[ResolvedAgentTool] = [] + + for ref in tools: + if isinstance(ref, AgentBuiltinTool): + if ref.name: + builtins.append(ref.name) + continue + + if isinstance(ref, AgentComposioTool): + custom.append( + await self._resolve_composio_tool( + project_id=project_id, + ref=ref, + ) + ) + + return AgentToolsResolution(builtins=builtins, custom=custom) + + async def _resolve_composio_tool( + self, + *, + project_id: UUID, + ref: AgentComposioTool, + ) -> ResolvedAgentTool: + provider_key = ToolProviderKind.COMPOSIO.value + + for segment in (ref.integration, ref.action, ref.connection): + if not _SLUG_SEGMENT_RE.match(segment): + raise ToolSlugInvalidError( + slug=f"{provider_key}.{ref.integration}.{ref.action}.{ref.connection}", + detail=f"Invalid slug segment: {segment!r}", + ) + + # Fail fast if the connection is missing/inactive/invalid for this project. + await self.resolve_connection_by_slug( + project_id=project_id, + provider_key=provider_key, + integration_key=ref.integration, + connection_slug=ref.connection, + ) + + action = await self.get_action( + provider_key=provider_key, + integration_key=ref.integration, + action_key=ref.action, + ) + if not action: + raise ActionNotFoundError( + provider_key=provider_key, + integration_key=ref.integration, + action_key=ref.action, + ) + + input_schema = ( + action.schemas.inputs if action.schemas and action.schemas.inputs else None + ) + name = ref.name or f"{ref.integration}__{ref.action}" + call_ref = ( + f"tools.{provider_key}.{ref.integration}.{ref.action}.{ref.connection}" + ) + + return ResolvedAgentTool( + name=name, + description=action.description, + input_schema=input_schema, + call_ref=call_ref, + ) diff --git a/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md b/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md index 0bb4b12777..977427469c 100644 --- a/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md +++ b/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md @@ -92,8 +92,9 @@ network the internal hostname resolves from both; if it does not, the sidecar's ## How to verify -1. Start the service (`entrypoints.agent_main:app`) with `AGENTA_HOST` and - `AGENTA_API_KEY` set and a Pi login or provider key available. +1. Start the services app (`entrypoints.main:app`, which mounts the agent at + `/agent/v0`) with `AGENTA_HOST` and `AGENTA_API_KEY` set and a Pi login or provider + key available. 2. POST a chat-style body to `/agent/v0/invoke` and read `x-ag-trace-id` from the response headers (it equals `trace_id` in the body). 3. Fetch the trace and confirm the merged tree and the totals: diff --git a/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md b/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md index 81f8cb6e88..f905fd6d7a 100644 --- a/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md +++ b/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md @@ -4,6 +4,14 @@ Status: MVP built and verified by curl (2026-06-15). Decisions below were taken; "Implemented" section records what shipped. Original decision points are kept marked **[DECISION]** for history. +> Note (current state): the sections below describe the iterative MVP, including a +> standalone entrypoint (`agent_main.py`) and dedicated composes +> (`docker-compose.agent.yml`, `docker-compose.stack.yml`). Those were **removed** in +> favor of the integrated path only: the agent is mounted in `entrypoints/main.py` at +> `/agent/v0` and the `agent-pi` sidecar lives in +> `hosting/docker-compose/ee/docker-compose.dev.yml`. The standalone run commands below +> are historical. See `qa.md` for the rationale. + ## Implemented (MVP, verified by curl) Per the decisions: a Python service exposes the Agenta `/invoke` contract (auth, diff --git a/docs/design/agent-workflows/wp-2-agent-service/qa.md b/docs/design/agent-workflows/wp-2-agent-service/qa.md new file mode 100644 index 0000000000..b7d25221d9 --- /dev/null +++ b/docs/design/agent-workflows/wp-2-agent-service/qa.md @@ -0,0 +1,176 @@ +# Agent service: Q&A + +Running notes answering review questions about the agent workflow implementation +(branch `feat/agent-workflows`). Questions are in no particular order. + +--- + +## Q: Why a separate entrypoint `agent_main.py` instead of `main.py`? + +Short answer: `agent_main.py` is not a replacement for `main.py`. It is an extra, +lightweight runner for testing the agent in isolation. The real integration lives in +`main.py`, and that is what the 8280 stack actually runs. + +The two entrypoints: + +- `services/entrypoints/main.py` is the full services app. It mounts every service + (chat, completion, all the managed evaluators, and now the agent at `/agent/v0`). This + is the production/dev container entrypoint and the path the playground uses + (`/services/agent/v0/...`). The agent is a first-class part of it: + `app.mount("/agent/v0", agent_v0_app)`. + +- `services/entrypoints/agent_main.py` mounts only the agent app plus `/health`. + +Why we added `agent_main.py`: + +1. Isolated, fast iteration. Early on the deliverable was "a standalone agent service + verified by curl" (no full stack). Running `main.py` pulls in the whole managed + evaluator surface (litellm, all the builtins) and `ag.init()` for the full app. + `agent_main.py` lets you run just the agent: + `uv run uvicorn entrypoints.agent_main:app --port 8090` and curl it, without the rest. + +2. The dedicated `:8092` Docker compose. Before the agent was integrated into the real + stack, it ran standalone in its own compose. That container ran `agent_main.py`. + +3. A place for cross-origin CORS. When the playground had to call the agent on a + different port (`:8092` vs the web on `:8280`), the browser needs a credentialed CORS + policy (echo the specific origin + allow credentials). `agent_main.py` sets that + (`allow_origin_regex` + `allow_credentials=True`). `main.py` keeps the stricter + shared services CORS, which is fine for it because, once integrated, the agent is + served same-origin (`/services/agent/v0`) so there is no CORS at all. + +Net: `main.py` is the real, integrated path (same-origin, used by the 8280 stack). +`agent_main.py` was a convenience runner for isolated local/standalone testing and the +old dedicated compose. + +**Update (decision): dropped.** We removed `agent_main.py` and the two standalone +composes (`docker-compose.agent.yml`, `docker-compose.stack.yml`) to keep only the +integrated path: the agent mounted in `entrypoints/main.py` at `/agent/v0`, served by +the normal services container, with the `agent-pi` sidecar wired into +`hosting/docker-compose/ee/docker-compose.dev.yml`. If we ever want isolated runs again, +the cleaner approach is a profile/override on the real compose rather than a parallel +entrypoint. + +--- + +## Q: How does the agent service use the workflow middleware? Which parts does it have access to (secrets, invoke, inspect, ...)? + +The agent gets the whole Agenta workflow machinery "for free" because it is built the +same way as chat and completion: `ag.create_app()` + `ag.workflow(schemas=...)` + +`ag.route("/", flags={"is_chat": True})` in `services/oss/src/agent.py`. That was the +point of the Python-front decision: the Python layer provides auth, middleware, +tracing, secrets, and the invoke/inspect contract; the Node wrapper only runs Pi. + +There are **two middleware layers**. + +### Layer 1 — HTTP/ASGI middleware (per request) + +Added by `ag.create_app()` (`sdks/.../decorators/routing.py:64`). Outermost first: + +- **CORSMiddleware** — cross-origin headers. Irrelevant on the integrated same-origin + path; it mattered only for the old cross-port setup. +- **AuthMiddleware** — verifies the caller against `{host}/api/access/permissions/check` + and puts the resolved credential on `request.state.auth["credentials"]` (a signed + `Secret`). With `AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=false` it passes the raw + `Authorization` through without a remote check. This is the credential everything + downstream uses. +- **OTelMiddleware** — opens the request's tracing context, i.e. the workflow span the + whole run nests under. + +### Layer 2 — Workflow middleware (inside `wf.invoke`) + +Set on the workflow object (`decorators/running.py:197`), run in order around the +handler: + +- **VaultMiddleware** — resolves secrets for the credential: it fetches the project's + vault secrets from `{api_url}/secrets/`, combines them with any local secrets, checks + access, and exposes them on the running context. (More on "access" below.) +- **ResolverMiddleware** — resolves which handler to run from the revision URI, hydrates + references / revision / config from the backend when needed, and resolves embeds in + parameters. +- **NormalizerMiddleware** — maps the request to the handler's arguments by inspecting + its signature (`inputs`, `messages`, `parameters` pulled from `data`), calls + `_agent(...)`, and wraps the return value into the response envelope, attaching + `trace_id` / `span_id`. + +### What the agent actually has access to / uses + +- **invoke** — yes, fully. `POST /services/agent/v0/invoke` runs the entire chain + (auth -> vault -> resolver -> normalizer -> `_agent`). `_agent` receives `inputs`, + `messages`, and `parameters` already mapped for it. +- **inspect** — yes. `POST /services/agent/v0/inspect` returns the agent's interface, + i.e. `AGENT_SCHEMAS` (chat `messages` in, `message` out, config = `model` + + `agents_md`). This is what tells the playground to render a chat box and the two + config fields. (Known bug: inspect currently 500s under session-cookie auth; it did + not block the playground because the create flow takes the schema from the catalog + template.) +- **auth / credentials** — yes. The resolved `Secret` credential is available to the + handler and to tracing export. +- **tracing** — yes. `_agent` reads the active workflow span via `_trace_context()` and + threads the `traceparent` (plus endpoint/auth) to the Pi sidecar, so the Pi spans + nest under the `/invoke` span in one trace. +- **secrets** — available but **not consumed yet**. VaultMiddleware resolves the + project's secrets on every invoke and exposes them on the running context. Chat and + completion use them automatically because litellm reads them. The agent handler does + not read them today; the Pi model auth currently comes from the mounted + `~/.pi/agent` (Codex login) or `AGENTA_API_KEY`/provider env on the sidecar. Wiring + the resolved secrets into the Pi run (the "startup hook injects the provider/tool + keys" step) is exactly where this plugs in: read the secrets in `_agent`, pass them in + the harness request, and have the wrapper inject them (`setRuntimeApiKey` / env). That + is the planned secrets work, not yet built. + +One detail: the route passes `secrets=None` into `wf.invoke`, so the agent does not +hand secrets in; VaultMiddleware fetches them itself from the credential. The gap is +only on the consuming side (the handler), not the resolving side. + +--- + +## Q: Why does tracing look different / broken now vs the old trace? + +Reference old trace `6ab51033...`: root `invoke_agent`, four `turn`s, several +`chat gpt-5.5` spans, and `execute_tool ls/read/bash/write` — 14 spans, with +cumulative token + cost rolled up onto the `turn` and `invoke_agent` spans. + +Current trace (e.g. `329698f7...`): `_agent -> invoke_agent -> turn 0 -> chat` — 4 +spans; the `chat` span has tokens + cost, the parents do not. + +Tracing is **not broken** (spans land, nest correctly, the `chat` span carries model, +tokens, cost). Two things changed: + +### 1. Different agent and task (the big, expected difference) + +The old trace is the WP-1 POC: tools enabled (`read/bash/edit/write/ls`) and a task +that needs them ("read notes.txt, write greeting.txt"). That drives a multi-turn loop +with tool calls, so you get many turns, many `chat` spans, and `execute_tool` spans. + +The current app is the hello-world chat agent: `tools=[]` and "answer in one or two +short sentences". So it does exactly one turn, no tools, one `chat`. Same +instrumentation, a trivial run. To get a rich trace again, give the agent tools +(built-in `read/bash/...` or the WP-7 runnable tools) and a task that uses them. + +### 2. Cumulative token/cost rollup is lost across the process boundary (a real regression) + +In the old (standalone) trace, all spans were exported by one process in one batch, so +Agenta's per-ingest-batch cumulative computation could build the roll-up tree and put +cumulative tokens/cost on `turn` and `invoke_agent`. + +Now the trace is split across **two exporters**: +- Python (services container) exports `_agent` (the workflow span). +- Node (`agent-pi`) exports `invoke_agent -> turn -> chat` (the Pi spans), where + `invoke_agent`'s parent is the **remote** `_agent`. + +Agenta builds the cumulative tree per ingest batch and "attaches a span only if its +parent is already seen" (see the `orderParentFirst` comment in `agenta-otel.ts`). In the +Node batch, `invoke_agent`'s parent (`_agent`) is in the **other** (Python) batch, so the +Pi subtree is dropped from the cumulative tree. Result: the leaf `chat` keeps its raw +`incremental` tokens, but `cumulative` is missing on `chat` and there is no token/cost +rollup on `turn` / `invoke_agent` / `_agent`. (Duration still rolls up because it is +computed differently.) + +So the agent- and turn-level token/cost totals you used to see are gone. This is a +side effect of nesting the agent under the Agenta workflow span (the integration goal). +The fix belongs on the tracing side (owned by the instrumentation work): compute the +cumulative roll-up across the whole trace by `trace_id` rather than per ingest batch, so +a trace split between the Python workflow span and the Node Pi spans still aggregates. +Until then, per-span (leaf `chat`) tokens/cost are correct; the rolled-up agent totals +are not. diff --git a/docs/design/agent-workflows/wp-7-tools/README.md b/docs/design/agent-workflows/wp-7-tools/README.md index 225c77eb26..483f5dc688 100644 --- a/docs/design/agent-workflows/wp-7-tools/README.md +++ b/docs/design/agent-workflows/wp-7-tools/README.md @@ -1,6 +1,8 @@ # WP-7: Runnable tools as agent configuration -Status: design draft. Builds on WP-2 (agent service) and WP-6 (workflow type and template). +Status: Composio MVP implemented. Resolution lives in `api`; the bridge routes Pi tool +calls back through `POST /tools/call`. Builds on WP-2 (agent service) and WP-6 (workflow +type and template). See [Implementation status](#implementation-status-composio-mvp) below. ## Goal @@ -205,6 +207,99 @@ dispatches purely by `provider_key` through the registry, the agent side stays p smoke run, with the call nested under the agent invoke span and the Composio key absent from the sandbox. +## Implementation status (Composio MVP) + +What landed, by seam. WP-6 is not started, so resolution runs in `api` behind a thin +endpoint that the agent service calls over HTTP; when WP-6 lands, its invoke path calls the +same `ToolsService.resolve_agent_tools(...)` in-process and the HTTP hop drops out. + +**Backend (`api`) — the resolver and the shared connection lookup.** + +- `core/tools/dtos.py`: `AgentToolReference` (discriminated `builtin` | `composio`), + `ResolvedAgentTool` (`name`, `description`, `input_schema`, `call_ref`), and + `AgentToolsResolution` (`builtins`, `custom`). +- `core/tools/service.py`: `resolve_connection_by_slug(...)` (extracted from `call_tool`, now + shared) and `resolve_agent_tools(...)`. Composio refs validate the connection up front, + enrich `description` + `input_schema` from the catalog (`get_action`), and build the + `call_ref` `tools.composio.{integration}.{action}.{connection}`. Slug segments are validated + and `__` is rejected so the `/tools/call` `__`↔`.` round-trip can't corrupt the split. +- `apis/fastapi/tools/router.py`: `POST /tools/resolve` (project-scoped, EE `VIEW_TOOLS`) + returns the resolution; `call_tool` now reuses `resolve_connection_by_slug`. `call_tool` is + otherwise unchanged as the execution endpoint. + +**Agent service (`services/oss`) — thin driver.** + +- `agent_pi/ports.py`: `ToolCallback` (endpoint + authorization) and `custom_tools` / + `tool_callback` on `HarnessRequest`, serialized onto the wire by both harness adapters. +- `agent.py`: reads `parameters["tools"]` (or the file config), POSTs them to `/tools/resolve`, + and threads the result plus a `/tools/call` callback into the harness. The callback endpoint + and credential reuse the OTLP-credential mechanism (`inject()` Authorization, API-base derived + from `ag.tracing.otlp_url`, with `AGENTA_AGENT_TOOLS_API_URL` / `AGENTA_API_KEY` fallbacks). An + agent with no tools never touches the backend, preserving the tool-less WP-2 path. + +**TS wrapper (`services/agent`) — the bridge.** + +- `runPi.ts`: `buildCustomTools(...)` turns each resolved spec into a Pi `customTool` whose + `execute` does one `POST {endpoint}` with the OpenAI envelope + `{ data: { id, type, function: { name: callRef, arguments } } }` and the callback + Authorization. Arguments go as an object (no double-encoding); the result `content` returns + verbatim; an HTTP/timeout failure throws, which Pi turns into a tool-error result rather than a + run failure. Custom tool names are added to the `createAgentSession` `tools` allowlist, because + the allowlist gates custom tools too (an empty allowlist would hide them). + +**Config schema as shipped.** Under the agent revision `parameters["tools"]`, each entry is a +built-in tool name (string, normalized to `{"type": "builtin", "name": ...}`) or a discriminated +object. Example: + +```json +{ + "model": "gpt-5.5", + "tools": [ + "read_file", + { "type": "composio", "integration": "gmail", "action": "GMAIL_SEND_EMAIL", + "connection": "gmail-team", "name": "gmail__SEND_EMAIL" } + ] +} +``` + +**Playground integration: reuse the existing tool picker.** The chat/completion tool picker +only renders inside the prompt control, which the playground shows for a config field marked +`x-ag-type-ref: "prompt-template"`. So the agent advertises its config as a `prompt` +prompt-template (`agent_pi/schemas.py`) instead of a bespoke form: the playground then renders +the same model selector + system-message editor + tool picker, with no new frontend code. The +handler (`agent.py` `_resolve_run_config`) reads the system message as the AGENTS.md, the model +and tools from `prompt.llm_config`, and still accepts the flat `{model, agents_md, tools}` an API +caller may send. The picker encodes a Composio action as a gateway function name, +`tools__{provider}__{integration}__{action}__{connection}` (connection = the connection slug); +`agent.py` `_parse_gateway_slug` turns that into the same `composio` ref the resolver already +takes, so no backend change was needed. Non-Composio picker entries (provider built-ins, inline +functions) are skipped. + +**Verified live (2026-06-16, dev stack, pi-agents project).** A real GitHub Composio connection +(`github-tvn`) plus a `GET_THE_AUTHENTICATED_USER` reference, passed via `parameters["tools"]` to +the agent `/invoke`, drove the whole path: `/tools/resolve` built the spec, Pi registered the +`github_whoami` customTool, called it, and the bridge executed the real action through +`/tools/call`. The agent answered with live data (login `mmabrouk`, follower count, public-repo +count) that only comes from executing the action. The trace nests the tool call correctly: +`_agent → invoke_agent → turn 0 → {chat, execute_tool github_whoami} → turn 1 → chat`. The same +run also works end to end from the playground: the picker shows the GitHub tool as a gateway card, +and Run returns the live answer. + +Earlier unit-level checks still hold: the resolver builds correct specs and raises the right +errors for missing / inactive / invalid connections, bad slugs, and missing actions; the bridge +sends the right envelope, forwards Authorization, sends object-form arguments, returns content +verbatim, and throws on HTTP error; Pi's validator accepts and coerces the plain Composio JSON +Schema. + +**Deployment hardening found and fixed.** The DoD wants the Composio key absent from the sandbox. +The WP-7 *data path* already guarantees this (the key is never sent to Pi). But the dev +`agent-pi` sidecar was loading the whole stack `env_file`, so the container inherited +`COMPOSIO_API_KEY` and other secrets anyway. Dropping `env_file` from the `agent-pi` service in +`hosting/docker-compose/ee/docker-compose.dev.yml` (it reads only `PORT`, `PI_CODING_AGENT_DIR`, +`AGENTA_HOST`, `AGENTA_API_KEY`, and two optional vars; Pi auth comes from the mounted login) makes +the property hold in the local sidecar too. A real sandbox (WP-3 Daytona) is isolated and never +saw these. + ## Links - [`wp-2-agent-service/`](../wp-2-agent-service/README.md) diff --git a/hosting/docker-compose/ee/docker-compose.dev.yml b/hosting/docker-compose/ee/docker-compose.dev.yml index 27974d996f..e09b82b29f 100644 --- a/hosting/docker-compose/ee/docker-compose.dev.yml +++ b/hosting/docker-compose/ee/docker-compose.dev.yml @@ -428,8 +428,12 @@ services: sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true; exec node_modules/.bin/tsx src/server.ts" # === CONFIGURATION ======================================== # - env_file: - - ${ENV_FILE:-./.env.ee.dev} + # Deliberately NO env_file: the Pi sandbox must not inherit the stack's + # secrets (COMPOSIO_API_KEY, STRIPE/POSTHOG/GOOGLE/DAYTONA keys, ...). Tools + # run server-side via /tools/call, so the sandbox only needs its own port, + # the Pi login (mounted below), and the OTLP export fallback. The wrapper + # reads exactly: PORT, PI_CODING_AGENT_DIR, AGENTA_HOST, AGENTA_API_KEY, and + # the optional AGENTA_AGENT_TOOL_CALL_TIMEOUT_MS / OTEL_SERVICE_NAME. environment: PORT: "8765" PI_CODING_AGENT_DIR: /pi-agent diff --git a/services/agent/README.md b/services/agent/README.md index c920de19f9..f566acb704 100644 --- a/services/agent/README.md +++ b/services/agent/README.md @@ -1,4 +1,4 @@ -# Agent service: Pi wrapper (WP-2) +# Agent service: Pi wrapper (WP-2 + WP-7) This is the TypeScript side of the agent workflow service. It is a thin wrapper that drives the [Pi](https://pi.dev) agent harness for a single run. The Python service @@ -54,6 +54,34 @@ With no `trace` block the run is traced standalone using `AGENTA_HOST` / `AGENTA_API_KEY`, or not at all when neither is set. The extension lives in `src/agenta-otel.ts`. +## Tools (WP-7) + +The agent's runnable tools are resolved in the backend (not here) and arrive on the +request as `customTools` plus a `toolCallback`. `buildCustomTools` in `src/runPi.ts` +turns each spec into a Pi `customTool` whose `execute` does one +`POST {toolCallback.endpoint}` (Agenta's `/tools/call`) with the `callRef` slug and the +threaded `authorization`. Pi drives the loop and runs the tool in-process; the provider +key and connection auth stay server-side behind `/tools/call` and never enter this +sandbox. See `docs/design/agent-workflows/wp-7-tools/README.md`. + +```json +{ + "prompt": "What is my GitHub username?", + "customTools": [ + { + "name": "github__GET_THE_AUTHENTICATED_USER", + "description": "Gets the authenticated GitHub user.", + "inputSchema": {"type": "object", "properties": {}}, + "callRef": "tools.composio.github.GET_THE_AUTHENTICATED_USER.github-tvn" + } + ], + "toolCallback": { + "endpoint": "https://host/api/tools/call", + "authorization": "ApiKey ..." + } +} +``` + ## Auth `AuthStorage.create()` reads `~/.pi/agent/auth.json`. Log in once with `pnpm exec pi` @@ -68,6 +96,8 @@ echo '{"agentsMd":"You are a hello-world agent.","prompt":"Hi"}' | pnpm run run: ## Config -`config/AGENTS.md` and `config/agent.json` hold the hardcoded MVP config. They are read -by the Python service and passed into the request, so editing them changes the agent -without a code change. +The live config comes from the agent revision in the playground: a `prompt-template` +whose system message is the AGENTS.md, with the model and the picked tools under +`llm_config`. The Python service (`services/oss/src/agent.py`) reads that and fills the +request. `config/AGENTS.md` and `config/agent.json` are only the file fallback used when +the request carries no config. diff --git a/services/agent/docker-compose.agent.yml b/services/agent/docker-compose.agent.yml deleted file mode 100644 index 43f733d1c7..0000000000 --- a/services/agent/docker-compose.agent.yml +++ /dev/null @@ -1,98 +0,0 @@ -# Dedicated, self-contained compose for the agent service (WP-2). -# -# Runs the agent fully in Docker, invokable by curl, without touching any other stack: -# -# agent-pi - the TypeScript Pi wrapper as an HTTP sidecar. Uses the local Pi login -# (~/.pi/agent) copied in at startup so token refresh never writes to the -# host. Reachable only on the internal network. -# agent-api - the Python agent service (reuses the prebuilt services dev image). Speaks -# the Agenta /invoke contract and calls agent-pi over HTTP. Published on a -# host port for curl. -# -# Bring up: -# docker compose -f services/agent/docker-compose.agent.yml up --build -# Verify: -# curl localhost:8092/health -# curl -X POST localhost:8092/agent/v0/invoke -H 'Content-Type: application/json' \ -# -d '{"data":{"inputs":{"messages":[{"role":"user","content":"hi"}]}}}' -# Tear down: -# docker compose -f services/agent/docker-compose.agent.yml down - -name: agenta-agent - -services: - agent-pi: - build: - context: . - dockerfile: docker/Dockerfile.dev - # Copy the read-only mounted login into a writable container path so OAuth token - # refresh works and never writes back to the host ~/.pi/agent. - command: > - sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true; - exec node_modules/.bin/tsx watch src/server.ts" - environment: - PORT: "8765" - PI_CODING_AGENT_DIR: /pi-agent - # Tracing export fallback when the request carries no Authorization - # (auth disabled locally). Must be reachable from this container. - AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280} - AGENTA_API_KEY: ${AGENTA_API_KEY:-} - volumes: - - ./src:/app/src - - ${HOME}/.pi/agent:/pi-agent-ro:ro - networks: - - agent-net - restart: unless-stopped - - agent-api: - # Built from the current services dev Dockerfile (Python 3.13, current SDK + - # deps). A dedicated tag so we never clobber other stacks' images. - image: agenta-agent-api:dev - build: - context: ../.. - dockerfile: services/oss/docker/Dockerfile.dev - command: - [ - "uvicorn", - "entrypoints.agent_main:app", - "--host", - "0.0.0.0", - "--port", - "8080", - "--reload", - "--reload-dir", - "/app/oss/src", - "--reload-dir", - "/app/entrypoints", - "--reload-dir", - "/sdks/python/agenta", - "--reload-exclude", - "*.pyc", - "--reload-exclude", - "__pycache__", - ] - environment: - # Local curl: skip the remote credential check (the Python layer still runs - # its auth/middleware stack, it just passes the header through). - AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED: "false" - # Drives the harness selection: HTTP harness -> the agent-pi sidecar. - AGENTA_AGENT_PI_URL: http://agent-pi:8765 - # Tracing export target. Must be reachable from this container AND from the - # agent-pi sidecar (the endpoint is passed across to nest the Pi spans), so - # use the host IP, not localhost. The API key authorizes the OTLP export. - AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280} - AGENTA_API_KEY: ${AGENTA_API_KEY:-} - volumes: - - ..:/app - - ../../sdks/python:/sdks/python - - ../../clients/python:/clients/python - ports: - - "8092:8080" - depends_on: - - agent-pi - networks: - - agent-net - restart: unless-stopped - -networks: - agent-net: diff --git a/services/agent/docker-compose.stack.yml b/services/agent/docker-compose.stack.yml deleted file mode 100644 index 774e942517..0000000000 --- a/services/agent/docker-compose.stack.yml +++ /dev/null @@ -1,86 +0,0 @@ -# Same-origin demo: the agent served exactly like chat and completion. -# -# Runs the FULL services app (entrypoints.main, which now mounts /agent/v0 next to -# /chat/v0 and /completion/v0) behind its own traefik, so the agent answers at -# {origin}/services/agent/v0/invoke just like {origin}/services/chat/v0/invoke. The -# Pi sidecar is called in-network. This is the integration; a full dev stack (with the -# web app) would serve the playground at the same origin so there is no CORS at all. -# -# Bring up (creds for tracing/export come from the shell): -# set -a && source .env.test.local && set +a -# docker compose -f services/agent/docker-compose.stack.yml up --build -d -# Verify: -# curl -X POST localhost:8480/services/agent/v0/invoke -H 'content-type: application/json' \ -# -d '{"data":{"inputs":{"messages":[{"role":"user","content":"hi"}]}}}' - -name: agenta-agent-stack - -services: - traefik: - image: traefik:2 - command: - - --providers.docker - - --providers.docker.constraints=Label(`com.docker.compose.project`,`agenta-agent-stack`) - - --entrypoints.web.address=:80 - volumes: - - /var/run/docker.sock:/var/run/docker.sock - ports: - - "8480:80" - networks: - - stack-net - restart: unless-stopped - - services: - image: agenta-agent-api:dev - command: - [ - "uvicorn", - "entrypoints.main:app", - "--host", - "0.0.0.0", - "--port", - "8080", - "--root-path", - "/services", - ] - environment: - AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED: "false" - AGENTA_AGENT_PI_URL: http://agent-pi:8765 - AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280} - AGENTA_API_KEY: ${AGENTA_API_KEY:-} - volumes: - - ..:/app - - ../../sdks/python:/sdks/python - - ../../clients/python:/clients/python - networks: - - stack-net - labels: - - "traefik.http.routers.aservices.rule=PathPrefix(`/services/`)" - - "traefik.http.routers.aservices.entrypoints=web" - - "traefik.http.middlewares.aservices-strip.stripprefix.prefixes=/services" - - "traefik.http.middlewares.aservices-strip.stripprefix.forceslash=true" - - "traefik.http.routers.aservices.middlewares=aservices-strip" - - "traefik.http.services.aservices.loadbalancer.server.port=8080" - restart: unless-stopped - - agent-pi: - build: - context: . - dockerfile: docker/Dockerfile.dev - command: > - sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true; - exec node_modules/.bin/tsx src/server.ts" - environment: - PORT: "8765" - PI_CODING_AGENT_DIR: /pi-agent - AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280} - AGENTA_API_KEY: ${AGENTA_API_KEY:-} - volumes: - - ./src:/app/src - - ${HOME}/.pi/agent:/pi-agent-ro:ro - networks: - - stack-net - restart: unless-stopped - -networks: - stack-net: diff --git a/services/agent/scripts/register_agent_app.py b/services/agent/scripts/register_agent_app.py deleted file mode 100644 index 1e73c0515f..0000000000 --- a/services/agent/scripts/register_agent_app.py +++ /dev/null @@ -1,166 +0,0 @@ -# /// script -# requires-python = ">=3.11" -# dependencies = ["requests"] -# /// -"""Register the agent as an app in a running Agenta stack, pointing at the dockerized -agent service. Run it, then open the app in the playground and chat. - -It creates a workflow + default variant and commits a revision whose `data.url` points -at the agent service and whose `data.schemas` is the chat interface the agent serves -from /inspect (so the playground renders a chat box). This is the "custom workflow" -path: no static SDK interface, the agent self-describes. - -Env: - AGENTA_HOST base host (default http://144.76.237.122:8280) - AGENTA_API_KEY api key for that stack (Authorization: ApiKey ...) - AGENT_URL agent service invoke base (default http://144.76.237.122:8092/agent/v0) - PROJECT_ID optional; defaults to the stack's default project - APP_SLUG optional; defaults to wp2-agent- - -Usage: - AGENTA_API_KEY=... uv run services/agent/scripts/register_agent_app.py -""" - -import os -import secrets -import sys - -import requests - -HOST = os.environ.get("AGENTA_HOST", "http://144.76.237.122:8280").rstrip("/") -API = HOST + "/api" -KEY = os.environ.get("AGENTA_API_KEY") -AGENT_URL = os.environ.get("AGENT_URL", "http://144.76.237.122:8092/agent/v0").rstrip( - "/" -) -PROJECT_ID = os.environ.get("PROJECT_ID") -APP_SLUG = os.environ.get("APP_SLUG") or f"wp2-agent-{secrets.token_hex(3)}" - -if not KEY: - sys.exit("Set AGENTA_API_KEY") - -H = {"Authorization": f"ApiKey {KEY}", "Content-Type": "application/json"} - -# The chat interface the agent advertises via /inspect (kept in sync with -# services/oss/src/agent_pi/schemas.py). -SCHEMA = "https://json-schema.org/draft/2020-12/schema" -AGENT_SCHEMAS = { - "inputs": { - "$schema": SCHEMA, - "type": "object", - "additionalProperties": True, - "properties": { - "messages": { - "x-ag-type-ref": "messages", - "type": "array", - "description": "Ordered list of normalized chat messages.", - } - }, - }, - "parameters": { - "$schema": SCHEMA, - "type": "object", - "additionalProperties": True, - "properties": {"model": {"type": "string", "description": "Model override."}}, - }, - "outputs": { - "$schema": SCHEMA, - "x-ag-type-ref": "message", - "type": "object", - "description": "Final assistant message returned by the agent.", - }, -} - - -def _id() -> str: - return secrets.token_hex(6) - - -def post(path: str, body: dict) -> dict: - r = requests.post( - f"{API}{path}", - json=body, - headers=H, - params={"project_id": PROJECT_ID}, - timeout=60, - ) - if r.status_code >= 300: - sys.exit(f"POST {path} -> {r.status_code}: {r.text[:600]}") - return r.json() - - -def main() -> None: - global PROJECT_ID - if not PROJECT_ID: - projects = requests.get(f"{API}/projects", headers=H, timeout=30).json() - default = next( - (p for p in projects if p.get("is_default_project")), projects[0] - ) - PROJECT_ID = default["project_id"] - print(f"project_id={PROJECT_ID} app_slug={APP_SLUG} agent_url={AGENT_URL}") - - wf = post( - "/workflows/", - { - "workflow": { - "slug": APP_SLUG, - "name": APP_SLUG, - "flags": {"is_application": True}, - } - }, - ) - workflow_id = wf["workflow"]["id"] - - var = post( - "/workflows/variants/", - { - "workflow_variant": { - "workflow_id": workflow_id, - "slug": f"{APP_SLUG}.default", - "name": "default", - } - }, - ) - variant_id = var["workflow_variant"]["id"] - - # Seed v0 (tables dismiss v0), then commit v1 with the real data. - post( - "/workflows/revisions/commit", - { - "workflow_revision": { - "workflow_id": workflow_id, - "workflow_variant_id": variant_id, - "slug": _id(), - "name": "default", - "message": "Initial commit", - } - }, - ) - rev = post( - "/workflows/revisions/commit", - { - "workflow_revision": { - "workflow_id": workflow_id, - "workflow_variant_id": variant_id, - "slug": _id(), - "name": "default", - "message": "Agent service", - "flags": {"is_chat": True}, - "data": { - "url": AGENT_URL, - "parameters": {"model": "gpt-5.5"}, - "schemas": AGENT_SCHEMAS, - }, - } - }, - ) - revision = rev["workflow_revision"] - print(f"workflow_id={workflow_id}") - print(f"variant_id={variant_id}") - print(f"revision_id={revision['id']} flags={revision.get('flags')}") - print(f"stored url={revision.get('data', {}).get('url')}") - print(f"\nOpen the playground: {HOST}/apps/{workflow_id}/playground") - - -if __name__ == "__main__": - main() diff --git a/services/agent/src/runPi.ts b/services/agent/src/runPi.ts index cabf603701..4056d0dce7 100644 --- a/services/agent/src/runPi.ts +++ b/services/agent/src/runPi.ts @@ -4,8 +4,9 @@ * This is the concrete "harness" behind the service's Harness port. It drives the * Pi SDK (`createAgentSession`) for a single run: it injects the agent's AGENTS.md * in memory, resolves the model, sends one user turn, and returns the final - * assistant text. No streaming, no tools by default, no session persistence. Those - * are later work packages. + * assistant text. It also turns the backend-resolved runnable tools (WP-7) into Pi + * customTools that route back through Agenta's /tools/call. No streaming and no + * session persistence yet; those are later work packages. * * Auth: uses `AuthStorage.create()`, which reads ~/.pi/agent/auth.json (the local * Pi login). Set OPENAI_API_KEY / ANTHROPIC_API_KEY in the environment as an @@ -54,6 +55,34 @@ export interface TraceContext { captureContent?: boolean; } +/** + * A runnable tool the backend already resolved from the agent config: name + + * description + JSON-Schema params for the model, plus the `callRef` slug the + * execution bridge sends back to Agenta's /tools/call. The Composio key and the + * connection auth stay server-side; this sandbox never sees them. + */ +export interface ResolvedToolSpec { + /** Function name shown to the model (e.g. "gmail__SEND_EMAIL"). */ + name: string; + /** Description shown to the model. Resolved live from the provider catalog. */ + description?: string; + /** JSON Schema for the tool arguments. Pi accepts plain JSON Schema here. */ + inputSchema?: Record | null; + /** "tools.{provider}.{integration}.{action}.{connection}" — the /tools/call slug. */ + callRef: string; +} + +/** + * Where and how to route a tool call back through Agenta. The backend builds the + * full /tools/call URL and threads the same credential the OTLP export rides on. + */ +export interface ToolCallbackContext { + /** Full /tools/call URL. */ + endpoint: string; + /** Authorization header value for the callback (project-scoped). */ + authorization?: string; +} + export interface AgentRunRequest { /** AGENTS.md text injected as the agent's instructions (in memory). */ agentsMd?: string; @@ -65,6 +94,10 @@ export interface AgentRunRequest { messages?: ChatMessage[]; /** Built-in tools to enable. MVP default: none. */ tools?: string[]; + /** Resolved runnable tools (WP-7), turned into Pi customTools below. */ + customTools?: ResolvedToolSpec[]; + /** Where customTools route their calls back to. Required when customTools is set. */ + toolCallback?: ToolCallbackContext; /** Tracing: thread the Agenta trace context across the boundary. */ trace?: TraceContext; } @@ -126,6 +159,117 @@ function extractAssistantText(messages: any[]): string { return ""; } +/** Per-tool budget for the /tools/call round-trip. Surfaced as a tool error on timeout. */ +const TOOL_CALL_TIMEOUT_MS = Number( + process.env.AGENTA_AGENT_TOOL_CALL_TIMEOUT_MS ?? 30000, +); + +/** Permissive default when a resolved tool has no input schema. */ +const EMPTY_OBJECT_SCHEMA = { + type: "object", + properties: {}, + additionalProperties: true, +}; + +/** + * Turn resolved tool specs into Pi customTools. Each tool's `execute` does one + * POST back through Agenta's /tools/call, so Pi runs the loop while the Composio + * key and connection auth stay server-side. A failed call throws, which Pi turns + * into a tool-error result (the loop continues) rather than a run failure. + */ +export function buildCustomTools( + specs: ResolvedToolSpec[], + callback: ToolCallbackContext | undefined, +): any[] { + if (specs.length === 0) return []; + if (!callback?.endpoint) { + log(`skipping ${specs.length} custom tool(s): missing toolCallback endpoint`); + return []; + } + + return specs.map((spec) => ({ + name: spec.name, + label: spec.name, + description: spec.description ?? spec.name, + // Pi accepts a plain JSON Schema for `parameters` (its validator has a + // non-TypeBox path); the schema is resolved live from the provider catalog. + parameters: (spec.inputSchema as any) ?? EMPTY_OBJECT_SCHEMA, + async execute(toolCallId: string, params: unknown, signal?: AbortSignal) { + const text = await callAgentaTool( + callback, + spec.callRef, + toolCallId, + params, + signal, + ); + return { + content: [{ type: "text", text }], + details: { callRef: spec.callRef }, + }; + }, + })); +} + +/** One /tools/call round-trip. Returns the result string; throws on failure. */ +async function callAgentaTool( + callback: ToolCallbackContext, + callRef: string, + toolCallId: string, + params: unknown, + signal?: AbortSignal, +): Promise { + const headers: Record = { "content-type": "application/json" }; + if (callback.authorization) headers["authorization"] = callback.authorization; + + // Combine Pi's abort signal (if any) with a per-tool timeout. + const timeoutSignal = AbortSignal.timeout(TOOL_CALL_TIMEOUT_MS); + const anyOf = (AbortSignal as any).any; + const combined = + signal && typeof anyOf === "function" + ? anyOf([signal, timeoutSignal]) + : timeoutSignal; + + let response: Response; + try { + response = await fetch(callback.endpoint, { + method: "POST", + headers, + body: JSON.stringify({ + data: { + id: toolCallId, + type: "function", + // Arguments as an object (not a JSON string) to avoid double-encoding. + function: { name: callRef, arguments: params ?? {} }, + }, + }), + signal: combined, + }); + } catch (err) { + throw new Error( + `tool call ${callRef} failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + + const bodyText = await response.text(); + if (!response.ok) { + throw new Error( + `tool call ${callRef} returned HTTP ${response.status}: ${bodyText.slice(0, 500)}`, + ); + } + + // ToolCallResponse -> { call: { data: { content }, status } }. `content` is the + // execution result serialized as a JSON string; hand it to the model verbatim. + try { + const parsed = JSON.parse(bodyText); + const content = parsed?.call?.data?.content; + if (typeof content === "string") return content; + if (content != null) return JSON.stringify(content); + return bodyText; + } catch { + return bodyText; + } +} + export async function runPi(request: AgentRunRequest): Promise { const prompt = resolvePrompt(request); if (!prompt) { @@ -176,12 +320,27 @@ export async function runPi(request: AgentRunRequest): Promise { }); await loader.reload(); + // Build runnable tools from the resolved specs. Pi's allowlist gates custom + // tools too, so their names must be in `tools` for the model to see them. + const customTools = buildCustomTools( + request.customTools ?? [], + request.toolCallback, + ); + const toolAllowlist = [ + ...(request.tools ?? []), + ...customTools.map((tool) => tool.name), + ]; + if (customTools.length > 0) { + log(`custom tools: ${customTools.map((t) => t.name).join(", ")}`); + } + const { session } = await createAgentSession({ cwd, model, authStorage, modelRegistry, - tools: request.tools ?? [], + tools: toolAllowlist, + customTools, sessionManager: SessionManager.inMemory(cwd), settingsManager: SettingsManager.inMemory(), resourceLoader: loader, diff --git a/services/entrypoints/agent_main.py b/services/entrypoints/agent_main.py deleted file mode 100644 index 595e60ad27..0000000000 --- a/services/entrypoints/agent_main.py +++ /dev/null @@ -1,47 +0,0 @@ -"""Standalone entrypoint for the agent service (WP-2 local verification). - -Mounts only the agent app plus a health check, so the agent ``/invoke`` can be -exercised with curl without bringing up the full services app. The real integration -point is ``entrypoints/main.py`` (one import + one mount), kept separate so this -isolated runner stays light. - -Run locally (auth disabled for curl): - - cd services - AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=false \\ - uv run uvicorn entrypoints.agent_main:app --host 0.0.0.0 --port 8090 -""" - -from fastapi import FastAPI -from fastapi.middleware.cors import CORSMiddleware - -import agenta as ag -from oss.src.agent import agent_v0_app - -ag.init() - -app = FastAPI( - openapi_url=None, - docs_url=None, - redoc_url=None, -) - -app.add_middleware( - CORSMiddleware, - # The playground invokes cross-origin (web on a different port) with credentials - # (cookies + Authorization). Browsers reject a "*" origin on credentialed requests, - # so echo the specific origin and allow credentials. Matches the dev box on any - # port and localhost. Same-origin (served under /services) would avoid CORS entirely. - allow_origin_regex=r"https?://(144\.76\.237\.122|localhost|0\.0\.0\.0)(:\d+)?", - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], -) - - -@app.get("/health") -async def health(): - return {"status": "ok"} - - -app.mount("/agent/v0", agent_v0_app) diff --git a/services/oss/src/agent.py b/services/oss/src/agent.py index 1203f1560a..42f9b1832c 100644 --- a/services/oss/src/agent.py +++ b/services/oss/src/agent.py @@ -1,4 +1,4 @@ -"""Agent workflow service (WP-2). +"""Agent workflow service (WP-2 + WP-7). Mirrors the chat/completion services: an Agenta app exposing ``/invoke`` and ``/inspect`` through ``ag.create_app`` + ``ag.workflow`` + ``ag.route``, so the @@ -6,13 +6,17 @@ builds the user turn from the request and runs it through the Harness port, whose Pi adapter drives the TypeScript wrapper in ``services/agent``. -MVP: hardcoded config (AGENTS.md text, model) read from files, a single -non-streaming reply, no tools. Streaming, multi-message output, tools, and Daytona -are later work packages. +Config is a ``prompt-template`` (system message as AGENTS.md, model, and tools): the +playground renders the same prompt control as chat/completion, including the tool +picker. Runnable tools (WP-7) are resolved in the backend (``/tools/resolve``) and +executed back through ``/tools/call`` while Pi drives the loop. Streaming, +multi-message output, and the Daytona sandbox are later work packages. """ import os -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple + +import httpx import agenta as ag from agenta.sdk.engines.tracing.propagation import inject @@ -22,7 +26,7 @@ from oss.src.agent_pi.local_runtime import LocalRuntime from oss.src.agent_pi.pi_harness import PiHarness from oss.src.agent_pi.pi_http_harness import PiHttpHarness -from oss.src.agent_pi.ports import Harness, HarnessRequest, TraceContext +from oss.src.agent_pi.ports import Harness, HarnessRequest, ToolCallback, TraceContext from oss.src.agent_pi.schemas import AGENT_SCHEMAS log = get_module_logger(__name__) @@ -33,6 +37,9 @@ "no", ) +# Budget for the backend tool-resolution round-trip (catalog + connection check). +_TOOLS_RESOLVE_TIMEOUT = float(os.getenv("AGENTA_AGENT_TOOLS_TIMEOUT", "30")) + def _build_harness() -> Harness: """Pick the harness adapter for the current deployment. @@ -46,6 +53,53 @@ def _build_harness() -> Harness: return PiHarness(LocalRuntime(), wrapper_dir=str(wrapper_dir())) +def _system_text(messages: Optional[List[Any]]) -> str: + """Join the system-message content of a prompt-template into AGENTS.md text.""" + parts: List[str] = [] + for message in messages or []: + if not isinstance(message, dict) or message.get("role") != "system": + continue + content = message.get("content") + if isinstance(content, str): + parts.append(content) + elif isinstance(content, list): + parts.extend( + block.get("text", "") + for block in content + if isinstance(block, dict) and block.get("type") == "text" + ) + return "\n\n".join(part for part in parts if part) + + +def _resolve_run_config( + params: Dict[str, Any], + config: Any, +) -> Tuple[str, str, Any]: + """Pull model, instructions, and raw tools from the request parameters. + + Accepts both shapes: the playground's ``prompt`` (a ``prompt-template`` whose + system message is the AGENTS.md and whose ``llm_config`` carries model + picker + tools) and the flat ``{model, agents_md, tools}`` an API caller may send. Falls + back to the service file config for any unset field. + """ + prompt_cfg = params.get("prompt") + if isinstance(prompt_cfg, dict): + llm_config = prompt_cfg.get("llm_config") or {} + model = llm_config.get("model") or config.model + agents_md = _system_text(prompt_cfg.get("messages")) or config.agents_md + raw_tools = llm_config.get("tools") + if raw_tools is None: + raw_tools = prompt_cfg.get("tools") + else: + model = params.get("model") or config.model + agents_md = params.get("agents_md") or config.agents_md + raw_tools = params.get("tools") + + if raw_tools is None: + raw_tools = config.tools + return model, agents_md, raw_tools + + def _latest_user_message(messages: Optional[List[Any]]) -> str: for message in reversed(messages or []): if not isinstance(message, dict): @@ -91,6 +145,162 @@ def _trace_context() -> Optional[TraceContext]: return None +def _agenta_api_base() -> Optional[str]: + """Resolve the Agenta backend base URL (``.../api``) for tool calls. + + Prefers an explicit override, then derives it from the OTLP endpoint the SDK is + configured with (``{host}/api/otlp/v1/traces``), then falls back to env. Returns + ``None`` when nothing is configured; callers only need this when tools are set. + """ + override = os.getenv("AGENTA_AGENT_TOOLS_API_URL") + if override: + return override.rstrip("/") + + try: + otlp_url = ag.tracing.otlp_url + except Exception: # pylint: disable=broad-except + otlp_url = None + if otlp_url and "/otlp/" in otlp_url: + return otlp_url.split("/otlp/", 1)[0].rstrip("/") + + api_url = os.getenv("AGENTA_API_URL") + if api_url: + return api_url.rstrip("/") + + return None + + +def _request_authorization() -> Optional[str]: + """The project-scoped credential to call ``/tools/resolve`` and ``/tools/call``. + + Reuses the same propagation the OTLP credential rides on (the caller's + Authorization), falling back to the service's own API key the way the tracing + sidecar does. Scoping to the caller keeps an agent run from invoking tools the + user could not (see WP-7 risk: RUN_TOOLS scoping). + """ + try: + authorization = inject({}).get("Authorization") + except Exception: # pylint: disable=broad-except + authorization = None + if authorization: + return authorization + + api_key = os.getenv("AGENTA_API_KEY") + if api_key: + return f"ApiKey {api_key}" + + return None + + +def _parse_gateway_slug(slug: Any) -> Optional[Dict[str, Any]]: + """Parse a gateway tool slug into a Composio reference, or ``None``. + + The playground tool picker encodes a Composio action as a function name like + ``tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn`` (the same + 5-segment slug ``/tools/call`` parses; ``__`` or ``.`` separated). Anything that + is not a 5-segment ``tools.composio.*`` slug returns ``None`` so the caller can + skip it. + """ + if not isinstance(slug, str): + return None + parts = slug.replace("__", ".").split(".") + if len(parts) == 5 and parts[0] == "tools" and parts[1] == "composio": + return { + "type": "composio", + "integration": parts[2], + "action": parts[3], + "connection": parts[4], + } + return None + + +def _normalize_tool_ref(ref: Any) -> Optional[Dict[str, Any]]: + """Coerce a config entry into a discriminated tool reference the resolver parses. + + Handles three shapes: a bare string (or single-key ``{"name": ...}``) is the + existing built-in tool name; a dict already carrying ``type`` passes through; and + the playground picker's gateway entry (``{"function": {"name": + "tools__composio__..."}}``) is parsed into a ``composio`` ref. Unsupported picker + entries (provider built-ins, inline custom functions) return ``None`` and are + skipped rather than failing the run. + """ + if isinstance(ref, str): + return {"type": "builtin", "name": ref} + if isinstance(ref, dict): + if ref.get("type") in ("builtin", "composio"): + return ref + function = ref.get("function") if isinstance(ref.get("function"), dict) else {} + gateway = _parse_gateway_slug(function.get("name") or ref.get("name")) + if gateway: + return gateway + if "type" not in ref and isinstance(ref.get("name"), str): + return {"type": "builtin", "name": ref["name"]} + return None + return None + + +async def _resolve_tools( + tools: List[Any], +) -> Tuple[List[str], List[Dict[str, Any]], Optional[ToolCallback]]: + """Resolve config tool references into builtins + Pi customTool specs. + + Calls the backend resolver (``POST /tools/resolve``), which validates Composio + connections up front and enriches each action from the catalog. Returns the + built-in tool names, the camelCase customTool specs for the wire, and the + ``/tools/call`` callback. Raises on resolution failure so the invoke fails early + with a clear message rather than the model hitting a runtime tool error. + """ + refs = [ref for ref in (_normalize_tool_ref(t) for t in tools if t) if ref] + if not refs: + return [], [], None + + api_base = _agenta_api_base() + if not api_base: + raise RuntimeError( + "Agent has tools configured but the Agenta API base URL is unknown. " + "Set AGENTA_AGENT_TOOLS_API_URL or AGENTA_API_URL." + ) + + authorization = _request_authorization() + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + + async with httpx.AsyncClient(timeout=_TOOLS_RESOLVE_TIMEOUT) as client: + response = await client.post( + f"{api_base}/tools/resolve", + json={"tools": refs}, + headers=headers, + ) + + if response.status_code >= 400: + raise RuntimeError( + f"Tool resolution failed (HTTP {response.status_code}): " + f"{response.text[:500]}" + ) + + data = response.json() + builtins = data.get("builtins") or [] + custom = data.get("custom") or [] + + custom_tools = [ + { + "name": spec["name"], + "description": spec.get("description"), + "inputSchema": spec.get("input_schema"), + "callRef": spec["call_ref"], + } + for spec in custom + ] + + callback = ToolCallback( + endpoint=f"{api_base}/tools/call", + authorization=authorization, + ) + + return builtins, custom_tools, callback + + async def _agent( inputs: Optional[Dict[str, Any]] = None, messages: Optional[List[Any]] = None, @@ -98,15 +308,22 @@ async def _agent( ): config = load_config() - # Config (model + AGENTS.md instructions) comes from parameters when the - # playground/caller sets it, falling back to the service's file config. + # Config comes from parameters when the playground/caller sets it, falling back + # to the service file config. Accepts both the playground prompt-template shape + # and a flat {model, agents_md, tools} (see _resolve_run_config). params = parameters or {} - model = params.get("model") or config.model - agents_md = params.get("agents_md") or config.agents_md + model, agents_md, tools_config = _resolve_run_config(params, config) + + if isinstance(tools_config, dict): + tools_config = [tools_config] + elif not isinstance(tools_config, list): + tools_config = [] msgs = messages or (inputs or {}).get("messages") or [] prompt = _latest_user_message(msgs) + builtins, custom_tools, tool_callback = await _resolve_tools(tools_config) + harness = _build_harness() await harness.setup() @@ -117,7 +334,9 @@ async def _agent( model=model, prompt=prompt, messages=msgs, - tools=config.tools, + tools=builtins, + custom_tools=custom_tools, + tool_callback=tool_callback, trace=_trace_context(), ) ) diff --git a/services/oss/src/agent_pi/config.py b/services/oss/src/agent_pi/config.py index b630a3063e..8c2f5bf660 100644 --- a/services/oss/src/agent_pi/config.py +++ b/services/oss/src/agent_pi/config.py @@ -9,7 +9,7 @@ import os from dataclasses import dataclass, field from pathlib import Path -from typing import List, Optional +from typing import Any, List, Optional # services/oss/src/agent_pi/config.py -> parents[3] == services/ _SERVICES_DIR = Path(__file__).resolve().parents[3] @@ -30,7 +30,11 @@ class AgentConfig: agents_md: str model: Optional[str] = None - tools: List[str] = field(default_factory=list) + # Provider-agnostic tool references (WP-7). Each entry is either a plain string + # (a Pi built-in name, normalized to a ``builtin`` ref downstream) or a + # discriminated dict (``{"type": "composio", ...}``). Resolution happens in the + # backend at invoke time; the service just forwards the list. + tools: List[Any] = field(default_factory=list) def wrapper_dir() -> Path: diff --git a/services/oss/src/agent_pi/pi_harness.py b/services/oss/src/agent_pi/pi_harness.py index f4c5fc3e5c..266e9cb9a0 100644 --- a/services/oss/src/agent_pi/pi_harness.py +++ b/services/oss/src/agent_pi/pi_harness.py @@ -48,6 +48,10 @@ async def invoke(self, request: HarnessRequest) -> HarnessResult: "prompt": request.prompt, "messages": request.messages, "tools": request.tools, + "customTools": request.custom_tools, + "toolCallback": request.tool_callback.to_wire() + if request.tool_callback + else None, "trace": request.trace.to_wire() if request.trace else None, } ).encode("utf-8") diff --git a/services/oss/src/agent_pi/pi_http_harness.py b/services/oss/src/agent_pi/pi_http_harness.py index 1e4b8a0d2e..0435319011 100644 --- a/services/oss/src/agent_pi/pi_http_harness.py +++ b/services/oss/src/agent_pi/pi_http_harness.py @@ -42,6 +42,10 @@ async def invoke(self, request: HarnessRequest) -> HarnessResult: "prompt": request.prompt, "messages": request.messages, "tools": request.tools, + "customTools": request.custom_tools, + "toolCallback": request.tool_callback.to_wire() + if request.tool_callback + else None, "trace": request.trace.to_wire() if request.trace else None, } diff --git a/services/oss/src/agent_pi/ports.py b/services/oss/src/agent_pi/ports.py index f556de8cf7..4b436db6c6 100644 --- a/services/oss/src/agent_pi/ports.py +++ b/services/oss/src/agent_pi/ports.py @@ -84,6 +84,28 @@ def to_wire(self) -> Dict[str, Any]: } +@dataclass +class ToolCallback: + """How the harness routes a tool call back through Agenta's ``/tools/call``. + + The backend resolves runnable tool references into specs and hands the harness + this callback. The TS wrapper turns each spec into a Pi ``customTool`` whose + ``execute`` POSTs the OpenAI-style envelope to ``endpoint`` with + ``authorization``. The provider key and connection auth never enter the sandbox; + they stay behind ``/tools/call``. Same mechanism that threads the OTLP credential. + """ + + endpoint: str # full ``/tools/call`` URL + authorization: Optional[str] = None # full Authorization header value + + def to_wire(self) -> Dict[str, Any]: + """Serialize to the camelCase shape the TS wrapper expects on the wire.""" + return { + "endpoint": self.endpoint, + "authorization": self.authorization, + } + + @dataclass class HarnessRequest: """One agent run: instructions, model, the user turn, and optional history.""" @@ -93,6 +115,10 @@ class HarnessRequest: prompt: Optional[str] = None messages: List[Any] = field(default_factory=list) tools: List[str] = field(default_factory=list) + # Resolved runnable tool specs, already in the camelCase wire shape the TS + # wrapper turns into Pi customTools: {name, description, inputSchema, callRef}. + custom_tools: List[Dict[str, Any]] = field(default_factory=list) + tool_callback: Optional[ToolCallback] = None trace: Optional[TraceContext] = None diff --git a/services/oss/src/agent_pi/schemas.py b/services/oss/src/agent_pi/schemas.py index 93a22c6532..cef2440679 100644 --- a/services/oss/src/agent_pi/schemas.py +++ b/services/oss/src/agent_pi/schemas.py @@ -34,24 +34,30 @@ }, } -# Parameters: the agent config the playground renders as editable fields. Exposes -# the two values that actually drive a run: the model and the AGENTS.md instructions. -# `x-parameters.multiline` is the hint the playground honors to render a textarea. +# Parameters: the agent config the playground renders. We reuse the existing +# `prompt-template` control (model selector + tool picker + message editor) instead +# of a bespoke agent form: the `x-ag-type-ref: prompt-template` marker makes the +# playground render the same prompt UI chat/completion use, so the tool picker comes +# for free. The agent reads the system message as its AGENTS.md, `llm_config.model` +# as the model, and `llm_config.tools` (the picker output) as its runnable tools. AGENT_PARAMETERS_SCHEMA = { "$schema": _SCHEMA, "type": "object", "additionalProperties": True, "properties": { - "model": { - "type": "string", - "default": _DEFAULT_MODEL, - "description": "Model the agent runs on.", - }, - "agents_md": { - "type": "string", - "default": _DEFAULT_AGENTS_MD, - "description": "The agent's instructions (AGENTS.md).", - "x-parameters": {"multiline": True}, + "prompt": { + "x-ag-type-ref": "prompt-template", + "type": "object", + "description": ( + "The agent's instructions (system message), model, and tools. Tools " + "are picked from connected providers (e.g. Composio) and run " + "server-side via /tools/call." + ), + "default": { + "messages": [{"role": "system", "content": _DEFAULT_AGENTS_MD}], + "template_format": "mustache", + "llm_config": {"model": _DEFAULT_MODEL, "tools": []}, + }, }, }, }