From 59bf20a98fda115c3cb96a0de197b140faae1587 Mon Sep 17 00:00:00 2001
From: Mahmoud Mabrouk <mahmoud@agenta.ai>
Date: Tue, 16 Jun 2026 20:17:40 +0200
Subject: [PATCH] feat(agent): runnable tools as agent configuration (WP-7)

---
 api/oss/src/apis/fastapi/tools/models.py      |  18 ++
 api/oss/src/apis/fastapi/tools/router.py      |  89 ++++---
 api/oss/src/core/tools/dtos.py                |  62 ++++-
 api/oss/src/core/tools/exceptions.py          |  18 ++
 api/oss/src/core/tools/service.py             | 159 ++++++++++++
 .../tracing-in-the-agent-service.md           |   5 +-
 .../wp-2-agent-service/implementation-plan.md |   8 +
 .../agent-workflows/wp-2-agent-service/qa.md  | 176 +++++++++++++
 .../agent-workflows/wp-7-tools/README.md      |  97 ++++++-
 .../docker-compose/ee/docker-compose.dev.yml  |   8 +-
 services/agent/README.md                      |  38 ++-
 services/agent/docker-compose.agent.yml       |  98 -------
 services/agent/docker-compose.stack.yml       |  86 -------
 services/agent/scripts/register_agent_app.py  | 166 ------------
 services/agent/src/runPi.ts                   | 165 +++++++++++-
 services/entrypoints/agent_main.py            |  47 ----
 services/oss/src/agent.py                     | 241 +++++++++++++++++-
 services/oss/src/agent_pi/config.py           |   8 +-
 services/oss/src/agent_pi/pi_harness.py       |   4 +
 services/oss/src/agent_pi/pi_http_harness.py  |   4 +
 services/oss/src/agent_pi/ports.py            |  26 ++
 services/oss/src/agent_pi/schemas.py          |  32 ++-
 22 files changed, 1089 insertions(+), 466 deletions(-)
 create mode 100644 docs/design/agent-workflows/wp-2-agent-service/qa.md
 delete mode 100644 services/agent/docker-compose.agent.yml
 delete mode 100644 services/agent/docker-compose.stack.yml
 delete mode 100644 services/agent/scripts/register_agent_app.py
 delete mode 100644 services/entrypoints/agent_main.py

diff --git a/api/oss/src/apis/fastapi/tools/models.py b/api/oss/src/apis/fastapi/tools/models.py
index 891b276c22..768574f23c 100644
--- a/api/oss/src/apis/fastapi/tools/models.py
+++ b/api/oss/src/apis/fastapi/tools/models.py
@@ -15,6 +15,9 @@
     ToolConnectionCreate,
     # Tool Calls
     ToolResult,
+    # Agent tools
+    AgentToolReference,
+    ResolvedAgentTool,
 )
 
 
@@ -87,3 +90,18 @@ class ToolConnectionsResponse(BaseModel):
 
 class ToolCallResponse(BaseModel):
     call: ToolResult
+
+
+# ---------------------------------------------------------------------------
+# Agent tool resolution
+# ---------------------------------------------------------------------------
+
+
+class ToolResolveRequest(BaseModel):
+    tools: List[AgentToolReference] = []
+
+
+class ToolResolveResponse(BaseModel):
+    count: int = 0
+    builtins: List[str] = []
+    custom: List[ResolvedAgentTool] = []
diff --git a/api/oss/src/apis/fastapi/tools/router.py b/api/oss/src/apis/fastapi/tools/router.py
index 043d114fa7..3cc689a055 100644
--- a/api/oss/src/apis/fastapi/tools/router.py
+++ b/api/oss/src/apis/fastapi/tools/router.py
@@ -29,6 +29,9 @@
     ToolConnectionsResponse,
     #
     ToolCallResponse,
+    #
+    ToolResolveRequest,
+    ToolResolveResponse,
 )
 
 from oss.src.core.shared.dtos import Status
@@ -42,10 +45,12 @@
     ToolResultData,
 )
 from oss.src.core.tools.exceptions import (
+    ActionNotFoundError,
     AdapterError,
     ConnectionInactiveError,
     ConnectionInvalidError,
     ConnectionNotFoundError,
+    ToolSlugInvalidError,
 )
 from oss.src.core.tools.service import (
     ToolsService,
@@ -208,6 +213,14 @@ def __init__(
         )
 
         # --- Tool operations ---
+        self.router.add_api_route(
+            "/resolve",
+            self.resolve_tools,
+            methods=["POST"],
+            operation_id="resolve_agent_tools",
+            response_model=ToolResolveResponse,
+            response_model_exclude_none=True,
+        )
         self.router.add_api_route(
             "/call",
             self.call_tool,
@@ -886,6 +899,51 @@ async def callback_connection(
     # Tool Calls
     # -----------------------------------------------------------------------
 
+    @intercept_exceptions()
+    @handle_adapter_exceptions()
+    async def resolve_tools(
+        self,
+        request: Request,
+        *,
+        body: ToolResolveRequest,
+    ) -> ToolResolveResponse:
+        """Resolve an agent's tool references into model-ready specs.
+
+        Validates Composio connections up front and enriches each action from the
+        catalog, so a running agent (e.g. Pi) gets ``customTools`` whose ``execute``
+        routes back through ``POST /tools/call`` — provider keys stay server-side.
+        """
+        if is_ee():
+            has_permission = await check_action_access(
+                user_uid=request.state.user_id,
+                project_id=request.state.project_id,
+                permission=Permission.VIEW_TOOLS,
+            )
+            if not has_permission:
+                raise FORBIDDEN_EXCEPTION
+
+        try:
+            resolution = await self.tools_service.resolve_agent_tools(
+                project_id=UUID(request.state.project_id),
+                tools=body.tools,
+            )
+        except ConnectionNotFoundError as e:
+            raise HTTPException(status_code=404, detail=e.message) from e
+        except ConnectionInactiveError as e:
+            raise HTTPException(status_code=400, detail=e.message) from e
+        except ConnectionInvalidError as e:
+            raise HTTPException(status_code=400, detail=e.message) from e
+        except ToolSlugInvalidError as e:
+            raise HTTPException(status_code=400, detail=e.message) from e
+        except ActionNotFoundError as e:
+            raise HTTPException(status_code=404, detail=e.message) from e
+
+        return ToolResolveResponse(
+            count=len(resolution.builtins) + len(resolution.custom),
+            builtins=resolution.builtins,
+            custom=resolution.custom,
+        )
+
     @intercept_exceptions()
     @handle_adapter_exceptions()
     async def call_tool(
@@ -931,39 +989,12 @@ async def call_tool(
         connection_slug = slug_parts[4]
 
         try:
-            connections = await self.tools_service.query_connections(
+            connection = await self.tools_service.resolve_connection_by_slug(
                 project_id=UUID(request.state.project_id),
                 provider_key=provider_key,
                 integration_key=integration_key,
+                connection_slug=connection_slug,
             )
-
-            connection = next(
-                (c for c in connections if c.slug == connection_slug), None
-            )
-
-            if not connection:
-                raise ConnectionNotFoundError(
-                    connection_slug=connection_slug,
-                    provider_key=provider_key,
-                    integration_key=integration_key,
-                )
-
-            if not connection.is_active:
-                raise ConnectionInactiveError(connection_id=connection_slug)
-
-            if not connection.is_valid:
-                raise ConnectionInvalidError(
-                    connection_slug=connection_slug,
-                    detail="Please refresh the connection.",
-                )
-
-            if not connection.provider_connection_id:
-                raise ConnectionNotFoundError(
-                    connection_slug=connection_slug,
-                    provider_key=provider_key,
-                    integration_key=integration_key,
-                )
-
         except ConnectionNotFoundError as e:
             raise HTTPException(status_code=404, detail=e.message) from e
         except ConnectionInactiveError as e:
diff --git a/api/oss/src/core/tools/dtos.py b/api/oss/src/core/tools/dtos.py
index a588965f61..3c3f0ec53e 100644
--- a/api/oss/src/core/tools/dtos.py
+++ b/api/oss/src/core/tools/dtos.py
@@ -1,8 +1,8 @@
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Annotated, Any, Dict, List, Literal, Optional, Union
 
 from agenta.sdk.models.workflows import JsonSchemas
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from oss.src.core.shared.dtos import (
     Header,
@@ -238,3 +238,61 @@ class ToolExecutionResponse(BaseModel):
     data: Optional[Json] = None
     error: Optional[str] = None
     successful: bool = False
+
+
+# ---------------------------------------------------------------------------
+# Agent tools (config references + resolution)
+# ---------------------------------------------------------------------------
+
+# A provider-agnostic list of tool references lives under an agent revision's
+# ``parameters["tools"]``. Each entry is a discriminated union on ``type``: config
+# holds references and display metadata only, never secrets. The backend resolves
+# them into model-ready specs at invoke time (see ToolsService.resolve_agent_tools).
+
+
+class AgentBuiltinTool(BaseModel):
+    """A Pi built-in tool, referenced by name (e.g. ``read``, ``bash``)."""
+
+    type: Literal["builtin"] = "builtin"
+    name: str
+
+
+class AgentComposioTool(BaseModel):
+    """A Composio action, carrying the slug segments ``/tools/call`` parses."""
+
+    type: Literal["composio"] = "composio"
+    integration: str
+    action: str
+    connection: str
+    # Function name shown to the model. Defaults to ``{integration}__{action}``.
+    name: Optional[str] = None
+
+
+AgentToolReference = Annotated[
+    Union[AgentBuiltinTool, AgentComposioTool],
+    Field(discriminator="type"),
+]
+
+
+class ResolvedAgentTool(BaseModel):
+    """A runnable reference resolved into a model-ready tool spec.
+
+    ``call_ref`` is the ``tools.{provider}.{integration}.{action}.{connection}`` slug
+    the execution bridge sends back to ``POST /tools/call``.
+    """
+
+    name: str
+    description: Optional[str] = None
+    input_schema: Optional[Dict[str, Any]] = None
+    call_ref: str
+
+
+class AgentToolsResolution(BaseModel):
+    """Outcome of resolving an agent's ``tools`` list.
+
+    ``builtins`` pass straight into Pi's ``tools: string[]``; ``custom`` become Pi
+    ``customTools`` whose ``execute`` routes through ``/tools/call``.
+    """
+
+    builtins: List[str] = []
+    custom: List[ResolvedAgentTool] = []
diff --git a/api/oss/src/core/tools/exceptions.py b/api/oss/src/core/tools/exceptions.py
index f46c08b6cd..e9dbd54f3f 100644
--- a/api/oss/src/core/tools/exceptions.py
+++ b/api/oss/src/core/tools/exceptions.py
@@ -40,6 +40,24 @@ def __init__(
         super().__init__(msg)
 
 
+class ActionNotFoundError(ToolsError):
+    """Raised when a catalog action cannot be found for an integration."""
+
+    def __init__(
+        self,
+        *,
+        provider_key: str,
+        integration_key: str,
+        action_key: str,
+    ):
+        self.provider_key = provider_key
+        self.integration_key = integration_key
+        self.action_key = action_key
+        super().__init__(
+            f"Action not found: {provider_key}/{integration_key}/{action_key}"
+        )
+
+
 class ConnectionSlugConflictError(ToolsError):
     """Raised when a connection slug already exists for the integration."""
 
diff --git a/api/oss/src/core/tools/service.py b/api/oss/src/core/tools/service.py
index f603bc4d42..a9e1e4c779 100644
--- a/api/oss/src/core/tools/service.py
+++ b/api/oss/src/core/tools/service.py
@@ -1,3 +1,4 @@
+import re
 from typing import Any, Dict, List, Optional, Tuple
 from uuid import UUID
 
@@ -6,6 +7,11 @@
 from oss.src.core.tools.utils import make_oauth_state
 
 from oss.src.core.tools.dtos import (
+    AgentBuiltinTool,
+    AgentComposioTool,
+    AgentToolReference,
+    AgentToolsResolution,
+    ResolvedAgentTool,
     ToolCatalogAction,
     ToolCatalogActionDetails,
     ToolCatalogIntegration,
@@ -15,17 +21,27 @@
     ToolConnectionRequest,
     ToolExecutionRequest,
     ToolExecutionResponse,
+    ToolProviderKind,
 )
 from oss.src.core.tools.interfaces import (
     ToolsDAOInterface,
 )
 from oss.src.core.tools.registry import ToolsGatewayRegistry
 from oss.src.core.tools.exceptions import (
+    ActionNotFoundError,
     ConnectionInactiveError,
+    ConnectionInvalidError,
     ConnectionNotFoundError,
+    ToolSlugInvalidError,
 )
 
 
+# A slug segment is safe for the ``tools.{provider}.{integration}.{action}.{connection}``
+# call-ref. ``__`` is forbidden because ``/tools/call`` round-trips ``__`` <-> ``.`` when
+# parsing function names, so a ``__`` inside a segment would corrupt the split.
+_SLUG_SEGMENT_RE = re.compile(r"^[a-zA-Z0-9-]+(?:_[a-zA-Z0-9-]+)*$")
+
+
 log = get_module_logger(__name__)
 
 
@@ -408,3 +424,146 @@ async def execute_tool(
                 arguments=arguments,
             ),
         )
+
+    # -----------------------------------------------------------------------
+    # Connection resolution (shared by the call endpoint and the agent resolver)
+    # -----------------------------------------------------------------------
+
+    async def resolve_connection_by_slug(
+        self,
+        *,
+        project_id: UUID,
+        provider_key: str,
+        integration_key: str,
+        connection_slug: str,
+    ) -> ToolConnection:
+        """Resolve a project-scoped connection slug to a usable connection row.
+
+        Raises a domain exception when the connection is missing, inactive, invalid,
+        or never finished its provider handshake. Shared by ``call_tool`` (execution)
+        and ``resolve_agent_tools`` (up-front validation).
+        """
+        # Query all (not active-only) so an inactive connection yields a precise
+        # "inactive" error instead of an indistinguishable "not found".
+        connections = await self.query_connections(
+            project_id=project_id,
+            provider_key=provider_key,
+            integration_key=integration_key,
+            is_active=None,
+        )
+
+        connection = next(
+            (c for c in connections if c.slug == connection_slug),
+            None,
+        )
+
+        if not connection:
+            raise ConnectionNotFoundError(
+                provider_key=provider_key,
+                integration_key=integration_key,
+                connection_slug=connection_slug,
+            )
+
+        if not connection.is_active:
+            raise ConnectionInactiveError(connection_id=connection_slug)
+
+        if not connection.is_valid:
+            raise ConnectionInvalidError(
+                connection_slug=connection_slug,
+                detail="Please refresh the connection.",
+            )
+
+        if not connection.provider_connection_id:
+            raise ConnectionNotFoundError(
+                provider_key=provider_key,
+                integration_key=integration_key,
+                connection_slug=connection_slug,
+            )
+
+        return connection
+
+    # -----------------------------------------------------------------------
+    # Agent tool resolution
+    # -----------------------------------------------------------------------
+
+    async def resolve_agent_tools(
+        self,
+        *,
+        project_id: UUID,
+        tools: List[AgentToolReference],
+    ) -> AgentToolsResolution:
+        """Resolve an agent's tool references into model-ready specs.
+
+        ``builtin`` references pass through as names. ``composio`` references are
+        validated against the project's connections up front and enriched from the
+        catalog (description + input schema), so the model never sees a stale schema
+        and the invoke fails fast on a missing/invalid connection rather than mid-loop.
+        """
+        builtins: List[str] = []
+        custom: List[ResolvedAgentTool] = []
+
+        for ref in tools:
+            if isinstance(ref, AgentBuiltinTool):
+                if ref.name:
+                    builtins.append(ref.name)
+                continue
+
+            if isinstance(ref, AgentComposioTool):
+                custom.append(
+                    await self._resolve_composio_tool(
+                        project_id=project_id,
+                        ref=ref,
+                    )
+                )
+
+        return AgentToolsResolution(builtins=builtins, custom=custom)
+
+    async def _resolve_composio_tool(
+        self,
+        *,
+        project_id: UUID,
+        ref: AgentComposioTool,
+    ) -> ResolvedAgentTool:
+        provider_key = ToolProviderKind.COMPOSIO.value
+
+        for segment in (ref.integration, ref.action, ref.connection):
+            if not _SLUG_SEGMENT_RE.match(segment):
+                raise ToolSlugInvalidError(
+                    slug=f"{provider_key}.{ref.integration}.{ref.action}.{ref.connection}",
+                    detail=f"Invalid slug segment: {segment!r}",
+                )
+
+        # Fail fast if the connection is missing/inactive/invalid for this project.
+        await self.resolve_connection_by_slug(
+            project_id=project_id,
+            provider_key=provider_key,
+            integration_key=ref.integration,
+            connection_slug=ref.connection,
+        )
+
+        action = await self.get_action(
+            provider_key=provider_key,
+            integration_key=ref.integration,
+            action_key=ref.action,
+        )
+        if not action:
+            raise ActionNotFoundError(
+                provider_key=provider_key,
+                integration_key=ref.integration,
+                action_key=ref.action,
+            )
+
+        input_schema = (
+            action.schemas.inputs if action.schemas and action.schemas.inputs else None
+        )
+        name = ref.name or f"{ref.integration}__{ref.action}"
+        call_ref = (
+            f"tools.{provider_key}.{ref.integration}.{ref.action}.{ref.connection}"
+        )
+
+        return ResolvedAgentTool(
+            name=name,
+            description=action.description,
+            input_schema=input_schema,
+            call_ref=call_ref,
+        )
diff --git a/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md b/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md
index 0bb4b12777..977427469c 100644
--- a/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md
+++ b/docs/design/agent-workflows/wp-1-pi-tracing/tracing-in-the-agent-service.md
@@ -92,8 +92,9 @@ network the internal hostname resolves from both; if it does not, the sidecar's
 
 ## How to verify
 
-1. Start the service (`entrypoints.agent_main:app`) with `AGENTA_HOST` and
-   `AGENTA_API_KEY` set and a Pi login or provider key available.
+1. Start the services app (`entrypoints.main:app`, which mounts the agent at
+   `/agent/v0`) with `AGENTA_HOST` and `AGENTA_API_KEY` set and a Pi login or provider
+   key available.
 2. POST a chat-style body to `/agent/v0/invoke` and read `x-ag-trace-id` from the
    response headers (it equals `trace_id` in the body).
 3. Fetch the trace and confirm the merged tree and the totals:
diff --git a/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md b/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md
index 81f8cb6e88..f905fd6d7a 100644
--- a/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md
+++ b/docs/design/agent-workflows/wp-2-agent-service/implementation-plan.md
@@ -4,6 +4,14 @@ Status: MVP built and verified by curl (2026-06-15). Decisions below were taken;
 "Implemented" section records what shipped. Original decision points are kept marked
 **[DECISION]** for history.
 
+> Note (current state): the sections below describe the iterative MVP, including a
+> standalone entrypoint (`agent_main.py`) and dedicated composes
+> (`docker-compose.agent.yml`, `docker-compose.stack.yml`). Those were **removed** in
+> favor of the integrated path only: the agent is mounted in `entrypoints/main.py` at
+> `/agent/v0` and the `agent-pi` sidecar lives in
+> `hosting/docker-compose/ee/docker-compose.dev.yml`. The standalone run commands below
+> are historical. See `qa.md` for the rationale.
+
 ## Implemented (MVP, verified by curl)
 
 Per the decisions: a Python service exposes the Agenta `/invoke` contract (auth,
diff --git a/docs/design/agent-workflows/wp-2-agent-service/qa.md b/docs/design/agent-workflows/wp-2-agent-service/qa.md
new file mode 100644
index 0000000000..b7d25221d9
--- /dev/null
+++ b/docs/design/agent-workflows/wp-2-agent-service/qa.md
@@ -0,0 +1,176 @@
+# Agent service: Q&A
+
+Running notes answering review questions about the agent workflow implementation
+(branch `feat/agent-workflows`). Questions are in no particular order.
+
+---
+
+## Q: Why a separate entrypoint `agent_main.py` instead of `main.py`?
+
+Short answer: `agent_main.py` is not a replacement for `main.py`. It is an extra,
+lightweight runner for testing the agent in isolation. The real integration lives in
+`main.py`, and that is what the 8280 stack actually runs.
+
+The two entrypoints:
+
+- `services/entrypoints/main.py` is the full services app. It mounts every service
+  (chat, completion, all the managed evaluators, and now the agent at `/agent/v0`). This
+  is the production/dev container entrypoint and the path the playground uses
+  (`/services/agent/v0/...`). The agent is a first-class part of it:
+  `app.mount("/agent/v0", agent_v0_app)`.
+
+- `services/entrypoints/agent_main.py` mounts only the agent app plus `/health`.
+
+Why we added `agent_main.py`:
+
+1. Isolated, fast iteration. Early on the deliverable was "a standalone agent service
+   verified by curl" (no full stack). Running `main.py` pulls in the whole managed
+   evaluator surface (litellm, all the builtins) and `ag.init()` for the full app.
+   `agent_main.py` lets you run just the agent:
+   `uv run uvicorn entrypoints.agent_main:app --port 8090` and curl it, without the rest.
+
+2. The dedicated `:8092` Docker compose. Before the agent was integrated into the real
+   stack, it ran standalone in its own compose. That container ran `agent_main.py`.
+
+3. A place for cross-origin CORS. When the playground had to call the agent on a
+   different port (`:8092` vs the web on `:8280`), the browser needs a credentialed CORS
+   policy (echo the specific origin + allow credentials). `agent_main.py` sets that
+   (`allow_origin_regex` + `allow_credentials=True`). `main.py` keeps the stricter
+   shared services CORS, which is fine for it because, once integrated, the agent is
+   served same-origin (`/services/agent/v0`) so there is no CORS at all.
+
+Net: `main.py` is the real, integrated path (same-origin, used by the 8280 stack).
+`agent_main.py` was a convenience runner for isolated local/standalone testing and the
+old dedicated compose.
+
+**Update (decision): dropped.** We removed `agent_main.py` and the two standalone
+composes (`docker-compose.agent.yml`, `docker-compose.stack.yml`) to keep only the
+integrated path: the agent mounted in `entrypoints/main.py` at `/agent/v0`, served by
+the normal services container, with the `agent-pi` sidecar wired into
+`hosting/docker-compose/ee/docker-compose.dev.yml`. If we ever want isolated runs again,
+the cleaner approach is a profile/override on the real compose rather than a parallel
+entrypoint.
+
+---
+
+## Q: How does the agent service use the workflow middleware? Which parts does it have access to (secrets, invoke, inspect, ...)?
+
+The agent gets the whole Agenta workflow machinery "for free" because it is built the
+same way as chat and completion: `ag.create_app()` + `ag.workflow(schemas=...)` +
+`ag.route("/", flags={"is_chat": True})` in `services/oss/src/agent.py`. That was the
+point of the Python-front decision: the Python layer provides auth, middleware,
+tracing, secrets, and the invoke/inspect contract; the Node wrapper only runs Pi.
+
+There are **two middleware layers**.
+
+### Layer 1 — HTTP/ASGI middleware (per request)
+
+Added by `ag.create_app()` (`sdks/.../decorators/routing.py:64`). Outermost first:
+
+- **CORSMiddleware** — cross-origin headers. Irrelevant on the integrated same-origin
+  path; it mattered only for the old cross-port setup.
+- **AuthMiddleware** — verifies the caller against `{host}/api/access/permissions/check`
+  and puts the resolved credential on `request.state.auth["credentials"]` (a signed
+  `Secret`). With `AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=false` it passes the raw
+  `Authorization` through without a remote check. This is the credential everything
+  downstream uses.
+- **OTelMiddleware** — opens the request's tracing context, i.e. the workflow span the
+  whole run nests under.
+
+### Layer 2 — Workflow middleware (inside `wf.invoke`)
+
+Set on the workflow object (`decorators/running.py:197`), run in order around the
+handler:
+
+- **VaultMiddleware** — resolves secrets for the credential: it fetches the project's
+  vault secrets from `{api_url}/secrets/`, combines them with any local secrets, checks
+  access, and exposes them on the running context. (More on "access" below.)
+- **ResolverMiddleware** — resolves which handler to run from the revision URI, hydrates
+  references / revision / config from the backend when needed, and resolves embeds in
+  parameters.
+- **NormalizerMiddleware** — maps the request to the handler's arguments by inspecting
+  its signature (`inputs`, `messages`, `parameters` pulled from `data`), calls
+  `_agent(...)`, and wraps the return value into the response envelope, attaching
+  `trace_id` / `span_id`.
+
+### What the agent actually has access to / uses
+
+- **invoke** — yes, fully. `POST /services/agent/v0/invoke` runs the entire chain
+  (auth -> vault -> resolver -> normalizer -> `_agent`). `_agent` receives `inputs`,
+  `messages`, and `parameters` already mapped for it.
+- **inspect** — yes. `POST /services/agent/v0/inspect` returns the agent's interface,
+  i.e. `AGENT_SCHEMAS` (chat `messages` in, `message` out, config = `model` +
+  `agents_md`). This is what tells the playground to render a chat box and the two
+  config fields. (Known bug: inspect currently 500s under session-cookie auth; it did
+  not block the playground because the create flow takes the schema from the catalog
+  template.)
+- **auth / credentials** — yes. The resolved `Secret` credential is available to the
+  handler and to tracing export.
+- **tracing** — yes. `_agent` reads the active workflow span via `_trace_context()` and
+  threads the `traceparent` (plus endpoint/auth) to the Pi sidecar, so the Pi spans
+  nest under the `/invoke` span in one trace.
+- **secrets** — available but **not consumed yet**. VaultMiddleware resolves the
+  project's secrets on every invoke and exposes them on the running context. Chat and
+  completion use them automatically because litellm reads them. The agent handler does
+  not read them today; the Pi model auth currently comes from the mounted
+  `~/.pi/agent` (Codex login) or `AGENTA_API_KEY`/provider env on the sidecar. Wiring
+  the resolved secrets into the Pi run (the "startup hook injects the provider/tool
+  keys" step) is exactly where this plugs in: read the secrets in `_agent`, pass them in
+  the harness request, and have the wrapper inject them (`setRuntimeApiKey` / env). That
+  is the planned secrets work, not yet built.
+
+One detail: the route passes `secrets=None` into `wf.invoke`, so the agent does not
+hand secrets in; VaultMiddleware fetches them itself from the credential. The gap is
+only on the consuming side (the handler), not the resolving side.
+
+---
+
+## Q: Why does tracing look different / broken now vs the old trace?
+
+Reference old trace `6ab51033...`: root `invoke_agent`, four `turn`s, several
+`chat gpt-5.5` spans, and `execute_tool ls/read/bash/write` — 14 spans, with
+cumulative token + cost rolled up onto the `turn` and `invoke_agent` spans.
+
+Current trace (e.g. `329698f7...`): `_agent -> invoke_agent -> turn 0 -> chat` — 4
+spans; the `chat` span has tokens + cost, the parents do not.
+
+Tracing is **not broken** (spans land, nest correctly, the `chat` span carries model,
+tokens, cost). Two things changed:
+
+### 1. Different agent and task (the big, expected difference)
+
+The old trace is the WP-1 POC: tools enabled (`read/bash/edit/write/ls`) and a task
+that needs them ("read notes.txt, write greeting.txt"). That drives a multi-turn loop
+with tool calls, so you get many turns, many `chat` spans, and `execute_tool` spans.
+
+The current app is the hello-world chat agent: `tools=[]` and "answer in one or two
+short sentences". So it does exactly one turn, no tools, one `chat`. Same
+instrumentation, a trivial run. To get a rich trace again, give the agent tools
+(built-in `read/bash/...` or the WP-7 runnable tools) and a task that uses them.
+
+### 2. Cumulative token/cost rollup is lost across the process boundary (a real regression)
+
+In the old (standalone) trace, all spans were exported by one process in one batch, so
+Agenta's per-ingest-batch cumulative computation could build the roll-up tree and put
+cumulative tokens/cost on `turn` and `invoke_agent`.
+
+Now the trace is split across **two exporters**:
+- Python (services container) exports `_agent` (the workflow span).
+- Node (`agent-pi`) exports `invoke_agent -> turn -> chat` (the Pi spans), where
+  `invoke_agent`'s parent is the **remote** `_agent`.
+
+Agenta builds the cumulative tree per ingest batch and "attaches a span only if its
+parent is already seen" (see the `orderParentFirst` comment in `agenta-otel.ts`). In the
+Node batch, `invoke_agent`'s parent (`_agent`) is in the **other** (Python) batch, so the
+Pi subtree is dropped from the cumulative tree. Result: the leaf `chat` keeps its raw
+`incremental` tokens, but `cumulative` is missing on `chat` and there is no token/cost
+rollup on `turn` / `invoke_agent` / `_agent`. (Duration still rolls up because it is
+computed differently.)
+
+So the agent- and turn-level token/cost totals you used to see are gone. This is a
+side effect of nesting the agent under the Agenta workflow span (the integration goal).
+The fix belongs on the tracing side (owned by the instrumentation work): compute the
+cumulative roll-up across the whole trace by `trace_id` rather than per ingest batch, so
+a trace split between the Python workflow span and the Node Pi spans still aggregates.
+Until then, per-span (leaf `chat`) tokens/cost are correct; the rolled-up agent totals
+are not.
diff --git a/docs/design/agent-workflows/wp-7-tools/README.md b/docs/design/agent-workflows/wp-7-tools/README.md
index 225c77eb26..483f5dc688 100644
--- a/docs/design/agent-workflows/wp-7-tools/README.md
+++ b/docs/design/agent-workflows/wp-7-tools/README.md
@@ -1,6 +1,8 @@
 # WP-7: Runnable tools as agent configuration
 
-Status: design draft. Builds on WP-2 (agent service) and WP-6 (workflow type and template).
+Status: Composio MVP implemented. Resolution lives in `api`; the bridge routes Pi tool
+calls back through `POST /tools/call`. Builds on WP-2 (agent service) and WP-6 (workflow
+type and template). See [Implementation status](#implementation-status-composio-mvp) below.
 
 ## Goal
 
@@ -205,6 +207,99 @@ dispatches purely by `provider_key` through the registry, the agent side stays p
   smoke run, with the call nested under the agent invoke span and the Composio key absent from the
   sandbox.
 
+## Implementation status (Composio MVP)
+
+What landed, by seam. WP-6 is not started, so resolution runs in `api` behind a thin
+endpoint that the agent service calls over HTTP; when WP-6 lands, its invoke path calls the
+same `ToolsService.resolve_agent_tools(...)` in-process and the HTTP hop drops out.
+
+**Backend (`api`) — the resolver and the shared connection lookup.**
+
+- `core/tools/dtos.py`: `AgentToolReference` (discriminated `builtin` | `composio`),
+  `ResolvedAgentTool` (`name`, `description`, `input_schema`, `call_ref`), and
+  `AgentToolsResolution` (`builtins`, `custom`).
+- `core/tools/service.py`: `resolve_connection_by_slug(...)` (extracted from `call_tool`, now
+  shared) and `resolve_agent_tools(...)`. Composio refs validate the connection up front,
+  enrich `description` + `input_schema` from the catalog (`get_action`), and build the
+  `call_ref` `tools.composio.{integration}.{action}.{connection}`. Slug segments are validated
+  and `__` is rejected so the `/tools/call` `__`↔`.` round-trip can't corrupt the split.
+- `apis/fastapi/tools/router.py`: `POST /tools/resolve` (project-scoped, EE `VIEW_TOOLS`)
+  returns the resolution; `call_tool` now reuses `resolve_connection_by_slug`. `call_tool` is
+  otherwise unchanged as the execution endpoint.
+
+**Agent service (`services/oss`) — thin driver.**
+
+- `agent_pi/ports.py`: `ToolCallback` (endpoint + authorization) and `custom_tools` /
+  `tool_callback` on `HarnessRequest`, serialized onto the wire by both harness adapters.
+- `agent.py`: reads `parameters["tools"]` (or the file config), POSTs them to `/tools/resolve`,
+  and threads the result plus a `/tools/call` callback into the harness. The callback endpoint
+  and credential reuse the OTLP-credential mechanism (`inject()` Authorization, API-base derived
+  from `ag.tracing.otlp_url`, with `AGENTA_AGENT_TOOLS_API_URL` / `AGENTA_API_KEY` fallbacks). An
+  agent with no tools never touches the backend, preserving the tool-less WP-2 path.
+
+**TS wrapper (`services/agent`) — the bridge.**
+
+- `runPi.ts`: `buildCustomTools(...)` turns each resolved spec into a Pi `customTool` whose
+  `execute` does one `POST {endpoint}` with the OpenAI envelope
+  `{ data: { id, type, function: { name: callRef, arguments } } }` and the callback
+  Authorization. Arguments go as an object (no double-encoding); the result `content` returns
+  verbatim; an HTTP/timeout failure throws, which Pi turns into a tool-error result rather than a
+  run failure. Custom tool names are added to the `createAgentSession` `tools` allowlist, because
+  the allowlist gates custom tools too (an empty allowlist would hide them).
+
+**Config schema as shipped.** Under the agent revision `parameters["tools"]`, each entry is a
+built-in tool name (string, normalized to `{"type": "builtin", "name": ...}`) or a discriminated
+object. Example:
+
+```json
+{
+  "model": "gpt-5.5",
+  "tools": [
+    "read_file",
+    { "type": "composio", "integration": "gmail", "action": "GMAIL_SEND_EMAIL",
+      "connection": "gmail-team", "name": "gmail__SEND_EMAIL" }
+  ]
+}
+```
+
+**Playground integration: reuse the existing tool picker.** The chat/completion tool picker
+only renders inside the prompt control, which the playground shows for a config field marked
+`x-ag-type-ref: "prompt-template"`. So the agent advertises its config as a `prompt`
+prompt-template (`agent_pi/schemas.py`) instead of a bespoke form: the playground then renders
+the same model selector + system-message editor + tool picker, with no new frontend code. The
+handler (`agent.py` `_resolve_run_config`) reads the system message as the AGENTS.md, the model
+and tools from `prompt.llm_config`, and still accepts the flat `{model, agents_md, tools}` an API
+caller may send. The picker encodes a Composio action as a gateway function name,
+`tools__{provider}__{integration}__{action}__{connection}` (connection = the connection slug);
+`agent.py` `_parse_gateway_slug` turns that into the same `composio` ref the resolver already
+takes, so no backend change was needed. Non-Composio picker entries (provider built-ins, inline
+functions) are skipped.
+
+**Verified live (2026-06-16, dev stack, pi-agents project).** A real GitHub Composio connection
+(`github-tvn`) plus a `GET_THE_AUTHENTICATED_USER` reference, passed via `parameters["tools"]` to
+the agent `/invoke`, drove the whole path: `/tools/resolve` built the spec, Pi registered the
+`github_whoami` customTool, called it, and the bridge executed the real action through
+`/tools/call`. The agent answered with live data (login `mmabrouk`, follower count, public-repo
+count) that only comes from executing the action. The trace nests the tool call correctly:
+`_agent → invoke_agent → turn 0 → {chat, execute_tool github_whoami} → turn 1 → chat`. The same
+run also works end to end from the playground: the picker shows the GitHub tool as a gateway card,
+and Run returns the live answer.
+
+Earlier unit-level checks still hold: the resolver builds correct specs and raises the right
+errors for missing / inactive / invalid connections, bad slugs, and missing actions; the bridge
+sends the right envelope, forwards Authorization, sends object-form arguments, returns content
+verbatim, and throws on HTTP error; Pi's validator accepts and coerces the plain Composio JSON
+Schema.
+
+**Deployment hardening found and fixed.** The DoD wants the Composio key absent from the sandbox.
+The WP-7 *data path* already guarantees this (the key is never sent to Pi). But the dev
+`agent-pi` sidecar was loading the whole stack `env_file`, so the container inherited
+`COMPOSIO_API_KEY` and other secrets anyway. Dropping `env_file` from the `agent-pi` service in
+`hosting/docker-compose/ee/docker-compose.dev.yml` (it reads only `PORT`, `PI_CODING_AGENT_DIR`,
+`AGENTA_HOST`, `AGENTA_API_KEY`, and two optional vars; Pi auth comes from the mounted login) makes
+the property hold in the local sidecar too. A real sandbox (WP-3 Daytona) is isolated and never
+saw these.
+
 ## Links
 
 - [`wp-2-agent-service/`](../wp-2-agent-service/README.md)
diff --git a/hosting/docker-compose/ee/docker-compose.dev.yml b/hosting/docker-compose/ee/docker-compose.dev.yml
index 27974d996f..e09b82b29f 100644
--- a/hosting/docker-compose/ee/docker-compose.dev.yml
+++ b/hosting/docker-compose/ee/docker-compose.dev.yml
@@ -428,8 +428,12 @@ services:
             sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true;
             exec node_modules/.bin/tsx src/server.ts"
         # === CONFIGURATION ======================================== #
-        env_file:
-            - ${ENV_FILE:-./.env.ee.dev}
+        # Deliberately NO env_file: the Pi sandbox must not inherit the stack's
+        # secrets (COMPOSIO_API_KEY, STRIPE/POSTHOG/GOOGLE/DAYTONA keys, ...). Tools
+        # run server-side via /tools/call, so the sandbox only needs its own port,
+        # the Pi login (mounted below), and the OTLP export fallback. The wrapper
+        # reads exactly: PORT, PI_CODING_AGENT_DIR, AGENTA_HOST, AGENTA_API_KEY, and
+        # the optional AGENTA_AGENT_TOOL_CALL_TIMEOUT_MS / OTEL_SERVICE_NAME.
         environment:
             PORT: "8765"
             PI_CODING_AGENT_DIR: /pi-agent
diff --git a/services/agent/README.md b/services/agent/README.md
index c920de19f9..f566acb704 100644
--- a/services/agent/README.md
+++ b/services/agent/README.md
@@ -1,4 +1,4 @@
-# Agent service: Pi wrapper (WP-2)
+# Agent service: Pi wrapper (WP-2 + WP-7)
 
 This is the TypeScript side of the agent workflow service. It is a thin wrapper that
 drives the [Pi](https://pi.dev) agent harness for a single run. The Python service
@@ -54,6 +54,34 @@ With no `trace` block the run is traced standalone using `AGENTA_HOST` /
 `AGENTA_API_KEY`, or not at all when neither is set. The extension lives in
 `src/agenta-otel.ts`.
 
+## Tools (WP-7)
+
+The agent's runnable tools are resolved in the backend (not here) and arrive on the
+request as `customTools` plus a `toolCallback`. `buildCustomTools` in `src/runPi.ts`
+turns each spec into a Pi `customTool` whose `execute` does one
+`POST {toolCallback.endpoint}` (Agenta's `/tools/call`) with the `callRef` slug and the
+threaded `authorization`. Pi drives the loop and runs the tool in-process; the provider
+key and connection auth stay server-side behind `/tools/call` and never enter this
+sandbox. See `docs/design/agent-workflows/wp-7-tools/README.md`.
+
+```json
+{
+  "prompt": "What is my GitHub username?",
+  "customTools": [
+    {
+      "name": "github__GET_THE_AUTHENTICATED_USER",
+      "description": "Gets the authenticated GitHub user.",
+      "inputSchema": {"type": "object", "properties": {}},
+      "callRef": "tools.composio.github.GET_THE_AUTHENTICATED_USER.github-tvn"
+    }
+  ],
+  "toolCallback": {
+    "endpoint": "https://host/api/tools/call",
+    "authorization": "ApiKey ..."
+  }
+}
+```
+
 ## Auth
 
 `AuthStorage.create()` reads `~/.pi/agent/auth.json`. Log in once with `pnpm exec pi`
@@ -68,6 +96,8 @@ echo '{"agentsMd":"You are a hello-world agent.","prompt":"Hi"}' | pnpm run run:
 
 ## Config
 
-`config/AGENTS.md` and `config/agent.json` hold the hardcoded MVP config. They are read
-by the Python service and passed into the request, so editing them changes the agent
-without a code change.
+The live config comes from the agent revision in the playground: a `prompt-template`
+whose system message is the AGENTS.md, with the model and the picked tools under
+`llm_config`. The Python service (`services/oss/src/agent.py`) reads that and fills the
+request. `config/AGENTS.md` and `config/agent.json` are only the file fallback used when
+the request carries no config.
diff --git a/services/agent/docker-compose.agent.yml b/services/agent/docker-compose.agent.yml
deleted file mode 100644
index 43f733d1c7..0000000000
--- a/services/agent/docker-compose.agent.yml
+++ /dev/null
@@ -1,98 +0,0 @@
-# Dedicated, self-contained compose for the agent service (WP-2).
-#
-# Runs the agent fully in Docker, invokable by curl, without touching any other stack:
-#
-#   agent-pi   - the TypeScript Pi wrapper as an HTTP sidecar. Uses the local Pi login
-#                (~/.pi/agent) copied in at startup so token refresh never writes to the
-#                host. Reachable only on the internal network.
-#   agent-api  - the Python agent service (reuses the prebuilt services dev image). Speaks
-#                the Agenta /invoke contract and calls agent-pi over HTTP. Published on a
-#                host port for curl.
-#
-# Bring up:
-#   docker compose -f services/agent/docker-compose.agent.yml up --build
-# Verify:
-#   curl localhost:8092/health
-#   curl -X POST localhost:8092/agent/v0/invoke -H 'Content-Type: application/json' \
-#     -d '{"data":{"inputs":{"messages":[{"role":"user","content":"hi"}]}}}'
-# Tear down:
-#   docker compose -f services/agent/docker-compose.agent.yml down
-
-name: agenta-agent
-
-services:
-    agent-pi:
-        build:
-            context: .
-            dockerfile: docker/Dockerfile.dev
-        # Copy the read-only mounted login into a writable container path so OAuth token
-        # refresh works and never writes back to the host ~/.pi/agent.
-        command: >
-            sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true;
-            exec node_modules/.bin/tsx watch src/server.ts"
-        environment:
-            PORT: "8765"
-            PI_CODING_AGENT_DIR: /pi-agent
-            # Tracing export fallback when the request carries no Authorization
-            # (auth disabled locally). Must be reachable from this container.
-            AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280}
-            AGENTA_API_KEY: ${AGENTA_API_KEY:-}
-        volumes:
-            - ./src:/app/src
-            - ${HOME}/.pi/agent:/pi-agent-ro:ro
-        networks:
-            - agent-net
-        restart: unless-stopped
-
-    agent-api:
-        # Built from the current services dev Dockerfile (Python 3.13, current SDK +
-        # deps). A dedicated tag so we never clobber other stacks' images.
-        image: agenta-agent-api:dev
-        build:
-            context: ../..
-            dockerfile: services/oss/docker/Dockerfile.dev
-        command:
-            [
-                "uvicorn",
-                "entrypoints.agent_main:app",
-                "--host",
-                "0.0.0.0",
-                "--port",
-                "8080",
-                "--reload",
-                "--reload-dir",
-                "/app/oss/src",
-                "--reload-dir",
-                "/app/entrypoints",
-                "--reload-dir",
-                "/sdks/python/agenta",
-                "--reload-exclude",
-                "*.pyc",
-                "--reload-exclude",
-                "__pycache__",
-            ]
-        environment:
-            # Local curl: skip the remote credential check (the Python layer still runs
-            # its auth/middleware stack, it just passes the header through).
-            AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED: "false"
-            # Drives the harness selection: HTTP harness -> the agent-pi sidecar.
-            AGENTA_AGENT_PI_URL: http://agent-pi:8765
-            # Tracing export target. Must be reachable from this container AND from the
-            # agent-pi sidecar (the endpoint is passed across to nest the Pi spans), so
-            # use the host IP, not localhost. The API key authorizes the OTLP export.
-            AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280}
-            AGENTA_API_KEY: ${AGENTA_API_KEY:-}
-        volumes:
-            - ..:/app
-            - ../../sdks/python:/sdks/python
-            - ../../clients/python:/clients/python
-        ports:
-            - "8092:8080"
-        depends_on:
-            - agent-pi
-        networks:
-            - agent-net
-        restart: unless-stopped
-
-networks:
-    agent-net:
diff --git a/services/agent/docker-compose.stack.yml b/services/agent/docker-compose.stack.yml
deleted file mode 100644
index 774e942517..0000000000
--- a/services/agent/docker-compose.stack.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Same-origin demo: the agent served exactly like chat and completion.
-#
-# Runs the FULL services app (entrypoints.main, which now mounts /agent/v0 next to
-# /chat/v0 and /completion/v0) behind its own traefik, so the agent answers at
-# {origin}/services/agent/v0/invoke just like {origin}/services/chat/v0/invoke. The
-# Pi sidecar is called in-network. This is the integration; a full dev stack (with the
-# web app) would serve the playground at the same origin so there is no CORS at all.
-#
-# Bring up (creds for tracing/export come from the shell):
-#   set -a && source .env.test.local && set +a
-#   docker compose -f services/agent/docker-compose.stack.yml up --build -d
-# Verify:
-#   curl -X POST localhost:8480/services/agent/v0/invoke -H 'content-type: application/json' \
-#     -d '{"data":{"inputs":{"messages":[{"role":"user","content":"hi"}]}}}'
-
-name: agenta-agent-stack
-
-services:
-    traefik:
-        image: traefik:2
-        command:
-            - --providers.docker
-            - --providers.docker.constraints=Label(`com.docker.compose.project`,`agenta-agent-stack`)
-            - --entrypoints.web.address=:80
-        volumes:
-            - /var/run/docker.sock:/var/run/docker.sock
-        ports:
-            - "8480:80"
-        networks:
-            - stack-net
-        restart: unless-stopped
-
-    services:
-        image: agenta-agent-api:dev
-        command:
-            [
-                "uvicorn",
-                "entrypoints.main:app",
-                "--host",
-                "0.0.0.0",
-                "--port",
-                "8080",
-                "--root-path",
-                "/services",
-            ]
-        environment:
-            AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED: "false"
-            AGENTA_AGENT_PI_URL: http://agent-pi:8765
-            AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280}
-            AGENTA_API_KEY: ${AGENTA_API_KEY:-}
-        volumes:
-            - ..:/app
-            - ../../sdks/python:/sdks/python
-            - ../../clients/python:/clients/python
-        networks:
-            - stack-net
-        labels:
-            - "traefik.http.routers.aservices.rule=PathPrefix(`/services/`)"
-            - "traefik.http.routers.aservices.entrypoints=web"
-            - "traefik.http.middlewares.aservices-strip.stripprefix.prefixes=/services"
-            - "traefik.http.middlewares.aservices-strip.stripprefix.forceslash=true"
-            - "traefik.http.routers.aservices.middlewares=aservices-strip"
-            - "traefik.http.services.aservices.loadbalancer.server.port=8080"
-        restart: unless-stopped
-
-    agent-pi:
-        build:
-            context: .
-            dockerfile: docker/Dockerfile.dev
-        command: >
-            sh -c "mkdir -p /pi-agent && cp -a /pi-agent-ro/. /pi-agent/ 2>/dev/null || true;
-            exec node_modules/.bin/tsx src/server.ts"
-        environment:
-            PORT: "8765"
-            PI_CODING_AGENT_DIR: /pi-agent
-            AGENTA_HOST: ${AGENTA_HOST:-http://144.76.237.122:8280}
-            AGENTA_API_KEY: ${AGENTA_API_KEY:-}
-        volumes:
-            - ./src:/app/src
-            - ${HOME}/.pi/agent:/pi-agent-ro:ro
-        networks:
-            - stack-net
-        restart: unless-stopped
-
-networks:
-    stack-net:
diff --git a/services/agent/scripts/register_agent_app.py b/services/agent/scripts/register_agent_app.py
deleted file mode 100644
index 1e73c0515f..0000000000
--- a/services/agent/scripts/register_agent_app.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# /// script
-# requires-python = ">=3.11"
-# dependencies = ["requests"]
-# ///
-"""Register the agent as an app in a running Agenta stack, pointing at the dockerized
-agent service. Run it, then open the app in the playground and chat.
-
-It creates a workflow + default variant and commits a revision whose `data.url` points
-at the agent service and whose `data.schemas` is the chat interface the agent serves
-from /inspect (so the playground renders a chat box). This is the "custom workflow"
-path: no static SDK interface, the agent self-describes.
-
-Env:
-  AGENTA_HOST     base host (default http://144.76.237.122:8280)
-  AGENTA_API_KEY  api key for that stack (Authorization: ApiKey ...)
-  AGENT_URL       agent service invoke base (default http://144.76.237.122:8092/agent/v0)
-  PROJECT_ID      optional; defaults to the stack's default project
-  APP_SLUG        optional; defaults to wp2-agent-<n>
-
-Usage:
-  AGENTA_API_KEY=... uv run services/agent/scripts/register_agent_app.py
-"""
-
-import os
-import secrets
-import sys
-
-import requests
-
-HOST = os.environ.get("AGENTA_HOST", "http://144.76.237.122:8280").rstrip("/")
-API = HOST + "/api"
-KEY = os.environ.get("AGENTA_API_KEY")
-AGENT_URL = os.environ.get("AGENT_URL", "http://144.76.237.122:8092/agent/v0").rstrip(
-    "/"
-)
-PROJECT_ID = os.environ.get("PROJECT_ID")
-APP_SLUG = os.environ.get("APP_SLUG") or f"wp2-agent-{secrets.token_hex(3)}"
-
-if not KEY:
-    sys.exit("Set AGENTA_API_KEY")
-
-H = {"Authorization": f"ApiKey {KEY}", "Content-Type": "application/json"}
-
-# The chat interface the agent advertises via /inspect (kept in sync with
-# services/oss/src/agent_pi/schemas.py).
-SCHEMA = "https://json-schema.org/draft/2020-12/schema"
-AGENT_SCHEMAS = {
-    "inputs": {
-        "$schema": SCHEMA,
-        "type": "object",
-        "additionalProperties": True,
-        "properties": {
-            "messages": {
-                "x-ag-type-ref": "messages",
-                "type": "array",
-                "description": "Ordered list of normalized chat messages.",
-            }
-        },
-    },
-    "parameters": {
-        "$schema": SCHEMA,
-        "type": "object",
-        "additionalProperties": True,
-        "properties": {"model": {"type": "string", "description": "Model override."}},
-    },
-    "outputs": {
-        "$schema": SCHEMA,
-        "x-ag-type-ref": "message",
-        "type": "object",
-        "description": "Final assistant message returned by the agent.",
-    },
-}
-
-
-def _id() -> str:
-    return secrets.token_hex(6)
-
-
-def post(path: str, body: dict) -> dict:
-    r = requests.post(
-        f"{API}{path}",
-        json=body,
-        headers=H,
-        params={"project_id": PROJECT_ID},
-        timeout=60,
-    )
-    if r.status_code >= 300:
-        sys.exit(f"POST {path} -> {r.status_code}: {r.text[:600]}")
-    return r.json()
-
-
-def main() -> None:
-    global PROJECT_ID
-    if not PROJECT_ID:
-        projects = requests.get(f"{API}/projects", headers=H, timeout=30).json()
-        default = next(
-            (p for p in projects if p.get("is_default_project")), projects[0]
-        )
-        PROJECT_ID = default["project_id"]
-    print(f"project_id={PROJECT_ID}  app_slug={APP_SLUG}  agent_url={AGENT_URL}")
-
-    wf = post(
-        "/workflows/",
-        {
-            "workflow": {
-                "slug": APP_SLUG,
-                "name": APP_SLUG,
-                "flags": {"is_application": True},
-            }
-        },
-    )
-    workflow_id = wf["workflow"]["id"]
-
-    var = post(
-        "/workflows/variants/",
-        {
-            "workflow_variant": {
-                "workflow_id": workflow_id,
-                "slug": f"{APP_SLUG}.default",
-                "name": "default",
-            }
-        },
-    )
-    variant_id = var["workflow_variant"]["id"]
-
-    # Seed v0 (tables dismiss v0), then commit v1 with the real data.
-    post(
-        "/workflows/revisions/commit",
-        {
-            "workflow_revision": {
-                "workflow_id": workflow_id,
-                "workflow_variant_id": variant_id,
-                "slug": _id(),
-                "name": "default",
-                "message": "Initial commit",
-            }
-        },
-    )
-    rev = post(
-        "/workflows/revisions/commit",
-        {
-            "workflow_revision": {
-                "workflow_id": workflow_id,
-                "workflow_variant_id": variant_id,
-                "slug": _id(),
-                "name": "default",
-                "message": "Agent service",
-                "flags": {"is_chat": True},
-                "data": {
-                    "url": AGENT_URL,
-                    "parameters": {"model": "gpt-5.5"},
-                    "schemas": AGENT_SCHEMAS,
-                },
-            }
-        },
-    )
-    revision = rev["workflow_revision"]
-    print(f"workflow_id={workflow_id}")
-    print(f"variant_id={variant_id}")
-    print(f"revision_id={revision['id']}  flags={revision.get('flags')}")
-    print(f"stored url={revision.get('data', {}).get('url')}")
-    print(f"\nOpen the playground: {HOST}/apps/{workflow_id}/playground")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/services/agent/src/runPi.ts b/services/agent/src/runPi.ts
index cabf603701..4056d0dce7 100644
--- a/services/agent/src/runPi.ts
+++ b/services/agent/src/runPi.ts
@@ -4,8 +4,9 @@
  * This is the concrete "harness" behind the service's Harness port. It drives the
  * Pi SDK (`createAgentSession`) for a single run: it injects the agent's AGENTS.md
  * in memory, resolves the model, sends one user turn, and returns the final
- * assistant text. No streaming, no tools by default, no session persistence. Those
- * are later work packages.
+ * assistant text. It also turns the backend-resolved runnable tools (WP-7) into Pi
+ * customTools that route back through Agenta's /tools/call. No streaming and no
+ * session persistence yet; those are later work packages.
  *
  * Auth: uses `AuthStorage.create()`, which reads ~/.pi/agent/auth.json (the local
  * Pi login). Set OPENAI_API_KEY / ANTHROPIC_API_KEY in the environment as an
@@ -54,6 +55,34 @@ export interface TraceContext {
   captureContent?: boolean;
 }
 
+/**
+ * A runnable tool the backend already resolved from the agent config: name +
+ * description + JSON-Schema params for the model, plus the `callRef` slug the
+ * execution bridge sends back to Agenta's /tools/call. The Composio key and the
+ * connection auth stay server-side; this sandbox never sees them.
+ */
+export interface ResolvedToolSpec {
+  /** Function name shown to the model (e.g. "gmail__SEND_EMAIL"). */
+  name: string;
+  /** Description shown to the model. Resolved live from the provider catalog. */
+  description?: string;
+  /** JSON Schema for the tool arguments. Pi accepts plain JSON Schema here. */
+  inputSchema?: Record<string, unknown> | null;
+  /** "tools.{provider}.{integration}.{action}.{connection}" — the /tools/call slug. */
+  callRef: string;
+}
+
+/**
+ * Where and how to route a tool call back through Agenta. The backend builds the
+ * full /tools/call URL and threads the same credential the OTLP export rides on.
+ */
+export interface ToolCallbackContext {
+  /** Full /tools/call URL. */
+  endpoint: string;
+  /** Authorization header value for the callback (project-scoped). */
+  authorization?: string;
+}
+
 export interface AgentRunRequest {
   /** AGENTS.md text injected as the agent's instructions (in memory). */
   agentsMd?: string;
@@ -65,6 +94,10 @@ export interface AgentRunRequest {
   messages?: ChatMessage[];
   /** Built-in tools to enable. MVP default: none. */
   tools?: string[];
+  /** Resolved runnable tools (WP-7), turned into Pi customTools below. */
+  customTools?: ResolvedToolSpec[];
+  /** Where customTools route their calls back to. Required when customTools is set. */
+  toolCallback?: ToolCallbackContext;
   /** Tracing: thread the Agenta trace context across the boundary. */
   trace?: TraceContext;
 }
@@ -126,6 +159,117 @@ function extractAssistantText(messages: any[]): string {
   return "";
 }
 
+/** Per-tool budget for the /tools/call round-trip. Surfaced as a tool error on timeout. */
+const TOOL_CALL_TIMEOUT_MS = Number(
+  process.env.AGENTA_AGENT_TOOL_CALL_TIMEOUT_MS ?? 30000,
+);
+
+/** Permissive default when a resolved tool has no input schema. */
+const EMPTY_OBJECT_SCHEMA = {
+  type: "object",
+  properties: {},
+  additionalProperties: true,
+};
+
+/**
+ * Turn resolved tool specs into Pi customTools. Each tool's `execute` does one
+ * POST back through Agenta's /tools/call, so Pi runs the loop while the Composio
+ * key and connection auth stay server-side. A failed call throws, which Pi turns
+ * into a tool-error result (the loop continues) rather than a run failure.
+ */
+export function buildCustomTools(
+  specs: ResolvedToolSpec[],
+  callback: ToolCallbackContext | undefined,
+): any[] {
+  if (specs.length === 0) return [];
+  if (!callback?.endpoint) {
+    log(`skipping ${specs.length} custom tool(s): missing toolCallback endpoint`);
+    return [];
+  }
+
+  return specs.map((spec) => ({
+    name: spec.name,
+    label: spec.name,
+    description: spec.description ?? spec.name,
+    // Pi accepts a plain JSON Schema for `parameters` (its validator has a
+    // non-TypeBox path); the schema is resolved live from the provider catalog.
+    parameters: (spec.inputSchema as any) ?? EMPTY_OBJECT_SCHEMA,
+    async execute(toolCallId: string, params: unknown, signal?: AbortSignal) {
+      const text = await callAgentaTool(
+        callback,
+        spec.callRef,
+        toolCallId,
+        params,
+        signal,
+      );
+      return {
+        content: [{ type: "text", text }],
+        details: { callRef: spec.callRef },
+      };
+    },
+  }));
+}
+
+/** One /tools/call round-trip. Returns the result string; throws on failure. */
+async function callAgentaTool(
+  callback: ToolCallbackContext,
+  callRef: string,
+  toolCallId: string,
+  params: unknown,
+  signal?: AbortSignal,
+): Promise<string> {
+  const headers: Record<string, string> = { "content-type": "application/json" };
+  if (callback.authorization) headers["authorization"] = callback.authorization;
+
+  // Combine Pi's abort signal (if any) with a per-tool timeout.
+  const timeoutSignal = AbortSignal.timeout(TOOL_CALL_TIMEOUT_MS);
+  const anyOf = (AbortSignal as any).any;
+  const combined =
+    signal && typeof anyOf === "function"
+      ? anyOf([signal, timeoutSignal])
+      : timeoutSignal;
+
+  let response: Response;
+  try {
+    response = await fetch(callback.endpoint, {
+      method: "POST",
+      headers,
+      body: JSON.stringify({
+        data: {
+          id: toolCallId,
+          type: "function",
+          // Arguments as an object (not a JSON string) to avoid double-encoding.
+          function: { name: callRef, arguments: params ?? {} },
+        },
+      }),
+      signal: combined,
+    });
+  } catch (err) {
+    throw new Error(
+      `tool call ${callRef} failed: ${err instanceof Error ? err.message : String(err)}`,
+    );
+  }
+
+  const bodyText = await response.text();
+  if (!response.ok) {
+    throw new Error(
+      `tool call ${callRef} returned HTTP ${response.status}: ${bodyText.slice(0, 500)}`,
+    );
+  }
+
+  // ToolCallResponse -> { call: { data: { content }, status } }. `content` is the
+  // execution result serialized as a JSON string; hand it to the model verbatim.
+  try {
+    const parsed = JSON.parse(bodyText);
+    const content = parsed?.call?.data?.content;
+    if (typeof content === "string") return content;
+    if (content != null) return JSON.stringify(content);
+    return bodyText;
+  } catch {
+    return bodyText;
+  }
+}
+
 export async function runPi(request: AgentRunRequest): Promise<AgentRunResult> {
   const prompt = resolvePrompt(request);
   if (!prompt) {
@@ -176,12 +320,27 @@ export async function runPi(request: AgentRunRequest): Promise<AgentRunResult> {
     });
     await loader.reload();
 
+    // Build runnable tools from the resolved specs. Pi's allowlist gates custom
+    // tools too, so their names must be in `tools` for the model to see them.
+    const customTools = buildCustomTools(
+      request.customTools ?? [],
+      request.toolCallback,
+    );
+    const toolAllowlist = [
+      ...(request.tools ?? []),
+      ...customTools.map((tool) => tool.name),
+    ];
+    if (customTools.length > 0) {
+      log(`custom tools: ${customTools.map((t) => t.name).join(", ")}`);
+    }
+
     const { session } = await createAgentSession({
       cwd,
       model,
       authStorage,
       modelRegistry,
-      tools: request.tools ?? [],
+      tools: toolAllowlist,
+      customTools,
       sessionManager: SessionManager.inMemory(cwd),
       settingsManager: SettingsManager.inMemory(),
       resourceLoader: loader,
diff --git a/services/entrypoints/agent_main.py b/services/entrypoints/agent_main.py
deleted file mode 100644
index 595e60ad27..0000000000
--- a/services/entrypoints/agent_main.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""Standalone entrypoint for the agent service (WP-2 local verification).
-
-Mounts only the agent app plus a health check, so the agent ``/invoke`` can be
-exercised with curl without bringing up the full services app. The real integration
-point is ``entrypoints/main.py`` (one import + one mount), kept separate so this
-isolated runner stays light.
-
-Run locally (auth disabled for curl):
-
-    cd services
-    AGENTA_SERVICES_MIDDLEWARE_AUTH_ENABLED=false \\
-        uv run uvicorn entrypoints.agent_main:app --host 0.0.0.0 --port 8090
-"""
-
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-
-import agenta as ag
-from oss.src.agent import agent_v0_app
-
-ag.init()
-
-app = FastAPI(
-    openapi_url=None,
-    docs_url=None,
-    redoc_url=None,
-)
-
-app.add_middleware(
-    CORSMiddleware,
-    # The playground invokes cross-origin (web on a different port) with credentials
-    # (cookies + Authorization). Browsers reject a "*" origin on credentialed requests,
-    # so echo the specific origin and allow credentials. Matches the dev box on any
-    # port and localhost. Same-origin (served under /services) would avoid CORS entirely.
-    allow_origin_regex=r"https?://(144\.76\.237\.122|localhost|0\.0\.0\.0)(:\d+)?",
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-@app.get("/health")
-async def health():
-    return {"status": "ok"}
-
-
-app.mount("/agent/v0", agent_v0_app)
diff --git a/services/oss/src/agent.py b/services/oss/src/agent.py
index 1203f1560a..42f9b1832c 100644
--- a/services/oss/src/agent.py
+++ b/services/oss/src/agent.py
@@ -1,4 +1,4 @@
-"""Agent workflow service (WP-2).
+"""Agent workflow service (WP-2 + WP-7).
 
 Mirrors the chat/completion services: an Agenta app exposing ``/invoke`` and
 ``/inspect`` through ``ag.create_app`` + ``ag.workflow`` + ``ag.route``, so the
@@ -6,13 +6,17 @@
 builds the user turn from the request and runs it through the Harness port, whose Pi
 adapter drives the TypeScript wrapper in ``services/agent``.
 
-MVP: hardcoded config (AGENTS.md text, model) read from files, a single
-non-streaming reply, no tools. Streaming, multi-message output, tools, and Daytona
-are later work packages.
+Config is a ``prompt-template`` (system message as AGENTS.md, model, and tools): the
+playground renders the same prompt control as chat/completion, including the tool
+picker. Runnable tools (WP-7) are resolved in the backend (``/tools/resolve``) and
+executed back through ``/tools/call`` while Pi drives the loop. Streaming,
+multi-message output, and the Daytona sandbox are later work packages.
 """
 
 import os
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
+
+import httpx
 
 import agenta as ag
 from agenta.sdk.engines.tracing.propagation import inject
@@ -22,7 +26,7 @@
 from oss.src.agent_pi.local_runtime import LocalRuntime
 from oss.src.agent_pi.pi_harness import PiHarness
 from oss.src.agent_pi.pi_http_harness import PiHttpHarness
-from oss.src.agent_pi.ports import Harness, HarnessRequest, TraceContext
+from oss.src.agent_pi.ports import Harness, HarnessRequest, ToolCallback, TraceContext
 from oss.src.agent_pi.schemas import AGENT_SCHEMAS
 
 log = get_module_logger(__name__)
@@ -33,6 +37,9 @@
     "no",
 )
 
+# Budget for the backend tool-resolution round-trip (catalog + connection check).
+_TOOLS_RESOLVE_TIMEOUT = float(os.getenv("AGENTA_AGENT_TOOLS_TIMEOUT", "30"))
+
 
 def _build_harness() -> Harness:
     """Pick the harness adapter for the current deployment.
@@ -46,6 +53,53 @@ def _build_harness() -> Harness:
     return PiHarness(LocalRuntime(), wrapper_dir=str(wrapper_dir()))
 
 
+def _system_text(messages: Optional[List[Any]]) -> str:
+    """Join the system-message content of a prompt-template into AGENTS.md text."""
+    parts: List[str] = []
+    for message in messages or []:
+        if not isinstance(message, dict) or message.get("role") != "system":
+            continue
+        content = message.get("content")
+        if isinstance(content, str):
+            parts.append(content)
+        elif isinstance(content, list):
+            parts.extend(
+                block.get("text", "")
+                for block in content
+                if isinstance(block, dict) and block.get("type") == "text"
+            )
+    return "\n\n".join(part for part in parts if part)
+
+
+def _resolve_run_config(
+    params: Dict[str, Any],
+    config: Any,
+) -> Tuple[str, str, Any]:
+    """Pull model, instructions, and raw tools from the request parameters.
+
+    Accepts both shapes: the playground's ``prompt`` (a ``prompt-template`` whose
+    system message is the AGENTS.md and whose ``llm_config`` carries model + picker
+    tools) and the flat ``{model, agents_md, tools}`` an API caller may send. Falls
+    back to the service file config for any unset field.
+    """
+    prompt_cfg = params.get("prompt")
+    if isinstance(prompt_cfg, dict):
+        llm_config = prompt_cfg.get("llm_config") or {}
+        model = llm_config.get("model") or config.model
+        agents_md = _system_text(prompt_cfg.get("messages")) or config.agents_md
+        raw_tools = llm_config.get("tools")
+        if raw_tools is None:
+            raw_tools = prompt_cfg.get("tools")
+    else:
+        model = params.get("model") or config.model
+        agents_md = params.get("agents_md") or config.agents_md
+        raw_tools = params.get("tools")
+
+    if raw_tools is None:
+        raw_tools = config.tools
+    return model, agents_md, raw_tools
+
+
 def _latest_user_message(messages: Optional[List[Any]]) -> str:
     for message in reversed(messages or []):
         if not isinstance(message, dict):
@@ -91,6 +145,162 @@ def _trace_context() -> Optional[TraceContext]:
         return None
 
 
+def _agenta_api_base() -> Optional[str]:
+    """Resolve the Agenta backend base URL (``.../api``) for tool calls.
+
+    Prefers an explicit override, then derives it from the OTLP endpoint the SDK is
+    configured with (``{host}/api/otlp/v1/traces``), then falls back to env. Returns
+    ``None`` when nothing is configured; callers only need this when tools are set.
+    """
+    override = os.getenv("AGENTA_AGENT_TOOLS_API_URL")
+    if override:
+        return override.rstrip("/")
+
+    try:
+        otlp_url = ag.tracing.otlp_url
+    except Exception:  # pylint: disable=broad-except
+        otlp_url = None
+    if otlp_url and "/otlp/" in otlp_url:
+        return otlp_url.split("/otlp/", 1)[0].rstrip("/")
+
+    api_url = os.getenv("AGENTA_API_URL")
+    if api_url:
+        return api_url.rstrip("/")
+
+    return None
+
+
+def _request_authorization() -> Optional[str]:
+    """The project-scoped credential to call ``/tools/resolve`` and ``/tools/call``.
+
+    Reuses the same propagation the OTLP credential rides on (the caller's
+    Authorization), falling back to the service's own API key the way the tracing
+    sidecar does. Scoping to the caller keeps an agent run from invoking tools the
+    user could not (see WP-7 risk: RUN_TOOLS scoping).
+    """
+    try:
+        authorization = inject({}).get("Authorization")
+    except Exception:  # pylint: disable=broad-except
+        authorization = None
+    if authorization:
+        return authorization
+
+    api_key = os.getenv("AGENTA_API_KEY")
+    if api_key:
+        return f"ApiKey {api_key}"
+
+    return None
+
+
+def _parse_gateway_slug(slug: Any) -> Optional[Dict[str, Any]]:
+    """Parse a gateway tool slug into a Composio reference, or ``None``.
+
+    The playground tool picker encodes a Composio action as a function name like
+    ``tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn`` (the same
+    5-segment slug ``/tools/call`` parses; ``__`` or ``.`` separated). Anything that
+    is not a 5-segment ``tools.composio.*`` slug returns ``None`` so the caller can
+    skip it.
+    """
+    if not isinstance(slug, str):
+        return None
+    parts = slug.replace("__", ".").split(".")
+    if len(parts) == 5 and parts[0] == "tools" and parts[1] == "composio":
+        return {
+            "type": "composio",
+            "integration": parts[2],
+            "action": parts[3],
+            "connection": parts[4],
+        }
+    return None
+
+
+def _normalize_tool_ref(ref: Any) -> Optional[Dict[str, Any]]:
+    """Coerce a config entry into a discriminated tool reference the resolver parses.
+
+    Handles three shapes: a bare string (or single-key ``{"name": ...}``) is the
+    existing built-in tool name; a dict already carrying ``type`` passes through; and
+    the playground picker's gateway entry (``{"function": {"name":
+    "tools__composio__..."}}``) is parsed into a ``composio`` ref. Unsupported picker
+    entries (provider built-ins, inline custom functions) return ``None`` and are
+    skipped rather than failing the run.
+    """
+    if isinstance(ref, str):
+        return {"type": "builtin", "name": ref}
+    if isinstance(ref, dict):
+        if ref.get("type") in ("builtin", "composio"):
+            return ref
+        function = ref.get("function") if isinstance(ref.get("function"), dict) else {}
+        gateway = _parse_gateway_slug(function.get("name") or ref.get("name"))
+        if gateway:
+            return gateway
+        if "type" not in ref and isinstance(ref.get("name"), str):
+            return {"type": "builtin", "name": ref["name"]}
+        return None
+    return None
+
+
+async def _resolve_tools(
+    tools: List[Any],
+) -> Tuple[List[str], List[Dict[str, Any]], Optional[ToolCallback]]:
+    """Resolve config tool references into builtins + Pi customTool specs.
+
+    Calls the backend resolver (``POST /tools/resolve``), which validates Composio
+    connections up front and enriches each action from the catalog. Returns the
+    built-in tool names, the camelCase customTool specs for the wire, and the
+    ``/tools/call`` callback. Raises on resolution failure so the invoke fails early
+    with a clear message rather than the model hitting a runtime tool error.
+    """
+    refs = [ref for ref in (_normalize_tool_ref(t) for t in tools if t) if ref]
+    if not refs:
+        return [], [], None
+
+    api_base = _agenta_api_base()
+    if not api_base:
+        raise RuntimeError(
+            "Agent has tools configured but the Agenta API base URL is unknown. "
+            "Set AGENTA_AGENT_TOOLS_API_URL or AGENTA_API_URL."
+        )
+
+    authorization = _request_authorization()
+    headers = {"Content-Type": "application/json"}
+    if authorization:
+        headers["Authorization"] = authorization
+
+    async with httpx.AsyncClient(timeout=_TOOLS_RESOLVE_TIMEOUT) as client:
+        response = await client.post(
+            f"{api_base}/tools/resolve",
+            json={"tools": refs},
+            headers=headers,
+        )
+
+    if response.status_code >= 400:
+        raise RuntimeError(
+            f"Tool resolution failed (HTTP {response.status_code}): "
+            f"{response.text[:500]}"
+        )
+
+    data = response.json()
+    builtins = data.get("builtins") or []
+    custom = data.get("custom") or []
+
+    custom_tools = [
+        {
+            "name": spec["name"],
+            "description": spec.get("description"),
+            "inputSchema": spec.get("input_schema"),
+            "callRef": spec["call_ref"],
+        }
+        for spec in custom
+    ]
+
+    callback = ToolCallback(
+        endpoint=f"{api_base}/tools/call",
+        authorization=authorization,
+    )
+
+    return builtins, custom_tools, callback
+
+
 async def _agent(
     inputs: Optional[Dict[str, Any]] = None,
     messages: Optional[List[Any]] = None,
@@ -98,15 +308,22 @@ async def _agent(
 ):
     config = load_config()
 
-    # Config (model + AGENTS.md instructions) comes from parameters when the
-    # playground/caller sets it, falling back to the service's file config.
+    # Config comes from parameters when the playground/caller sets it, falling back
+    # to the service file config. Accepts both the playground prompt-template shape
+    # and a flat {model, agents_md, tools} (see _resolve_run_config).
     params = parameters or {}
-    model = params.get("model") or config.model
-    agents_md = params.get("agents_md") or config.agents_md
+    model, agents_md, tools_config = _resolve_run_config(params, config)
+
+    if isinstance(tools_config, dict):
+        tools_config = [tools_config]
+    elif not isinstance(tools_config, list):
+        tools_config = []
 
     msgs = messages or (inputs or {}).get("messages") or []
     prompt = _latest_user_message(msgs)
 
+    builtins, custom_tools, tool_callback = await _resolve_tools(tools_config)
+
     harness = _build_harness()
 
     await harness.setup()
@@ -117,7 +334,9 @@ async def _agent(
                 model=model,
                 prompt=prompt,
                 messages=msgs,
-                tools=config.tools,
+                tools=builtins,
+                custom_tools=custom_tools,
+                tool_callback=tool_callback,
                 trace=_trace_context(),
             )
         )
diff --git a/services/oss/src/agent_pi/config.py b/services/oss/src/agent_pi/config.py
index b630a3063e..8c2f5bf660 100644
--- a/services/oss/src/agent_pi/config.py
+++ b/services/oss/src/agent_pi/config.py
@@ -9,7 +9,7 @@
 import os
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import List, Optional
+from typing import Any, List, Optional
 
 # services/oss/src/agent_pi/config.py -> parents[3] == services/
 _SERVICES_DIR = Path(__file__).resolve().parents[3]
@@ -30,7 +30,11 @@
 class AgentConfig:
     agents_md: str
     model: Optional[str] = None
-    tools: List[str] = field(default_factory=list)
+    # Provider-agnostic tool references (WP-7). Each entry is either a plain string
+    # (a Pi built-in name, normalized to a ``builtin`` ref downstream) or a
+    # discriminated dict (``{"type": "composio", ...}``). Resolution happens in the
+    # backend at invoke time; the service just forwards the list.
+    tools: List[Any] = field(default_factory=list)
 
 
 def wrapper_dir() -> Path:
diff --git a/services/oss/src/agent_pi/pi_harness.py b/services/oss/src/agent_pi/pi_harness.py
index f4c5fc3e5c..266e9cb9a0 100644
--- a/services/oss/src/agent_pi/pi_harness.py
+++ b/services/oss/src/agent_pi/pi_harness.py
@@ -48,6 +48,10 @@ async def invoke(self, request: HarnessRequest) -> HarnessResult:
                 "prompt": request.prompt,
                 "messages": request.messages,
                 "tools": request.tools,
+                "customTools": request.custom_tools,
+                "toolCallback": request.tool_callback.to_wire()
+                if request.tool_callback
+                else None,
                 "trace": request.trace.to_wire() if request.trace else None,
             }
         ).encode("utf-8")
diff --git a/services/oss/src/agent_pi/pi_http_harness.py b/services/oss/src/agent_pi/pi_http_harness.py
index 1e4b8a0d2e..0435319011 100644
--- a/services/oss/src/agent_pi/pi_http_harness.py
+++ b/services/oss/src/agent_pi/pi_http_harness.py
@@ -42,6 +42,10 @@ async def invoke(self, request: HarnessRequest) -> HarnessResult:
             "prompt": request.prompt,
             "messages": request.messages,
             "tools": request.tools,
+            "customTools": request.custom_tools,
+            "toolCallback": request.tool_callback.to_wire()
+            if request.tool_callback
+            else None,
             "trace": request.trace.to_wire() if request.trace else None,
         }
 
diff --git a/services/oss/src/agent_pi/ports.py b/services/oss/src/agent_pi/ports.py
index f556de8cf7..4b436db6c6 100644
--- a/services/oss/src/agent_pi/ports.py
+++ b/services/oss/src/agent_pi/ports.py
@@ -84,6 +84,28 @@ def to_wire(self) -> Dict[str, Any]:
         }
 
 
+@dataclass
+class ToolCallback:
+    """How the harness routes a tool call back through Agenta's ``/tools/call``.
+
+    The backend resolves runnable tool references into specs and hands the harness
+    this callback. The TS wrapper turns each spec into a Pi ``customTool`` whose
+    ``execute`` POSTs the OpenAI-style envelope to ``endpoint`` with
+    ``authorization``. The provider key and connection auth never enter the sandbox;
+    they stay behind ``/tools/call``. Same mechanism that threads the OTLP credential.
+    """
+
+    endpoint: str  # full ``/tools/call`` URL
+    authorization: Optional[str] = None  # full Authorization header value
+
+    def to_wire(self) -> Dict[str, Any]:
+        """Serialize to the camelCase shape the TS wrapper expects on the wire."""
+        return {
+            "endpoint": self.endpoint,
+            "authorization": self.authorization,
+        }
+
+
 @dataclass
 class HarnessRequest:
     """One agent run: instructions, model, the user turn, and optional history."""
@@ -93,6 +115,10 @@ class HarnessRequest:
     prompt: Optional[str] = None
     messages: List[Any] = field(default_factory=list)
     tools: List[str] = field(default_factory=list)
+    # Resolved runnable tool specs, already in the camelCase wire shape the TS
+    # wrapper turns into Pi customTools: {name, description, inputSchema, callRef}.
+    custom_tools: List[Dict[str, Any]] = field(default_factory=list)
+    tool_callback: Optional[ToolCallback] = None
     trace: Optional[TraceContext] = None
 
 
diff --git a/services/oss/src/agent_pi/schemas.py b/services/oss/src/agent_pi/schemas.py
index 93a22c6532..cef2440679 100644
--- a/services/oss/src/agent_pi/schemas.py
+++ b/services/oss/src/agent_pi/schemas.py
@@ -34,24 +34,30 @@
     },
 }
 
-# Parameters: the agent config the playground renders as editable fields. Exposes
-# the two values that actually drive a run: the model and the AGENTS.md instructions.
-# `x-parameters.multiline` is the hint the playground honors to render a textarea.
+# Parameters: the agent config the playground renders. We reuse the existing
+# `prompt-template` control (model selector + tool picker + message editor) instead
+# of a bespoke agent form: the `x-ag-type-ref: prompt-template` marker makes the
+# playground render the same prompt UI chat/completion use, so the tool picker comes
+# for free. The agent reads the system message as its AGENTS.md, `llm_config.model`
+# as the model, and `llm_config.tools` (the picker output) as its runnable tools.
 AGENT_PARAMETERS_SCHEMA = {
     "$schema": _SCHEMA,
     "type": "object",
     "additionalProperties": True,
     "properties": {
-        "model": {
-            "type": "string",
-            "default": _DEFAULT_MODEL,
-            "description": "Model the agent runs on.",
-        },
-        "agents_md": {
-            "type": "string",
-            "default": _DEFAULT_AGENTS_MD,
-            "description": "The agent's instructions (AGENTS.md).",
-            "x-parameters": {"multiline": True},
+        "prompt": {
+            "x-ag-type-ref": "prompt-template",
+            "type": "object",
+            "description": (
+                "The agent's instructions (system message), model, and tools. Tools "
+                "are picked from connected providers (e.g. Composio) and run "
+                "server-side via /tools/call."
+            ),
+            "default": {
+                "messages": [{"role": "system", "content": _DEFAULT_AGENTS_MD}],
+                "template_format": "mustache",
+                "llm_config": {"model": _DEFAULT_MODEL, "tools": []},
+            },
         },
     },
 }