diff --git a/services/agent/src/tools/code.ts b/services/agent/src/tools/code.ts index d4d8db92b8..14544b0cfd 100644 --- a/services/agent/src/tools/code.ts +++ b/services/agent/src/tools/code.ts @@ -1,197 +1,27 @@ /** - * Code-tool executor: run a resolved `code` tool's snippet in the agent sandbox. + * Code-tool sidecar execution gate. * - * A code tool ships a snippet (`code`) + a runtime (`python` | `node`) + a scoped `env` (the - * tool's declared vault secrets, resolved server-side). Unlike a `callback` tool, it never - * touches Agenta's /tools/call — it runs locally where the harness runs, which is exactly why - * its secrets are injected here as subprocess env (and nowhere else). - * - * Entry convention (same for both runtimes): the snippet defines a top-level `main`. A bare - * `def main(**inputs)` / `function main(inputs)` is found automatically; an explicit export - * (`module.exports.main` / `exports.main` / `module.exports = fn` in Node) is also accepted. - * Python calls `main(**inputs)` (keyword args from the tool input object); Node calls - * `main(inputs)` (the input object) and may return a promise. The return value is - * JSON-serialized and handed to the model as the tool result. - * - * Shared by every delivery path that runs code locally: engines/pi.ts (in-process Pi), - * extensions/agenta.ts (Pi under sandbox-agent), tools/mcp-server.ts (the MCP bridge for other - * harnesses). + * The code-tool interface still exists and code tools are still advertised to harnesses. The + * sidecar no longer executes author-supplied snippets locally, though: every delivery path + * funnels a `kind: "code"` call through this function, so throwing here makes direct Pi, + * sandbox Pi, and the ACP/MCP bridge fail consistently without changing the public wire shape. */ -import { spawn } from "node:child_process"; -import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; - -/** Per-call budget for a code tool. Surfaced as a tool error on timeout. */ -export const CODE_TOOL_TIMEOUT_MS = Number( - process.env.AGENTA_AGENT_CODE_TOOL_TIMEOUT_MS ?? 30000, -); - -// argv[1] is the snippet path (python `-c`/node `-e` put the first extra arg at argv[1]); -// the tool input arrives as JSON on stdin. Both bootstraps evaluate the snippet in a fresh -// scope and pick up a top-level `main` (a bare `def main`/`function main`), falling back to an -// explicit export. Either way the contract is: define a callable `main`. -const PY_BOOTSTRAP = `import sys, json -_path = sys.argv[1] -with open(_path) as _f: - _src = _f.read() -_ns = {} -exec(compile(_src, _path, "exec"), _ns) -if not callable(_ns.get("main")): - sys.stderr.write("code tool must define a callable main(**inputs)") - sys.exit(1) -_args = json.loads(sys.stdin.read() or "{}") -_out = _ns["main"](**_args) -sys.stdout.write(json.dumps(_out)) -`; - -// `require(path)` would only see CommonJS exports, so a bare top-level `function main` (which -// exports nothing under CommonJS) would be invisible. Instead read the source and evaluate it -// in a scope that captures a top-level `main`, while still honoring an explicit -// `module.exports.main` / `exports.main` / `module.exports = fn`. -const NODE_BOOTSTRAP = `const fs = require("fs"); -const path = process.argv[1]; -const src = fs.readFileSync(path, "utf8"); -const mod = { exports: {} }; -const factory = new Function( - "exports", - "require", - "module", - "__filename", - "__dirname", - src + - "\\n;return (typeof main !== 'undefined' ? main : (module.exports && (module.exports.main || module.exports.default)) || module.exports);", -); -const fn = factory(mod.exports, require, mod, path, require("path").dirname(path)); -if (typeof fn !== "function") { - process.stderr.write("code tool must define or export a callable main(inputs) function"); - process.exit(1); -} -const args = JSON.parse(fs.readFileSync(0, "utf8") || "{}"); -Promise.resolve(fn(args)) - .then((out) => process.stdout.write(JSON.stringify(out === undefined ? null : out))) - .catch((err) => { process.stderr.write(String((err && err.stack) || err)); process.exit(1); }); -`; export type CodeRuntime = "python" | "node"; -// The minimal set of host env vars a python3/node runtime needs to start. Deliberately -// excludes everything secret-bearing or sidecar-specific: no AGENTA_*, no *_API_KEY / -// *_TOKEN, no COMPOSIO_* / DAYTONA_*, no provider keys that the in-process Pi path writes -// into process.env before a run. Only the tool's declared scoped `env` is layered on top. -const BASE_ENV_ALLOWLIST = [ - "PATH", - "HOME", - "LANG", - "LC_ALL", - "LC_CTYPE", - "TZ", - "TMPDIR", - "TEMP", - "TMP", - "NODE_PATH", - // Windows essentials, copied only when present. - "SystemRoot", - "ComSpec", -]; - -/** Build the child env from a minimal allowlist (copied only when set) plus scoped secrets. */ -function buildChildEnv( - env: Record | undefined, -): Record { - const base: Record = {}; - for (const key of BASE_ENV_ALLOWLIST) { - const value = process.env[key]; - if (value !== undefined) base[key] = value; - } - return { ...base, ...(env ?? {}) }; -} +export const CODE_TOOL_UNSUPPORTED_MESSAGE = + "Code tools are not supported by the sidecar."; /** - * Run a code tool's snippet and return its JSON-serialized output as text. Throws on a - * non-zero exit, a timeout, or an abort; callers turn the throw into a tool-error result so - * the model loop continues. + * Fail a code-tool invocation. Callers turn this throw into a tool-error result so the model + * loop continues rather than crashing the whole run. */ export async function runCodeTool( - runtime: CodeRuntime | undefined, - code: string, - env: Record | undefined, - args: unknown, - signal?: AbortSignal, + _runtime: CodeRuntime | undefined, + _code: string, + _env: Record | undefined, + _args: unknown, + _signal?: AbortSignal, ): Promise { - const dir = mkdtempSync(join(tmpdir(), "agenta-code-")); - try { - const isNode = runtime === "node"; - const scriptPath = join(dir, isNode ? "tool.js" : "tool.py"); - writeFileSync(scriptPath, code ?? "", "utf8"); - - const command = isNode ? "node" : "python3"; - const childArgs = isNode - ? ["-e", NODE_BOOTSTRAP, scriptPath] - : ["-c", PY_BOOTSTRAP, scriptPath]; - - return await new Promise((resolve, reject) => { - const child = spawn(command, childArgs, { - // The child inherits ONLY a minimal startup allowlist (PATH, HOME, locale/temp, and - // Windows essentials when present) plus the tool's declared scoped secrets. It does - // NOT inherit the sidecar's process.env, so provider keys (OPENAI_API_KEY, etc.) that - // the in-process Pi path writes into process.env, AGENTA_* config, and other secret- - // bearing vars never reach an author-supplied snippet. Nothing is written to the - // agent-visible filesystem beyond the temp dir. - env: buildChildEnv(env), - stdio: ["pipe", "pipe", "pipe"], - }); - - let stdout = ""; - let stderr = ""; - let settled = false; - const finish = (fn: () => void) => { - if (settled) return; - settled = true; - clearTimeout(timer); - if (signal) signal.removeEventListener("abort", onAbort); - fn(); - }; - - const timer = setTimeout(() => { - child.kill("SIGKILL"); - finish(() => - reject(new Error(`code tool timed out after ${CODE_TOOL_TIMEOUT_MS}ms`)), - ); - }, CODE_TOOL_TIMEOUT_MS); - - const onAbort = () => { - child.kill("SIGKILL"); - finish(() => reject(new Error("aborted"))); - }; - if (signal) { - if (signal.aborted) onAbort(); - else signal.addEventListener("abort", onAbort, { once: true }); - } - - child.stdout.on("data", (d) => (stdout += d)); - child.stderr.on("data", (d) => (stderr += d)); - child.on("error", (err) => finish(() => reject(err))); - child.on("close", (exitCode) => - finish(() => { - if (exitCode === 0) resolve(stdout.trim()); - else - reject( - new Error( - `code tool exited ${exitCode}: ${stderr.slice(0, 500) || "(no stderr)"}`, - ), - ); - }), - ); - - child.stdin.write(JSON.stringify(args ?? {})); - child.stdin.end(); - }); - } finally { - try { - rmSync(dir, { recursive: true, force: true }); - } catch { - // best-effort cleanup of the throwaway snippet dir - } - } + throw new Error(CODE_TOOL_UNSUPPORTED_MESSAGE); } diff --git a/services/agent/src/tools/dispatch.ts b/services/agent/src/tools/dispatch.ts index e8e2d25e38..89fdda10d1 100644 --- a/services/agent/src/tools/dispatch.ts +++ b/services/agent/src/tools/dispatch.ts @@ -9,7 +9,7 @@ * advertise/skip behavior for `client` tools — only the execution itself is shared. * * The three executor kinds (see `ResolvedToolSpec`): - * - `code`: run the snippet in a sandbox subprocess with its scoped secret `env`. + * - `code`: advertised to harnesses, but rejected by the sidecar as unsupported. * - `client`: browser-fulfilled across a turn boundary; never executed in-sandbox (throws). * - `callback` (default): POST back through Agenta's /tools/call so the Composio key and * connection auth stay server-side. On Daytona the in-sandbox process can't reach Agenta, @@ -96,7 +96,7 @@ export async function relayToolCall( * Execute one resolved tool and return its result text. Throws on failure; every call site * turns the throw into a tool-error result so the model loop continues rather than crashing. * - * - `code` → run the snippet locally (scoped secret env), no callback/relay. + * - `code` -> reject as unsupported by the sidecar, no callback/relay. * - `client` → throw: browser-fulfilled, never executed in-sandbox. * - default/`callback` → relay through the runner when `opts.relayDir` is set (Daytona), * else POST directly to `opts.endpoint`. diff --git a/services/agent/tests/unit/code-tool.test.ts b/services/agent/tests/unit/code-tool.test.ts index 5a3566614d..5914797459 100644 --- a/services/agent/tests/unit/code-tool.test.ts +++ b/services/agent/tests/unit/code-tool.test.ts @@ -1,89 +1,25 @@ /** - * Unit test for the code-tool executor (runCodeTool). + * Unit test for the code-tool sidecar execution gate. * - * Exercises both runtimes end-to-end through real subprocesses: a python tool, node tools - * written as a bare top-level `function main` (the F2 regression) and as an explicit - * `module.exports.main`, an async node `main`, the F3 env-isolation guarantee (provider keys - * do NOT leak in; declared scoped secrets DO), and the non-zero-exit reject path. - * - * Needs `python3` and `node` on PATH (both present locally and on ubuntu CI runners). - * - * Run: pnpm test (or: pnpm exec vitest run tests/unit/code-tool.test.ts) + * Code tools remain part of the public tool interface and can still be advertised to harnesses, + * but the sidecar must not execute author-supplied snippets locally. */ import { describe, it } from "vitest"; import assert from "node:assert/strict"; -import { runCodeTool } from "../../src/tools/code.ts"; +import { CODE_TOOL_UNSUPPORTED_MESSAGE, runCodeTool } from "../../src/tools/code.ts"; describe("runCodeTool", () => { - it("runs a python bare `def main(**kw)`", async () => { - const code = 'def main(**kw):\n return {"sum": kw.get("a", 0) + kw.get("b", 0)}\n'; - const out = await runCodeTool("python", code, undefined, { a: 2, b: 3 }); - assert.deepEqual(JSON.parse(out), { sum: 5 }, "python bare main returns the right JSON"); - }); - - it("runs a node bare top-level `function main` (F2 regression)", async () => { - const code = "function main(inputs) { return { got: inputs }; }"; - const out = await runCodeTool("node", code, undefined, { hello: "world" }); - assert.deepEqual( - JSON.parse(out), - { got: { hello: "world" } }, - "node bare function main executes and echoes the input", - ); - }); - - it("runs a node explicit `module.exports.main`", async () => { - const code = "module.exports.main = function (inputs) { return { via: 'exports', got: inputs }; };"; - const out = await runCodeTool("node", code, undefined, { x: 1 }); - assert.deepEqual( - JSON.parse(out), - { via: "exports", got: { x: 1 } }, - "node module.exports.main works", - ); - }); - - it("runs an async node `main` returning a Promise", async () => { - const code = - "async function main(inputs) { await new Promise((r) => setTimeout(r, 5)); return { doubled: inputs.n * 2 }; }"; - const out = await runCodeTool("node", code, undefined, { n: 21 }); - assert.deepEqual(JSON.parse(out), { doubled: 42 }, "node async main resolves"); - }); - - it("F3: provider keys do NOT leak; scoped secrets DO", async () => { - const hadKey = "OPENAI_API_KEY" in process.env; - const prevKey = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "leak-me-xyz"; - try { - // The provider key sits in process.env but must not reach the snippet. - const leakCode = "function main() { return { key: process.env.OPENAI_API_KEY ?? 'absent' }; }"; - const leakOut = await runCodeTool("node", leakCode, undefined, {}); - assert.deepEqual( - JSON.parse(leakOut), - { key: "absent" }, - "F3: OPENAI_API_KEY did NOT leak into the snippet env", - ); - - // A secret declared on the tool (passed via the scoped `env` arg) must be visible. - const scopedCode = - "function main() { return { secret: process.env.MY_TOOL_SECRET ?? 'absent' }; }"; - const scopedOut = await runCodeTool("node", scopedCode, { MY_TOOL_SECRET: "ok" }, {}); - assert.deepEqual( - JSON.parse(scopedOut), - { secret: "ok" }, - "F3: scoped MY_TOOL_SECRET IS visible to the snippet", - ); - } finally { - if (hadKey) process.env.OPENAI_API_KEY = prevKey; - else delete process.env.OPENAI_API_KEY; - } - }); - - it("rejects when the snippet throws / exits non-zero", async () => { - const code = "function main() { throw new Error('boom'); }"; + it("fails with a clear unsupported error instead of executing code", async () => { await assert.rejects( - () => runCodeTool("node", code, undefined, {}), - /boom|exited/, - "a throwing snippet rejects", + () => + runCodeTool( + "node", + "function main() { return { executed: true }; }", + { MY_TOOL_SECRET: "secret" }, + { input: true }, + ), + new RegExp(CODE_TOOL_UNSUPPORTED_MESSAGE), ); }); }); diff --git a/services/agent/tests/unit/tool-bridge.test.ts b/services/agent/tests/unit/tool-bridge.test.ts index fcd7eb6a13..0371633b20 100644 --- a/services/agent/tests/unit/tool-bridge.test.ts +++ b/services/agent/tests/unit/tool-bridge.test.ts @@ -2,8 +2,8 @@ * Unit tests for buildToolMcpServers (the tool MCP bridge attachment decision). * * Regression cover for F4: attachment must be decided per tool kind, not on the callback - * endpoint alone. A `code` tool runs locally in mcp-server.ts and needs no endpoint, so a run - * whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind + * endpoint alone. A `code` tool is still advertised through mcp-server.ts and needs no endpoint, + * so a run whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind * tools require AGENTA_TOOL_CALLBACK_ENDPOINT; missing it must degrade those tools, not drop the * whole server. `client` tools are browser-fulfilled and never justify attaching the bridge. * @@ -109,7 +109,7 @@ describe("buildToolMcpServers", () => { ]; const out = buildToolMcpServers(specs, relayDir); assert.notDeepEqual(out, [], "mixed run with no endpoint must not return []"); - assert.equal(out.length, 1, "still attaches the server so the code tool works"); + assert.equal(out.length, 1, "still attaches the server so the code tool is advertised"); assert.equal( envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), undefined, diff --git a/services/agent/tests/unit/tool-dispatch.test.ts b/services/agent/tests/unit/tool-dispatch.test.ts index af27dc991f..a6ca4e8e85 100644 --- a/services/agent/tests/unit/tool-dispatch.test.ts +++ b/services/agent/tests/unit/tool-dispatch.test.ts @@ -7,9 +7,9 @@ * advertising behavior that stays per-site: * - buildCustomTools (pi.ts) skips `client` specs, builds a tool per `code`/`callback` spec, * and skips a `callback` spec with no callback endpoint. - * - runResolvedTool runs a real `code` snippet end-to-end (python) and throws for `client`. + * - runResolvedTool advertises code tools but fails their sidecar execution, and throws for `client`. * - * No network and no harness: the `code` path shells out to python3 (available locally); the + * No network and no harness: the `code` path now fails before any subprocess; the * `callback`/relay paths are not exercised here (they need a live /tools/call or a relay dir). * * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-dispatch.test.ts) @@ -66,15 +66,14 @@ describe("buildCustomTools routing", () => { }); describe("runResolvedTool", () => { - it("runs a code spec end-to-end (python)", async () => { - const text = await runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { - toolCallId: "call-1", - }); - const parsed = JSON.parse(text); - assert.deepEqual( - parsed, - { echo: { greeting: "hi", n: 3 } }, - "code tool runs the snippet and returns its JSON output containing the input", + it("fails code specs with a clear unsupported error", async () => { + await assert.rejects( + () => + runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { + toolCallId: "call-1", + }), + /Code tools are not supported by the sidecar\./, + "code tools remain advertised but are not executable by the sidecar", ); });