Agenta-AI · mmabrouk · Jun 24, 2026 · Jun 24, 2026
diff --git a/services/agent/src/tools/code.ts b/services/agent/src/tools/code.ts
@@ -1,197 +1,27 @@
 /**
- * Code-tool executor: run a resolved `code` tool's snippet in the agent sandbox.
+ * Code-tool sidecar execution gate.
  *
- * A code tool ships a snippet (`code`) + a runtime (`python` | `node`) + a scoped `env` (the
- * tool's declared vault secrets, resolved server-side). Unlike a `callback` tool, it never
- * touches Agenta's /tools/call — it runs locally where the harness runs, which is exactly why
- * its secrets are injected here as subprocess env (and nowhere else).
- *
- * Entry convention (same for both runtimes): the snippet defines a top-level `main`. A bare
- * `def main(**inputs)` / `function main(inputs)` is found automatically; an explicit export
- * (`module.exports.main` / `exports.main` / `module.exports = fn` in Node) is also accepted.
- * Python calls `main(**inputs)` (keyword args from the tool input object); Node calls
- * `main(inputs)` (the input object) and may return a promise. The return value is
- * JSON-serialized and handed to the model as the tool result.
- *
- * Shared by every delivery path that runs code locally: engines/pi.ts (in-process Pi),
- * extensions/agenta.ts (Pi under sandbox-agent), tools/mcp-server.ts (the MCP bridge for other
- * harnesses).
+ * The code-tool interface still exists and code tools are still advertised to harnesses. The
+ * sidecar no longer executes author-supplied snippets locally, though: every delivery path
+ * funnels a `kind: "code"` call through this function, so throwing here makes direct Pi,
+ * sandbox Pi, and the ACP/MCP bridge fail consistently without changing the public wire shape.
  */
-import { spawn } from "node:child_process";
-import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-
-/** Per-call budget for a code tool. Surfaced as a tool error on timeout. */
-export const CODE_TOOL_TIMEOUT_MS = Number(
-  process.env.AGENTA_AGENT_CODE_TOOL_TIMEOUT_MS ?? 30000,
-);
-
-// argv[1] is the snippet path (python `-c`/node `-e` put the first extra arg at argv[1]);
-// the tool input arrives as JSON on stdin. Both bootstraps evaluate the snippet in a fresh
-// scope and pick up a top-level `main` (a bare `def main`/`function main`), falling back to an
-// explicit export. Either way the contract is: define a callable `main`.
-const PY_BOOTSTRAP = `import sys, json
-_path = sys.argv[1]
-with open(_path) as _f:
-    _src = _f.read()
-_ns = {}
-exec(compile(_src, _path, "exec"), _ns)
-if not callable(_ns.get("main")):
-    sys.stderr.write("code tool must define a callable main(**inputs)")
-    sys.exit(1)
-_args = json.loads(sys.stdin.read() or "{}")
-_out = _ns["main"](**_args)
-sys.stdout.write(json.dumps(_out))
-`;
-
-// `require(path)` would only see CommonJS exports, so a bare top-level `function main` (which
-// exports nothing under CommonJS) would be invisible. Instead read the source and evaluate it
-// in a scope that captures a top-level `main`, while still honoring an explicit
-// `module.exports.main` / `exports.main` / `module.exports = fn`.
-const NODE_BOOTSTRAP = `const fs = require("fs");
-const path = process.argv[1];
-const src = fs.readFileSync(path, "utf8");
-const mod = { exports: {} };
-const factory = new Function(
-  "exports",
-  "require",
-  "module",
-  "__filename",
-  "__dirname",
-  src +
-    "\\n;return (typeof main !== 'undefined' ? main : (module.exports && (module.exports.main || module.exports.default)) || module.exports);",
-);
-const fn = factory(mod.exports, require, mod, path, require("path").dirname(path));
-if (typeof fn !== "function") {
-  process.stderr.write("code tool must define or export a callable main(inputs) function");
-  process.exit(1);
-}
-const args = JSON.parse(fs.readFileSync(0, "utf8") || "{}");
-Promise.resolve(fn(args))
-  .then((out) => process.stdout.write(JSON.stringify(out === undefined ? null : out)))
-  .catch((err) => { process.stderr.write(String((err && err.stack) || err)); process.exit(1); });
-`;
 
 export type CodeRuntime = "python" | "node";
 
-// The minimal set of host env vars a python3/node runtime needs to start. Deliberately
-// excludes everything secret-bearing or sidecar-specific: no AGENTA_*, no *_API_KEY /
-// *_TOKEN, no COMPOSIO_* / DAYTONA_*, no provider keys that the in-process Pi path writes
-// into process.env before a run. Only the tool's declared scoped `env` is layered on top.
-const BASE_ENV_ALLOWLIST = [
-  "PATH",
-  "HOME",
-  "LANG",
-  "LC_ALL",
-  "LC_CTYPE",
-  "TZ",
-  "TMPDIR",
-  "TEMP",
-  "TMP",
-  "NODE_PATH",
-  // Windows essentials, copied only when present.
-  "SystemRoot",
-  "ComSpec",
-];
-
-/** Build the child env from a minimal allowlist (copied only when set) plus scoped secrets. */
-function buildChildEnv(
-  env: Record<string, string> | undefined,
-): Record<string, string> {
-  const base: Record<string, string> = {};
-  for (const key of BASE_ENV_ALLOWLIST) {
-    const value = process.env[key];
-    if (value !== undefined) base[key] = value;
-  }
-  return { ...base, ...(env ?? {}) };
-}
+export const CODE_TOOL_UNSUPPORTED_MESSAGE =
+  "Code tools are not supported by the sidecar.";
 
 /**
- * Run a code tool's snippet and return its JSON-serialized output as text. Throws on a
- * non-zero exit, a timeout, or an abort; callers turn the throw into a tool-error result so
- * the model loop continues.
+ * Fail a code-tool invocation. Callers turn this throw into a tool-error result so the model
+ * loop continues rather than crashing the whole run.
  */
 export async function runCodeTool(
-  runtime: CodeRuntime | undefined,
-  code: string,
-  env: Record<string, string> | undefined,
-  args: unknown,
-  signal?: AbortSignal,
+  _runtime: CodeRuntime | undefined,
+  _code: string,
+  _env: Record<string, string> | undefined,
+  _args: unknown,
+  _signal?: AbortSignal,
 ): Promise<string> {
-  const dir = mkdtempSync(join(tmpdir(), "agenta-code-"));
-  try {
-    const isNode = runtime === "node";
-    const scriptPath = join(dir, isNode ? "tool.js" : "tool.py");
-    writeFileSync(scriptPath, code ?? "", "utf8");
-
-    const command = isNode ? "node" : "python3";
-    const childArgs = isNode
-      ? ["-e", NODE_BOOTSTRAP, scriptPath]
-      : ["-c", PY_BOOTSTRAP, scriptPath];
-
-    return await new Promise<string>((resolve, reject) => {
-      const child = spawn(command, childArgs, {
-        // The child inherits ONLY a minimal startup allowlist (PATH, HOME, locale/temp, and
-        // Windows essentials when present) plus the tool's declared scoped secrets. It does
-        // NOT inherit the sidecar's process.env, so provider keys (OPENAI_API_KEY, etc.) that
-        // the in-process Pi path writes into process.env, AGENTA_* config, and other secret-
-        // bearing vars never reach an author-supplied snippet. Nothing is written to the
-        // agent-visible filesystem beyond the temp dir.
-        env: buildChildEnv(env),
-        stdio: ["pipe", "pipe", "pipe"],
-      });
-
-      let stdout = "";
-      let stderr = "";
-      let settled = false;
-      const finish = (fn: () => void) => {
-        if (settled) return;
-        settled = true;
-        clearTimeout(timer);
-        if (signal) signal.removeEventListener("abort", onAbort);
-        fn();
-      };
-
-      const timer = setTimeout(() => {
-        child.kill("SIGKILL");
-        finish(() =>
-          reject(new Error(`code tool timed out after ${CODE_TOOL_TIMEOUT_MS}ms`)),
-        );
-      }, CODE_TOOL_TIMEOUT_MS);
-
-      const onAbort = () => {
-        child.kill("SIGKILL");
-        finish(() => reject(new Error("aborted")));
-      };
-      if (signal) {
-        if (signal.aborted) onAbort();
-        else signal.addEventListener("abort", onAbort, { once: true });
-      }
-
-      child.stdout.on("data", (d) => (stdout += d));
-      child.stderr.on("data", (d) => (stderr += d));
-      child.on("error", (err) => finish(() => reject(err)));
-      child.on("close", (exitCode) =>
-        finish(() => {
-          if (exitCode === 0) resolve(stdout.trim());
-          else
-            reject(
-              new Error(
-                `code tool exited ${exitCode}: ${stderr.slice(0, 500) || "(no stderr)"}`,
-              ),
-            );
-        }),
-      );
-
-      child.stdin.write(JSON.stringify(args ?? {}));
-      child.stdin.end();
-    });
-  } finally {
-    try {
-      rmSync(dir, { recursive: true, force: true });
-    } catch {
-      // best-effort cleanup of the throwaway snippet dir
-    }
-  }
+  throw new Error(CODE_TOOL_UNSUPPORTED_MESSAGE);
 }
diff --git a/services/agent/src/tools/dispatch.ts b/services/agent/src/tools/dispatch.ts
@@ -9,7 +9,7 @@
  * advertise/skip behavior for `client` tools — only the execution itself is shared.
  *
  * The three executor kinds (see `ResolvedToolSpec`):
- *  - `code`: run the snippet in a sandbox subprocess with its scoped secret `env`.
+ *  - `code`: advertised to harnesses, but rejected by the sidecar as unsupported.
  *  - `client`: browser-fulfilled across a turn boundary; never executed in-sandbox (throws).
  *  - `callback` (default): POST back through Agenta's /tools/call so the Composio key and
  *    connection auth stay server-side. On Daytona the in-sandbox process can't reach Agenta,
@@ -96,7 +96,7 @@ export async function relayToolCall(
  * Execute one resolved tool and return its result text. Throws on failure; every call site
  * turns the throw into a tool-error result so the model loop continues rather than crashing.
  *
- *  - `code`   → run the snippet locally (scoped secret env), no callback/relay.
+ *  - `code`   -> reject as unsupported by the sidecar, no callback/relay.
  *  - `client` → throw: browser-fulfilled, never executed in-sandbox.
  *  - default/`callback` → relay through the runner when `opts.relayDir` is set (Daytona),
  *    else POST directly to `opts.endpoint`.

diff --git a/services/agent/tests/unit/code-tool.test.ts b/services/agent/tests/unit/code-tool.test.ts
@@ -1,89 +1,25 @@
 /**
- * Unit test for the code-tool executor (runCodeTool).
+ * Unit test for the code-tool sidecar execution gate.
  *
- * Exercises both runtimes end-to-end through real subprocesses: a python tool, node tools
- * written as a bare top-level `function main` (the F2 regression) and as an explicit
- * `module.exports.main`, an async node `main`, the F3 env-isolation guarantee (provider keys
- * do NOT leak in; declared scoped secrets DO), and the non-zero-exit reject path.
- *
- * Needs `python3` and `node` on PATH (both present locally and on ubuntu CI runners).
- *
- * Run: pnpm test (or: pnpm exec vitest run tests/unit/code-tool.test.ts)
+ * Code tools remain part of the public tool interface and can still be advertised to harnesses,
+ * but the sidecar must not execute author-supplied snippets locally.
  */
 import { describe, it } from "vitest";
 import assert from "node:assert/strict";
 
-import { runCodeTool } from "../../src/tools/code.ts";
+import { CODE_TOOL_UNSUPPORTED_MESSAGE, runCodeTool } from "../../src/tools/code.ts";
 
 describe("runCodeTool", () => {
-  it("runs a python bare `def main(**kw)`", async () => {
-    const code = 'def main(**kw):\n    return {"sum": kw.get("a", 0) + kw.get("b", 0)}\n';
-    const out = await runCodeTool("python", code, undefined, { a: 2, b: 3 });
-    assert.deepEqual(JSON.parse(out), { sum: 5 }, "python bare main returns the right JSON");
-  });
-
-  it("runs a node bare top-level `function main` (F2 regression)", async () => {
-    const code = "function main(inputs) { return { got: inputs }; }";
-    const out = await runCodeTool("node", code, undefined, { hello: "world" });
-    assert.deepEqual(
-      JSON.parse(out),
-      { got: { hello: "world" } },
-      "node bare function main executes and echoes the input",
-    );
-  });
-
-  it("runs a node explicit `module.exports.main`", async () => {
-    const code = "module.exports.main = function (inputs) { return { via: 'exports', got: inputs }; };";
-    const out = await runCodeTool("node", code, undefined, { x: 1 });
-    assert.deepEqual(
-      JSON.parse(out),
-      { via: "exports", got: { x: 1 } },
-      "node module.exports.main works",
-    );
-  });
-
-  it("runs an async node `main` returning a Promise", async () => {
-    const code =
-      "async function main(inputs) { await new Promise((r) => setTimeout(r, 5)); return { doubled: inputs.n * 2 }; }";
-    const out = await runCodeTool("node", code, undefined, { n: 21 });
-    assert.deepEqual(JSON.parse(out), { doubled: 42 }, "node async main resolves");
-  });
-
-  it("F3: provider keys do NOT leak; scoped secrets DO", async () => {
-    const hadKey = "OPENAI_API_KEY" in process.env;
-    const prevKey = process.env.OPENAI_API_KEY;
-    process.env.OPENAI_API_KEY = "leak-me-xyz";
-    try {
-      // The provider key sits in process.env but must not reach the snippet.
-      const leakCode = "function main() { return { key: process.env.OPENAI_API_KEY ?? 'absent' }; }";
-      const leakOut = await runCodeTool("node", leakCode, undefined, {});
-      assert.deepEqual(
-        JSON.parse(leakOut),
-        { key: "absent" },
-        "F3: OPENAI_API_KEY did NOT leak into the snippet env",
-      );
-
-      // A secret declared on the tool (passed via the scoped `env` arg) must be visible.
-      const scopedCode =
-        "function main() { return { secret: process.env.MY_TOOL_SECRET ?? 'absent' }; }";
-      const scopedOut = await runCodeTool("node", scopedCode, { MY_TOOL_SECRET: "ok" }, {});
-      assert.deepEqual(
-        JSON.parse(scopedOut),
-        { secret: "ok" },
-        "F3: scoped MY_TOOL_SECRET IS visible to the snippet",
-      );
-    } finally {
-      if (hadKey) process.env.OPENAI_API_KEY = prevKey;
-      else delete process.env.OPENAI_API_KEY;
-    }
-  });
-
-  it("rejects when the snippet throws / exits non-zero", async () => {
-    const code = "function main() { throw new Error('boom'); }";
+  it("fails with a clear unsupported error instead of executing code", async () => {
     await assert.rejects(
-      () => runCodeTool("node", code, undefined, {}),
-      /boom|exited/,
-      "a throwing snippet rejects",
+      () =>
+        runCodeTool(
+          "node",
+          "function main() { return { executed: true }; }",
+          { MY_TOOL_SECRET: "secret" },
+          { input: true },
+        ),
+      new RegExp(CODE_TOOL_UNSUPPORTED_MESSAGE),
     );
   });
 });
diff --git a/services/agent/tests/unit/tool-bridge.test.ts b/services/agent/tests/unit/tool-bridge.test.ts
@@ -2,8 +2,8 @@
  * Unit tests for buildToolMcpServers (the tool MCP bridge attachment decision).
  *
  * Regression cover for F4: attachment must be decided per tool kind, not on the callback
- * endpoint alone. A `code` tool runs locally in mcp-server.ts and needs no endpoint, so a run
- * whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind
+ * endpoint alone. A `code` tool is still advertised through mcp-server.ts and needs no endpoint,
+ * so a run whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind
  * tools require AGENTA_TOOL_CALLBACK_ENDPOINT; missing it must degrade those tools, not drop the
  * whole server. `client` tools are browser-fulfilled and never justify attaching the bridge.
  *
@@ -109,7 +109,7 @@ describe("buildToolMcpServers", () => {
     ];
     const out = buildToolMcpServers(specs, relayDir);
     assert.notDeepEqual(out, [], "mixed run with no endpoint must not return []");
-    assert.equal(out.length, 1, "still attaches the server so the code tool works");
+    assert.equal(out.length, 1, "still attaches the server so the code tool is advertised");
     assert.equal(
       envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"),
       undefined,

diff --git a/services/agent/tests/unit/tool-dispatch.test.ts b/services/agent/tests/unit/tool-dispatch.test.ts
@@ -7,9 +7,9 @@
  * advertising behavior that stays per-site:
  *  - buildCustomTools (pi.ts) skips `client` specs, builds a tool per `code`/`callback` spec,
  *    and skips a `callback` spec with no callback endpoint.
- *  - runResolvedTool runs a real `code` snippet end-to-end (python) and throws for `client`.
+ *  - runResolvedTool advertises code tools but fails their sidecar execution, and throws for `client`.
  *
- * No network and no harness: the `code` path shells out to python3 (available locally); the
+ * No network and no harness: the `code` path now fails before any subprocess; the
  * `callback`/relay paths are not exercised here (they need a live /tools/call or a relay dir).
  *
  * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-dispatch.test.ts)
@@ -66,15 +66,14 @@ describe("buildCustomTools routing", () => {
 });
 
 describe("runResolvedTool", () => {
-  it("runs a code spec end-to-end (python)", async () => {
-    const text = await runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, {
-      toolCallId: "call-1",
-    });
-    const parsed = JSON.parse(text);
-    assert.deepEqual(
-      parsed,
-      { echo: { greeting: "hi", n: 3 } },
-      "code tool runs the snippet and returns its JSON output containing the input",
+  it("fails code specs with a clear unsupported error", async () => {
+    await assert.rejects(
+      () =>
+        runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, {
+          toolCallId: "call-1",
+        }),
+      /Code tools are not supported by the sidecar\./,
+      "code tools remain advertised but are not executable by the sidecar",
     );
   });