From c8f517782f47a04f610a655316d2f0a12d9fb6f7 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 10 Jun 2026 14:30:54 -0700 Subject: [PATCH 01/12] Add hack/README.md documenting scripts --- hack/README.md | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 hack/README.md diff --git a/hack/README.md b/hack/README.md new file mode 100644 index 0000000..4ed40e1 --- /dev/null +++ b/hack/README.md @@ -0,0 +1,59 @@ +# Helper Scripts (`hack/`) + +This directory contains utility scripts to install, run, and test AX Orchestrator and its agent harnesses. + +## Available Scripts + +### 1. `run-ax-environment.sh` +This script boots the complete local development environment in a single terminal session. + +- **What it does**: + 1. Compiles the Go `ax` CLI with `-tags harness`. + 2. Starts the **Python gRPC Harness Server** in the background on port `50053`. + 3. Waits for the harness to become healthy. + 4. Starts the **AX Orchestrator Server** (`ax serve`) in the background on port `8494`. + 5. Waits for the orchestrator to become healthy. + 6. Runs the **AX Monitor Dashboard** (`ax monitor`) in the foreground, opening your web browser to the dashboard interface (`http://localhost:8080`). + 7. On exit or `Ctrl+C`, cleans up all background processes gracefully. + +- **Usage**: + ```bash + export GEMINI_API_KEY="your-gemini-api-key" + ./hack/run-ax-environment.sh + ``` + +--- + +### 2. `run-antigravity-streaming.sh` +This script executes a local E2E test turn against a persistent gRPC harness. + +- **What it does**: + 1. Boots the Python gRPC server in the background. + 2. Compiles the Go E2E client binary (`cmd/e2e/main.go`). + 3. Runs the E2E verification test suite. + 4. Automatically cleans up the background server on exit. + +- **Usage**: + ```bash + export GEMINI_API_KEY="your-gemini-api-key" + ./hack/run-antigravity-streaming.sh + ``` + +--- + +### 3. `install-ax.sh` +This script deploys the AX server components to a Kubernetes/SubstrATE cluster. + +- **What it does**: + - Leverages `ko` to build and deploy AX server containers. + - Automatically handles namespace and dependency setups. + +- **Usage**: + - **Deploy to Cluster**: + ```bash + ./hack/install-ax.sh --deploy-ax-server + ``` + - **Tear Down**: + ```bash + ./hack/install-ax.sh --delete-ax-server + ``` From 86dad8de9c103fb191b182c4a61d37518bb903e3 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 10 Jun 2026 14:47:43 -0700 Subject: [PATCH 02/12] Consolidate developer running and deployment scripts into hack/ax-dev.sh --- hack/README.md | 42 +++-- hack/ax-dev.sh | 300 ++++++++++++++++++++++++++++++ hack/install-ax.sh | 117 ------------ hack/run-antigravity-streaming.sh | 74 -------- internal/hack/install-ax.sh | 112 ----------- internal/manifests/README.md | 4 +- manifests/README.md | 4 +- 7 files changed, 326 insertions(+), 327 deletions(-) create mode 100755 hack/ax-dev.sh delete mode 100755 hack/install-ax.sh delete mode 100755 hack/run-antigravity-streaming.sh delete mode 100755 internal/hack/install-ax.sh diff --git a/hack/README.md b/hack/README.md index 4ed40e1..a34d189 100644 --- a/hack/README.md +++ b/hack/README.md @@ -1,11 +1,15 @@ -# Helper Scripts (`hack/`) +# Developer Utility Tool (`hack/`) -This directory contains utility scripts to install, run, and test AX Orchestrator and its agent harnesses. +This directory contains the unified AX developer CLI utility tool `ax-dev.sh` to compile, run, and deploy the AX Orchestrator and its harnesses. -## Available Scripts +--- + +## Unified Command: `ax-dev.sh` + +The `ax-dev.sh` script consolidates all local running, E2E testing, and Kubernetes/SubstrATE cluster deployments. -### 1. `run-ax-environment.sh` -This script boots the complete local development environment in a single terminal session. +### 1. Local Development Mode (`local`) +Starts the Python harness, AX Orchestrator, and the AX Monitor Dashboard, launching the visual web interface automatically in a single terminal session. - **What it does**: 1. Compiles the Go `ax` CLI with `-tags harness`. @@ -13,19 +17,19 @@ This script boots the complete local development environment in a single termina 3. Waits for the harness to become healthy. 4. Starts the **AX Orchestrator Server** (`ax serve`) in the background on port `8494`. 5. Waits for the orchestrator to become healthy. - 6. Runs the **AX Monitor Dashboard** (`ax monitor`) in the foreground, opening your web browser to the dashboard interface (`http://localhost:8080`). + 6. Starts the **AX Monitor Dashboard** (`ax monitor`) in the foreground, opening your web browser to the dashboard interface (`http://localhost:8080`). 7. On exit or `Ctrl+C`, cleans up all background processes gracefully. - **Usage**: ```bash export GEMINI_API_KEY="your-gemini-api-key" - ./hack/run-ax-environment.sh + ./hack/ax-dev.sh local ``` --- -### 2. `run-antigravity-streaming.sh` -This script executes a local E2E test turn against a persistent gRPC harness. +### 2. Local E2E Integration Tests (`test`) +Runs local end-to-end integration tests using the stateful Python harness and Go E2E test client. - **What it does**: 1. Boots the Python gRPC server in the background. @@ -36,24 +40,22 @@ This script executes a local E2E test turn against a persistent gRPC harness. - **Usage**: ```bash export GEMINI_API_KEY="your-gemini-api-key" - ./hack/run-antigravity-streaming.sh + ./hack/ax-dev.sh test ``` --- -### 3. `install-ax.sh` -This script deploys the AX server components to a Kubernetes/SubstrATE cluster. +### 3. Cloud / SubstrATE Deployment Mode (`cloud`) +Deploys or deletes the AX Orchestrator server resources on a SubstrATE-enabled Kubernetes cluster. -- **What it does**: - - Leverages `ko` to build and deploy AX server containers. - - Automatically handles namespace and dependency setups. - -- **Usage**: +- **Commands**: - **Deploy to Cluster**: ```bash - ./hack/install-ax.sh --deploy-ax-server + export GEMINI_API_KEY="your-gemini-api-key" + export BUCKET_NAME="your-gcs-bucket-name" + ./hack/ax-dev.sh cloud deploy ``` - - **Tear Down**: + - **Tear Down / Delete**: ```bash - ./hack/install-ax.sh --delete-ax-server + ./hack/ax-dev.sh cloud delete ``` diff --git a/hack/ax-dev.sh b/hack/ax-dev.sh new file mode 100755 index 0000000..f881c2b --- /dev/null +++ b/hack/ax-dev.sh @@ -0,0 +1,300 @@ +#!/bin/bash +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +COLOR_CYAN='\033[1;36m' +COLOR_RED='\033[1;31m' +COLOR_RESET='\033[0m' + +log_step() { + echo -e "${COLOR_CYAN}[step]: $1${COLOR_RESET}" +} + +log_error() { + echo -e "${COLOR_RED}ERROR: $1${COLOR_RESET}" >&2 +} + +usage() { + echo "AX Developer Utility Tool" + echo "" + echo "Usage: $0 [options]" + echo "" + echo "Commands:" + echo " local Start the local AX development environment (Harness + Server + Monitor Dashboard)" + echo " test Run local end-to-end integration tests (Harness + E2E client)" + echo " cloud deploy Deploy AX server and harnesses to a SubstrATE Kubernetes cluster" + echo " cloud delete Tear down AX server and harnesses from a SubstrATE Kubernetes cluster" + echo "" + echo "Cloud Options:" + echo " --harness, --v2 Target the experimental harness-path configuration (ax-deployment2.yaml)" + echo "" + echo "Options:" + echo " -h, --help Show this help message" +} + +# Find appropriate Python virtualenv or fallback to python3 +resolve_python() { + if [ -f ".venv/bin/python" ]; then + echo ".venv/bin/python" + elif [ -f "/Users/anjalisridhar/ax/.venv/bin/python" ]; then + echo "/Users/anjalisridhar/ax/.venv/bin/python" + else + echo "python3" + fi +} + +check_gemini_key() { + if [ -z "$GEMINI_API_KEY" ]; then + log_error "GEMINI_API_KEY environment variable is not set." + echo "Please set it using: export GEMINI_API_KEY=\"your-key\"" + exit 1 + fi +} + +# Wait for a local TCP port to open +wait_for_port() { + local port=$1 + local name=$2 + local max_attempts=30 + local attempt=1 + while [ $attempt -le $max_attempts ]; do + if nc -z localhost "$port"; then + return 0 + fi + sleep 0.2 + attempt=$((attempt + 1)) + done + log_error "$name failed to bind to port $port." + return 1 +} + +run_local() { + check_gemini_key + + local port=50053 + local agent_file="examples/antigravity_agent/agent.py" + local config_file="internal/ax2.yaml" + local python_bin + python_bin=$(resolve_python) + + if [ ! -f "bin/ax" ]; then + log_step "Building AX CLI..." + go build -tags harness -o bin/ax ./cmd/ax + fi + + local server_pid="" + local ax_pid="" + + cleanup() { + echo "" + log_step "Shutting down local AX environment..." + if [ -n "$server_pid" ]; then + kill "$server_pid" 2>/dev/null || true + fi + if [ -n "$ax_pid" ]; then + kill "$ax_pid" 2>/dev/null || true + fi + wait "$server_pid" 2>/dev/null || true + wait "$ax_pid" 2>/dev/null || true + log_step "Shutdown complete!" + } + trap cleanup EXIT INT TERM + + log_step "Starting Python gRPC Harness Server on port $port..." + PYTHONPATH=python:. "$python_bin" -m python.antigravity.harness_server --agent_file "$agent_file" --port "$port" > /tmp/antigravity_harness.log 2>&1 & + server_pid=$! + + log_step "Waiting for Python Harness Server to become healthy..." + if ! wait_for_port "$port" "Python Harness Server"; then + cat /tmp/antigravity_harness.log + exit 1 + fi + echo "Python Harness Server is active!" + + log_step "Starting AX Orchestrator Server (ax serve)..." + ./bin/ax serve --config "$config_file" > /tmp/ax_serve.log 2>&1 & + ax_pid=$! + + log_step "Waiting for AX Orchestrator to bind on port 8494..." + if ! wait_for_port 8494 "AX Orchestrator"; then + cat /tmp/ax_serve.log + exit 1 + fi + echo "AX Orchestrator is active!" + + log_step "Starting AX Monitor Dashboard (ax monitor)..." + echo "Press Ctrl+C to terminate all services." + ./bin/ax monitor --config "$config_file" --addr localhost:8080 +} + +run_test() { + check_gemini_key + + local port=50053 + local agent_file="examples/antigravity_agent/agent.py" + local python_bin + python_bin=$(resolve_python) + + local server_pid="" + + cleanup() { + if [ -n "$server_pid" ]; then + log_step "Killing Python server (PID: $server_pid)..." + kill "$server_pid" 2>/dev/null || true + wait "$server_pid" 2>/dev/null || true + fi + } + trap cleanup EXIT INT TERM + + log_step "Starting Python gRPC Harness Server on port $port..." + PYTHONPATH=python:. "$python_bin" -m python.antigravity.harness_server --agent_file "$agent_file" --port "$port" > /tmp/antigravity_harness.log 2>&1 & + server_pid=$! + + log_step "Waiting for Python Harness Server to become healthy..." + if ! wait_for_port "$port" "Python Harness Server"; then + cat /tmp/antigravity_harness.log + exit 1 + fi + + log_step "Building E2E test client..." + go build -o bin/e2e ./cmd/e2e + + log_step "Executing E2E integration test suite..." + bin/e2e + + echo "Success!" +} + +# SubstrATE Cloud commands helper +run_kubectl() { + kubectl ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} "$@" +} + +run_ko() { + GOFLAGS="-tags=ate" ko apply ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} "$@" +} + +cloud_deploy() { + check_gemini_key + + if [ -z "$BUCKET_NAME" ]; then + log_error "BUCKET_NAME environment variable is not set." + echo "Please set it using: export BUCKET_NAME=\"your-gcs-bucket\"" + exit 1 + fi + + local manifest="manifests/ax-deployment.yaml.tmpl" + local service="manifests/ax-service.yaml" + + while [[ "$#" -gt 0 ]]; do + case $1 in + --harness|--v2) + manifest="internal/manifests/ax-deployment2.yaml" + service="" + shift + ;; + *) + shift + ;; + esac + done + + log_step "Deploying AX Server and Harnesses from $manifest to Kubernetes cluster..." + sed -e "s|\${GEMINI_API_KEY}|${GEMINI_API_KEY}|g" \ + -e "s|\${BUCKET_NAME}|${BUCKET_NAME}|g" \ + "$manifest" \ + | run_ko -f - + + if [ -n "$service" ]; then + run_kubectl apply -f "$service" + fi + log_step "Deployment applied successfully!" +} + +cloud_delete() { + local manifest="manifests/ax-deployment.yaml.tmpl" + local service="manifests/ax-service.yaml" + + while [[ "$#" -gt 0 ]]; do + case $1 in + --harness|--v2) + manifest="internal/manifests/ax-deployment2.yaml" + service="" + shift + ;; + *) + shift + ;; + esac + done + + log_step "Deleting AX Server and Harnesses ($manifest) from Kubernetes cluster..." + sed -e "s|\${GEMINI_API_KEY}|dummy-key|g" \ + -e "s|\${BUCKET_NAME}|dummy-bucket|g" \ + "$manifest" \ + | run_kubectl delete --ignore-not-found -f - + + if [ -n "$service" ]; then + run_kubectl delete --ignore-not-found -f "$service" + fi + log_step "Deletion complete!" +} + +# Main routing logic +if [ "$#" -eq 0 ]; then + usage + exit 1 +fi + +case $1 in + -h|--help) + usage + exit 0 + ;; + local) + run_local + ;; + test) + run_test + ;; + cloud) + if [ "$#" -lt 2 ]; then + log_error "Missing cloud action (deploy or delete)." + usage + exit 1 + fi + action=$2 + shift 2 + case $action in + deploy) + cloud_deploy "$@" + ;; + delete) + cloud_delete "$@" + ;; + *) + log_error "Unknown cloud command: $action" + usage + exit 1 + ;; + esac + ;; + *) + log_error "Unknown command: $1" + usage + exit 1 + ;; +esac diff --git a/hack/install-ax.sh b/hack/install-ax.sh deleted file mode 100755 index 9885092..0000000 --- a/hack/install-ax.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -set -u -set -o pipefail - -ROOT=$(git rev-parse --show-toplevel) -cd "${ROOT}" - -# ANSI color codes for prettier output -COLOR_CYAN='\033[1;36m' -COLOR_RESET='\033[0m' - -function log_step() { - local step_name="$1" - echo -e "${COLOR_CYAN}[step]: ${step_name}${COLOR_RESET}" -} - -function usage() { - echo "Usage: $0 [options]" - echo "" - echo "Options:" - echo " --deploy-ax-server Deploy AX server and components using ko" - echo " --delete-ax-server Delete AX server and components from cluster" - echo " -h, --help Show this help message" -} - -run_kubectl() { - kubectl \ - ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} \ - "$@" -} - -run_ko() { - GOFLAGS="-tags=ate" ko apply \ - ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} \ - "$@" -} - -deploy_ax_server() { - log_step "deploy_ax_server" - - # Check dependencies - if [[ -z "${GEMINI_API_KEY:-}" ]]; then - echo "Error: GEMINI_API_KEY environment variable must be set" >&2 - exit 1 - fi - if [[ -z "${BUCKET_NAME:-}" ]]; then - echo "Error: BUCKET_NAME environment variable must be set" >&2 - exit 1 - fi - - echo "Using GCS Bucket: ${BUCKET_NAME}" - - # Render template and apply with ko - sed -e "s|\${GEMINI_API_KEY}|${GEMINI_API_KEY}|g" \ - -e "s|\${BUCKET_NAME}|${BUCKET_NAME}|g" \ - manifests/ax-deployment.yaml.tmpl \ - | run_ko -f - - - # Apply service - run_kubectl apply -f manifests/ax-service.yaml -} - -delete_ax_server() { - log_step "delete_ax_server" - - # Delete resources using a dummy key and bucket so credentials aren't required for deletion - sed -e "s|\${GEMINI_API_KEY}|dummy-key|g" \ - -e "s|\${BUCKET_NAME}|dummy-bucket|g" \ - manifests/ax-deployment.yaml.tmpl \ - | run_kubectl delete --ignore-not-found -f - - - run_kubectl delete --ignore-not-found -f manifests/ax-service.yaml -} - -if [ "$#" -eq 0 ]; then - usage - exit 1 -fi - -# If -h or --help appears anywhere in the command line, print the usage and exit. -for arg in "$@"; do - case "$arg" in - -h|--help) - usage - exit 0 - ;; - esac -done - -while [[ "$#" -gt 0 ]]; do - case $1 in - --deploy-ax-server) deploy_ax_server ;; - --delete-ax-server) delete_ax_server ;; - *) - echo "Error: unknown option: $1" >&2 - echo "" - usage - exit 1 - ;; - esac - shift -done diff --git a/hack/run-antigravity-streaming.sh b/hack/run-antigravity-streaming.sh deleted file mode 100755 index 7e80428..0000000 --- a/hack/run-antigravity-streaming.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -# Check if GEMINI_API_KEY is set -if [ -z "$GEMINI_API_KEY" ]; then - echo "ERROR: GEMINI_API_KEY environment variable is not set." - echo "Please set it using: export GEMINI_API_KEY=\"your-key\"" - exit 1 -fi - -PORT=50053 -ADDRESS="localhost:$PORT" -AGENT_FILE="examples/antigravity_agent/agent.py" - -# 1. Start Python gRPC server in the background -echo "Starting Python gRPC Harness Server on port $PORT..." -PYTHONPATH=python:. .venv/bin/python -m python.antigravity.harness_server --agent_file "$AGENT_FILE" --port "$PORT" > /tmp/antigravity_harness.log 2>&1 & -SERVER_PID=$! - -# Register trap to ensure server is killed on script exit -cleanup() { - echo "Cleaning up: killing Python server (PID: $SERVER_PID)..." - kill "$SERVER_PID" || true - wait "$SERVER_PID" 2>/dev/null || true - echo "Cleanup complete!" -} -trap cleanup EXIT - -# 2. Wait for the Python server to be healthy -echo "Waiting for Python server to become healthy..." -MAX_ATTEMPTS=30 -ATTEMPT=1 -HEALTHY=false - -while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do - # We can check if the port is open using nc (netcat) - if nc -z localhost "$PORT"; then - HEALTHY=true - break - fi - sleep 0.2 - ATTEMPT=$((ATTEMPT + 1)) -done - -if [ "$HEALTHY" = false ]; then - echo "ERROR: Python server failed to start within 6 seconds." - echo "Server logs (/tmp/antigravity_harness.log):" - cat /tmp/antigravity_harness.log - exit 1 -fi -echo "Python server is active!" - -# 3. Build and run the Go E2E V2 demonstration -echo "Building e2e..." -/opt/homebrew/bin/go build -o bin/e2e ./cmd/e2e - -echo "Executing E2E Demo with Antigravity gRPC Harness..." -bin/e2e - -echo "Success!" diff --git a/internal/hack/install-ax.sh b/internal/hack/install-ax.sh deleted file mode 100755 index c7f53e8..0000000 --- a/internal/hack/install-ax.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e -set -u -set -o pipefail - -ROOT=$(git rev-parse --show-toplevel) -cd "${ROOT}" - -# ANSI color codes for prettier output -COLOR_CYAN='\033[1;36m' -COLOR_RESET='\033[0m' - -function log_step() { - local step_name="$1" - echo -e "${COLOR_CYAN}[step]: ${step_name}${COLOR_RESET}" -} - -function usage() { - echo "Usage: $0 [options]" - echo "" - echo "Options:" - echo " --deploy-ax-server Deploy AX server and components using ko" - echo " --delete-ax-server Delete AX server and components from cluster" - echo " -h, --help Show this help message" -} - -run_kubectl() { - kubectl \ - ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} \ - "$@" -} - -run_ko() { - GOFLAGS="-tags=harness" ko apply \ - ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} \ - "$@" -} - -deploy_ax_server() { - log_step "deploy_ax_server" - - # Check dependencies - if [[ -z "${GEMINI_API_KEY:-}" ]]; then - echo "Error: GEMINI_API_KEY environment variable must be set" >&2 - exit 1 - fi - if [[ -z "${BUCKET_NAME:-}" ]]; then - echo "Error: BUCKET_NAME environment variable must be set" >&2 - exit 1 - fi - - echo "Using GCS Bucket: ${BUCKET_NAME}" - - # Render template and apply with ko - sed -e "s|\${GEMINI_API_KEY}|${GEMINI_API_KEY}|g" \ - -e "s|\${BUCKET_NAME}|${BUCKET_NAME}|g" \ - internal/manifests/ax-deployment2.yaml \ - | run_ko -f - -} - -delete_ax_server() { - log_step "delete_ax_server" - - # Delete resources using a dummy key and bucket so credentials aren't required for deletion - sed -e "s|\${GEMINI_API_KEY}|dummy-key|g" \ - -e "s|\${BUCKET_NAME}|dummy-bucket|g" \ - internal/manifests/ax-deployment2.yaml \ - | run_kubectl delete --ignore-not-found -f - -} - -if [ "$#" -eq 0 ]; then - usage - exit 1 -fi - -# If -h or --help appears anywhere in the command line, print the usage and exit. -for arg in "$@"; do - case "$arg" in - -h|--help) - usage - exit 0 - ;; - esac -done - -while [[ "$#" -gt 0 ]]; do - case $1 in - --deploy-ax-server) deploy_ax_server ;; - --delete-ax-server) delete_ax_server ;; - *) - echo "Error: unknown option: $1" >&2 - echo "" - usage - exit 1 - ;; - esac - shift -done diff --git a/internal/manifests/README.md b/internal/manifests/README.md index faa090f..2e1c3ef 100644 --- a/internal/manifests/README.md +++ b/internal/manifests/README.md @@ -49,7 +49,7 @@ export BUCKET_NAME="snapshot-substrate-test-$PROJECT_ID" export KO_DOCKER_REPO="gcr.io/$PROJECT_ID/ate-images" export KO_DEFAULTPLATFORMS="linux/amd64" -./internal/hack/install-ax.sh --deploy-ax-server +./hack/ax-dev.sh cloud deploy --harness ``` This command will: @@ -110,7 +110,7 @@ hello world To remove AX resources from your cluster, run: ```bash -./internal/hack/install-ax.sh --delete-ax-server +./hack/ax-dev.sh cloud delete --harness ``` --- diff --git a/manifests/README.md b/manifests/README.md index acbbca5..d966848 100644 --- a/manifests/README.md +++ b/manifests/README.md @@ -22,7 +22,7 @@ Use the core installation script to build the images and apply the resolved mani ```bash export GEMINI_API_KEY="your-api-key" export BUCKET_NAME="your-gcs-bucket" -./hack/install-ax.sh --deploy-ax-server +./hack/ax-dev.sh cloud deploy ``` This command will: @@ -58,7 +58,7 @@ ax exec --server=localhost:8001 --input="hello" To remove AX resources from your cluster, run: ```bash -./hack/install-ax.sh --delete-ax-server +./hack/ax-dev.sh cloud delete ``` --- From b37d42ff06c9283d0762bb9d3d3b0e3b9011a807 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Wed, 10 Jun 2026 15:36:57 -0700 Subject: [PATCH 03/12] Fix duplicate cancel logs in ax-dev.sh and implement trace data fallback for V2 runs in trace.go --- cmd/ax/trace.go | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ hack/ax-dev.sh | 4 ++-- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/cmd/ax/trace.go b/cmd/ax/trace.go index a95f2d5..c26501b 100644 --- a/cmd/ax/trace.go +++ b/cmd/ax/trace.go @@ -28,6 +28,7 @@ import ( "github.com/google/ax/internal/controller/executor" "github.com/google/ax/proto" "github.com/spf13/cobra" + "google.golang.org/protobuf/types/known/timestamppb" ) var ( @@ -158,20 +159,83 @@ func fetch(ctx context.Context, cfg *cliutil.Config, convID string) ([]*proto.Ex } var allEvents []*proto.ExecutionEvent + var hasExecLogs bool for _, eID := range execIDs { events, err := evLog.ExecEvents(ctx, eID) if err != nil { return nil, "", nil, fmt.Errorf("failed to query events for exec %s: %w", eID, err) } + if len(events) > 0 { + hasExecLogs = true + } allEvents = append(allEvents, events...) } // Use the first execID as the rootExecID as requested by user rootExecID := execIDs[0] + if !hasExecLogs { + allEvents = reconstructExecEvents(convEvents) + } + return allEvents, rootExecID, execIDs, nil } +func reconstructExecEvents(convEvents []*proto.ConversationEvent) []*proto.ExecutionEvent { + eventsMap := make(map[string][]*proto.ConversationEvent) + var execIDsOrdered []string + seenExec := make(map[string]bool) + + for _, ev := range convEvents { + if ev.ExecId == "" { + continue + } + if !seenExec[ev.ExecId] { + execIDsOrdered = append(execIDsOrdered, ev.ExecId) + seenExec[ev.ExecId] = true + } + eventsMap[ev.ExecId] = append(eventsMap[ev.ExecId], ev) + } + + var result []*proto.ExecutionEvent + baseTime := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + + for _, eID := range execIDsOrdered { + evs := eventsMap[eID] + if len(evs) == 0 { + continue + } + + var inputs []*proto.Message + var outputs []*proto.Message + var finalState proto.State + + for i, ev := range evs { + if i == 0 { + inputs = append(inputs, ev.Messages...) + } else { + outputs = append(outputs, ev.Messages...) + } + finalState = ev.State + } + + seqOffset := evs[0].Seq + mockTime := baseTime.Add(time.Duration(seqOffset) * time.Second) + + execEv := &proto.ExecutionEvent{ + ExecId: eID, + AgentId: "unknown", + Inputs: inputs, + Outputs: outputs, + State: finalState, + Timestamp: timestamppb.New(mockTime), + } + result = append(result, execEv) + } + + return result +} + func buildExecTraces(execIDs []string, events []*proto.ExecutionEvent) []ExecTrace { execsMap := make(map[string][]ExecutionEvent) diff --git a/hack/ax-dev.sh b/hack/ax-dev.sh index f881c2b..f885d75 100755 --- a/hack/ax-dev.sh +++ b/hack/ax-dev.sh @@ -111,7 +111,7 @@ run_local() { wait "$ax_pid" 2>/dev/null || true log_step "Shutdown complete!" } - trap cleanup EXIT INT TERM + trap cleanup EXIT log_step "Starting Python gRPC Harness Server on port $port..." PYTHONPATH=python:. "$python_bin" -m python.antigravity.harness_server --agent_file "$agent_file" --port "$port" > /tmp/antigravity_harness.log 2>&1 & @@ -157,7 +157,7 @@ run_test() { wait "$server_pid" 2>/dev/null || true fi } - trap cleanup EXIT INT TERM + trap cleanup EXIT log_step "Starting Python gRPC Harness Server on port $port..." PYTHONPATH=python:. "$python_bin" -m python.antigravity.harness_server --agent_file "$agent_file" --port "$port" > /tmp/antigravity_harness.log 2>&1 & From 42e86c221760b3fe9e325388022804949841327d Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 10:49:21 -0700 Subject: [PATCH 04/12] Ensure Go bin paths are included in script PATH for tool resolution (e.g. ko) --- hack/ax-dev.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hack/ax-dev.sh b/hack/ax-dev.sh index f885d75..2e537d2 100755 --- a/hack/ax-dev.sh +++ b/hack/ax-dev.sh @@ -15,6 +15,9 @@ set -e +# Ensure Go bin paths are included in PATH +export PATH=$PATH:$(go env GOPATH)/bin:~/go/bin + COLOR_CYAN='\033[1;36m' COLOR_RED='\033[1;31m' COLOR_RESET='\033[0m' From 3d8b362528d8584048e02e35e1b37458d87a3f15 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 11:00:17 -0700 Subject: [PATCH 05/12] Add GKE-specific SubstrATE deployment templates and route cloud deploy/delete to them automatically --- hack/ax-dev.sh | 34 +++- internal/manifests/ax-deployment2-gke.yaml | 134 +++++++++++++ manifests/ax-deployment-gke.yaml.tmpl | 219 +++++++++++++++++++++ 3 files changed, 383 insertions(+), 4 deletions(-) create mode 100644 internal/manifests/ax-deployment2-gke.yaml create mode 100644 manifests/ax-deployment-gke.yaml.tmpl diff --git a/hack/ax-dev.sh b/hack/ax-dev.sh index 2e537d2..ece613b 100755 --- a/hack/ax-dev.sh +++ b/hack/ax-dev.sh @@ -201,11 +201,20 @@ cloud_deploy() { local manifest="manifests/ax-deployment.yaml.tmpl" local service="manifests/ax-service.yaml" - + local is_gke=false + + if run_kubectl get crd workerpools.ate.gke.io &>/dev/null; then + is_gke=true + fi + while [[ "$#" -gt 0 ]]; do case $1 in --harness|--v2) - manifest="internal/manifests/ax-deployment2.yaml" + if [ "$is_gke" = true ]; then + manifest="internal/manifests/ax-deployment2-gke.yaml" + else + manifest="internal/manifests/ax-deployment2.yaml" + fi service="" shift ;; @@ -215,6 +224,10 @@ cloud_deploy() { esac done + if [ "$is_gke" = true ] && [ "$manifest" = "manifests/ax-deployment.yaml.tmpl" ]; then + manifest="manifests/ax-deployment-gke.yaml.tmpl" + fi + log_step "Deploying AX Server and Harnesses from $manifest to Kubernetes cluster..." sed -e "s|\${GEMINI_API_KEY}|${GEMINI_API_KEY}|g" \ -e "s|\${BUCKET_NAME}|${BUCKET_NAME}|g" \ @@ -230,11 +243,20 @@ cloud_deploy() { cloud_delete() { local manifest="manifests/ax-deployment.yaml.tmpl" local service="manifests/ax-service.yaml" - + local is_gke=false + + if run_kubectl get crd workerpools.ate.gke.io &>/dev/null; then + is_gke=true + fi + while [[ "$#" -gt 0 ]]; do case $1 in --harness|--v2) - manifest="internal/manifests/ax-deployment2.yaml" + if [ "$is_gke" = true ]; then + manifest="internal/manifests/ax-deployment2-gke.yaml" + else + manifest="internal/manifests/ax-deployment2.yaml" + fi service="" shift ;; @@ -244,6 +266,10 @@ cloud_delete() { esac done + if [ "$is_gke" = true ] && [ "$manifest" = "manifests/ax-deployment.yaml.tmpl" ]; then + manifest="manifests/ax-deployment-gke.yaml.tmpl" + fi + log_step "Deleting AX Server and Harnesses ($manifest) from Kubernetes cluster..." sed -e "s|\${GEMINI_API_KEY}|dummy-key|g" \ -e "s|\${BUCKET_NAME}|dummy-bucket|g" \ diff --git a/internal/manifests/ax-deployment2-gke.yaml b/internal/manifests/ax-deployment2-gke.yaml new file mode 100644 index 0000000..de8c688 --- /dev/null +++ b/internal/manifests/ax-deployment2-gke.yaml @@ -0,0 +1,134 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: ax +--- +apiVersion: v1 +kind: Namespace +metadata: + name: custom-harness +--- +apiVersion: ate.gke.io/v1alpha1 +kind: WorkerPool +metadata: + name: ax-harness-workerpool + namespace: ax +spec: + replicas: 5 + containers: + - name: "axharness" + image: ko://github.com/google/ax/cmd/ax + command: ["/ko-app/ax", "harness"] + ports: + - containerPort: 50053 +--- +apiVersion: ate.gke.io/v1alpha1 +kind: ActorTemplate +metadata: + name: antigravity-template + namespace: ax +spec: + workerPoolRef: + name: ax-harness-workerpool + snapshotsConfig: + location: + bucket: "${BUCKET_NAME}" + folder: "antigravity" +--- +apiVersion: ate.gke.io/v1alpha1 +kind: WorkerPool +metadata: + name: custom-harness-workerpool + namespace: custom-harness +spec: + replicas: 3 + containers: + - name: "axharness" + image: ko://github.com/google/ax/cmd/ax + command: ["/ko-app/ax", "harness"] + ports: + - containerPort: 50053 +--- +apiVersion: ate.gke.io/v1alpha1 +kind: ActorTemplate +metadata: + name: hello-world-template + namespace: custom-harness +spec: + workerPoolRef: + name: custom-harness-workerpool + snapshotsConfig: + location: + bucket: "${BUCKET_NAME}" + folder: "hello-world" +--- +apiVersion: apps/v1 +kind: ReplicaSet +metadata: + name: ax-server + namespace: ax + labels: + app: ax-server +spec: + replicas: 3 + selector: + matchLabels: + app: ax-server + template: + metadata: + labels: + app: ax-server + spec: + containers: + - name: ax-server + image: ko://github.com/google/ax/cmd/ax + command: ["/ko-app/ax", "serve", "--config", "/etc/ax/ax.yaml"] + ports: + - containerPort: 8494 + env: + - name: GEMINI_API_KEY + value: "${GEMINI_API_KEY}" + - name: AX_SUBSTRATE + value: "1" + - name: AX_SUBSTRATE_ENDPOINT + value: "api.ate-system.svc:443" + volumeMounts: + - name: ax-config + mountPath: /etc/ax + volumes: + - name: ax-config + configMap: + name: ax-server-config +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ax-server-config + namespace: ax +data: + ax.yaml: | + harnesses: + default: antigravity + # Built-in harness + antigravity: + - id: antigravity + # Custom harness + substrate: + - id: hello-world + namespace: custom-harness + template: hello-world-template + port: 50053 diff --git a/manifests/ax-deployment-gke.yaml.tmpl b/manifests/ax-deployment-gke.yaml.tmpl new file mode 100644 index 0000000..90bd7c0 --- /dev/null +++ b/manifests/ax-deployment-gke.yaml.tmpl @@ -0,0 +1,219 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: ax +--- +apiVersion: ate.gke.io/v1alpha1 +kind: WorkerPool +metadata: + name: ax-workerpool + namespace: ax +spec: + replicas: 5 + containers: + - name: "ax" + image: ko://github.com/google/ax/cmd/ax + command: ["/ko-app/ax", "serve"] + ports: + - containerPort: 8494 + env: + - name: GEMINI_API_KEY + value: "${GEMINI_API_KEY}" + - name: AX_SUBSTRATE + value: "1" +--- +apiVersion: ate.gke.io/v1alpha1 +kind: ActorTemplate +metadata: + name: ax-template + namespace: ax +spec: + workerPoolRef: + name: ax-workerpool + snapshotsConfig: + location: + bucket: "${BUCKET_NAME}" + folder: "ax" +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ax-router + namespace: ax +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: ax-router-config + namespace: ax +data: + envoy.yaml: | + admin: + address: + socket_address: + address: 0.0.0.0 + port_value: 9901 + static_resources: + listeners: + - name: listener_0 + address: + socket_address: + address: 0.0.0.0 + port_value: 8080 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: ingress_http + stream_idle_timeout: 0s + generate_request_id: true + access_log: + - name: envoy.access_loggers.stdout + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog + http_filters: + - name: envoy.filters.http.ext_authz + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.ext_authz.v3.ExtAuthz + grpc_service: + envoy_grpc: + cluster_name: epp-cluster + timeout: 10s + transport_api_version: V3 + with_request_body: + max_request_bytes: 1048576 + allow_partial_message: true + pack_as_bytes: true + failure_mode_allow: false + - name: envoy.filters.http.dynamic_forward_proxy + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.FilterConfig + dns_cache_config: + name: dynamic_forward_proxy_cache_config + dns_lookup_family: V4_ONLY + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + route_config: + name: ax-route + virtual_hosts: + - name: ax-virtual-host + domain: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: dynamic_forward_proxy_cluster + timeout: 0s + idle_timeout: 0s + typed_per_filter_config: + envoy.filters.http.dynamic_forward_proxy: + "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig + host_rewrite_header: x-backend-ip + clusters: + - name: dynamic_forward_proxy_cluster + lb_policy: CLUSTER_PROVIDED + cluster_type: + name: envoy.clusters.dynamic_forward_proxy + typed_config: + "@type": type.googleapis.com/envoy.extensions.clusters.dynamic_forward_proxy.v3.ClusterConfig + dns_cache_config: + name: dynamic_forward_proxy_cache_config + dns_lookup_family: V4_ONLY + allow_insecure_cluster_options: true + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: {} + - name: epp-cluster + connect_timeout: 0.25s + type: STATIC + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: {} + load_assignment: + cluster_name: epp-cluster + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 50051 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ax-router + namespace: ax + labels: + app: ax-router +spec: + replicas: 1 + selector: + matchLabels: + app: ax-router + template: + metadata: + labels: + app: ax-router + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + serviceAccount: ax-router + containers: + - name: envoy + image: envoyproxy/envoy:v1.30-latest + command: + - /usr/local/bin/envoy + - -c + - /etc/envoy/envoy.yaml + - --component-log-level + - upstream:debug,router:debug,ext_authz:debug + ports: + - containerPort: 8080 + name: http + - containerPort: 9901 + name: admin + volumeMounts: + - name: envoy-config + mountPath: /etc/envoy + - name: axepp + image: ko://github.com/google/ax/cmd/axepp + args: + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + ports: + - containerPort: 50051 + name: grpc + volumeMounts: + - name: "servicedns" + mountPath: "/run/servicedns.podcert.ate.dev" + volumes: + - name: envoy-config + configMap: + name: ax-router-config + - name: "servicedns" + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem From 3c04dc9f41fef168a99ab75e9c24c65bd6ae9a8a Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 11:25:50 -0700 Subject: [PATCH 06/12] Fix GKE Envoy router mountPath signerName and indentation typos --- manifests/ax-deployment-gke.yaml.tmpl | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/manifests/ax-deployment-gke.yaml.tmpl b/manifests/ax-deployment-gke.yaml.tmpl index 90bd7c0..2042b91 100644 --- a/manifests/ax-deployment-gke.yaml.tmpl +++ b/manifests/ax-deployment-gke.yaml.tmpl @@ -113,18 +113,18 @@ data: name: ax-route virtual_hosts: - name: ax-virtual-host - domain: ["*"] - routes: - - match: - prefix: "/" - route: - cluster: dynamic_forward_proxy_cluster - timeout: 0s - idle_timeout: 0s - typed_per_filter_config: - envoy.filters.http.dynamic_forward_proxy: - "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig - host_rewrite_header: x-backend-ip + domains: ["*"] + routes: + - match: + prefix: "/" + route: + cluster: dynamic_forward_proxy_cluster + timeout: 0s + idle_timeout: 0s + typed_per_filter_config: + envoy.filters.http.dynamic_forward_proxy: + "@type": type.googleapis.com/envoy.extensions.filters.http.dynamic_forward_proxy.v3.PerRouteConfig + host_rewrite_header: x-backend-ip clusters: - name: dynamic_forward_proxy_cluster lb_policy: CLUSTER_PROVIDED @@ -199,13 +199,13 @@ spec: - name: axepp image: ko://github.com/google/ax/cmd/axepp args: - - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--grpc-server-cred-bundle=/run/servicedns.podcert.gke.io/credential-bundle.pem" ports: - containerPort: 50051 name: grpc volumeMounts: - name: "servicedns" - mountPath: "/run/servicedns.podcert.ate.dev" + mountPath: "/run/servicedns.podcert.gke.io" volumes: - name: envoy-config configMap: @@ -214,6 +214,6 @@ spec: projected: sources: - podCertificate: - signerName: servicedns.podcert.ate.dev/identity + signerName: servicedns.podcert.gke.io/identity keyType: ECDSAP256 credentialBundlePath: credential-bundle.pem From 3bd4d64bb95d1d9037735ff02aae1587aa9dfcc2 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 11:50:01 -0700 Subject: [PATCH 07/12] Update codebase to use GKE SubstrATE protobuf schemas (ActorKey nested fields, ActiveWorker IP resolution) and local module replacement --- cmd/axepp/main.go | 18 ++++++++---- go.mod | 21 +++++++------- go.sum | 38 +++++++++---------------- internal/experimental/agent/ate.go | 6 ++-- internal/experimental/k8s/ate/client.go | 22 ++++++++++---- internal/harness/substrate.go | 6 ++-- 6 files changed, 58 insertions(+), 53 deletions(-) diff --git a/cmd/axepp/main.go b/cmd/axepp/main.go index 207cd0f..bc5c5af 100644 --- a/cmd/axepp/main.go +++ b/cmd/axepp/main.go @@ -29,7 +29,7 @@ import ( "net" "os" - "github.com/agent-substrate/substrate/proto/ateapipb" + "github.com/ai-on-gke/SubstrATE/proto/ateapipb" corepb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" authv3 "github.com/envoyproxy/go-control-plane/envoy/service/auth/v3" "github.com/google/ax/proto" @@ -56,9 +56,11 @@ type authServer struct { func runSession(ctx context.Context, sc ateapipb.ControlClient, sessionID string) (*authv3.CheckResponse, error) { slog.InfoContext(ctx, "About to call CreateActor", slog.Any("actor-id", sessionID)) if _, err := sc.CreateActor(ctx, &ateapipb.CreateActorRequest{ - ActorId: sessionID, - ActorTemplateNamespace: *actorTemplateNamespace, - ActorTemplateName: *actorTemplateName, + ActorKey: &ateapipb.ActorKey{ + ActorTemplateNamespace: *actorTemplateNamespace, + ActorTemplateName: *actorTemplateName, + ActorId: sessionID, + }, }); err != nil { if status.Code(err) != codes.AlreadyExists { slog.ErrorContext(ctx, "CreateActor error", slog.Any("error", err)) @@ -70,7 +72,11 @@ func runSession(ctx context.Context, sc ateapipb.ControlClient, sessionID string slog.InfoContext(ctx, "About to call ResumeActor", slog.Any("actor-id", sessionID)) resp, err := sc.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ - ActorId: sessionID, + ActorKey: &ateapipb.ActorKey{ + ActorTemplateNamespace: *actorTemplateNamespace, + ActorTemplateName: *actorTemplateName, + ActorId: sessionID, + }, }) if err != nil { slog.InfoContext(ctx, "ResumeActor error", slog.Any("error", err)) @@ -79,7 +85,7 @@ func runSession(ctx context.Context, sc ateapipb.ControlClient, sessionID string }, nil } - destinationIP := resp.GetActor().GetAteomPodIp() + destinationIP := resp.GetActor().GetActiveWorker().GetIp() destrinationAddr := net.JoinHostPort(destinationIP, *axPort) slog.InfoContext(ctx, "Redirecting request to backend", slog.String("address", destrinationAddr)) diff --git a/go.mod b/go.mod index 47fc344..9edb9fd 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,12 @@ module github.com/google/ax -go 1.26.1 +go 1.26.3 require ( charm.land/huh/v2 v2.0.3 charm.land/lipgloss/v2 v2.0.3 github.com/a2aproject/a2a-go/v2 v2.2.0 - github.com/agent-substrate/substrate v0.0.0 + github.com/ai-on-gke/SubstrATE v0.0.0-00010101000000-000000000000 github.com/envoyproxy/go-control-plane/envoy v1.37.0 github.com/google/uuid v1.6.0 github.com/spf13/cobra v1.10.2 @@ -49,7 +49,6 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.14 // indirect github.com/googleapis/gax-go/v2 v2.21.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect - github.com/hashicorp/go-reap v0.0.0-20260220095743-4e27870b4f51 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-isatty v0.0.20 // indirect @@ -61,21 +60,21 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/spf13/pflag v1.0.10 // indirect - github.com/vishvananda/netlink v1.3.1 // indirect - github.com/vishvananda/netns v0.0.5 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 // indirect go.opentelemetry.io/otel v1.43.0 // indirect go.opentelemetry.io/otel/metric v1.43.0 // indirect go.opentelemetry.io/otel/trace v1.43.0 // indirect - golang.org/x/crypto v0.49.0 // indirect - golang.org/x/mod v0.33.0 // indirect - golang.org/x/net v0.52.0 // indirect - golang.org/x/sys v0.43.0 // indirect - golang.org/x/text v0.35.0 // indirect + golang.org/x/crypto v0.51.0 // indirect + golang.org/x/mod v0.35.0 // indirect + golang.org/x/net v0.55.0 // indirect + golang.org/x/sys v0.45.0 // indirect + golang.org/x/text v0.37.0 // indirect google.golang.org/api v0.274.0 // indirect modernc.org/libc v1.70.0 // indirect modernc.org/mathutil v1.7.1 // indirect modernc.org/memory v1.11.0 // indirect ) + +replace github.com/ai-on-gke/SubstrATE => /Users/anjalisridhar/SubstrATE diff --git a/go.sum b/go.sum index bb1ac88..dc96bbb 100644 --- a/go.sum +++ b/go.sum @@ -16,8 +16,6 @@ github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/a2aproject/a2a-go/v2 v2.2.0 h1:eayiNXYpyTOLVhhQrGmIHlcy8GnOdnwaNdYQPvS84Ik= github.com/a2aproject/a2a-go/v2 v2.2.0/go.mod h1:htTxMwicNXXXEwwfjuB/Pd1g7UHDrswhSievncmTVcE= -github.com/agent-substrate/substrate v0.0.0 h1:XEX4QAjzaIcv4amBqBvPE/f40WV5WHRWo7u04xvqv/g= -github.com/agent-substrate/substrate v0.0.0/go.mod h1:8Z4SJqPWDMPBa76JgIdpiX0jTY1JXcfLTXEAtkUv7go= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o= @@ -90,8 +88,6 @@ github.com/googleapis/gax-go/v2 v2.21.0 h1:h45NjjzEO3faG9Lg/cFrBh2PgegVVgzqKzuZl github.com/googleapis/gax-go/v2 v2.21.0/go.mod h1:But/NJU6TnZsrLai/xBAQLLz+Hc7fHZJt/hsCz3Fih4= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= -github.com/hashicorp/go-reap v0.0.0-20260220095743-4e27870b4f51 h1:MpKgm7VEcOAD3dIR+cRoK4rbCcjqYXsMGCnFWTcHfds= -github.com/hashicorp/go-reap v0.0.0-20260220095743-4e27870b4f51/go.mod h1:qIFzeFcJU3OIFk/7JreWXcUjFmcCaeHTH9KoNyHYVCs= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -130,16 +126,12 @@ github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0= -github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4= -github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY= -github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0/go.mod h1:GQ/474YrbE4Jx8gZ4q5I4hrhUzM6UPzyrqJYV2AqPoQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0/go.mod h1:c7hN3ddxs/z6q9xwvfLPk+UHlWRQyaeR1LdgfL/66l0= go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= @@ -151,25 +143,23 @@ go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHS go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= -golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= +golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 h1:mgKeJMpvi0yx/sU5GsxQ7p6s2wtOnGAHZWCHUM4KGzY= golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546/go.mod h1:j/pmGrbnkbPtQfxEe5D0VQhZC6qKbfKifgD0oM7sR70= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= -golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= -golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/mod v0.35.0 h1:Ww1D637e6Pg+Zb2KrWfHQUnH2dQRLBQyAtpr/haaJeM= +golang.org/x/mod v0.35.0/go.mod h1:+GwiRhIInF8wPm+4AoT6L0FA1QWAad3OMdTRx4tFYlU= +golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= +golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= -golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= -golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= -golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= +golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= +golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +golang.org/x/tools v0.44.0 h1:UP4ajHPIcuMjT1GqzDWRlalUEoY+uzoZKnhOjbIPD2c= +golang.org/x/tools v0.44.0/go.mod h1:KA0AfVErSdxRZIsOVipbv3rQhVXTnlU6UhKxHd1seDI= gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/api v0.274.0 h1:aYhycS5QQCwxHLwfEHRRLf9yNsfvp1JadKKWBE54RFA= diff --git a/internal/experimental/agent/ate.go b/internal/experimental/agent/ate.go index 8197b43..a3f5497 100644 --- a/internal/experimental/agent/ate.go +++ b/internal/experimental/agent/ate.go @@ -70,11 +70,11 @@ func (a *SubstrateAgent) Connect(ctx context.Context, conversationID string, exe if actor == nil { return fmt.Errorf("received nil actor in response") } - if actor.AteomPodIp == "" { - return fmt.Errorf("actor has no active worker IP address (AteomPodIp is empty)") + if actor.GetActiveWorker().GetIp() == "" { + return fmt.Errorf("actor has no active worker IP address (ActiveWorker.Ip is empty)") } - workerAddr := fmt.Sprintf("%s:%d", actor.AteomPodIp, a.config.Port) + workerAddr := fmt.Sprintf("%s:%d", actor.GetActiveWorker().GetIp(), a.config.Port) // 2. Connect to the Actor. var activeAgent agent.Agent switch strings.ToLower(a.config.Protocol) { diff --git a/internal/experimental/k8s/ate/client.go b/internal/experimental/k8s/ate/client.go index 4237285..3a8e402 100644 --- a/internal/experimental/k8s/ate/client.go +++ b/internal/experimental/k8s/ate/client.go @@ -19,7 +19,7 @@ import ( "context" "fmt" - "github.com/agent-substrate/substrate/proto/ateapipb" + "github.com/ai-on-gke/SubstrATE/proto/ateapipb" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" ) @@ -59,9 +59,11 @@ func NewClient(ns, template, target string, opts ...grpc.DialOption) (*Client, e func (c *Client) CreateActor(ctx context.Context, id string) (*ateapipb.CreateActorResponse, error) { client := ateapipb.NewControlClient(c.conn) resp, err := client.CreateActor(ctx, &ateapipb.CreateActorRequest{ - ActorId: id, - ActorTemplateNamespace: c.namespace, - ActorTemplateName: c.template, + ActorKey: &ateapipb.ActorKey{ + ActorTemplateNamespace: c.namespace, + ActorTemplateName: c.template, + ActorId: id, + }, }) if err != nil { return nil, fmt.Errorf("error when calling Control.CreateActor: %w", err) @@ -74,7 +76,11 @@ func (c *Client) CreateActor(ctx context.Context, id string) (*ateapipb.CreateAc func (c *Client) ResumeActor(ctx context.Context, id string) (*ateapipb.ResumeActorResponse, error) { client := ateapipb.NewControlClient(c.conn) resp, err := client.ResumeActor(ctx, &ateapipb.ResumeActorRequest{ - ActorId: id, + ActorKey: &ateapipb.ActorKey{ + ActorTemplateNamespace: c.namespace, + ActorTemplateName: c.template, + ActorId: id, + }, }) if err != nil { return nil, fmt.Errorf("error when calling Control.ResumeActor: %w", err) @@ -86,7 +92,11 @@ func (c *Client) ResumeActor(ctx context.Context, id string) (*ateapipb.ResumeAc func (c *Client) SuspendActor(ctx context.Context, id string) (*ateapipb.SuspendActorResponse, error) { client := ateapipb.NewControlClient(c.conn) resp, err := client.SuspendActor(ctx, &ateapipb.SuspendActorRequest{ - ActorId: id, + ActorKey: &ateapipb.ActorKey{ + ActorTemplateNamespace: c.namespace, + ActorTemplateName: c.template, + ActorId: id, + }, }) if err != nil { return nil, fmt.Errorf("error when calling Control.SuspendActor: %w", err) diff --git a/internal/harness/substrate.go b/internal/harness/substrate.go index 6ca7bee..d482ca7 100644 --- a/internal/harness/substrate.go +++ b/internal/harness/substrate.go @@ -96,12 +96,12 @@ func (h *SubstrateHarness) Start(ctx context.Context, conversationID string) (Ex if actor == nil { return nil, fmt.Errorf("received nil actor in response for %s", conversationID) } - if actor.AteomPodIp == "" { + if actor.GetActiveWorker().GetIp() == "" { return nil, fmt.Errorf("actor %s has no active worker IP address", conversationID) } - // Establish connection to the actor's worker IP - workerAddr := fmt.Sprintf("%s:%d", actor.AteomPodIp, h.port) + // 2. Establish connection to the actor's worker IP + workerAddr := fmt.Sprintf("%s:%d", actor.GetActiveWorker().GetIp(), h.port) conn, err := grpc.NewClient(workerAddr, h.dialOpts...) if err != nil { return nil, fmt.Errorf("failed to dial remote harness service at %s: %w", workerAddr, err) From 5bf4d480e494f1d8017f12f2755c584c22360dc8 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 11:53:29 -0700 Subject: [PATCH 08/12] Recreate hack/install-ax.sh delegation wrapper for backward compatibility --- hack/install-ax.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100755 hack/install-ax.sh diff --git a/hack/install-ax.sh b/hack/install-ax.sh new file mode 100755 index 0000000..dade69c --- /dev/null +++ b/hack/install-ax.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +set -u +set -o pipefail + +ROOT=$(git rev-parse --show-toplevel) +cd "${ROOT}" + +# Delegate to the consolidated ax-dev.sh script +case "${1:-}" in + --deploy-ax-server) + shift + ./hack/ax-dev.sh cloud deploy "$@" + ;; + --delete-ax-server) + shift + ./hack/ax-dev.sh cloud delete "$@" + ;; + -h|--help) + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " --deploy-ax-server Deploy AX server and components using ko" + echo " --delete-ax-server Delete AX server and components from cluster" + echo " -h, --help Show this help message" + ;; + *) + echo "Usage: $0 --deploy-ax-server | --delete-ax-server" + exit 1 + ;; +esac From 8d449fd3941e10738d3a3fedfe299d6549178040 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 12:08:49 -0700 Subject: [PATCH 09/12] Document GKE vs Open-Source SubstrATE compatibility and auto-routing features in READMEs --- internal/manifests/README.md | 3 +++ manifests/README.md | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/internal/manifests/README.md b/internal/manifests/README.md index 2e1c3ef..f290320 100644 --- a/internal/manifests/README.md +++ b/internal/manifests/README.md @@ -52,6 +52,9 @@ export KO_DEFAULTPLATFORMS="linux/amd64" ./hack/ax-dev.sh cloud deploy --harness ``` +> [!NOTE] +> If GKE SubstrATE is detected on your cluster, the script automatically applies GKE-specific harness configurations from `internal/manifests/ax-deployment2-gke.yaml` rather than the default open-source template. + This command will: - Build the AX images using `ko` with the `harness` build tag. - Create the `ax` namespace (AX control plane + built-in harnesses) and the diff --git a/manifests/README.md b/manifests/README.md index d966848..b0241da 100644 --- a/manifests/README.md +++ b/manifests/README.md @@ -63,6 +63,20 @@ To remove AX resources from your cluster, run: --- +## ☁️ GKE SubstrATE vs. Open-Source SubstrATE Compatibility + +AX supports both self-managed open-source SubstrATE (`ate.dev/v1alpha1`) and managed GKE SubstrATE (`ate.gke.io/v1alpha1`) clusters. + +### Key Architectural & Schema Differences: +* **Container Configuration**: In open-source, containers (like AX server or harnesses) are defined inside the `ActorTemplate` resource. In GKE, the managed sandboxing engine requires containers to be declared inside the `WorkerPool` resource's `spec.containers` instead. +* **Snapshot Storage (`snapshotsConfig.location`)**: Open-source takes a single string URI (`gs://bucket/folder/`), whereas GKE validates this as a structured object containing separate `bucket` and `folder` string keys. +* **Envoy Router Cert Signer**: The SPIFFE certificate signer name is `servicedns.podcert.ate.dev/identity` in open-source, but is `servicedns.podcert.gke.io/identity` in GKE. + +### Automatic Routing: +The unified deploy script (`./hack/ax-dev.sh`) dynamically detects if your active cluster runs GKE's managed CRD endpoints (`workerpools.ate.gke.io`). If GKE is found, it automatically applies GKE-specific manifest files (`-gke.yaml`), bypassing manual configuration changes. + +--- + ## 🛠️ Inspection & Diagnostics Use the **`kubectl ate`** CLI tool to inspect the live states of From ee679a04edb1e4b8b595a384c2bf09260acab14e Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 12:18:44 -0700 Subject: [PATCH 10/12] Document ax+substrate+harness cloud testing steps in hack/README.md --- hack/README.md | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/hack/README.md b/hack/README.md index a34d189..2918e1b 100644 --- a/hack/README.md +++ b/hack/README.md @@ -49,13 +49,38 @@ Runs local end-to-end integration tests using the stateful Python harness and Go Deploys or deletes the AX Orchestrator server resources on a SubstrATE-enabled Kubernetes cluster. - **Commands**: - - **Deploy to Cluster**: + * **Standard Deployment**: ```bash export GEMINI_API_KEY="your-gemini-api-key" export BUCKET_NAME="your-gcs-bucket-name" ./hack/ax-dev.sh cloud deploy ``` - - **Tear Down / Delete**: + * **Harness E2E Path Deployment (V2 Experimental)**: + Deploys AX server along with isolated warm harness worker pools (`antigravity` and `hello-world` actors): ```bash - ./hack/ax-dev.sh cloud delete + export GEMINI_API_KEY="your-gemini-api-key" + export BUCKET_NAME="your-gcs-bucket-name" + export KO_DOCKER_REPO="gcr.io/your-project-id/ate-images" + export KO_DEFAULTPLATFORMS="linux/amd64" + ./hack/ax-dev.sh cloud deploy --harness + ``` + * **Tear Down / Delete**: + ```bash + ./hack/ax-dev.sh cloud delete [--harness] ``` + +--- + +### 4. Running E2E Harness Tests on Cloud / GKE +To test the deployed AX + SubstrATE + Harness stack end-to-end: + +1. **Port-Forward AX Server**: + ```bash + kubectl port-forward -n ax rs/ax-server 8494:8494 + ``` +2. **Execute request targeting local tunnel**: + Compile the local CLI and execute the plan request (it will launch the default `antigravity` harness actor on SubstrATE automatically): + ```bash + go build -o bin/ax ./cmd/ax + ./bin/ax exec --server localhost:8494 --input "hello" + ``` From be15087f17992969cfaf72ed78a3c5b355732f5c Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 12:44:33 -0700 Subject: [PATCH 11/12] Explicitly document --v2 flag alias in READMEs --- hack/README.md | 4 ++-- internal/manifests/README.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hack/README.md b/hack/README.md index 2918e1b..e2a8dce 100644 --- a/hack/README.md +++ b/hack/README.md @@ -62,11 +62,11 @@ Deploys or deletes the AX Orchestrator server resources on a SubstrATE-enabled K export BUCKET_NAME="your-gcs-bucket-name" export KO_DOCKER_REPO="gcr.io/your-project-id/ate-images" export KO_DEFAULTPLATFORMS="linux/amd64" - ./hack/ax-dev.sh cloud deploy --harness + ./hack/ax-dev.sh cloud deploy --harness # or --v2 ``` * **Tear Down / Delete**: ```bash - ./hack/ax-dev.sh cloud delete [--harness] + ./hack/ax-dev.sh cloud delete [--harness | --v2] ``` --- diff --git a/internal/manifests/README.md b/internal/manifests/README.md index f290320..be186d3 100644 --- a/internal/manifests/README.md +++ b/internal/manifests/README.md @@ -49,7 +49,7 @@ export BUCKET_NAME="snapshot-substrate-test-$PROJECT_ID" export KO_DOCKER_REPO="gcr.io/$PROJECT_ID/ate-images" export KO_DEFAULTPLATFORMS="linux/amd64" -./hack/ax-dev.sh cloud deploy --harness +./hack/ax-dev.sh cloud deploy --harness # or --v2 ``` > [!NOTE] From e711fff425c4578e468bd62a466d0205ea17f6a5 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 11 Jun 2026 12:52:52 -0700 Subject: [PATCH 12/12] Remove trace.go fallback changes --- cmd/ax/trace.go | 64 ------------------------------------------------- 1 file changed, 64 deletions(-) diff --git a/cmd/ax/trace.go b/cmd/ax/trace.go index c26501b..a95f2d5 100644 --- a/cmd/ax/trace.go +++ b/cmd/ax/trace.go @@ -28,7 +28,6 @@ import ( "github.com/google/ax/internal/controller/executor" "github.com/google/ax/proto" "github.com/spf13/cobra" - "google.golang.org/protobuf/types/known/timestamppb" ) var ( @@ -159,83 +158,20 @@ func fetch(ctx context.Context, cfg *cliutil.Config, convID string) ([]*proto.Ex } var allEvents []*proto.ExecutionEvent - var hasExecLogs bool for _, eID := range execIDs { events, err := evLog.ExecEvents(ctx, eID) if err != nil { return nil, "", nil, fmt.Errorf("failed to query events for exec %s: %w", eID, err) } - if len(events) > 0 { - hasExecLogs = true - } allEvents = append(allEvents, events...) } // Use the first execID as the rootExecID as requested by user rootExecID := execIDs[0] - if !hasExecLogs { - allEvents = reconstructExecEvents(convEvents) - } - return allEvents, rootExecID, execIDs, nil } -func reconstructExecEvents(convEvents []*proto.ConversationEvent) []*proto.ExecutionEvent { - eventsMap := make(map[string][]*proto.ConversationEvent) - var execIDsOrdered []string - seenExec := make(map[string]bool) - - for _, ev := range convEvents { - if ev.ExecId == "" { - continue - } - if !seenExec[ev.ExecId] { - execIDsOrdered = append(execIDsOrdered, ev.ExecId) - seenExec[ev.ExecId] = true - } - eventsMap[ev.ExecId] = append(eventsMap[ev.ExecId], ev) - } - - var result []*proto.ExecutionEvent - baseTime := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) - - for _, eID := range execIDsOrdered { - evs := eventsMap[eID] - if len(evs) == 0 { - continue - } - - var inputs []*proto.Message - var outputs []*proto.Message - var finalState proto.State - - for i, ev := range evs { - if i == 0 { - inputs = append(inputs, ev.Messages...) - } else { - outputs = append(outputs, ev.Messages...) - } - finalState = ev.State - } - - seqOffset := evs[0].Seq - mockTime := baseTime.Add(time.Duration(seqOffset) * time.Second) - - execEv := &proto.ExecutionEvent{ - ExecId: eID, - AgentId: "unknown", - Inputs: inputs, - Outputs: outputs, - State: finalState, - Timestamp: timestamppb.New(mockTime), - } - result = append(result, execEv) - } - - return result -} - func buildExecTraces(execIDs []string, events []*proto.ExecutionEvent) []ExecTrace { execsMap := make(map[string][]ExecutionEvent)