diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e87842781..2688d5e37 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -67,6 +67,9 @@ jobs: - name: Install ko uses: ko-build/setup-ko@v0.7 + - name: Install Helm + uses: azure/setup-helm@v4 + - name: Log in to GHCR uses: docker/login-action@v3 with: @@ -85,7 +88,7 @@ jobs: run: | set -o errexit -o nounset -o pipefail - for component in ateapi atelet ateom-gvisor podcertcontroller atenet; do + for component in ateapi atecontroller atelet ateom-gvisor podcertcontroller atenet; do KO_DOCKER_REPO="${IMAGE_REPOSITORY}/${component}" \ ./hack/run-tool.sh ko build \ --tags "${IMAGE_TAGS}" \ @@ -94,6 +97,36 @@ jobs: "./cmd/${component}" done + - name: Package and push Helm charts + if: inputs.create_release + env: + HELM_EXPERIMENTAL_OCI: "1" + CHART_REPOSITORY: oci://ghcr.io/kagent-dev/substrate/helm + run: | + set -o errexit -o nounset -o pipefail + + tag="${{ steps.tag.outputs.value }}" + chart_version="${tag#v}" + package_dir="${RUNNER_TEMP}/helm-packages" + mkdir -p "${package_dir}" + + echo "${{ secrets.GITHUB_TOKEN }}" \ + | helm registry login ghcr.io \ + --username "${{ github.actor }}" \ + --password-stdin + + helm package charts/substrate-crds \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + helm package charts/substrate \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + + helm push "${package_dir}/substrate-crds-${chart_version}.tgz" "${CHART_REPOSITORY}" + helm push "${package_dir}/substrate-${chart_version}.tgz" "${CHART_REPOSITORY}" + - name: Create GitHub Release if: inputs.create_release uses: softprops/action-gh-release@v2 diff --git a/Makefile b/Makefile index c6b70cc5e..770d426e7 100644 --- a/Makefile +++ b/Makefile @@ -44,10 +44,11 @@ build: build-images build-atectl .PHONY: build-images build-images: - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/ateapi - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atelet - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atenet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/ateapi + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atecontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atelet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atenet .PHONY: build-atectl build-atectl: @@ -92,3 +93,19 @@ verify: test .PHONY: clean clean: rm -rf $(BINDIR) + +# Render the substrate Helm chart into manifests/ate-install/ (mTLS mode, +# the historical default install). Run this whenever charts/substrate/ changes. +.PHONY: helm-template +helm-template: + @./hack/render-manifests.sh + +# Verify that manifests/ate-install/ matches the chart output. Used in CI. +.PHONY: verify-helm-template +verify-helm-template: + @./hack/render-manifests.sh --check + +# Verify that the CRD chart mirrors the generated CRDs. +.PHONY: verify-crd-chart +verify-crd-chart: + @./hack/verify/crd-chart.sh diff --git a/README.md b/README.md index fb63afb54..6bb2de64c 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ To quickly set up the complete environment: 2. Run the following steps: ```shell -# create cluster and local registry +# create cluster and local registry (enables podcert feature gates for mTLS) hack/create-kind-cluster.sh # install ate, valkey, rustfs @@ -126,6 +126,25 @@ kubectl port-forward -n ate-system svc/atenet-router 8000:80 curl -X POST -H "Host: my-counter-1.actors.resources.substrate.ate.dev" -i http://localhost:8000/ ``` +#### JWT mode (no feature gates) + +For clusters where you can't enable the `ClusterTrustBundle` / +`PodCertificateRequest` feature gates (most managed Kubernetes), use the +JWT install path. Authentication is via projected ServiceAccount tokens +verified against the cluster's OIDC issuer; server certs come from a +self-signed pair bootstrapped by the install script. + +```shell +# create cluster WITHOUT podcert feature gates +KIND_ENABLE_PODCERT=false hack/create-kind-cluster.sh + +# install ate via Helm in JWT mode (auto-bootstraps Secret/ConfigMap) +hack/install-ate-kind-jwt.sh + +# the demo + kubectl-ate + port-forward steps from the mTLS Quickstart +# above work identically from here. +``` + ### GKE Quickstart (Development) 1. Create and configure your environment file: diff --git a/manifests/ate-install/ate-system-namespace.yaml b/charts/substrate-crds/Chart.yaml similarity index 65% rename from manifests/ate-install/ate-system-namespace.yaml rename to charts/substrate-crds/Chart.yaml index 4fa19da0a..a69dcee0e 100644 --- a/manifests/ate-install/ate-system-namespace.yaml +++ b/charts/substrate-crds/Chart.yaml @@ -12,7 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -apiVersion: v1 -kind: Namespace -metadata: - name: ate-system \ No newline at end of file +apiVersion: v2 +name: substrate-crds +description: Agent Substrate CustomResourceDefinitions. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate +- crds diff --git a/charts/substrate-crds/README.md b/charts/substrate-crds/README.md new file mode 100644 index 000000000..12fa31f0a --- /dev/null +++ b/charts/substrate-crds/README.md @@ -0,0 +1,13 @@ +# substrate-crds + +Helm chart for installing the Agent Substrate CRDs. + +Install this chart before installing the main `substrate` chart: + +```bash +helm upgrade --install substrate-crds ./charts/substrate-crds +helm upgrade --install substrate ./charts/substrate --namespace ate-system --create-namespace +``` + +The CRD YAMLs in `templates/` mirror `manifests/ate-install/generated/`. +Run `hack/verify/crd-chart.sh` to verify they are in sync. diff --git a/charts/substrate-crds/templates/ate.dev_actortemplates.yaml b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml new file mode 100644 index 000000000..cdb07e788 --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml @@ -0,0 +1,357 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: actortemplates.ate.dev +spec: + group: ate.dev + names: + kind: ActorTemplate + listKind: ActorTemplateList + plural: actortemplates + shortNames: + - actortemplate + singular: actortemplate + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ActorTemplate + properties: + containers: + description: Containers is the workload definition. + items: + description: A single application container that you want to run + within a WorkerPool. + properties: + command: + description: Entrypoint array. Not executed within a shell. + items: + type: string + maxItems: 64 + type: array + x-kubernetes-list-type: atomic + env: + description: Environment variables to set in the worker replicas. + items: + description: |- + EnvVar represents an environment variable supplied to a container in an + ActorTemplate. It models only a subset of Kubernetes Pod env behavior: + literal values are not expanded with Kubernetes-style $(VAR) references, + envFrom is not supported, and valueFrom currently supports only secretKeyRef. + properties: + name: + description: |- + Name is the name of the environment variable. May be any printable ASCII + character except '='. + minLength: 1 + pattern: ^[ -<>-~]+$ + type: string + value: + description: |- + Variable value. Mutually exclusive with ValueFrom. + Value is the literal value of the environment variable. Unlike in + Kubernetes pods, this value is not interpolated, and $(VAR) + references are not expanded. + minLength: 0 + type: string + valueFrom: + description: |- + Source for the environment variable's value. Mutually exclusive with + Value. + maxProperties: 1 + minProperties: 1 + properties: + secretKeyRef: + description: Selects a key of a Secret in the ActorTemplate's + namespace. + properties: + key: + description: Key to select within the Secret. + minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ + type: string + name: + description: Name of the referent Secret. + maxLength: 253 + type: string + x-kubernetes-validations: + - message: Name must be a valid DNS subdomain + rule: '!format.dns1123Subdomain().validate(self).hasValue()' + optional: + description: Specify whether the Secret or its + key must be defined. + type: boolean + required: + - key + - name + type: object + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: exactly one of the fields in [value valueFrom] + must be set + rule: '[has(self.value),has(self.valueFrom)].filter(x,x==true).size() + == 1' + maxItems: 32 + type: array + image: + description: Image to use for the worker replicas. + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + name: + description: Name of the container. + maxLength: 63 + type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' + required: + - image + - name + type: object + maxItems: 10 + type: array + pauseImage: + description: |- + PauseImage is the container to use as the root sandbox container. + + Typically, set it to [1] for on-gcp, and [2] for off-gcp + + - [1] gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da + - [2] registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4 + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + runsc: + description: Parameters for fetching the runsc binary to use. + properties: + amd64: + description: Configuration for the amd64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + pattern: ^[a-z0-9]+$ + type: string + url: + description: | + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + minLength: 1 + type: string + required: + - sha256Hash + - url + type: object + arm64: + description: Configuration for the arm64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + pattern: ^[a-z0-9]+$ + type: string + url: + description: | + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + minLength: 1 + type: string + required: + - sha256Hash + - url + type: object + authentication: + description: How should atelet authenticate to download the runsc + binary? + properties: + gcp: + description: Use GCP application-default credentials. + type: object + type: object + type: object + snapshotsConfig: + description: Snapshots configuration for the actor. + properties: + location: + description: Location to store snapshots in. + minLength: 1 + type: string + required: + - location + type: object + workerPoolRef: + description: | + Name of the worker pool to use for the actor. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + required: + - pauseImage + - runsc + - snapshotsConfig + - workerPoolRef + type: object + status: + description: status is the observed state of ActorTemplate + properties: + conditions: + description: conditions defines the status conditions array + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + goldenActorID: + type: string + goldenSnapshot: + type: string + phase: + description: Phase of the actor template. + type: string + takeGoldenSnapshotAt: + format: date-time + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/substrate-crds/templates/ate.dev_workerpools.yaml b/charts/substrate-crds/templates/ate.dev_workerpools.yaml new file mode 100644 index 000000000..3e2387802 --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_workerpools.yaml @@ -0,0 +1,99 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: workerpools.ate.dev +spec: + group: ate.dev + names: + kind: WorkerPool + listKind: WorkerPoolList + plural: workerpools + shortNames: + - workerpool + singular: workerpool + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.replicas + name: Desired + type: integer + - jsonPath: .status.replicas + name: Replicas + type: integer + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: WorkerPool is the Schema for the workerpools API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of WorkerPool + properties: + ateomImage: + description: AteomImage is the ateom container image to deploy as + workers. + minLength: 1 + type: string + replicas: + description: Replicas is the number of worker pods to run. + format: int32 + minimum: 0 + type: integer + required: + - ateomImage + - replicas + type: object + status: + description: status is the observed state of WorkerPool + properties: + replicas: + description: Replicas is the total number of worker pods. + format: int32 + minimum: 0 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/charts/substrate/Chart.yaml b/charts/substrate/Chart.yaml new file mode 100644 index 000000000..52bd74800 --- /dev/null +++ b/charts/substrate/Chart.yaml @@ -0,0 +1,27 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: substrate +description: Agent Substrate — actor runtime, control plane, and data-plane router. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate diff --git a/charts/substrate/README.md b/charts/substrate/README.md new file mode 100644 index 000000000..b515c26f1 --- /dev/null +++ b/charts/substrate/README.md @@ -0,0 +1,71 @@ +# substrate + +Helm chart for installing Agent Substrate. + +## Install modes + +| Mode | Default? | Cluster requirements | Trade-off | +|------|----------|----------------------|-----------| +| `mtls` | yes | feature gates `ClusterTrustBundle`, `ClusterTrustBundleProjection`, `PodCertificateRequest` + `certificates.k8s.io/v1beta1` API | Full in-cluster mTLS via the bundled `podcertcontroller`. | +| `jwt` | | none beyond stock K8s | Server certs and session signing pools are generated by the chart; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | + +```bash +# CRDs +helm upgrade --install substrate-crds ./charts/substrate-crds + +# mTLS mode (default) +helm upgrade --install substrate ./charts/substrate + +# JWT mode (no off-by-default feature gates) +helm upgrade --install substrate ./charts/substrate \ + --set auth.mode=jwt \ + --set auth.jwt.issuer=https://kubernetes.default.svc.cluster.local +``` + +By default, component images are pulled from `ghcr.io/kagent-dev/substrate` +using the chart `appVersion` as the tag. Override `image.registry` and +`image.tag` to install from a different image repository or tag. + +## JWT-mode bootstrap + +JWT mode is standalone by default. The chart generates: + +- `Secret/ateapi-tls` +- `ConfigMap/ateapi-ca` +- `Secret/session-id-jwt-pool` +- `Secret/session-id-ca-pool` + +Existing generated data is reused on upgrade so key material does not rotate +during normal chart upgrades. Set `auth.jwt.bootstrap.enabled=false` to bring +your own resources with those names. + +## Render manifests without applying + +```bash +helm template substrate ./charts/substrate # mtls +helm template substrate ./charts/substrate --set auth.mode=jwt \ + --set auth.jwt.issuer=https://kubernetes.default.svc.cluster.local +``` + +`manifests/ate-install/` in the repo is the rendered mTLS output and is +regenerated by `make helm-template`. The separate `substrate-crds` chart +mirrors `manifests/ate-install/generated/`. + +## Values + +See `values.yaml` for the full set; the important keys: + +| Key | Default | Notes | +|-----|---------|-------| +| `auth.mode` | `mtls` | `mtls` or `jwt` | +| `auth.jwt.issuer` | `""` | required when `auth.mode=jwt` | +| `auth.jwt.audience` | `api.ate-system.svc` | SA token audience | +| `auth.jwt.bootstrap.enabled` | `true` | Generate JWT TLS and session signing material | +| `auth.jwt.serverCertSecret` | `ateapi-tls` | Secret name | +| `auth.jwt.caBundleConfigMap` | `ateapi-ca` | ConfigMap name | +| `valkey.enabled` | `true` | Set false if you bring your own Redis/Valkey | +| `valkey.replicas` | `6` | StatefulSet size | +| `redis.clusterAddress` | `""` (in-cluster) | Override to use external Redis | +| `redis.useIAMAuth` | `false` | Google IAM auth | +| `atelet.gcpAuthForImagePulls` | `false` | Enable only when using GCP registry auth | +| `otel.endpoint` | `""` | Set to an OTLP endpoint to export traces/metrics | diff --git a/charts/substrate/templates/NOTES.txt b/charts/substrate/templates/NOTES.txt new file mode 100644 index 000000000..f736c32f7 --- /dev/null +++ b/charts/substrate/templates/NOTES.txt @@ -0,0 +1,21 @@ +substrate {{ .Chart.AppVersion }} installed in mode: {{ .Values.auth.mode }} + +{{ if eq .Values.auth.mode "mtls" -}} +NOTE: mtls mode REQUIRES the following Kubernetes feature gates to be enabled: + - ClusterTrustBundle + - ClusterTrustBundleProjection + - PodCertificateRequest +plus the v1beta1 certificates API. On vanilla clusters (kind, EKS, etc.) you +must enable these explicitly. To install without them, pick auth.mode=jwt. +{{- else }} +JWT mode is active. + +{{- if .Values.auth.jwt.bootstrap.enabled }} +JWT bootstrap resources are managed by this chart. Existing key material is +reused on upgrade. +{{- else }} +JWT bootstrap is disabled. Provide {{ .Values.auth.jwt.serverCertSecret }}, +{{ .Values.auth.jwt.caBundleConfigMap }}, session-id-jwt-pool, and +session-id-ca-pool before pods become healthy. +{{- end }} +{{- end }} diff --git a/charts/substrate/templates/_helpers.tpl b/charts/substrate/templates/_helpers.tpl new file mode 100644 index 000000000..2f8ecc8e6 --- /dev/null +++ b/charts/substrate/templates/_helpers.tpl @@ -0,0 +1,80 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{/* +Qualified resource name for a chart component. + +Usage: + {{ include "substrate.fullname" (list "ate-api-server" .) }} + +When the release name equals the chart name (the canonical render in +hack/render-manifests.sh — `helm template substrate charts/substrate`), this +returns the bare component name, so the generated manifests/ate-install/ +files keep their historical names ("ate-api-server", "ate-controller", ...). + +Otherwise resources are prefixed with the release name in the standard Helm +style ("foo-ate-api-server", ...) so multiple releases coexist without +colliding. +*/}} +{{- define "substrate.fullname" -}} +{{- $name := index . 0 -}} +{{- $ctx := index . 1 -}} +{{- if eq $ctx.Release.Name $ctx.Chart.Name -}} +{{- $name -}} +{{- else -}} +{{- printf "%s-%s" $ctx.Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Build an image reference for a substrate component binary. + +Usage: + {{ include "substrate.componentImage" (list "ateapi" .) }} + +Produces {image.registry}/{name}:{tag} where tag is resolved as: + 1. image.tag value, if set and not the sentinel "" + 2. .Chart.AppVersion, if image.tag is empty + 3. no tag (no colon) when image.tag is the sentinel "" + +The "" sentinel is used by hack/render-manifests.sh so that ko:// refs +are emitted without a tag, letting `ko resolve` supply the digest at build time. +*/}} +{{- define "substrate.componentImage" -}} +{{- $name := index . 0 -}} +{{- $ctx := index . 1 -}} +{{- $registry := $ctx.Values.image.registry -}} +{{- $tag := $ctx.Values.image.tag | default $ctx.Chart.AppVersion -}} +{{- if ne $tag "" -}} +{{- printf "%s/%s:%s" $registry $name $tag -}} +{{- else -}} +{{- printf "%s/%s" $registry $name -}} +{{- end -}} +{{- end -}} + +{{/* +Validate auth.mode at template time. +*/}} +{{- define "substrate.validateAuthMode" -}} +{{- if not (or (eq .Values.auth.mode "mtls") (eq .Values.auth.mode "jwt")) -}} +{{- fail (printf "auth.mode must be 'mtls' or 'jwt', got %q" .Values.auth.mode) -}} +{{- end -}} +{{- if eq .Values.auth.mode "jwt" -}} +{{- if not .Values.auth.jwt.issuer -}} +{{- fail "auth.jwt.issuer is required when auth.mode=jwt" -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/charts/substrate/templates/ate-api-server-envvars.yaml b/charts/substrate/templates/ate-api-server-envvars.yaml new file mode 100644 index 000000000..d294a8046 --- /dev/null +++ b/charts/substrate/templates/ate-api-server-envvars.yaml @@ -0,0 +1,27 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.ateApiServerEnvVarsConfigMap }} + namespace: {{ .Release.Namespace }} +data: + ATE_API_REDIS_ADDRESS: {{ .Values.redis.clusterAddress | default (printf "valkey-cluster.%s.svc:6379" .Release.Namespace) | quote }} + ATE_API_REDIS_USE_IAM_AUTH: {{ .Values.redis.useIAMAuth | toString | quote }} + ATE_API_REDIS_TLS_SERVER_NAME: {{ .Values.redis.tlsServerName | quote }} + ATE_API_REDIS_CLIENT_CERT: {{ .Values.redis.clientCert | default "" | quote }} + ATE_API_K8SJWT_ISSUER: {{ .Values.auth.jwt.issuer | quote }} diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml new file mode 100644 index 000000000..fcf5ccc7e --- /dev/null +++ b/charts/substrate/templates/ate-api-server.yaml @@ -0,0 +1,232 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-role" .) }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: ["ate.dev"] + resources: ["actortemplates"] + verbs: ["get", "watch", "list"] +# Secret reads for env source resolution are intentionally NOT granted +# cluster-wide here. Each demo / tenant is responsible for granting +# ate-api-server read access only to the specific Secrets referenced by its +# ActorTemplates (e.g. via a namespace-scoped Role + RoleBinding using +# resourceNames). +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-binding" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "ate-api-server-role" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "ate-api-server-deployment" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ate-api-server + template: + metadata: + labels: + app: ate-api-server + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "ate-api-server" .) }} +{{- if eq .Values.auth.mode "jwt" }} + initContainers: + - name: assemble-cred-bundle + image: {{ .Values.images.busybox }} + command: + - sh + - -c + - cat /run/ateapi-tls-src/tls.crt /run/ateapi-tls-src/tls.key > /run/ateapi-tls/credential-bundle.pem + volumeMounts: + - { name: ateapi-tls-src, mountPath: /run/ateapi-tls-src, readOnly: true } + - { name: ateapi-tls, mountPath: /run/ateapi-tls } +{{- end }} + containers: + - name: ate-api-server + image: {{ include "substrate.componentImage" (list "ateapi" .) }} + args: + - "--grpc-listen-addr=0.0.0.0:443" +{{- if eq .Values.auth.mode "mtls" }} + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--redis-cluster-address=@env" + - "--redis-ca-certs=/etc/valkey-ca/ca.crt" + - "--redis-use-iam-auth=@env" + - "--redis-tls-server-name=@env" + - "--redis-client-cert=@env" + - "--client-jwt-issuer=@env" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" +{{- else }} + - "--grpc-server-cred-bundle=/run/ateapi-tls/credential-bundle.pem" + - "--auth-mode=jwt" + - "--redis-cluster-address=@env" + - "--redis-no-tls=true" + - "--redis-use-iam-auth=@env" + - "--client-jwt-issuer={{ .Values.auth.jwt.issuer }}" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +{{- end }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + envFrom: + - configMapRef: + name: {{ .Values.ateApiServerEnvVarsConfigMap }} + optional: true + volumeMounts: +{{- if eq .Values.auth.mode "mtls" }} + - { name: servicedns, mountPath: /run/servicedns.podcert.ate.dev } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: valkey-ca-certs, mountPath: /etc/valkey-ca, readOnly: true } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } + - { name: workerpool-ca-certs, mountPath: /run/workerpool-ca-certs, readOnly: true } +{{- else }} + - { name: ateapi-tls, mountPath: /run/ateapi-tls, readOnly: true } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } +{{- end }} + ports: + - containerPort: 443 + - name: prometheus + containerPort: 9090 + readinessProbe: + httpGet: + path: /readyz + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 2 + volumes: +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - { key: ca.crt, path: ca.crt } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } + - name: workerpool-ca-certs + projected: + sources: + - clusterTrustBundle: + signerName: podidentity.podcert.ate.dev/identity + labelSelector: + matchLabels: + podcert.ate.dev/canarying: live + path: trust-bundle.pem +{{- else }} + - name: ateapi-tls-src + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: ateapi-tls + emptyDir: {} + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "api" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: ate-api-server + ports: + - name: grpc + protocol: TCP + port: 443 + targetPort: 443 diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml new file mode 100644 index 000000000..5c403837c --- /dev/null +++ b/charts/substrate/templates/ate-controller.yaml @@ -0,0 +1,102 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + apps: ate-controller +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + apiGroup: rbac.authorization.k8s.io +--- +kind: Service +apiVersion: v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: ate-controller +spec: + selector: + app: ate-controller + ports: + - name: metrics + port: 8080 + targetPort: metrics + protocol: TCP +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ate-controller + template: + metadata: + labels: + app: ate-controller + spec: + serviceAccountName: {{ include "substrate.fullname" (list "ate-controller" .) }} + containers: + - name: ate-controller + image: {{ include "substrate.componentImage" (list "atecontroller" .) }} +{{- if eq .Values.auth.mode "jwt" }} + args: + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + ports: + - name: metrics + containerPort: 8080 + protocol: TCP + - name: healthz + containerPort: 8081 + protocol: TCP +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - { name: ateapi-ca, mountPath: /run/ateapi-ca, readOnly: true } + - { name: ateapi-token, mountPath: /var/run/secrets/tokens/ateapi, readOnly: true } + volumes: + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml new file mode 100644 index 000000000..c2726fa36 --- /dev/null +++ b/charts/substrate/templates/atelet.yaml @@ -0,0 +1,105 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# atelet — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atelet-role" .) }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atelet-binding" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atelet-role" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atelet +spec: + selector: + matchLabels: + app: atelet + template: + metadata: + labels: + app: atelet + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atelet" .) }} + containers: + - name: atelet + image: {{ include "substrate.componentImage" (list "atelet" .) }} + args: + - --gcp-auth-for-image-pulls={{ .Values.atelet.gcpAuthForImagePulls }} +{{- with .Values.atelet.extraArgs }} +{{ toYaml . | indent 8 }} +{{- end }} + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + - name: ATE_STORAGE_BACKEND + value: {{ .Values.atelet.storageBackend | quote }} +{{- with .Values.atelet.extraEnv }} +{{ toYaml . | indent 8 }} +{{- end }} + ports: + - name: grpc + containerPort: 8085 + hostPort: 8085 + - name: prometheus + containerPort: 9090 + hostPort: 9090 + protocol: TCP + volumeMounts: + - name: run-ateom + mountPath: /var/lib/ateom-gvisor + volumes: + - name: run-ateom + hostPath: + path: /var/lib/ateom-gvisor + type: DirectoryOrCreate diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml new file mode 100644 index 000000000..0838d2c0b --- /dev/null +++ b/charts/substrate/templates/atenet-dns.yaml @@ -0,0 +1,177 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# atenet-dns — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + replicas: 1 + selector: + matchLabels: + app: dns + template: + metadata: + labels: + app: dns + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-dns" .) }} + shareProcessNamespace: true + initContainers: + - name: init-dns + image: {{ .Values.images.busybox }} + command: ["sh", "-c"] + args: + - | + cat <<'EOF' > /etc/coredns/Corefile + .:53 { + errors + health :8080 + ready :8181 + reload + } + EOF + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + containers: + - name: coredns + image: {{ .Values.images.coredns }} + imagePullPolicy: IfNotPresent + args: [ "-conf", "/etc/coredns/Corefile" ] + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + ports: + - name: dns + containerPort: 53 + protocol: UDP + - name: dns-tcp + containerPort: 53 + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: 8080 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /ready + port: 8181 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + - name: dns-controller + image: {{ include "substrate.componentImage" (list "atenet" .) }} + args: + - "dns" + - "--log-level=debug" + - "--interval=10s" + - "--corefile-path=/etc/coredns/Corefile" + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + volumes: + - name: dns-config-volume + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + selector: + app: dns + type: ClusterIP + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml new file mode 100644 index 000000000..01536184a --- /dev/null +++ b/charts/substrate/templates/atenet-router.yaml @@ -0,0 +1,270 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +rules: +- apiGroups: + - "ate.dev" + resources: + - actortemplates + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} + namespace: {{ .Release.Namespace }} +data: + config.yaml: | + # yaml-language-server: $schema=https://agentgateway.dev/schema/config + config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" + binds: + - port: 8080 + listeners: + - name: http + protocol: HTTP + routes: + - name: substrate-http + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} + - port: 8443 + listeners: + - name: https + protocol: HTTPS + tls: +{{ if eq .Values.auth.mode "mtls" }} + cert: "/run/servicedns.podcert.ate.dev/cert.pem" + key: "/run/servicedns.podcert.ate.dev/key.pem" +{{ else }} + cert: "/run/agentgateway-tls/tls.crt" + key: "/run/agentgateway-tls/tls.key" +{{ end }} + routes: + - name: substrate-https + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +spec: + replicas: 1 + selector: + matchLabels: + app: atenet-router + template: + metadata: + labels: + app: atenet-router + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-router" .) }} + containers: + - name: atenet-router + image: {{ include "substrate.componentImage" (list "atenet" .) }} + args: + - "router" + - "--standalone" + - "--networking-mode=agentgateway" + - "--namespace={{ .Release.Namespace }}" + - "--port-http=8080" + - "--port-extproc=50051" + - "--extproc-address=127.0.0.1" + - "--ateapi-address={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc:443" +{{- if eq .Values.auth.mode "jwt" }} + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + - "--status-port=4040" + - "--port-https=8443" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint | quote }} +{{- end }} + ports: + - name: extproc + containerPort: 50051 + - name: status + containerPort: 4040 + - name: metrics + containerPort: 9090 +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - name: ateapi-ca + mountPath: /run/ateapi-ca + readOnly: true + - name: ateapi-token + mountPath: /var/run/secrets/tokens/ateapi + readOnly: true +{{- end }} + - name: agentgateway + image: {{ .Values.images.agentgateway }} + args: + - "-f" + - "/etc/agentgateway/config.yaml" + ports: + - name: http + containerPort: 8080 + - name: https + containerPort: 8443 + - name: readiness + containerPort: 15021 + - name: gw-metrics + containerPort: 15020 + volumeMounts: + - name: agentgateway-config + mountPath: /etc/agentgateway +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + mountPath: "/run/servicedns.podcert.ate.dev" +{{- else }} + - name: agentgateway-tls + mountPath: /run/agentgateway-tls + readOnly: true +{{- end }} + readinessProbe: + httpGet: + path: /healthz/ready + port: readiness + periodSeconds: 10 + volumes: + - name: agentgateway-config + configMap: + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + certificateChainPath: cert.pem + keyPath: key.pem +{{- else }} + - name: agentgateway-tls + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: atenet-router + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + - name: https + port: 443 + targetPort: 8443 + protocol: TCP diff --git a/charts/substrate/templates/jwt-bootstrap.yaml b/charts/substrate/templates/jwt-bootstrap.yaml new file mode 100644 index 000000000..e3299fe08 --- /dev/null +++ b/charts/substrate/templates/jwt-bootstrap.yaml @@ -0,0 +1,73 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if and (eq .Values.auth.mode "jwt") .Values.auth.jwt.bootstrap.enabled }} +{{- $apiName := include "substrate.fullname" (list "api" .) }} +{{- $routerName := include "substrate.fullname" (list "atenet-router" .) }} +{{- $apiHost := printf "%s.%s.svc" $apiName .Release.Namespace }} +{{- $ca := genCA (printf "%s-ca" $apiName) 3650 }} +{{- $serverCert := genSignedCert $apiHost nil (list $apiHost (printf "%s.%s.svc.cluster.local" $apiName .Release.Namespace) (printf "%s.%s.svc" $routerName .Release.Namespace)) 365 $ca }} +{{- $sessionJWTKey := genPrivateKey "ecdsa" }} +{{- $sessionCA := genCA "session-id-ca" 3650 }} +{{- if .Values.auth.jwt.bootstrap.serverCert.enabled }} +{{- $existingTLS := lookup "v1" "Secret" .Release.Namespace .Values.auth.jwt.serverCertSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Values.auth.jwt.serverCertSecret }} + namespace: {{ .Release.Namespace }} +type: kubernetes.io/tls +data: + tls.crt: {{ if $existingTLS }}{{ index $existingTLS.data "tls.crt" }}{{ else }}{{ $serverCert.Cert | b64enc }}{{ end }} + tls.key: {{ if $existingTLS }}{{ index $existingTLS.data "tls.key" }}{{ else }}{{ $serverCert.Key | b64enc }}{{ end }} +--- +{{- $existingCA := lookup "v1" "ConfigMap" .Release.Namespace .Values.auth.jwt.caBundleConfigMap }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + namespace: {{ .Release.Namespace }} +data: + ca.crt: | +{{- if $existingCA }} +{{ index $existingCA.data "ca.crt" | nindent 4 }} +{{- else }} +{{ $ca.Cert | nindent 4 }} +{{- end }} +{{- end }} +{{- if .Values.auth.jwt.bootstrap.sessionPools.enabled }} +--- +{{- $existingJWTSecret := lookup "v1" "Secret" .Release.Namespace "session-id-jwt-pool" }} +apiVersion: v1 +kind: Secret +metadata: + name: session-id-jwt-pool + namespace: {{ .Release.Namespace }} +type: Opaque +data: + pool: {{ if $existingJWTSecret }}{{ index $existingJWTSecret.data "pool" }}{{ else }}{{ dict "Authorities" (list (dict "ID" "1" "Algorithm" "ES256" "SigningKeyPEM" $sessionJWTKey)) | toJson | b64enc }}{{ end }} +--- +{{- $existingCASecret := lookup "v1" "Secret" .Release.Namespace "session-id-ca-pool" }} +apiVersion: v1 +kind: Secret +metadata: + name: session-id-ca-pool + namespace: {{ .Release.Namespace }} +type: Opaque +data: + pool: {{ if $existingCASecret }}{{ index $existingCASecret.data "pool" }}{{ else }}{{ dict "CAs" (list (dict "ID" "1" "SigningKeyPEM" $sessionCA.Key "RootCertificatePEM" $sessionCA.Cert)) | toJson | b64enc }}{{ end }} +{{- end }} +{{- end }} diff --git a/charts/substrate/templates/jwt-oidc-rbac.yaml b/charts/substrate/templates/jwt-oidc-rbac.yaml new file mode 100644 index 000000000..a9fd499e9 --- /dev/null +++ b/charts/substrate/templates/jwt-oidc-rbac.yaml @@ -0,0 +1,42 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if eq .Values.auth.mode "jwt" }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +rules: +- nonResourceURLs: + - /.well-known/openid-configuration + - /openid/v1/jwks + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/charts/substrate/templates/namespace.yaml b/charts/substrate/templates/namespace.yaml new file mode 100644 index 000000000..63401c00d --- /dev/null +++ b/charts/substrate/templates/namespace.yaml @@ -0,0 +1,23 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- include "substrate.validateAuthMode" . -}} +{{- if .Values.createNamespace }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/substrate/templates/pod-certificate-controller.yaml b/charts/substrate/templates/pod-certificate-controller.yaml new file mode 100644 index 000000000..3aaaa9df9 --- /dev/null +++ b/charts/substrate/templates/pod-certificate-controller.yaml @@ -0,0 +1,200 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if eq .Values.auth.mode "mtls" -}} +apiVersion: v1 +kind: Namespace +metadata: + name: podcertificate-controller-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +rules: +# The service signer needs to be able to read services and pods. +- apiGroups: + - "" + resources: + - services + - pods + verbs: + - get + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests + verbs: + - get + - list + - watch + - update +- apiGroups: + - certificates.k8s.io + resources: + - clustertrustbundles + verbs: + - create + - get + - list + - watch + - update + - delete +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests/status + verbs: + - update +- apiGroups: + - certificates.k8s.io + resources: + - signers + resourceNames: + - servicedns.podcert.ate.dev/* + - podidentity.podcert.ate.dev/* + verbs: + - sign + - attest +- apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: podcertificate-controller-system + name: coordinator +rules: +- apiGroups: + - "coordination.k8s.io" + resources: + - "leases" + verbs: + - create + - get + - list + - watch + - update + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: podcertificate-controller-is-a-coordinator + namespace: podcertificate-controller-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: coordinator +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: podcertificate-controller + namespace: podcertificate-controller-system + labels: + app: podcertificate-controller +spec: + replicas: 1 + selector: + matchLabels: + app: podcertificate-controller + template: + metadata: + labels: + app: podcertificate-controller + spec: + containers: + - name: controller + image: {{ include "substrate.componentImage" (list "podcertcontroller" .) }} + args: + - --in-cluster=true + - --sharding-pod-namespace=$(POD_NAMESPACE) + - --sharding-pod-name=$(POD_NAME) + - --sharding-pod-uid=$(POD_UID) + - --sharding-application-name=podcertificate-controller + - --service-dns-ca-pool=/run/ca-state/service-dns-pool.json + - --pod-identity-ca-pool=/run/ca-state/pod-identity-pool.json + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + volumeMounts: + - name: "ca-state" + mountPath: "/run/ca-state" + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + readOnlyRootFilesystem: true + volumes: + - name: "ca-state" + projected: + sources: + - secret: + name: "service-dns-ca-pool" + items: + - key: "pool" + path: "service-dns-pool.json" + - secret: + name: "pod-identity-ca-pool" + items: + - key: "pool" + path: "pod-identity-pool.json" + dnsPolicy: Default + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccountName: default + terminationGracePeriodSeconds: 30 +{{- end }} diff --git a/manifests/ate-install/generated/role.yaml b/charts/substrate/templates/role.yaml similarity index 95% rename from manifests/ate-install/generated/role.yaml rename to charts/substrate/templates/role.yaml index 7341d28dd..8e3f7117d 100644 --- a/manifests/ate-install/generated/role.yaml +++ b/charts/substrate/templates/role.yaml @@ -16,7 +16,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: ate-controller + name: {{ include "substrate.fullname" (list "ate-controller" .) }} rules: - apiGroups: - "" diff --git a/charts/substrate/templates/valkey.yaml b/charts/substrate/templates/valkey.yaml new file mode 100644 index 000000000..b8233eea9 --- /dev/null +++ b/charts/substrate/templates/valkey.yaml @@ -0,0 +1,253 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.valkey.enabled -}} +{{- $sts := include "substrate.fullname" (list "valkey-cluster" .) -}} +{{- $headless := include "substrate.fullname" (list "valkey-cluster-service" .) -}} +{{- $ns := .Release.Namespace -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} + namespace: {{ .Release.Namespace }} +data: + valkey.conf: | +{{- if eq .Values.auth.mode "mtls" }} + # Enforce TLS and disable standard port + port 0 + tls-port 6379 + tls-cluster yes + tls-replication yes + + # Load certificates from projected volume + tls-cert-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-key-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-ca-cert-file /etc/valkey-ca/ca.crt + tls-auth-clients yes + + # Enable cluster mode +{{- else }} + # Plaintext: serve on the standard port, no TLS. + port 6379 + +{{- end }} + cluster-enabled yes + cluster-config-file nodes.conf + cluster-node-timeout 5000 + appendonly yes + protected-mode no +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $headless }} + namespace: {{ .Release.Namespace }} +spec: + clusterIP: None + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 + - name: bus + port: 16379 + targetPort: 16379 +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + serviceName: {{ $headless }} + replicas: {{ .Values.valkey.replicas }} + podManagementPolicy: Parallel + selector: + matchLabels: + app: valkey-cluster + template: + metadata: + labels: + app: valkey-cluster + spec: + containers: + - name: valkey + image: {{ .Values.images.valkey }} + command: ["valkey-server", "/etc/valkey/valkey.conf"] + ports: + - name: valkey + containerPort: 6379 + - name: bus + containerPort: 16379 + volumeMounts: + - name: config + mountPath: /etc/valkey +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + - name: data + mountPath: /data + volumes: + - name: config + configMap: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.valkey.storageSize }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "substrate.fullname" (list "valkey-cluster-init" .) }} + namespace: {{ .Release.Namespace }} +spec: + template: + metadata: + labels: + app: valkey-cluster-init + spec: + restartPolicy: OnFailure + containers: + - name: init + image: {{ .Values.images.valkey }} +{{- if eq .Values.auth.mode "mtls" }} + volumeMounts: + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + command: + - /bin/sh + - -c + - | + set -e + echo "Waiting for all Valkey pods to resolve..." + for i in 0 1 2 3 4 5; do + until getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-${i} DNS..." + sleep 2 + done + done + + echo "All pods resolved. Getting IPs..." + POD_IPS="" + for i in 0 1 2 3 4 5; do + ip=$(getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc | awk '{print $1}') + POD_IPS="${POD_IPS} ${ip}:6379" + done + + echo "Checking if Valkey cluster is already initialized..." +{{- if eq .Values.auth.mode "mtls" }} + until valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli --tls \ + --cacert /etc/valkey-ca/ca.crt \ + --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --key /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- else }} + until valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- end }} +{{- if eq .Values.auth.mode "mtls" }} + volumes: + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} +{{- end }} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml new file mode 100644 index 000000000..e34cc244d --- /dev/null +++ b/charts/substrate/values.yaml @@ -0,0 +1,114 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for the substrate chart. +# +# The chart supports two installation modes via `auth.mode`: +# +# - "mtls" (default): Today's behavior. Server certs are issued by the +# in-cluster podcertcontroller via PodCertificateRequest + projected +# into pods via the ClusterTrustBundle / podCertificate projection +# sources. Valkey runs with full TLS + client-cert verification. +# REQUIRES the off-by-default Kubernetes feature gates: +# ClusterTrustBundle, ClusterTrustBundleProjection, PodCertificateRequest +# and the v1beta1 certificates API. +# +# - "jwt": No PodCertificateRequest / ClusterTrustBundle usage. Server +# certs and session signing pools are generated by the chart by default, +# and can be disabled when you want to provide your own key material. +# Clients authenticate to ateapi with a projected Kubernetes ServiceAccount +# token. Valkey runs plaintext. + +auth: + mode: mtls # mtls | jwt + + jwt: + # OIDC issuer URL the cluster uses to mint SA tokens. Required when + # mode=jwt. Examples: + # GKE: https://container.googleapis.com/v1/projects//locations//clusters/ + # kind: https://kubernetes.default.svc.cluster.local + # EKS: https://oidc.eks..amazonaws.com/id/ + issuer: "" + + # Audience SA tokens are minted for, and that ateapi expects. + audience: api.ate-system.svc + + bootstrap: + # Generate JWT-mode TLS and session-signing key material with Helm. + # Existing generated resources are reused on upgrade via lookup. + enabled: true + serverCert: + enabled: true + sessionPools: + enabled: true + + # Name of a kubernetes.io/tls Secret in the release namespace, with keys + # tls.crt and tls.key. Created by the chart when + # auth.jwt.bootstrap.serverCert.enabled=true. + serverCertSecret: ateapi-tls + + # Name of a ConfigMap in the release namespace with key "ca.crt" holding + # the CA(s) that signed serverCertSecret. Clients mount it to verify the + # ateapi server certificate. Created by the chart when + # auth.jwt.bootstrap.serverCert.enabled=true. + caBundleConfigMap: ateapi-ca + +# Set to true to have the chart create the release namespace. +# Off by default — most helm workflows expect the namespace to already exist +# (helm install -n --create-namespace). Enable for the generated +# manifests/ate-install/ install path (kubectl apply). +createNamespace: false + +valkey: + enabled: true + replicas: 6 + storageSize: 1Gi + +# atelet daemonset overrides. Defaults avoid cloud-specific integrations. +# extraArgs / extraEnv are appended verbatim for installer-specific knobs +# (e.g. AWS_* for rustfs/S3 storage). +atelet: + gcpAuthForImagePulls: false + storageBackend: gcs + extraArgs: [] + extraEnv: [] + +redis: + # Override the cluster address. Empty -> derived from valkey.enabled + # (defaults to "valkey-cluster.ate-system.svc:6379"). + clusterAddress: "" + # Google IAM auth (for managed Memorystore / cloud Valkey). + useIAMAuth: false + # Override TLS server name for Redis hostname verification (mtls mode). + tlsServerName: "" + # File path for Redis client TLS credential bundle (mtls mode). + clientCert: "" + +# Name of a ConfigMap in the release namespace that supplies per-environment +# overrides for ate-api-server (ATE_API_REDIS_*, ATE_API_K8SJWT_ISSUER, ...). +# Mounted via envFrom with optional=true. Created by the chart from these values. +ateApiServerEnvVarsConfigMap: ate-api-server-envvars + +otel: + endpoint: "" + +image: + registry: ghcr.io/kagent-dev/substrate + tag: "" + +images: + valkey: valkey/valkey:8.0 + agentgateway: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 + coredns: coredns/coredns:1.11.1 + busybox: busybox:1.36 diff --git a/cmd/ateapi/internal/sessionidentity/sessionidentity.go b/cmd/ateapi/internal/sessionidentity/sessionidentity.go index 71fa2a6bc..c69fbbb48 100644 --- a/cmd/ateapi/internal/sessionidentity/sessionidentity.go +++ b/cmd/ateapi/internal/sessionidentity/sessionidentity.go @@ -21,6 +21,7 @@ import ( "crypto/x509/pkix" "fmt" "log/slog" + "net/http" "net/url" "os" "path" @@ -51,17 +52,19 @@ type Server struct { sessionIDCAPoolFile string workerCACerts string + httpClient *http.Client } var _ ateapipb.SessionIdentityServer = (*Server)(nil) -func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string) *Server { +func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string, httpClient *http.Client) *Server { return &Server{ clientJWTIssuer: clientJWTIssuer, clientJWTAudience: clientJWTAudience, sessionIDJWTPoolFile: sessionIDJWTPoolFile, sessionIDCAPoolFile: sessionIDCAPoolFile, workerCACerts: workerCACerts, + httpClient: httpClient, } } @@ -78,7 +81,7 @@ func (s *Server) MintJWT(ctx context.Context, req *ateapipb.MintJWTRequest) (*at clientJWT := strings.TrimPrefix(authorization[0], "Bearer ") - clientClaims, err := k8sjwt.Verify(ctx, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) + clientClaims, err := k8sjwt.Verify(ctx, s.httpClient, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) if err != nil { slog.ErrorContext(ctx, "Error while verifying client JWT", slog.Any("err", err)) return nil, status.Errorf(codes.Unauthenticated, "Unauthenticated") diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 87d5a7433..76705db6e 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -21,12 +21,15 @@ import ( "fmt" "log/slog" "net" + "net/http" "os" + "strings" "time" "github.com/agent-substrate/substrate/cmd/ateapi/internal/controlapi" "github.com/agent-substrate/substrate/cmd/ateapi/internal/sessionidentity" "github.com/agent-substrate/substrate/cmd/ateapi/internal/store/ateredis" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/ateinterceptors" "github.com/agent-substrate/substrate/internal/credbundle" "github.com/agent-substrate/substrate/internal/serverboot" @@ -56,6 +59,7 @@ var ( redisUseIAMAuth = pflag.String("redis-use-iam-auth", "true", "Whether to use Google IAM authentication for Redis/Valkey.") redisTLSServerName = pflag.String("redis-tls-server-name", "", "The ServerName to use for Redis TLS hostname verification.") redisClientCert = pflag.String("redis-client-cert", "", "The file containing client TLS certificate/key credential bundle for Redis/Valkey.") + redisNoTLS = pflag.Bool("redis-no-tls", false, "If true, connect to Redis/Valkey in plaintext (no TLS). For development / installs that don't enable Valkey TLS.") clientJWTIssuer = pflag.String("client-jwt-issuer", "", "The expected issuer URL for client JWTs.") clientJWTAudience = pflag.String("client-jwt-audience", "", "The expected audience for client JWTs.") @@ -64,7 +68,9 @@ var ( sessionIDCAPoolFile = pflag.String("session-id-ca-pool", "", "The file that contains the CA pool for signing session JWTs") workerpoolCACerts = pflag.String("workerpool-ca-certs", "", "The file that contains the CA for verifying workerpool client certificates.") - showVersion = pflag.Bool("version", false, "Print version and exit.") + showVersion = pflag.Bool("version", false, "Print version and exit.") + authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC.") + clientJWTCAFile = pflag.String("client-jwt-ca-cert", "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", "CA cert file used to verify TLS when fetching the OIDC discovery document and JWKS for JWT authentication. Defaults to the in-cluster service account CA.") ) func main() { @@ -94,6 +100,11 @@ func main() { loadFlagsFromEnv() logFlagValues(ctx) + authModeParsed, err := ateapiauth.ParseMode(*authMode) + if err != nil { + serverboot.Fatal(ctx, "Invalid --auth-mode", err) + } + redisClient, err := connectRedis(ctx) if err != nil { serverboot.Fatal(ctx, "Failed to set up Redis/Valkey", err) @@ -133,7 +144,9 @@ func main() { dialer := controlapi.NewAteletDialer(workerPodInformer.GetIndexer(), ateletPodInformer.GetIndexer()) sm := controlapi.NewService(redisPersistence, actorTemplateLister, dialer, clientset) - sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts) + jwtHTTPClient := buildJWTHTTPClient(ctx, *clientJWTCAFile) + + sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts, jwtHTTPClient) lisCfg := &net.ListenConfig{} lis, err := lisCfg.Listen(ctx, "tcp", *listenAddr) @@ -141,10 +154,23 @@ func main() { serverboot.Fatal(ctx, "Failed to start listener", err) } + authCfg := ateapiauth.ServerConfig{ + Mode: authModeParsed, + Issuer: *clientJWTIssuer, + Audience: *clientJWTAudience, + HTTPClient: jwtHTTPClient, + } + mux := grpc.NewServer( grpc.Creds(serverCreds), grpc.StatsHandler(otelgrpc.NewServerHandler()), - grpc.UnaryInterceptor(ateinterceptors.ServerUnaryInterceptor), + grpc.ChainUnaryInterceptor( + ateapiauth.UnaryServerInterceptor(authCfg), + ateinterceptors.ServerUnaryInterceptor, + ), + grpc.ChainStreamInterceptor( + ateapiauth.StreamServerInterceptor(authCfg), + ), ) reflection.Register(mux) ateapipb.RegisterControlServer(mux, sm) @@ -191,25 +217,30 @@ func logFlagValues(ctx context.Context) { slog.String("redis-use-iam-auth", *redisUseIAMAuth), slog.String("redis-tls-server-name", *redisTLSServerName), slog.String("redis-client-cert", *redisClientCert), + slog.Bool("redis-no-tls", *redisNoTLS), slog.String("client-jwt-issuer", *clientJWTIssuer), slog.String("client-jwt-audience", *clientJWTAudience), slog.String("session-id-jwt-pool", *sessionIDJWTPoolFile), slog.String("session-id-ca-pool", *sessionIDCAPoolFile), slog.String("workerpool-ca-certs", *workerpoolCACerts), + slog.String("auth-mode", *authMode), ) } // connectRedis builds the Redis/Valkey TLS config, plumbs IAM auth if // requested, opens the cluster client, and pings with retries. func connectRedis(ctx context.Context) (*redis.ClusterClient, error) { - tlsConfig, err := buildRedisTLSConfig(ctx) - if err != nil { - return nil, err - } - clusterOpts := &redis.ClusterOptions{ - Addrs: []string{*redisClusterAddress}, - TLSConfig: tlsConfig, + Addrs: []string{*redisClusterAddress}, + } + if *redisNoTLS { + slog.InfoContext(ctx, "Connecting to Redis/Valkey without TLS (--redis-no-tls=true)") + } else { + tlsConfig, err := buildRedisTLSConfig(ctx) + if err != nil { + return nil, err + } + clusterOpts.TLSConfig = tlsConfig } if *redisUseIAMAuth != "false" { @@ -325,3 +356,48 @@ func buildServerCreds(ctx context.Context) (credentials.TransportCredentials, er ClientCAs: clientCAs, }), nil } + +const saTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + +// buildJWTHTTPClient returns an *http.Client that trusts caFile for TLS +// verification and injects the pod's ServiceAccount Bearer token, used when +// fetching the OIDC discovery document and JWKS from the in-cluster Kubernetes +// API server. Returns nil (use http.DefaultClient) if caFile is empty or unreadable. +func buildJWTHTTPClient(ctx context.Context, caFile string) *http.Client { + if caFile == "" { + return nil + } + ca, err := os.ReadFile(caFile) + if err != nil { + slog.WarnContext(ctx, "Could not read JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile), slog.Any("err", err)) + return nil + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(ca) { + slog.WarnContext(ctx, "Could not parse JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile)) + return nil + } + return &http.Client{ + Transport: &saTokenTransport{ + base: &http.Transport{ + TLSClientConfig: &tls.Config{RootCAs: pool}, + }, + }, + } +} + +// saTokenTransport injects the pod's ServiceAccount Bearer token on every +// request. Reads the token file fresh on each request so token rotation is +// handled automatically. +type saTokenTransport struct { + base http.RoundTripper +} + +func (t *saTokenTransport) RoundTrip(req *http.Request) (*http.Response, error) { + token, err := os.ReadFile(saTokenFile) + if err == nil && len(token) > 0 { + req = req.Clone(req.Context()) + req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token))) + } + return t.base.RoundTrip(req) +} diff --git a/cmd/atecontroller/main.go b/cmd/atecontroller/main.go index f7e922273..4db44cc02 100644 --- a/cmd/atecontroller/main.go +++ b/cmd/atecontroller/main.go @@ -14,15 +14,14 @@ package main import ( - "crypto/tls" "os" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/controllers" clientv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "github.com/spf13/pflag" "google.golang.org/grpc" - "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -39,6 +38,11 @@ var ( setupLog = ctrl.Log.WithName("setup") ateAPIConnSpec = pflag.String("ateapi-conn-spec", "dns:///api.ate-system.svc:443", "") + + ateapiAuthMode = pflag.String("ateapi-auth", "mtls", "Client auth to ateapi: mtls|jwt. 'mtls' (default) dials with insecure TLS and relies on pod-projected mTLS credentials for identity. 'jwt' verifies the server cert and sends a Bearer SA token.") + ateapiCAFile = pflag.String("ateapi-ca-file", "", "PEM file with CAs trusted to verify the ateapi server cert. Required for jwt.") + ateapiServerName = pflag.String("ateapi-server-name", "", "SNI / hostname expected on the ateapi server cert. Optional.") + ateapiTokenFile = pflag.String("ateapi-token-file", "", "Projected SA token file used as Bearer credential. Required for jwt.") ) func init() { @@ -47,15 +51,27 @@ func init() { } func main() { + pflag.Parse() ctrl.SetLogger(zap.New(zap.UseDevMode(true))) - // TODO: Verify server certificate, pass client certificate. - clientTLSConfig := &tls.Config{ - InsecureSkipVerify: true, // Temporarily bypass standard checks + mode, err := ateapiauth.ParseMode(*ateapiAuthMode) + if err != nil { + setupLog.Error(err, "invalid --ateapi-auth") + os.Exit(1) + } + + dialOpts, err := ateapiauth.DialOptions(ateapiauth.ClientConfig{ + Mode: mode, + CAFile: *ateapiCAFile, + ServerName: *ateapiServerName, + TokenFile: *ateapiTokenFile, + }) + if err != nil { + setupLog.Error(err, "building ateapi dial options") + os.Exit(1) } - clientCreds := credentials.NewTLS(clientTLSConfig) - ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, grpc.WithTransportCredentials(clientCreds)) + ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, dialOpts...) if err != nil { setupLog.Error(err, "Error creating grpc connection to ate api") os.Exit(1) diff --git a/cmd/atenet/internal/app/router/provider.go b/cmd/atenet/internal/app/router/provider.go index ccd68cf79..ecab5f8ae 100644 --- a/cmd/atenet/internal/app/router/provider.go +++ b/cmd/atenet/internal/app/router/provider.go @@ -44,10 +44,10 @@ type proxyProvider interface { func newProxyProvider(cfg RouterConfig) (proxyProvider, error) { switch strings.ToLower(cfg.NetworkingMode) { - case "", NetworkingModeEnvoy: - return envoyProvider{cfg: cfg}, nil - case NetworkingModeAgentgateway: + case "", NetworkingModeAgentgateway: return agentgatewayProvider{cfg: cfg}, nil + case NetworkingModeEnvoy: + return envoyProvider{cfg: cfg}, nil default: return nil, fmt.Errorf("unsupported networking mode %q", cfg.NetworkingMode) } diff --git a/cmd/atenet/internal/app/router/router.go b/cmd/atenet/internal/app/router/router.go index 0449aacbb..15c757078 100644 --- a/cmd/atenet/internal/app/router/router.go +++ b/cmd/atenet/internal/app/router/router.go @@ -18,7 +18,6 @@ import ( "context" "crypto/rand" "crypto/rsa" - "crypto/tls" "crypto/x509" "crypto/x509/pkix" "encoding/pem" @@ -37,7 +36,6 @@ import ( "github.com/spf13/cobra" "golang.org/x/sync/errgroup" "google.golang.org/grpc" - "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" @@ -46,6 +44,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/serverboot" v1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" @@ -81,6 +80,11 @@ type RouterConfig struct { TLSKeyPath string LogLevel string MetricsAddr string + + AteapiAuthMode string + AteapiCAFile string + AteapiServerName string + AteapiTokenFile string } // RouterServer instantiates and coordinates runtime threads executing system modules. @@ -154,7 +158,7 @@ func NewCmd() *cobra.Command { cmd.Flags().IntVar(&cfg.XdsPort, "port-xds", 18000, "TCP port listening for the xDS dynamic Envoy connections") cmd.Flags().IntVar(&cfg.ExtprocPort, "port-extproc", 50051, "Listen port for the Envoy dynamic External Processing (ext_proc) server") cmd.Flags().StringVar(&cfg.ExtprocAddr, "extproc-address", "127.0.0.1", "Host IP or address of the Envoy External Processing (ext_proc) server") - cmd.Flags().StringVar(&cfg.NetworkingMode, "networking-mode", NetworkingModeEnvoy, "Networking proxy mode: envoy or agentgateway") + cmd.Flags().StringVar(&cfg.NetworkingMode, "networking-mode", NetworkingModeAgentgateway, "Networking proxy mode: agentgateway or envoy") cmd.Flags().StringVar(&cfg.EnvoyImage, "envoy-image", "envoyproxy/envoy:v1.30-latest", "Image URI used for dynamically launched router instances") cmd.Flags().StringVar(&cfg.AgentgatewayImage, "agentgateway-image", "cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1", "Image URI used for Agentgateway router instances") cmd.Flags().StringVar(&cfg.TemplatesFile, "actor-templates-file", "", "Path to offline YAML configuration file listing ActorTemplates") @@ -164,12 +168,17 @@ func NewCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.TLSCertPath, "tls-cert-path", "", "Path to the proxy TLS certificate file") cmd.Flags().StringVar(&cfg.TLSKeyPath, "tls-key-path", "", "Path to the proxy TLS private key file") + cmd.Flags().StringVar(&cfg.AteapiAuthMode, "ateapi-auth", "mtls", "Client auth to ateapi: mtls|jwt. 'mtls' (default) dials with insecure TLS and relies on pod-projected mTLS credentials for identity. 'jwt' verifies the server cert and sends a Bearer SA token.") + cmd.Flags().StringVar(&cfg.AteapiCAFile, "ateapi-ca-file", "", "PEM file with CAs trusted to verify the ateapi server cert. Required for jwt.") + cmd.Flags().StringVar(&cfg.AteapiServerName, "ateapi-server-name", "", "SNI / hostname expected on the ateapi server cert. Optional.") + cmd.Flags().StringVar(&cfg.AteapiTokenFile, "ateapi-token-file", "", "Projected SA token file used as Bearer credential. Required for jwt.") + return cmd } func NewRouterServer(cfg RouterConfig) (*RouterServer, error) { if cfg.NetworkingMode == "" { - cfg.NetworkingMode = NetworkingModeEnvoy + cfg.NetworkingMode = NetworkingModeAgentgateway } var k8sClient client.Client @@ -203,11 +212,24 @@ func NewRouterServer(cfg RouterConfig) (*RouterServer, error) { } } - conn, err := grpc.NewClient(cfg.AteapiAddr, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}))) + authMode, err := ateapiauth.ParseMode(cfg.AteapiAuthMode) + if err != nil { + return nil, fmt.Errorf("invalid --ateapi-auth: %w", err) + } + dialOpts, err := ateapiauth.DialOptions(ateapiauth.ClientConfig{ + Mode: authMode, + CAFile: cfg.AteapiCAFile, + ServerName: cfg.AteapiServerName, + TokenFile: cfg.AteapiTokenFile, + }) + if err != nil { + return nil, fmt.Errorf("building ateapi dial options: %w", err) + } + conn, err := grpc.NewClient(cfg.AteapiAddr, dialOpts...) if err != nil { return nil, fmt.Errorf("failed to establish grpc channel to ateapi client: %w", err) } - slog.Info("Connecting to ateapi", slog.String("address", cfg.AteapiAddr)) + slog.Info("Connecting to ateapi", slog.String("address", cfg.AteapiAddr), slog.String("auth", string(authMode))) apiClient := ateapipb.NewControlClient(conn) diff --git a/cmd/atenet/internal/app/router/xds.go b/cmd/atenet/internal/app/router/xds.go index 964fc5e92..6b248f53b 100644 --- a/cmd/atenet/internal/app/router/xds.go +++ b/cmd/atenet/internal/app/router/xds.go @@ -62,6 +62,7 @@ const ( IngressHTTPSListener = "ingress_https_listener" RouteName = "substrate_routes" ClusterName = "ate-cluster" + websocketUpgradeType = "websocket" ) // XdsServer implements an aggregated discovery service server for dynamic Envoy router nodes. @@ -287,6 +288,9 @@ func (x *XdsServer) buildRoutes() *routev3.RouteConfiguration { Cluster: "dynamic_forward_proxy_cluster", }, Timeout: durationpb.New(10 * time.Second), + UpgradeConfigs: []*routev3.RouteAction_UpgradeConfig{ + {UpgradeType: websocketUpgradeType}, + }, }, }, }, @@ -334,6 +338,10 @@ func (x *XdsServer) buildHcm(statPrefix string) *anypb.Any { hcm, _ := anypb.New(&hcmv3.HttpConnectionManager{ StatPrefix: statPrefix, GenerateRequestId: &wrapperspb.BoolValue{Value: true}, + UpgradeConfigs: []*hcmv3.HttpConnectionManager_UpgradeConfig{ + {UpgradeType: websocketUpgradeType}, + }, + StreamIdleTimeout: durationpb.New(0), AccessLog: []*accesslogv3.AccessLog{ { Name: "envoy.access_loggers.stdout", diff --git a/cmd/atenet/internal/app/router/xds_test.go b/cmd/atenet/internal/app/router/xds_test.go index 92e347648..f6548f41a 100644 --- a/cmd/atenet/internal/app/router/xds_test.go +++ b/cmd/atenet/internal/app/router/xds_test.go @@ -24,6 +24,7 @@ import ( clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" listenerv3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" routev3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + hcmv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" cachev3 "github.com/envoyproxy/go-control-plane/pkg/cache/v3" resourcev3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3" ) @@ -116,6 +117,10 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) { if fallbackRoute.GetMatch().GetPrefix() != "/" { t.Errorf("Expected path mapping prefix '/', got '%s'", fallbackRoute.GetMatch().GetPrefix()) } + routeAction := fallbackRoute.GetRoute() + if len(routeAction.GetUpgradeConfigs()) != 1 || routeAction.GetUpgradeConfigs()[0].GetUpgradeType() != websocketUpgradeType { + t.Errorf("Expected route websocket upgrade config, got %+v", routeAction.GetUpgradeConfigs()) + } } // Verify listeners generated @@ -135,6 +140,15 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) { if sa.GetAddress() != "0.0.0.0" { t.Errorf("Expected address '0.0.0.0', got %s", sa.GetAddress()) } + + hcmAny := l.GetFilterChains()[0].GetFilters()[0].GetTypedConfig() + hcm := &hcmv3.HttpConnectionManager{} + if err := hcmAny.UnmarshalTo(hcm); err != nil { + t.Fatalf("Failed to unmarshal HCM: %v", err) + } + if len(hcm.GetUpgradeConfigs()) != 1 || hcm.GetUpgradeConfigs()[0].GetUpgradeType() != websocketUpgradeType { + t.Errorf("Expected HCM websocket upgrade config, got %+v", hcm.GetUpgradeConfigs()) + } } } diff --git a/hack/create-kind-cluster.sh b/hack/create-kind-cluster.sh index 2915a95a2..bd3ecebef 100755 --- a/hack/create-kind-cluster.sh +++ b/hack/create-kind-cluster.sh @@ -18,6 +18,12 @@ set -o errexit -o nounset -o pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-kind}" +# Enable the off-by-default certificate feature gates required by the mTLS +# install path (cmd/podcertcontroller). On by default — the Quickstart's +# `hack/install-ate-kind.sh --deploy-ate-system` uses mTLS. Opt out +# (KIND_ENABLE_PODCERT=false) only when installing JWT-mode manifests, which +# do not require these gates. +KIND_ENABLE_PODCERT="${KIND_ENABLE_PODCERT:-true}" reg_name="kind-registry" reg_port="5001" @@ -43,12 +49,16 @@ if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true fi # 2. Create kind configuration with containerdConfigPatches and feature gates -echo "Creating kind configuration for cluster '${KIND_CLUSTER_NAME}'..." +echo "Creating kind configuration for cluster '${KIND_CLUSTER_NAME}' (KIND_ENABLE_PODCERT=${KIND_ENABLE_PODCERT})..." cat < "${ROOT}/bin/kind-config.yaml" kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane +EOF + +if [ "${KIND_ENABLE_PODCERT}" = "true" ]; then +cat <> "${ROOT}/bin/kind-config.yaml" # cmd/podcertcontroller depends on ClusterTrustBundle & PodCertificateRequest. # They are not enabled by default as of Kubernetes v1.36 # https://github.com/kubernetes/kubernetes/blob/master/test/compatibility_lifecycle/reference/versioned_feature_list.yaml @@ -59,6 +69,7 @@ featureGates: runtimeConfig: "certificates.k8s.io/v1beta1": "true" EOF +fi echo "Deleting existing kind cluster '${KIND_CLUSTER_NAME}' if it exists..." "${ROOT}"/hack/kind.sh delete cluster --name "${KIND_CLUSTER_NAME}" || true diff --git a/hack/gen-rbac.sh b/hack/gen-rbac.sh new file mode 100755 index 000000000..b8f9d9647 --- /dev/null +++ b/hack/gen-rbac.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generate the controller ClusterRole into the Helm chart and templatize its +# name so multi-release installs do not collide on a cluster-scoped resource. +# +# controller-gen emits a YAML file with a fixed `roleName=` value. We post- +# process that file to swap the static name for the chart's fullname helper, +# matching the convention used by every other resource in charts/substrate/. +# +# Invoked via `go generate ./internal/controllers/...`. +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +OUT="${ROOT}/charts/substrate/templates/role.yaml" + +bash "${ROOT}/hack/run-tool.sh" controller-gen \ + "rbac:headerFile=${ROOT}/hack/boilerplate/sh.txt,roleName=ate-controller" \ + paths="${ROOT}/internal/controllers/..." \ + "output:rbac:artifacts:config=${ROOT}/charts/substrate/templates/" + +# Templatize the ClusterRole name. controller-gen emits ` name: ate-controller` +# at column 0; the substitution is exact-match to stay robust. +sed -i 's|^ name: ate-controller$| name: {{ include "substrate.fullname" (list "ate-controller" .) }}|' "${OUT}" diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh new file mode 100755 index 000000000..322e31492 --- /dev/null +++ b/hack/install-ate-kind-jwt.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Install Agent Substrate on a kind cluster in JWT auth mode. +# +# Unlike the mTLS install path (hack/install-ate-kind.sh), this works on a +# stock Kubernetes cluster — no ClusterTrustBundle / PodCertificateRequest +# feature gates required. Suitable for a kind cluster created with +# KIND_ENABLE_PODCERT=false hack/create-kind-cluster.sh. +# +# Steps: +# 1. Render the chart with auth.mode=jwt + kind-specific values, resolve +# ko:// image refs against a local registry, and apply. +# 2. Apply the kind-only extras (rustfs storage, OTel collector) from +# manifests/ate-install/kind/. +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +NS="${NS:-ate-system}" +KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-kind}" +KUBECTL_CONTEXT="${KUBECTL_CONTEXT:-}" +KO_DOCKER_REPO="${KO_DOCKER_REPO:-localhost:5001}" +KO_DEFAULTPLATFORMS="${KO_DEFAULTPLATFORMS:-linux/$(go env GOARCH)}" +reg_name="kind-registry" +reg_port="5001" + +export KO_DOCKER_REPO KO_DEFAULTPLATFORMS + +run_kubectl() { + kubectl ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} "$@" +} + +run_helm() { + helm ${KUBECTL_CONTEXT:+--kube-context=${KUBECTL_CONTEXT}} "$@" +} + +log_step() { + echo -e "\033[1;36m[step]:\033[0m $1" +} + +ensure_namespace() { + log_step "ensure_namespace ${NS}" + run_kubectl create namespace "${NS}" --dry-run=client -o yaml | run_kubectl apply -f - +} + +ensure_kind_local_registry() { + log_step "ensure_kind_local_registry" + + if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" == "true" ]; then + if ! docker port "${reg_name}" | grep -q "${reg_port}"; then + echo "Registry exists but is not mapped to port ${reg_port}. Recreating..." + docker rm -f "${reg_name}" + fi + fi + + if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" != "true" ]; then + docker run \ + -d --restart=always \ + --label created-by=agent-substrate \ + -p "127.0.0.1:${reg_port}:5000" \ + -p "[::1]:${reg_port}:5000" \ + --network bridge --name "${reg_name}" \ + registry:3 + fi + + if [ "$(docker inspect -f='{{json .NetworkSettings.Networks.kind}}' "${reg_name}")" = "null" ]; then + docker network connect "kind" "${reg_name}" + fi + + local registry_dir="/etc/containerd/certs.d/localhost:${reg_port}" + local node + for node in $("${ROOT}"/hack/kind.sh get nodes --name "${KIND_CLUSTER_NAME}"); do + docker exec "${node}" mkdir -p "${registry_dir}" + cat <') + + # ko resolve replaces ko:// refs with built+pushed image refs. + echo "${rendered}" | bash "${ROOT}/hack/run-tool.sh" ko resolve -f - \ + | run_kubectl apply -f - +} + +apply_crds() { + log_step "apply_crds" + run_helm upgrade --install substrate-crds "${ROOT}/charts/substrate-crds" +} + +apply_kind_extras() { + log_step "apply_kind_extras (rustfs + otel-collector)" + run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/rustfs.yaml" + run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/otel-collector.yaml" +} + +wait_rollouts() { + log_step "wait_rollouts" + run_kubectl -n "${NS}" rollout status deployment/ate-api-server-deployment --timeout=180s + run_kubectl -n "${NS}" rollout status deployment/ate-controller --timeout=180s + run_kubectl -n "${NS}" rollout status deployment/atenet-router --timeout=180s + run_kubectl -n "${NS}" rollout status daemonset/atelet --timeout=180s + run_kubectl -n "${NS}" rollout status statefulset/valkey-cluster --timeout=180s +} + +ensure_namespace +ensure_kind_local_registry +apply_crds +apply_chart +apply_kind_extras +wait_rollouts + +echo "Substrate (JWT mode) installed in namespace ${NS}." diff --git a/hack/install-ate.sh b/hack/install-ate.sh index c8abaf3c4..12140c130 100755 --- a/hack/install-ate.sh +++ b/hack/install-ate.sh @@ -62,7 +62,7 @@ function usage() { echo "Overall infrastructure (all infrastructure components):" echo "" echo " --deploy-ate-system Deploy core system (CRDs, atelet, apiserver)" - echo " --router=envoy|agentgateway Select atenet-router implementation (default: agentgateway)" + echo " --router=agentgateway Select atenet-router implementation (default: agentgateway)" echo " --delete-ate-system Delete core system" echo " --delete-all Delete core system and all registered demos" echo "" @@ -120,11 +120,11 @@ run_ko() { set_atenet_router() { case "$1" in - envoy|agentgateway) + agentgateway) ATE_INSTALL_ATENET_ROUTER="$1" ;; *) - echo "unsupported atenet router mode: $1" >&2 + echo "unsupported atenet router mode: $1 (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -132,14 +132,11 @@ set_atenet_router() { atenet_router_manifest() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/atenet-router.yaml" - ;; agentgateway) - echo "manifests/ate-install/atenet-router-agentgateway.yaml" + echo "manifests/ate-install/atenet-router.yaml" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -147,14 +144,11 @@ atenet_router_manifest() { ate_install_kustomize_base_dir() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/base" - ;; agentgateway) - echo "manifests/ate-install/base-agentgateway" + echo "manifests/ate-install/base" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -162,14 +156,11 @@ ate_install_kustomize_base_dir() { ate_install_kustomize_dir() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/kind" - ;; agentgateway) - echo "manifests/ate-install/kind-agentgateway" + echo "manifests/ate-install/kind" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -271,6 +262,7 @@ ensure_crds() { deploy_crds() { log_step "deploy_crds" run_ko apply -f manifests/ate-install/generated + run_kubectl apply -f manifests/ate-install/role.yaml } deploy_ate_system() { @@ -278,7 +270,7 @@ deploy_ate_system() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s ensure_apiserver_prerequisites @@ -335,7 +327,7 @@ deploy_ate_apiserver() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s ensure_apiserver_prerequisites @@ -349,7 +341,7 @@ deploy_atelet() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s local manifest="" @@ -369,7 +361,7 @@ deploy_atenet() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s run_ko apply -f "$(atenet_router_manifest)" diff --git a/hack/render-manifests.sh b/hack/render-manifests.sh new file mode 100755 index 000000000..bbe50befb --- /dev/null +++ b/hack/render-manifests.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Render the substrate Helm chart into manifests/ate-install/ (mTLS-mode +# install) — the canonical kubectl-apply install path. The chart at +# charts/substrate/ is the single source of truth; this script only renders. +# +# Usage: +# hack/render-manifests.sh # write into manifests/ate-install/ +# hack/render-manifests.sh --check # fail if rendered output differs +# +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +OUT_DIR="${ROOT}/manifests/ate-install" +CHART_DIR="${ROOT}/charts/substrate" +CHECK_MODE="false" + +if [ "${1:-}" = "--check" ]; then + CHECK_MODE="true" +fi + +if ! command -v helm >/dev/null 2>&1; then + echo "helm not found in PATH" >&2 + exit 1 +fi + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +helm template substrate "${CHART_DIR}" \ + --namespace ate-system \ + --set auth.mode=mtls \ + --set createNamespace=true \ + --set image.registry=ko://github.com/agent-substrate/substrate/cmd \ + --set image.tag="" \ + > "${TMP_DIR}/all.yaml" + +# Split into per-source files so the directory structure mirrors the chart +# templates, making diffs friendlier. +python3 - "${TMP_DIR}/all.yaml" "${TMP_DIR}/out" <<'PY' +import os, re, sys, yaml +in_path, out_dir = sys.argv[1], sys.argv[2] +os.makedirs(out_dir, exist_ok=True) + +with open(in_path) as f: + raw = f.read() + +# Helm prepends a "# Source: /templates/" comment to each doc. +docs_by_source = {} +for doc in raw.split('\n---\n'): + m = re.search(r'#\s*Source:\s*\S+/templates/(\S+)', doc) + src = m.group(1) if m else "misc.yaml" + # Drop the leading "# Source:" line from the written file. + cleaned = re.sub(r'^\s*#\s*Source:.*\n', '', doc, count=1, flags=re.MULTILINE) + if not cleaned.strip(): + continue + docs_by_source.setdefault(src, []).append(cleaned.strip()) + +for src, docs in docs_by_source.items(): + header = ( + "# Copyright 2026 Google LLC\n" + "#\n" + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n" + "# you may not use this file except in compliance with the License.\n" + "# You may obtain a copy of the License at\n" + "#\n" + "# http://www.apache.org/licenses/LICENSE-2.0\n" + "#\n" + "# Unless required by applicable law or agreed to in writing, software\n" + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + "# See the License for the specific language governing permissions and\n" + "# limitations under the License.\n" + "\n" + "# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh.\n" + "# Run `make helm-template` to regenerate.\n" + "\n" + ) + with open(os.path.join(out_dir, src), "w") as out: + out.write(header) + out.write("\n---\n".join(docs)) + out.write("\n") +PY + +if [ "${CHECK_MODE}" = "true" ]; then + # Only compare top-level files; subdirs like generated/ and kind/ are not + # produced by the chart and live alongside it intentionally. + CHECK_TMP="$(mktemp -d)" + trap 'rm -rf "$TMP_DIR" "$CHECK_TMP"' EXIT + mkdir -p "${CHECK_TMP}/current" + find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' -exec cp {} "${CHECK_TMP}/current/" \; + if ! diff -ruN "${CHECK_TMP}/current" "${TMP_DIR}/out" >/dev/null 2>&1; then + echo "manifests/ate-install/ is out of date. Run: make helm-template" >&2 + diff -ruN "${CHECK_TMP}/current" "${TMP_DIR}/out" | head -60 >&2 || true + exit 1 + fi + echo "manifests/ate-install/ matches chart output." + exit 0 +fi + +# Replace contents (preserve kind/ and generated/ subdirs which are not chart output). +mkdir -p "${OUT_DIR}" +find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' -delete +cp "${TMP_DIR}/out/"*.yaml "${OUT_DIR}/" +rendered_count="$(find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' | wc -l | xargs)" +echo "Rendered ${rendered_count} manifest files into ${OUT_DIR}" diff --git a/hack/values-kind-jwt.yaml b/hack/values-kind-jwt.yaml new file mode 100644 index 000000000..00cc324b7 --- /dev/null +++ b/hack/values-kind-jwt.yaml @@ -0,0 +1,54 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Helm values for installing substrate on a kind cluster in JWT mode. +# Used by hack/install-ate-kind-jwt.sh — does NOT require the off-by-default +# certificate feature gates that the mTLS install path needs. + +auth: + mode: jwt + jwt: + # Kind's default API server issuer. + issuer: https://kubernetes.default.svc.cluster.local + audience: api.ate-system.svc + serverCertSecret: ateapi-tls + caBundleConfigMap: ateapi-ca + +createNamespace: false + +# In-cluster OTel collector deployed alongside via manifests/ate-install/kind/otel-collector.yaml +otel: + endpoint: http://opentelemetry-collector.otel-system.svc:4317 + +monitoring: + gkePodMonitoring: + enabled: false + +atelet: + gcpAuthForImagePulls: false + storageBackend: s3 + extraArgs: + - --localhost-registry-replacement=kind-registry:5000 + extraEnv: + - name: AWS_REGION + value: us-east-1 + - name: AWS_ENDPOINT_URL + value: http://rustfs.ate-system.svc:9000 + - name: AWS_S3_USE_PATH_STYLE + value: "true" + # TODO: use a secret / identity management for rustfs credentials. + - name: AWS_ACCESS_KEY_ID + value: rustfsadmin + - name: AWS_SECRET_ACCESS_KEY + value: rustfsadmin diff --git a/hack/verify/crd-chart.sh b/hack/verify/crd-chart.sh new file mode 100755 index 000000000..0837a8473 --- /dev/null +++ b/hack/verify/crd-chart.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit -o nounset -o pipefail + +ROOT="$(git rev-parse --show-toplevel)" +cd "${ROOT}" + +GENERATED_DIR="manifests/ate-install/generated" +CHART_TEMPLATES_DIR="charts/substrate-crds/templates" + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "${TMP_DIR}"' EXIT + +mkdir -p "${TMP_DIR}/generated" "${TMP_DIR}/chart" +cp "${GENERATED_DIR}/"*.yaml "${TMP_DIR}/generated/" +cp "${CHART_TEMPLATES_DIR}/"*.yaml "${TMP_DIR}/chart/" + +# The generated CRDs start with a leading document separator after the +# boilerplate header. In chart templates that separator renders as a +# comment-only YAML document, so the chart copies intentionally omit it. +for file in "${TMP_DIR}/generated/"*.yaml; do + awk 'BEGIN { removed = 0 } /^---$/ && removed == 0 { removed = 1; next } { print }' "${file}" > "${file}.tmp" + mv "${file}.tmp" "${file}" +done + +if ! diff -ruN "${TMP_DIR}/generated" "${TMP_DIR}/chart" >/dev/null 2>&1; then + echo "charts/substrate-crds/templates is out of sync with ${GENERATED_DIR}" >&2 + echo "Copy updated CRDs into charts/substrate-crds/templates." >&2 + diff -ruN "${TMP_DIR}/generated" "${TMP_DIR}/chart" | head -80 >&2 || true + exit 1 +fi + +echo "charts/substrate-crds/templates matches generated CRDs." diff --git a/internal/ateapiauth/client.go b/internal/ateapiauth/client.go new file mode 100644 index 000000000..db00807f7 --- /dev/null +++ b/internal/ateapiauth/client.go @@ -0,0 +1,110 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ateapiauth + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "os" + "strings" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" +) + +// ClientConfig configures how to dial the ateapi gRPC server. +// +// - Mode=ModeMTLS: insecure TLS dial (InsecureSkipVerify=true). Client +// identity is expected to come from mTLS credentials projected into +// the pod (servicedns.podcert.ate.dev). No app-level credentials. +// - Mode=ModeJWT: validates the server cert against CAFile, sends a Bearer +// token from TokenFile as per-RPC credentials. +type ClientConfig struct { + Mode Mode + + // CAFile is a PEM file containing CA certs that sign the server cert. + // Required for ModeJWT. Ignored for ModeMTLS. + CAFile string + + // ServerName overrides SNI / hostname verification. Optional. + ServerName string + + // TokenFile is a path to a Kubernetes projected ServiceAccount token used + // as a Bearer credential. Required for ModeJWT. + TokenFile string +} + +// DialOptions returns the grpc.DialOption set described by cfg, suitable to +// pass to grpc.NewClient. +func DialOptions(cfg ClientConfig) ([]grpc.DialOption, error) { + switch cfg.Mode { + case "", ModeMTLS: + tlsCfg := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // explicit opt-in + return []grpc.DialOption{ + grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)), + }, nil + + case ModeJWT: + if cfg.CAFile == "" { + return nil, fmt.Errorf("ateapiauth: jwt mode requires CAFile") + } + if cfg.TokenFile == "" { + return nil, fmt.Errorf("ateapiauth: jwt mode requires TokenFile") + } + caPEM, err := os.ReadFile(cfg.CAFile) + if err != nil { + return nil, fmt.Errorf("ateapiauth: reading CA file: %w", err) + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("ateapiauth: no certificates found in CA file %q", cfg.CAFile) + } + tlsCfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + RootCAs: pool, + ServerName: cfg.ServerName, + } + return []grpc.DialOption{ + grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)), + grpc.WithPerRPCCredentials(&fileTokenCreds{path: cfg.TokenFile}), + }, nil + + default: + return nil, fmt.Errorf("ateapiauth: unknown client mode %q", cfg.Mode) + } +} + +// fileTokenCreds reads a Kubernetes projected SA token from disk for every +// RPC. Kubernetes refreshes the file in place; reading it each time picks up +// rotations. +type fileTokenCreds struct { + path string +} + +func (c *fileTokenCreds) GetRequestMetadata(_ context.Context, _ ...string) (map[string]string, error) { + b, err := os.ReadFile(c.path) + if err != nil { + return nil, fmt.Errorf("ateapiauth: reading token file %q: %w", c.path, err) + } + tok := strings.TrimSpace(string(b)) + if tok == "" { + return nil, fmt.Errorf("ateapiauth: token file %q is empty", c.path) + } + return map[string]string{"authorization": "Bearer " + tok}, nil +} + +func (c *fileTokenCreds) RequireTransportSecurity() bool { return true } diff --git a/internal/ateapiauth/server.go b/internal/ateapiauth/server.go new file mode 100644 index 000000000..e924b0c79 --- /dev/null +++ b/internal/ateapiauth/server.go @@ -0,0 +1,158 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ateapiauth adds optional Kubernetes ServiceAccount JWT +// authentication on top of the ateapi gRPC server, and a matching client +// dial helper. It does not replace the existing TLS / mTLS path — the +// server's transport credentials still apply unchanged. Set Mode=ModeJWT +// on the server to require an `authorization: Bearer ` header +// on every RPC; Mode=ModeMTLS (the default) leaves identity to the +// transport-layer mTLS credentials. +package ateapiauth + +import ( + "context" + "fmt" + "net/http" + "strings" + "time" + + "github.com/agent-substrate/substrate/internal/k8sjwt" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +// Mode selects whether the JWT interceptor enforces a Bearer token. +type Mode string + +const ( + ModeMTLS Mode = "mtls" + ModeJWT Mode = "jwt" +) + +// ParseMode parses a flag value into a Mode, defaulting to ModeMTLS on empty. +// ModeMTLS means identity is established by the transport-layer mTLS +// credentials; the interceptor performs no app-level checks. ModeJWT +// additionally requires a Kubernetes SA Bearer token on every RPC. +func ParseMode(s string) (Mode, error) { + switch Mode(s) { + case "", ModeMTLS: + return ModeMTLS, nil + case ModeJWT: + return ModeJWT, nil + default: + return "", fmt.Errorf("unknown auth mode %q (want mtls|jwt)", s) + } +} + +// ServerConfig configures the server-side JWT interceptor. +type ServerConfig struct { + Mode Mode + Issuer string // OIDC issuer URL for JWT verification + Audience string // expected audience claim for JWT verification + + // HTTPClient is used for OIDC discovery and JWKS fetches. Nil uses http.DefaultClient. + // Set this to a client that trusts the cluster CA when verifying tokens issued by + // the in-cluster Kubernetes API server (https://kubernetes.default.svc.cluster.local). + HTTPClient *http.Client + + // Now returns the current time; nil uses time.Now. Exposed for tests. + Now func() time.Time +} + +type ctxKey struct{} + +// ClaimsFromContext returns the verified Kubernetes JWT claims that the +// interceptor attached to ctx, if any. +func ClaimsFromContext(ctx context.Context) (*k8sjwt.KubernetesClaims, bool) { + c, ok := ctx.Value(ctxKey{}).(*k8sjwt.KubernetesClaims) + return c, ok +} + +func contextWithClaims(ctx context.Context, c *k8sjwt.KubernetesClaims) context.Context { + return context.WithValue(ctx, ctxKey{}, c) +} + +// UnaryServerInterceptor returns a gRPC unary interceptor enforcing cfg. +func UnaryServerInterceptor(cfg ServerConfig) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + newCtx, err := authenticate(ctx, cfg) + if err != nil { + return nil, err + } + return handler(newCtx, req) + } +} + +// StreamServerInterceptor returns a gRPC stream interceptor enforcing cfg. +func StreamServerInterceptor(cfg ServerConfig) grpc.StreamServerInterceptor { + return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + newCtx, err := authenticate(ss.Context(), cfg) + if err != nil { + return err + } + return handler(srv, &wrappedStream{ServerStream: ss, ctx: newCtx}) + } +} + +type wrappedStream struct { + grpc.ServerStream + ctx context.Context +} + +func (w *wrappedStream) Context() context.Context { return w.ctx } + +func authenticate(ctx context.Context, cfg ServerConfig) (context.Context, error) { + if cfg.Mode == "" || cfg.Mode == ModeMTLS { + return ctx, nil + } + + now := time.Now + if cfg.Now != nil { + now = cfg.Now + } + + bearer, ok := bearerToken(ctx) + if !ok { + return nil, status.Error(codes.Unauthenticated, "missing bearer token") + } + claims, err := k8sjwt.Verify(ctx, cfg.HTTPClient, bearer, cfg.Issuer, cfg.Audience, now()) + if err != nil { + return nil, status.Errorf(codes.Unauthenticated, "invalid bearer token: %v", err) + } + return contextWithClaims(ctx, claims), nil +} + +func bearerToken(ctx context.Context) (string, bool) { + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + return "", false + } + vals := md.Get("authorization") + if len(vals) == 0 { + return "", false + } + const prefix = "Bearer " + v := vals[0] + if !strings.HasPrefix(v, prefix) { + return "", false + } + tok := strings.TrimSpace(strings.TrimPrefix(v, prefix)) + if tok == "" { + return "", false + } + return tok, true +} diff --git a/internal/ateapiauth/server_test.go b/internal/ateapiauth/server_test.go new file mode 100644 index 000000000..a2dda17aa --- /dev/null +++ b/internal/ateapiauth/server_test.go @@ -0,0 +1,103 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ateapiauth + +import ( + "context" + "testing" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +func TestParseMode(t *testing.T) { + cases := []struct { + in string + want Mode + wantErr bool + }{ + {"", ModeMTLS, false}, + {"mtls", ModeMTLS, false}, + {"jwt", ModeJWT, false}, + {"none", "", true}, + {"bogus", "", true}, + } + for _, tc := range cases { + got, err := ParseMode(tc.in) + if (err != nil) != tc.wantErr { + t.Errorf("ParseMode(%q) err=%v wantErr=%v", tc.in, err, tc.wantErr) + } + if !tc.wantErr && got != tc.want { + t.Errorf("ParseMode(%q)=%v want %v", tc.in, got, tc.want) + } + } +} + +func TestAuthenticate_MTLS_AllowsAnonymous(t *testing.T) { + _, err := authenticate(context.Background(), ServerConfig{Mode: ModeMTLS}) + if err != nil { + t.Fatalf("ModeMTLS should not error: %v", err) + } +} + +func TestAuthenticate_JWT_RequiresBearer(t *testing.T) { + cfg := ServerConfig{Mode: ModeJWT, Issuer: "https://example", Audience: "ateapi"} + + // Missing header -> Unauthenticated. + _, err := authenticate(context.Background(), cfg) + if code := status.Code(err); code != codes.Unauthenticated { + t.Fatalf("missing bearer: want Unauthenticated, got %v (err=%v)", code, err) + } + + // Garbage bearer -> Unauthenticated (k8sjwt.Verify will fail). + ctx := metadata.NewIncomingContext(context.Background(), metadata.Pairs("authorization", "Bearer not-a-jwt")) + _, err = authenticate(ctx, cfg) + if code := status.Code(err); code != codes.Unauthenticated { + t.Fatalf("bad bearer: want Unauthenticated, got %v (err=%v)", code, err) + } +} + +func TestBearerToken(t *testing.T) { + cases := []struct { + name string + hdr string + want string + found bool + }{ + {"missing", "", "", false}, + {"no prefix", "abc", "", false}, + {"prefix", "Bearer abc", "abc", true}, + {"prefix with spaces", "Bearer abc ", "abc", true}, + {"empty after prefix", "Bearer ", "", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + if tc.hdr != "" { + ctx = metadata.NewIncomingContext(ctx, metadata.Pairs("authorization", tc.hdr)) + } + got, ok := bearerToken(ctx) + if ok != tc.found || got != tc.want { + t.Errorf("bearerToken=(%q,%v) want (%q,%v)", got, ok, tc.want, tc.found) + } + }) + } +} + +// Build-time check. +var _ grpc.UnaryServerInterceptor = UnaryServerInterceptor(ServerConfig{}) +var _ grpc.StreamServerInterceptor = StreamServerInterceptor(ServerConfig{}) diff --git a/internal/controllers/gen.go b/internal/controllers/gen.go index df7df00a8..7ac77eff7 100644 --- a/internal/controllers/gen.go +++ b/internal/controllers/gen.go @@ -14,4 +14,4 @@ package controllers -//go:generate bash ../../hack/run-tool.sh controller-gen rbac:headerFile=../../hack/boilerplate/sh.txt,roleName=ate-controller paths="./..." output:rbac:artifacts:config=../../manifests/ate-install/generated/ +//go:generate bash ../../hack/gen-rbac.sh diff --git a/internal/k8sjwt/k8sjwt.go b/internal/k8sjwt/k8sjwt.go index e4403d0df..5b343ec4a 100644 --- a/internal/k8sjwt/k8sjwt.go +++ b/internal/k8sjwt/k8sjwt.go @@ -119,7 +119,9 @@ var permittedSkew = 5 * time.Minute // the object binding claims. If needed for your use case, you will need check the object bindings // by connecting to the cluster and seeing if the object(s) the bindings name still exist within the // cluster. -func Verify(ctx context.Context, jwt string, expectedIssuer, expectedAudience string, now time.Time) (*KubernetesClaims, error) { +// +// httpClient is used for OIDC discovery and JWKS fetches; nil uses http.DefaultClient. +func Verify(ctx context.Context, httpClient *http.Client, jwt string, expectedIssuer, expectedAudience string, now time.Time) (*KubernetesClaims, error) { segments := strings.Split(jwt, ".") if len(segments) != 3 { return nil, fmt.Errorf("malformed JWT") @@ -169,7 +171,7 @@ func Verify(ctx context.Context, jwt string, expectedIssuer, expectedAudience st } // TODO: Cache keys, and only fetch new keys if the JWT's key ID is not in the cache. - keys, err := discoverKeysForIssuer(ctx, rawClaims.Issuer) + keys, err := discoverKeysForIssuer(ctx, httpClient, rawClaims.Issuer) if err != nil { return nil, fmt.Errorf("while discovering keys from issuer: %w", err) } @@ -358,7 +360,7 @@ type jwkT struct { RSAE string `json:"e"` } -func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, error) { +func discoverKeysForIssuer(ctx context.Context, httpClient *http.Client, issuer string) ([]*KeyAndID, error) { var discoveryDocURL string if strings.HasSuffix(issuer, "/") { discoveryDocURL = issuer + ".well-known/openid-configuration" @@ -366,14 +368,14 @@ func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, err discoveryDocURL = issuer + "/.well-known/openid-configuration" } - oidcConfig, err := fetchJSON[oidcConfigT](discoveryDocURL) + oidcConfig, err := fetchJSON[oidcConfigT](httpClient, discoveryDocURL) if err != nil { return nil, fmt.Errorf("while fetching OIDC Discovery document: %w", err) } slog.InfoContext(ctx, "Fetched discovery doc", slog.Any("doc", oidcConfig)) - jwkSet, err := fetchJSON[jwkSetT](oidcConfig.JWKSURI) + jwkSet, err := fetchJSON[jwkSetT](httpClient, oidcConfig.JWKSURI) if err != nil { return nil, fmt.Errorf("while fetching JWKS: %w", err) } @@ -424,10 +426,12 @@ func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, err return ret, nil } -func fetchJSON[T any](url string) (T, error) { +func fetchJSON[T any](httpClient *http.Client, url string) (T, error) { var parsedBody T - - resp, err := http.Get(url) + if httpClient == nil { + httpClient = http.DefaultClient + } + resp, err := httpClient.Get(url) if err != nil { return parsedBody, fmt.Errorf("while making HTTP request: %w", err) } diff --git a/internal/localca/localca.go b/internal/localca/localca.go index eb8370905..3078435ed 100644 --- a/internal/localca/localca.go +++ b/internal/localca/localca.go @@ -22,6 +22,7 @@ import ( "crypto/rand" "crypto/x509" "encoding/json" + "encoding/pem" "fmt" "time" ) @@ -43,7 +44,9 @@ type serializedPool struct { type serializedCA struct { ID string SigningKeyPKCS8 []byte + SigningKeyPEM string RootCertificateDER []byte + RootCertificatePEM string IntermediateCertificatesDER [][]byte } @@ -92,12 +95,12 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { ID: wireCA.ID, } - ca.SigningKey, err = x509.ParsePKCS8PrivateKey(wireCA.SigningKeyPKCS8) + ca.SigningKey, err = parsePrivateKey(wireCA.SigningKeyPKCS8, wireCA.SigningKeyPEM) if err != nil { return nil, fmt.Errorf("while parsing signing key: %w", err) } - ca.RootCertificate, err = x509.ParseCertificate(wireCA.RootCertificateDER) + ca.RootCertificate, err = parseCertificate(wireCA.RootCertificateDER, wireCA.RootCertificatePEM) if err != nil { return nil, fmt.Errorf("while parsing root certificate: %w", err) } @@ -116,6 +119,43 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { return pool, nil } +func parsePrivateKey(pkcs8 []byte, pemData string) (crypto.PrivateKey, error) { + if len(pkcs8) != 0 { + return x509.ParsePKCS8PrivateKey(pkcs8) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + + if key, err := x509.ParsePKCS8PrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParseECPrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParsePKCS1PrivateKey(block.Bytes); err == nil { + return key, nil + } + return nil, fmt.Errorf("unsupported private key PEM type %q", block.Type) +} + +func parseCertificate(der []byte, pemData string) (*x509.Certificate, error) { + if len(der) != 0 { + return x509.ParseCertificate(der) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + if block.Type != "CERTIFICATE" { + return nil, fmt.Errorf("unsupported certificate PEM type %q", block.Type) + } + return x509.ParseCertificate(block.Bytes) +} + func GenerateED25519CA(id string) (*CA, error) { rootPubKey, rootPrivKey, err := ed25519.GenerateKey(rand.Reader) if err != nil { diff --git a/internal/localca/localca_test.go b/internal/localca/localca_test.go index 9b5d6247f..c470a4d48 100644 --- a/internal/localca/localca_test.go +++ b/internal/localca/localca_test.go @@ -18,8 +18,12 @@ import ( "bytes" "crypto/ed25519" "crypto/rand" + "crypto/rsa" "crypto/x509" + "crypto/x509/pkix" "encoding/json" + "encoding/pem" + "math/big" "strings" "testing" "time" @@ -198,6 +202,53 @@ func TestMarshalUnmarshalWithIntermediates(t *testing.T) { } } +func TestUnmarshalPEMPool(t *testing.T) { + key, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("GenerateKey(): %v", err) + } + template := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "session-id-ca"}, + NotBefore: time.Now(), + NotAfter: time.Now().Add(24 * time.Hour), + IsCA: true, + BasicConstraintsValid: true, + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + } + certDER, err := x509.CreateCertificate(rand.Reader, template, template, &key.PublicKey, key) + if err != nil { + t.Fatalf("CreateCertificate(): %v", err) + } + keyPEM := string(pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)})) + certPEM := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER})) + + data, err := json.Marshal(&serializedPool{ + CAs: []*serializedCA{{ + ID: "1", + SigningKeyPEM: keyPEM, + RootCertificatePEM: certPEM, + }}, + }) + if err != nil { + t.Fatalf("Marshal(): %v", err) + } + + pool, err := Unmarshal(data) + if err != nil { + t.Fatalf("Unmarshal(): %v", err) + } + if len(pool.CAs) != 1 { + t.Fatalf("CAs length = %d, want 1", len(pool.CAs)) + } + if _, ok := pool.CAs[0].SigningKey.(*rsa.PrivateKey); !ok { + t.Fatalf("SigningKey type = %T, want *rsa.PrivateKey", pool.CAs[0].SigningKey) + } + if pool.CAs[0].RootCertificate.Subject.CommonName != "session-id-ca" { + t.Fatalf("RootCertificate CN = %q, want session-id-ca", pool.CAs[0].RootCertificate.Subject.CommonName) + } +} + func TestUnmarshalErrors(t *testing.T) { ca, err := GenerateED25519CA("err-test") if err != nil { diff --git a/internal/localjwtauthority/localjwtauthority.go b/internal/localjwtauthority/localjwtauthority.go index 62f647b55..97021fb2d 100644 --- a/internal/localjwtauthority/localjwtauthority.go +++ b/internal/localjwtauthority/localjwtauthority.go @@ -22,6 +22,7 @@ import ( "crypto/rand" "crypto/x509" "encoding/json" + "encoding/pem" "fmt" ) @@ -43,6 +44,7 @@ type serializedAuthority struct { ID string Algorithm string SigningKeyPKCS8 []byte + SigningKeyPEM string } // Marshal serializes a Pool to JSON. @@ -86,7 +88,7 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { Algorithm: wireAuthority.Algorithm, } - signingKey, err := x509.ParsePKCS8PrivateKey(wireAuthority.SigningKeyPKCS8) + signingKey, err := parsePrivateKey(wireAuthority.SigningKeyPKCS8, wireAuthority.SigningKeyPEM) if err != nil { return nil, fmt.Errorf("while parsing signing key: %w", err) } @@ -98,6 +100,28 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { return pool, nil } +func parsePrivateKey(pkcs8 []byte, pemData string) (crypto.PrivateKey, error) { + if len(pkcs8) != 0 { + return x509.ParsePKCS8PrivateKey(pkcs8) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + + if key, err := x509.ParsePKCS8PrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParseECPrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParsePKCS1PrivateKey(block.Bytes); err == nil { + return key, nil + } + return nil, fmt.Errorf("unsupported private key PEM type %q", block.Type) +} + // GenerateECDSAP256Authority generates an ECDSA P256 JWT signing key. func GenerateECDSAP256Authority(id string) (*Authority, error) { privKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) diff --git a/internal/localjwtauthority/localjwtauthority_test.go b/internal/localjwtauthority/localjwtauthority_test.go new file mode 100644 index 000000000..7f25a72ea --- /dev/null +++ b/internal/localjwtauthority/localjwtauthority_test.go @@ -0,0 +1,62 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package localjwtauthority + +import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "encoding/json" + "encoding/pem" + "testing" +) + +func TestUnmarshalPEMSigningKey(t *testing.T) { + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("GenerateKey(): %v", err) + } + keyDER, err := x509.MarshalECPrivateKey(key) + if err != nil { + t.Fatalf("MarshalECPrivateKey(): %v", err) + } + keyPEM := string(pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})) + + data, err := json.Marshal(&serializedPool{ + Authorities: []*serializedAuthority{{ + ID: "1", + Algorithm: "ES256", + SigningKeyPEM: keyPEM, + }}, + }) + if err != nil { + t.Fatalf("Marshal(): %v", err) + } + + pool, err := Unmarshal(data) + if err != nil { + t.Fatalf("Unmarshal(): %v", err) + } + if len(pool.Authorities) != 1 { + t.Fatalf("Authorities length = %d, want 1", len(pool.Authorities)) + } + if pool.Authorities[0].Algorithm != "ES256" { + t.Fatalf("Algorithm = %q, want ES256", pool.Authorities[0].Algorithm) + } + if _, ok := pool.Authorities[0].SigningKey.(*ecdsa.PrivateKey); !ok { + t.Fatalf("SigningKey type = %T, want *ecdsa.PrivateKey", pool.Authorities[0].SigningKey) + } +} diff --git a/manifests/ate-install/ate-api-server-envvars.yaml b/manifests/ate-install/ate-api-server-envvars.yaml new file mode 100644 index 000000000..f0df64a0f --- /dev/null +++ b/manifests/ate-install/ate-api-server-envvars.yaml @@ -0,0 +1,28 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: ate-api-server-envvars + namespace: ate-system +data: + ATE_API_REDIS_ADDRESS: "valkey-cluster.ate-system.svc:6379" + ATE_API_REDIS_USE_IAM_AUTH: "false" + ATE_API_REDIS_TLS_SERVER_NAME: "" + ATE_API_REDIS_CLIENT_CERT: "" + ATE_API_K8SJWT_ISSUER: "" diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index 8d3c17086..46b5daf4a 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -1,19 +1,26 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. -# Define Permissions (Read-Only for Pods) +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ate-api-server + namespace: ate-system +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -31,14 +38,6 @@ rules: # ActorTemplates (e.g. via a namespace-scoped Role + RoleBinding using # resourceNames). --- -# Create Service Account for Workload Identity -apiVersion: v1 -kind: ServiceAccount -metadata: - name: ate-api-server - namespace: ate-system ---- -# 4. Bind Identity to Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -52,7 +51,21 @@ roleRef: name: ate-api-server-role apiGroup: rbac.authorization.k8s.io --- -# 5. Deploy the API Server +apiVersion: v1 +kind: Service +metadata: + name: api + namespace: ate-system +spec: + type: ClusterIP + selector: + app: ate-api-server + ports: + - name: grpc + protocol: TCP + port: 443 + targetPort: 443 +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -76,18 +89,18 @@ spec: - name: ate-api-server image: ko://github.com/agent-substrate/substrate/cmd/ateapi args: - - "--grpc-listen-addr=0.0.0.0:443" - - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" - - --redis-cluster-address=@env - - --redis-ca-certs=/etc/valkey-ca/ca.crt - - --redis-use-iam-auth=@env - - --redis-tls-server-name=@env - - --redis-client-cert=@env - - --client-jwt-issuer=@env - - --client-jwt-audience=api.ate-system.svc - - --session-id-jwt-pool=/run/session-id-jwt-pool/pool.json - - --session-id-ca-pool=/run/session-id-ca-pool/pool.json - - --workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem + - "--grpc-listen-addr=0.0.0.0:443" + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--redis-cluster-address=@env" + - "--redis-ca-certs=/etc/valkey-ca/ca.crt" + - "--redis-use-iam-auth=@env" + - "--redis-tls-server-name=@env" + - "--redis-client-cert=@env" + - "--client-jwt-issuer=@env" + - "--client-jwt-audience=api.ate-system.svc" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" env: - name: POD_NAME valueFrom: @@ -103,31 +116,16 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 - # Inject env vars from a ConfigMap created by each developer. This lets - # each developer customize their own redis address, etc, without having - # to edit this manifest, which can remain constant across all - # developers. envFrom: - configMapRef: name: ate-api-server-envvars optional: true volumeMounts: - - name: "servicedns" - mountPath: "/run/servicedns.podcert.ate.dev" - - name: "session-id-jwt-pool" - mountPath: "/run/session-id-jwt-pool" - # Note: See README.md for how to generate this secret. - - name: "valkey-ca-certs" - mountPath: "/etc/valkey-ca" - readOnly: true - - name: "session-id-ca-pool" - mountPath: "/run/session-id-ca-pool" - readOnly: true - - name: "workerpool-ca-certs" - mountPath: "/run/workerpool-ca-certs" - readOnly: true + - { name: servicedns, mountPath: /run/servicedns.podcert.ate.dev } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: valkey-ca-certs, mountPath: /etc/valkey-ca, readOnly: true } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } + - { name: workerpool-ca-certs, mountPath: /run/workerpool-ca-certs, readOnly: true } ports: - containerPort: 443 - name: prometheus @@ -139,38 +137,35 @@ spec: initialDelaySeconds: 5 periodSeconds: 2 volumes: - - name: "servicedns" + - name: servicedns projected: sources: - podCertificate: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 credentialBundlePath: credential-bundle.pem - - name: "session-id-jwt-pool" + - name: session-id-jwt-pool projected: sources: - secret: - name: "session-id-jwt-pool" + name: session-id-jwt-pool items: - - key: "pool" - path: "pool.json" - - name: "valkey-ca-certs" + - { key: pool, path: pool.json } + - name: valkey-ca-certs projected: sources: - secret: - name: "valkey-ca-certs" + name: valkey-ca-certs items: - - key: "ca.crt" - path: "ca.crt" - - name: "session-id-ca-pool" + - { key: ca.crt, path: ca.crt } + - name: session-id-ca-pool projected: sources: - secret: - name: "session-id-ca-pool" + name: session-id-ca-pool items: - - key: "pool" - path: "pool.json" - - name: "workerpool-ca-certs" + - { key: pool, path: pool.json } + - name: workerpool-ca-certs projected: sources: - clusterTrustBundle: @@ -179,19 +174,3 @@ spec: matchLabels: podcert.ate.dev/canarying: live path: trust-bundle.pem ---- -# 6. Expose the Session Assigner -apiVersion: v1 -kind: Service -metadata: - name: api - namespace: ate-system -spec: - type: ClusterIP - selector: - app: ate-api-server - ports: - - name: grpc - protocol: TCP - port: 443 - targetPort: 443 diff --git a/manifests/ate-install/ate-controller.yaml b/manifests/ate-install/ate-controller.yaml index 1f2756a27..2cfbeecb4 100644 --- a/manifests/ate-install/ate-controller.yaml +++ b/manifests/ate-install/ate-controller.yaml @@ -1,23 +1,20 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -kind: Namespace -apiVersion: v1 -metadata: - name: ate-system +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. ---- apiVersion: v1 kind: ServiceAccount metadata: @@ -25,7 +22,6 @@ metadata: namespace: ate-system labels: apps: ate-controller - --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -39,9 +35,7 @@ roleRef: kind: ClusterRole name: ate-controller apiGroup: rbac.authorization.k8s.io - --- - kind: Service apiVersion: v1 metadata: @@ -57,9 +51,7 @@ spec: port: 8080 targetPort: metrics protocol: TCP - --- - kind: Deployment apiVersion: apps/v1 metadata: diff --git a/manifests/ate-install/atelet.yaml b/manifests/ate-install/atelet.yaml index c9564c40d..e43ae9ab0 100644 --- a/manifests/ate-install/atelet.yaml +++ b/manifests/ate-install/atelet.yaml @@ -1,25 +1,27 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -# 1. Create Service Account +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +# atelet — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount metadata: name: atelet namespace: ate-system --- -# 2. Define Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -29,7 +31,6 @@ rules: resources: ["pods"] verbs: ["get", "watch", "list"] --- -# 3. Bind Identity to Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -43,7 +44,6 @@ roleRef: name: atelet-role apiGroup: rbac.authorization.k8s.io --- -# 4. Create DaemonSet apiVersion: apps/v1 kind: DaemonSet metadata: @@ -68,7 +68,7 @@ spec: - name: atelet image: ko://github.com/agent-substrate/substrate/cmd/atelet args: - - --gcp-auth-for-image-pulls=true + - --gcp-auth-for-image-pulls=false securityContext: privileged: true env: @@ -76,22 +76,6 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_UID - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: OTEL_RESOURCE_ATTRIBUTES - value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 - name: ATE_STORAGE_BACKEND value: "gcs" ports: diff --git a/manifests/ate-install/atenet-dns.yaml b/manifests/ate-install/atenet-dns.yaml index 46968a386..c925ada82 100644 --- a/manifests/ate-install/atenet-dns.yaml +++ b/manifests/ate-install/atenet-dns.yaml @@ -1,17 +1,21 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +# atenet-dns — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount metadata: @@ -34,6 +38,16 @@ rules: verbs: ["get", "list", "watch", "create", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: atenet-dns + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: atenet-dns @@ -48,16 +62,6 @@ roleRef: apiGroup: rbac.authorization.k8s.io --- apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: atenet-dns - namespace: kube-system -rules: -- apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "watch", "create", "update", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: atenet-dns @@ -71,6 +75,25 @@ roleRef: name: atenet-dns apiGroup: rbac.authorization.k8s.io --- +apiVersion: v1 +kind: Service +metadata: + name: dns + namespace: ate-system + labels: + app: dns +spec: + selector: + app: dns + type: ClusterIP + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -94,8 +117,6 @@ spec: - name: init-dns image: busybox:1.36 command: ["sh", "-c"] - # Initial core file is sufficient to start CoreDNS but does not contain - # any additional configuration. The controller will update the Corefile. args: - | cat <<'EOF' > /etc/coredns/Corefile @@ -155,22 +176,3 @@ spec: volumes: - name: dns-config-volume emptyDir: {} ---- -apiVersion: v1 -kind: Service -metadata: - name: dns - namespace: ate-system - labels: - app: dns -spec: - selector: - app: dns - type: ClusterIP - ports: - - name: dns - port: 53 - protocol: UDP - - name: dns-tcp - port: 53 - protocol: TCP \ No newline at end of file diff --git a/manifests/ate-install/atenet-router-agentgateway.yaml b/manifests/ate-install/atenet-router-agentgateway.yaml deleted file mode 100644 index d39972906..000000000 --- a/manifests/ate-install/atenet-router-agentgateway.yaml +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: v1 -kind: ServiceAccount -metadata: - name: atenet-router - namespace: ate-system - labels: - app: atenet-router ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: atenet-router -rules: -- apiGroups: - - "ate.dev" - resources: - - actortemplates - verbs: - - get - - watch - - list ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: atenet-router -subjects: -- kind: ServiceAccount - name: atenet-router - namespace: ate-system -roleRef: - kind: ClusterRole - name: atenet-router - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: atenet-router-agentgateway-config - namespace: ate-system -data: - config.yaml: | - binds: - - port: 8080 - listeners: - - name: http - protocol: HTTP - routes: - - name: substrate-http - matches: - - path: - pathPrefix: / - policies: - extProc: - host: "127.0.0.1:50051" - processingOptions: - requestBodyMode: none - responseBodyMode: none - requestHeaderMode: send - responseHeaderMode: skip - requestTrailerMode: skip - responseTrailerMode: skip - backends: - - dynamic: {} - - port: 8443 - listeners: - - name: https - protocol: HTTPS - tls: - cert: "/run/servicedns.podcert.ate.dev/cert.pem" - key: "/run/servicedns.podcert.ate.dev/key.pem" - routes: - - name: substrate-https - matches: - - path: - pathPrefix: / - policies: - extProc: - host: "127.0.0.1:50051" - processingOptions: - requestBodyMode: none - responseBodyMode: none - requestHeaderMode: send - responseHeaderMode: skip - requestTrailerMode: skip - responseTrailerMode: skip - backends: - - dynamic: {} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: atenet-router - namespace: ate-system - labels: - app: atenet-router -spec: - replicas: 1 - selector: - matchLabels: - app: atenet-router - template: - metadata: - labels: - app: atenet-router - spec: - serviceAccountName: atenet-router - containers: - - name: atenet-router - image: ko://github.com/agent-substrate/substrate/cmd/atenet - args: - - "router" - - "--networking-mode=agentgateway" - - "--standalone" - - "--namespace=ate-system" - - "--port-http=8080" - - "--port-extproc=50051" - - "--extproc-address=127.0.0.1" - - "--ateapi-address=api.ate-system.svc:443" - - "--status-port=4040" - - "--port-https=8443" - - "--tls-cert-path=/run/servicedns.podcert.ate.dev/cert.pem" - - "--tls-key-path=/run/servicedns.podcert.ate.dev/key.pem" - ports: - - name: extproc - containerPort: 50051 - - name: status - containerPort: 4040 - - name: agentgateway - image: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 - args: - - "-f" - - "/etc/agentgateway/config.yaml" - ports: - - name: http - containerPort: 8080 - - name: https - containerPort: 8443 - - name: readiness - containerPort: 15021 - - name: metrics - containerPort: 15020 - readinessProbe: - httpGet: - path: /healthz/ready - port: 15021 - periodSeconds: 10 - volumeMounts: - - name: agentgateway-config - mountPath: /etc/agentgateway - - name: "servicedns" - mountPath: "/run/servicedns.podcert.ate.dev" - volumes: - - name: agentgateway-config - configMap: - name: atenet-router-agentgateway-config - - name: "servicedns" - projected: - sources: - - podCertificate: - signerName: servicedns.podcert.ate.dev/identity - keyType: ECDSAP256 - certificateChainPath: cert.pem - keyPath: key.pem ---- -apiVersion: v1 -kind: Service -metadata: - name: atenet-router - namespace: ate-system -spec: - type: ClusterIP - selector: - app: atenet-router - ports: - - name: http - port: 80 - targetPort: 8080 - protocol: TCP - - name: https - port: 443 - targetPort: 8443 - protocol: TCP diff --git a/manifests/ate-install/atenet-router-monitoring.yaml b/manifests/ate-install/atenet-router-monitoring.yaml deleted file mode 100644 index 6a42ac904..000000000 --- a/manifests/ate-install/atenet-router-monitoring.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end -# request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches -# Google Managed Prometheus. This is E2E *context* for the per-stage latency -# dashboard, not an SLI we own (the SLI is the OTLP atenet.router.route.duration -# histogram). Envoy only speaks Prometheus, so it needs an explicit scrape; the -# admin port (9901) is already exposed by the envoy container above. -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: atenet-router-envoy - namespace: ate-system - labels: - app: atenet-router -spec: - selector: - matchLabels: - app: atenet-router - endpoints: - - port: admin - path: /stats/prometheus - interval: 30s diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index cddf7d367..54eb77e05 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -1,16 +1,19 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. apiVersion: v1 kind: ServiceAccount @@ -20,6 +23,69 @@ metadata: labels: app: atenet-router --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: atenet-router-agentgateway-config + namespace: ate-system +data: + config.yaml: | + # yaml-language-server: $schema=https://agentgateway.dev/schema/config + config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" + binds: + - port: 8080 + listeners: + - name: http + protocol: HTTP + routes: + - name: substrate-http + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} + - port: 8443 + listeners: + - name: https + protocol: HTTPS + tls: + + cert: "/run/servicedns.podcert.ate.dev/cert.pem" + key: "/run/servicedns.podcert.ate.dev/key.pem" + + routes: + - name: substrate-https + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -48,56 +114,23 @@ roleRef: apiGroup: rbac.authorization.k8s.io --- apiVersion: v1 -kind: ConfigMap +kind: Service metadata: - name: atenet-router-envoy-config + name: atenet-router namespace: ate-system -data: - envoy.yaml: | - admin: - address: - socket_address: - address: 0.0.0.0 - port_value: 9901 - - node: - id: substrate-envoy-node - cluster: substrate-router-cluster - - dynamic_resources: - lds_config: - resource_api_version: V3 - ads: {} - cds_config: - resource_api_version: V3 - ads: {} - ads_config: - api_type: GRPC - transport_api_version: V3 - grpc_services: - - envoy_grpc: - cluster_name: xds_cluster - - static_resources: - clusters: - - name: xds_cluster - connect_timeout: 0.25s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: xds_cluster - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: 127.0.0.1 - port_value: 18000 +spec: + type: ClusterIP + selector: + app: atenet-router + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + - name: https + port: 443 + targetPort: 8443 + protocol: TCP --- apiVersion: apps/v1 kind: Deployment @@ -125,18 +158,15 @@ spec: image: ko://github.com/agent-substrate/substrate/cmd/atenet args: - "router" - - "--networking-mode=envoy" - "--standalone" + - "--networking-mode=agentgateway" - "--namespace=ate-system" - "--port-http=8080" - - "--port-xds=18000" - "--port-extproc=50051" - "--extproc-address=127.0.0.1" - "--ateapi-address=api.ate-system.svc:443" - "--status-port=4040" - "--port-https=8443" - - "--tls-cert-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" - - "--tls-key-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" env: - name: POD_NAME valueFrom: @@ -152,64 +182,46 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 ports: - - name: xds - containerPort: 18000 - name: extproc containerPort: 50051 - name: status containerPort: 4040 - name: metrics containerPort: 9090 - - name: envoy - image: envoyproxy/envoy:v1.30-latest - command: - - "/usr/local/bin/envoy" - - "-c" - - "/etc/envoy/envoy.yaml" - - "--component-log-level" - - "upstream:debug,router:debug,ext_proc:debug" + - name: agentgateway + image: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 + args: + - "-f" + - "/etc/agentgateway/config.yaml" ports: - name: http containerPort: 8080 - name: https containerPort: 8443 - - name: admin - containerPort: 9901 + - name: readiness + containerPort: 15021 + - name: gw-metrics + containerPort: 15020 volumeMounts: - - name: envoy-config - mountPath: /etc/envoy + - name: agentgateway-config + mountPath: /etc/agentgateway - name: "servicedns" mountPath: "/run/servicedns.podcert.ate.dev" + readinessProbe: + httpGet: + path: /healthz/ready + port: readiness + periodSeconds: 10 volumes: - - name: envoy-config + - name: agentgateway-config configMap: - name: atenet-router-envoy-config + name: atenet-router-agentgateway-config - name: "servicedns" projected: sources: - podCertificate: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 - credentialBundlePath: credential-bundle.pem ---- -apiVersion: v1 -kind: Service -metadata: - name: atenet-router - namespace: ate-system -spec: - type: ClusterIP - selector: - app: atenet-router - ports: - - name: http - port: 80 - targetPort: 8080 - protocol: TCP - - name: https - port: 443 - targetPort: 8443 - protocol: TCP + certificateChainPath: cert.pem + keyPath: key.pem diff --git a/manifests/ate-install/base-agentgateway/kustomization.yaml b/manifests/ate-install/base-agentgateway/kustomization.yaml index d5883ba79..61cde5ace 100644 --- a/manifests/ate-install/base-agentgateway/kustomization.yaml +++ b/manifests/ate-install/base-agentgateway/kustomization.yaml @@ -20,6 +20,6 @@ resources: - ../ate-controller.yaml - ../atelet.yaml - ../atenet-dns.yaml - - ../atenet-router-agentgateway.yaml + - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml diff --git a/manifests/ate-install/kind-agentgateway/kustomization.yaml b/manifests/ate-install/kind-agentgateway/kustomization.yaml index d9c9192b9..b3f849937 100644 --- a/manifests/ate-install/kind-agentgateway/kustomization.yaml +++ b/manifests/ate-install/kind-agentgateway/kustomization.yaml @@ -20,7 +20,7 @@ resources: - ../ate-controller.yaml - ../kind/atelet - ../atenet-dns.yaml - - ../atenet-router-agentgateway.yaml + - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml - ../kind/rustfs.yaml diff --git a/manifests/ate-install/kind/kustomization.yaml b/manifests/ate-install/kind/kustomization.yaml index 43683e075..26d5d170f 100644 --- a/manifests/ate-install/kind/kustomization.yaml +++ b/manifests/ate-install/kind/kustomization.yaml @@ -23,6 +23,7 @@ resources: - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml + - ../role.yaml - rustfs.yaml - ./otel-collector.yaml - ./prometheus.yaml diff --git a/manifests/ate-install/namespace.yaml b/manifests/ate-install/namespace.yaml new file mode 100644 index 000000000..9b6e06c5a --- /dev/null +++ b/manifests/ate-install/namespace.yaml @@ -0,0 +1,22 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ate-system diff --git a/manifests/ate-install/pod-certificate-controller.yaml b/manifests/ate-install/pod-certificate-controller.yaml index 17c7b6fd2..987c1974b 100644 --- a/manifests/ate-install/pod-certificate-controller.yaml +++ b/manifests/ate-install/pod-certificate-controller.yaml @@ -1,16 +1,19 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. apiVersion: v1 kind: Namespace diff --git a/manifests/ate-install/role.yaml b/manifests/ate-install/role.yaml new file mode 100644 index 000000000..6a3558617 --- /dev/null +++ b/manifests/ate-install/role.yaml @@ -0,0 +1,98 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ate-controller +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ate.dev + resources: + - actortemplates + - workerpools + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ate.dev + resources: + - actortemplates/finalizers + - workerpools/finalizers + verbs: + - update +- apiGroups: + - ate.dev + resources: + - actortemplates/status + - workerpools/status + verbs: + - get + - patch + - update +--- +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/manifests/ate-install/valkey.yaml b/manifests/ate-install/valkey.yaml index ac649a555..f36724382 100644 --- a/manifests/ate-install/valkey.yaml +++ b/manifests/ate-install/valkey.yaml @@ -1,16 +1,19 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. apiVersion: v1 kind: ConfigMap @@ -210,4 +213,3 @@ spec: items: - key: ca.crt path: ca.crt -