From fb918dd6e902c21beaeca7bf8115e2ee88f8a93d Mon Sep 17 00:00:00 2001 From: Nicholas Loke Date: Thu, 4 Jun 2026 11:11:49 -0400 Subject: [PATCH 1/2] fix(openclaw): fallback to conditions when AgentHarness phase=UNSPECIFIED When the openclaw backend returns phase=UNSPECIFIED, the controller would never surface Ready=True to the AgentHarness. Fall back to checking status.conditions[Ready=True] in that case. See: #1958 --- go/core/pkg/sandboxbackend/openshell/translate.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/go/core/pkg/sandboxbackend/openshell/translate.go b/go/core/pkg/sandboxbackend/openshell/translate.go index c6ae5a0d0d..e1d547c6bd 100644 --- a/go/core/pkg/sandboxbackend/openshell/translate.go +++ b/go/core/pkg/sandboxbackend/openshell/translate.go @@ -78,6 +78,14 @@ func phaseToCondition(sb *openshellv1.Sandbox) (metav1.ConditionStatus, string, case openshellv1.SandboxPhase_SANDBOX_PHASE_DELETING: return metav1.ConditionFalse, "SandboxDeleting", msg case openshellv1.SandboxPhase_SANDBOX_PHASE_UNKNOWN, openshellv1.SandboxPhase_SANDBOX_PHASE_UNSPECIFIED: + // Gateway may omit the phase field (NVIDIA/OpenShell#1710). + // Fall back to status.conditions so an older gateway does not + // permanently block AgentHarness readiness. + for _, c := range sb.GetStatus().GetConditions() { + if c.GetType() == "Ready" && c.GetStatus() == "True" { + return metav1.ConditionTrue, "SandboxReady", msg + } + } return metav1.ConditionUnknown, "SandboxPhaseUnknown", msg default: return metav1.ConditionUnknown, "SandboxPhaseUnrecognized", fmt.Sprintf("unrecognized phase %s", sb.GetPhase()) From ec051a7236f080f5a387b9fd26f4d9667115c9f1 Mon Sep 17 00:00:00 2001 From: Nicholas Loke Date: Thu, 4 Jun 2026 11:12:04 -0400 Subject: [PATCH 2/2] fix(openclaw): upsert and attach inference provider at sandbox creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AgentHarness with openclaw backend fails all LLM calls because the OpenShell inference provider is never attached to the sandbox. The openclaw proxy resolves 'openshell:resolve:env:OPENAI_API_KEY' by looking up credentials from the sandbox's attached provider — if none is attached, the proxy closes the connection. Fix: wire upsertInferenceProviderForHarness into EnsureAgentHarness. It reads the ModelConfig, resolves the API key from the referenced k8s secret, upserts the provider via OpenShell gRPC, and passes it into attachMessagingProviders so the sandbox is created with the provider attached. The sandbox process never holds the real key — credential resolution happens in the proxy at request time, preserving the security model. --- .../pkg/sandboxbackend/openshell/openclaw.go | 41 ++++++++++++++++- .../openshell/openclaw/modelconfig.go | 6 +++ .../openshell/openclaw/provider.go | 6 +++ .../pkg/sandboxbackend/openshell/providers.go | 44 +++++++++++++++++++ 4 files changed, 96 insertions(+), 1 deletion(-) diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw.go b/go/core/pkg/sandboxbackend/openshell/openclaw.go index 9f95a407a5..5fa3a7a323 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw.go +++ b/go/core/pkg/sandboxbackend/openshell/openclaw.go @@ -50,7 +50,46 @@ func (b *ClawBackend) EnsureAgentHarness(ctx context.Context, ah *v1alpha2.Agent if res, found, err := b.findExistingSandbox(ctx, ah); err != nil || found { return res, err } - return b.ensureAgentHarnessSandbox(ctx, ah, buildClawCreateRequest) + + // Upsert the inference provider so the OpenShell proxy can resolve + // openshell:resolve:env: placeholders in LLM Authorization headers. + // The real API key lives only in the OpenShell provider record; the sandbox + // process env never receives it directly. + inferenceProviderName, err := b.upsertInferenceProviderForHarness(ctx, ah) + if err != nil { + ctrllog.FromContext(ctx).Error(err, "failed to upsert inference provider; LLM credential resolution may fail", + "agentHarness", ah.Namespace+"/"+ah.Name) + // non-fatal: proceed so harness creation is not blocked by a transient key-lookup failure + inferenceProviderName = "" + } + + builder := func(ah *v1alpha2.AgentHarness, msgProviders []string) (*openshellv1.CreateSandboxRequest, []string) { + req, unsupported := buildClawCreateRequest(ah, msgProviders) + if inferenceProviderName != "" { + attachMessagingProviders(req, []string{inferenceProviderName}) + } + return req, unsupported + } + + return b.ensureAgentHarnessSandbox(ctx, ah, builder) +} + +// upsertInferenceProviderForHarness fetches the ModelConfig referenced by the harness and upserts +// an OpenShell provider carrying the LLM credentials. Returns "" when no modelConfigRef is set. +func (b *ClawBackend) upsertInferenceProviderForHarness(ctx context.Context, ah *v1alpha2.AgentHarness) (string, error) { + ref := strings.TrimSpace(ah.Spec.ModelConfigRef) + if ref == "" { + return "", nil + } + modelConfigRef, err := utils.ParseRefString(ref, ah.Namespace) + if err != nil { + return "", fmt.Errorf("parse modelConfigRef: %w", err) + } + mc := &v1alpha2.ModelConfig{} + if err := b.kubeClient.Get(ctx, modelConfigRef, mc); err != nil { + return "", fmt.Errorf("get ModelConfig: %w", err) + } + return UpsertInferenceProvider(ctx, b.clients, b.kubeClient, ah, mc) } const defaultOpenclawGatewayPort = 18800 diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go b/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go index 3bb29e88fd..f0fbe751af 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go +++ b/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go @@ -16,6 +16,12 @@ func GatewayProviderRecordName(provider v1alpha2.ModelProvider) string { return strings.ToLower(string(provider)) } +// InferenceProviderName returns the OpenShell provider name used to attach LLM credentials to a sandbox. +// This name is stable and unique per (sandbox, provider) pair. +func InferenceProviderName(sandboxName string, provider v1alpha2.ModelProvider) string { + return fmt.Sprintf("%s-inference-%s", sandboxName, GatewayProviderRecordName(provider)) +} + // ResolveModelConfigAPIKey reads the API key from the Secret referenced by ModelConfig. func ResolveModelConfigAPIKey(ctx context.Context, kube client.Client, mc *v1alpha2.ModelConfig) (string, error) { if mc.Spec.APIKeyPassthrough { diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go b/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go index 70a075a272..0c0da1a010 100644 --- a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go +++ b/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go @@ -7,6 +7,12 @@ import ( "github.com/kagent-dev/kagent/go/api/v1alpha2" ) +// BootstrapProviderBaseURL returns the LLM base URL from the ModelConfig, falling back to DefaultInferenceBaseURL. +// Exported so the openshell package can include it in the OpenShell provider credentials. +func BootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string { + return bootstrapProviderBaseURL(mc) +} + func bootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string { switch mc.Spec.Provider { case v1alpha2.ModelProviderOpenAI: diff --git a/go/core/pkg/sandboxbackend/openshell/providers.go b/go/core/pkg/sandboxbackend/openshell/providers.go index 6969e5dc52..a3b258c5fe 100644 --- a/go/core/pkg/sandboxbackend/openshell/providers.go +++ b/go/core/pkg/sandboxbackend/openshell/providers.go @@ -7,9 +7,12 @@ import ( "github.com/kagent-dev/kagent/go/api/openshell/gen/datamodelv1" openshellv1 "github.com/kagent-dev/kagent/go/api/openshell/gen/openshellv1" + "github.com/kagent-dev/kagent/go/api/v1alpha2" "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/channels" + "github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "sigs.k8s.io/controller-runtime/pkg/client" ) const genericProviderType = "generic" @@ -86,3 +89,44 @@ func messagingDefsToGateway(defs []channels.MessagingProviderDef) []GatewayProvi } return out } + +// UpsertInferenceProvider registers an OpenShell provider carrying the LLM credentials +// (API key, base URL) for the given AgentHarness + ModelConfig pair. Attaching this +// provider to the sandbox allows the OpenShell proxy to resolve +// openshell:resolve:env: placeholders in Authorization headers at request time, +// so the real API key is never stored in the sandbox process environment. +// Returns the provider name to include in CreateSandboxRequest.spec.providers. +func UpsertInferenceProvider( + ctx context.Context, + oc *OpenShellClients, + kube client.Client, + ah *v1alpha2.AgentHarness, + mc *v1alpha2.ModelConfig, +) (string, error) { + if oc == nil || oc.OpenShell == nil { + return "", fmt.Errorf("openshell: OpenShell client is required for inference provider") + } + apiKey, err := openclaw.ResolveModelConfigAPIKey(ctx, kube, mc) + if err != nil { + return "", fmt.Errorf("resolve model API key: %w", err) + } + apiKeyEnv := openclaw.DefaultAPIKeyEnvVar(mc.Spec.Provider) + sandboxName := agentHarnessGatewayName(ah) + providerName := openclaw.InferenceProviderName(sandboxName, mc.Spec.Provider) + + creds := map[string]string{ + apiKeyEnv: apiKey, + } + if baseURL := openclaw.BootstrapProviderBaseURL(mc); baseURL != "" { + creds["OPENAI_BASE_URL"] = baseURL + } + + if err := UpsertGatewayProvider(ctx, oc.OpenShell, GatewayProviderDef{ + Name: providerName, + Type: genericProviderType, + Credentials: creds, + }); err != nil { + return "", fmt.Errorf("upsert inference provider %s: %w", providerName, err) + } + return providerName, nil +}