From fb918dd6e902c21beaeca7bf8115e2ee88f8a93d Mon Sep 17 00:00:00 2001
From: Nicholas Loke <nloke@users.noreply.github.com>
Date: Thu, 4 Jun 2026 11:11:49 -0400
Subject: [PATCH 1/2] fix(openclaw): fallback to conditions when AgentHarness
 phase=UNSPECIFIED

When the openclaw backend returns phase=UNSPECIFIED, the controller
would never surface Ready=True to the AgentHarness. Fall back to
checking status.conditions[Ready=True] in that case.

See: #1958
---
 go/core/pkg/sandboxbackend/openshell/translate.go | 8 ++++++++
 1 file changed, 8 insertions(+)
diff --git a/go/core/pkg/sandboxbackend/openshell/translate.go b/go/core/pkg/sandboxbackend/openshell/translate.go
index c6ae5a0d0d..e1d547c6bd 100644
--- a/go/core/pkg/sandboxbackend/openshell/translate.go
+++ b/go/core/pkg/sandboxbackend/openshell/translate.go
@@ -78,6 +78,14 @@ func phaseToCondition(sb *openshellv1.Sandbox) (metav1.ConditionStatus, string,
 	case openshellv1.SandboxPhase_SANDBOX_PHASE_DELETING:
 		return metav1.ConditionFalse, "SandboxDeleting", msg
 	case openshellv1.SandboxPhase_SANDBOX_PHASE_UNKNOWN, openshellv1.SandboxPhase_SANDBOX_PHASE_UNSPECIFIED:
+		// Gateway may omit the phase field (NVIDIA/OpenShell#1710).
+		// Fall back to status.conditions so an older gateway does not
+		// permanently block AgentHarness readiness.
+		for _, c := range sb.GetStatus().GetConditions() {
+			if c.GetType() == "Ready" && c.GetStatus() == "True" {
+				return metav1.ConditionTrue, "SandboxReady", msg
+			}
+		}
 		return metav1.ConditionUnknown, "SandboxPhaseUnknown", msg
 	default:
 		return metav1.ConditionUnknown, "SandboxPhaseUnrecognized", fmt.Sprintf("unrecognized phase %s", sb.GetPhase())

From ec051a7236f080f5a387b9fd26f4d9667115c9f1 Mon Sep 17 00:00:00 2001
From: Nicholas Loke <nloke@users.noreply.github.com>
Date: Thu, 4 Jun 2026 11:12:04 -0400
Subject: [PATCH 2/2] fix(openclaw): upsert and attach inference provider at
 sandbox creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AgentHarness with openclaw backend fails all LLM calls because the
OpenShell inference provider is never attached to the sandbox. The
openclaw proxy resolves 'openshell:resolve:env:OPENAI_API_KEY' by
looking up credentials from the sandbox's attached provider — if none
is attached, the proxy closes the connection.

Fix: wire upsertInferenceProviderForHarness into EnsureAgentHarness.
It reads the ModelConfig, resolves the API key from the referenced k8s
secret, upserts the provider via OpenShell gRPC, and passes it into
attachMessagingProviders so the sandbox is created with the provider
attached.

The sandbox process never holds the real key — credential resolution
happens in the proxy at request time, preserving the security model.
---
 .../pkg/sandboxbackend/openshell/openclaw.go  | 41 ++++++++++++++++-
 .../openshell/openclaw/modelconfig.go         |  6 +++
 .../openshell/openclaw/provider.go            |  6 +++
 .../pkg/sandboxbackend/openshell/providers.go | 44 +++++++++++++++++++
 4 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw.go b/go/core/pkg/sandboxbackend/openshell/openclaw.go
index 9f95a407a5..5fa3a7a323 100644
--- a/go/core/pkg/sandboxbackend/openshell/openclaw.go
+++ b/go/core/pkg/sandboxbackend/openshell/openclaw.go
@@ -50,7 +50,46 @@ func (b *ClawBackend) EnsureAgentHarness(ctx context.Context, ah *v1alpha2.Agent
 	if res, found, err := b.findExistingSandbox(ctx, ah); err != nil || found {
 		return res, err
 	}
-	return b.ensureAgentHarnessSandbox(ctx, ah, buildClawCreateRequest)
+
+	// Upsert the inference provider so the OpenShell proxy can resolve
+	// openshell:resolve:env:<VAR> placeholders in LLM Authorization headers.
+	// The real API key lives only in the OpenShell provider record; the sandbox
+	// process env never receives it directly.
+	inferenceProviderName, err := b.upsertInferenceProviderForHarness(ctx, ah)
+	if err != nil {
+		ctrllog.FromContext(ctx).Error(err, "failed to upsert inference provider; LLM credential resolution may fail",
+			"agentHarness", ah.Namespace+"/"+ah.Name)
+		// non-fatal: proceed so harness creation is not blocked by a transient key-lookup failure
+		inferenceProviderName = ""
+	}
+
+	builder := func(ah *v1alpha2.AgentHarness, msgProviders []string) (*openshellv1.CreateSandboxRequest, []string) {
+		req, unsupported := buildClawCreateRequest(ah, msgProviders)
+		if inferenceProviderName != "" {
+			attachMessagingProviders(req, []string{inferenceProviderName})
+		}
+		return req, unsupported
+	}
+
+	return b.ensureAgentHarnessSandbox(ctx, ah, builder)
+}
+
+// upsertInferenceProviderForHarness fetches the ModelConfig referenced by the harness and upserts
+// an OpenShell provider carrying the LLM credentials. Returns "" when no modelConfigRef is set.
+func (b *ClawBackend) upsertInferenceProviderForHarness(ctx context.Context, ah *v1alpha2.AgentHarness) (string, error) {
+	ref := strings.TrimSpace(ah.Spec.ModelConfigRef)
+	if ref == "" {
+		return "", nil
+	}
+	modelConfigRef, err := utils.ParseRefString(ref, ah.Namespace)
+	if err != nil {
+		return "", fmt.Errorf("parse modelConfigRef: %w", err)
+	}
+	mc := &v1alpha2.ModelConfig{}
+	if err := b.kubeClient.Get(ctx, modelConfigRef, mc); err != nil {
+		return "", fmt.Errorf("get ModelConfig: %w", err)
+	}
+	return UpsertInferenceProvider(ctx, b.clients, b.kubeClient, ah, mc)
 }
 
 const defaultOpenclawGatewayPort = 18800
diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go b/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go
index 3bb29e88fd..f0fbe751af 100644
--- a/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go
+++ b/go/core/pkg/sandboxbackend/openshell/openclaw/modelconfig.go
@@ -16,6 +16,12 @@ func GatewayProviderRecordName(provider v1alpha2.ModelProvider) string {
 	return strings.ToLower(string(provider))
 }
 
+// InferenceProviderName returns the OpenShell provider name used to attach LLM credentials to a sandbox.
+// This name is stable and unique per (sandbox, provider) pair.
+func InferenceProviderName(sandboxName string, provider v1alpha2.ModelProvider) string {
+	return fmt.Sprintf("%s-inference-%s", sandboxName, GatewayProviderRecordName(provider))
+}
+
 // ResolveModelConfigAPIKey reads the API key from the Secret referenced by ModelConfig.
 func ResolveModelConfigAPIKey(ctx context.Context, kube client.Client, mc *v1alpha2.ModelConfig) (string, error) {
 	if mc.Spec.APIKeyPassthrough {
diff --git a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go b/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go
index 70a075a272..0c0da1a010 100644
--- a/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go
+++ b/go/core/pkg/sandboxbackend/openshell/openclaw/provider.go
@@ -7,6 +7,12 @@ import (
 	"github.com/kagent-dev/kagent/go/api/v1alpha2"
 )
 
+// BootstrapProviderBaseURL returns the LLM base URL from the ModelConfig, falling back to DefaultInferenceBaseURL.
+// Exported so the openshell package can include it in the OpenShell provider credentials.
+func BootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string {
+	return bootstrapProviderBaseURL(mc)
+}
+
 func bootstrapProviderBaseURL(mc *v1alpha2.ModelConfig) string {
 	switch mc.Spec.Provider {
 	case v1alpha2.ModelProviderOpenAI:
diff --git a/go/core/pkg/sandboxbackend/openshell/providers.go b/go/core/pkg/sandboxbackend/openshell/providers.go
index 6969e5dc52..a3b258c5fe 100644
--- a/go/core/pkg/sandboxbackend/openshell/providers.go
+++ b/go/core/pkg/sandboxbackend/openshell/providers.go
@@ -7,9 +7,12 @@ import (
 
 	"github.com/kagent-dev/kagent/go/api/openshell/gen/datamodelv1"
 	openshellv1 "github.com/kagent-dev/kagent/go/api/openshell/gen/openshellv1"
+	"github.com/kagent-dev/kagent/go/api/v1alpha2"
 	"github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/channels"
+	"github.com/kagent-dev/kagent/go/core/pkg/sandboxbackend/openshell/openclaw"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
 const genericProviderType = "generic"
@@ -86,3 +89,44 @@ func messagingDefsToGateway(defs []channels.MessagingProviderDef) []GatewayProvi
 	}
 	return out
 }
+
+// UpsertInferenceProvider registers an OpenShell provider carrying the LLM credentials
+// (API key, base URL) for the given AgentHarness + ModelConfig pair. Attaching this
+// provider to the sandbox allows the OpenShell proxy to resolve
+// openshell:resolve:env:<VAR> placeholders in Authorization headers at request time,
+// so the real API key is never stored in the sandbox process environment.
+// Returns the provider name to include in CreateSandboxRequest.spec.providers.
+func UpsertInferenceProvider(
+	ctx context.Context,
+	oc *OpenShellClients,
+	kube client.Client,
+	ah *v1alpha2.AgentHarness,
+	mc *v1alpha2.ModelConfig,
+) (string, error) {
+	if oc == nil || oc.OpenShell == nil {
+		return "", fmt.Errorf("openshell: OpenShell client is required for inference provider")
+	}
+	apiKey, err := openclaw.ResolveModelConfigAPIKey(ctx, kube, mc)
+	if err != nil {
+		return "", fmt.Errorf("resolve model API key: %w", err)
+	}
+	apiKeyEnv := openclaw.DefaultAPIKeyEnvVar(mc.Spec.Provider)
+	sandboxName := agentHarnessGatewayName(ah)
+	providerName := openclaw.InferenceProviderName(sandboxName, mc.Spec.Provider)
+
+	creds := map[string]string{
+		apiKeyEnv: apiKey,
+	}
+	if baseURL := openclaw.BootstrapProviderBaseURL(mc); baseURL != "" {
+		creds["OPENAI_BASE_URL"] = baseURL
+	}
+
+	if err := UpsertGatewayProvider(ctx, oc.OpenShell, GatewayProviderDef{
+		Name:        providerName,
+		Type:        genericProviderType,
+		Credentials: creds,
+	}); err != nil {
+		return "", fmt.Errorf("upsert inference provider %s: %w", providerName, err)
+	}
+	return providerName, nil
+}