From d2a44bdb3c5351386897904215040e20193b07f8 Mon Sep 17 00:00:00 2001 From: Peter Jausovec Date: Thu, 11 Jun 2026 16:24:14 -0700 Subject: [PATCH] add reliability docs Signed-off-by: Peter Jausovec --- public/sitemap.xml | 243 +++++++++--------- src/app/docs/kagent/concepts/page.mdx | 1 + .../docs/kagent/concepts/reliability/page.mdx | 219 ++++++++++++++++ src/app/docs/kagent/operations/debug/page.mdx | 9 + src/config/navigation.json | 5 + 5 files changed, 359 insertions(+), 118 deletions(-) create mode 100644 src/app/docs/kagent/concepts/reliability/page.mdx diff --git a/public/sitemap.xml b/public/sitemap.xml index f5bf2a95..a5cb6a42 100644 --- a/public/sitemap.xml +++ b/public/sitemap.xml @@ -2,826 +2,833 @@ https://kagent.dev/agents - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/blog - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/community - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts/agent-harness - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts/agent-memory - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts/agents - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts/architecture - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts - 2026-05-26 + 2026-06-11 + weekly + 0.8 + + + + https://kagent.dev/docs/kagent/concepts/reliability + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/concepts/tools - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/a2a-agents - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/a2a-byo - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/agent-harness - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/agent-sandbox - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/agents-mcp - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/crewai-byo - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/discord-a2a - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/documentation - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/human-in-the-loop - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/langchain-byo - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/skills - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/slack-a2a - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/examples/telegram-bot - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started/first-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started/first-mcp-tool - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started/local-development - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started/quickstart - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/getting-started/system-prompts - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/introduction/features - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/introduction/installation - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/introduction - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/introduction/what-is-kagent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/observability/audit-prompts - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/observability/launch-ui - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/observability - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/observability/tracing - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/operations/debug - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/operations/operational-considerations - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/operations - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/operations/uninstall - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/operations/upgrade - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/api-ref - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-add-mcp - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-bug-report - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-build - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-completion - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-dashboard - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-deploy - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-get - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-help - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-init - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-install - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-invoke - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-mcp - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-run - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-uninstall - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli/kagent-version - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/cli - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/faq - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/helm - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/release-notes - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/resources/tools-ecosystem - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/amazon-bedrock - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/anthropic - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/azure-openai - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/byo-openai - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/gemini - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/google-vertexai - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/ollama - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/openai - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/sap-ai-core - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kagent/supported-providers/xai - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/deploy/install-controller - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/deploy - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/deploy/server - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/develop/fastmcp-python - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/develop/mcp-go - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/develop - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/introduction - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/quickstart - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/api-ref - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-add-tool - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-build - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-completion - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-deploy - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-help - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-init - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-install - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-run - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference/kmcp-secrets - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/reference - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs/kmcp/secrets - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/docs - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/enterprise - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/page.tsx - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/argo-rollouts-conversion-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/cilium-crd-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/helm-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/istio-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/k8s-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/kgateway-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/observability-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/agents/promql-agent - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/istio - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/kubernetes - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/prometheus - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/documentation - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/helm - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/argo - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/grafana - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/other - 2026-05-26 + 2026-06-11 weekly 0.8 https://kagent.dev/tools/cilium - 2026-05-26 + 2026-06-11 weekly 0.8 diff --git a/src/app/docs/kagent/concepts/page.mdx b/src/app/docs/kagent/concepts/page.mdx index 70c7ed1c..8bd89080 100644 --- a/src/app/docs/kagent/concepts/page.mdx +++ b/src/app/docs/kagent/concepts/page.mdx @@ -32,5 +32,6 @@ import QuickLink from '@/components/quick-link'; + diff --git a/src/app/docs/kagent/concepts/reliability/page.mdx b/src/app/docs/kagent/concepts/reliability/page.mdx new file mode 100644 index 00000000..02d94233 --- /dev/null +++ b/src/app/docs/kagent/concepts/reliability/page.mdx @@ -0,0 +1,219 @@ +--- +title: "Reliability" +pageOrder: 6 +description: "Configure self-healing and safety behaviors for kagent agents: tool call retries with reflection, model call limits, debug logging, and LLM request retries." +--- + +export const metadata = { + title: "Agent reliability features in kagent", + description: "Configure self-healing and safety behaviors for kagent agents: tool call retries with reflection, model call limits, debug logging, and LLM request retries.", + author: "kagent.dev" +}; + +# Reliability + +Agents fail in ways traditional applications don't: tool calls error out, LLM providers rate-limit requests, and a confused model can loop forever burning tokens. kagent provides a set of reliability features that make agents self-healing, bounded, and observable: + +| Feature | Resource | Field | What it does | +|---------|----------|-------|--------------| +| [Tool retries](#tool-retries-with-reflection) | Agent | `spec.declarative.reliability.toolRetries` | Retries failed tool calls with reflection guidance so the agent can self-correct | +| [Max LLM calls](#max-llm-calls) | Agent | `spec.declarative.reliability.maxLLMCalls` | Caps model calls per request to prevent runaway loops | +| [Debug logging](#debug-logging) | Agent | `spec.declarative.reliability.debugLogging` | Logs every LLM request/response and tool call to the agent pod logs | +| [LLM request retries](#llm-request-retries) | ModelConfig | `spec.retry.attempts` | Automatically retries failed LLM HTTP requests (429, 408, 5xx) with exponential backoff | + +All features work with both the Python (default) and Go agent runtimes. + +## Tool retries with reflection + +Tool calls fail for many reasons: a malformed manifest, a missing resource, a temporarily unavailable backend. By default, a failed tool call simply returns the error to the model, which may repeat the exact same failing call. + +Setting `reliability.toolRetries` enables a *reflect-and-retry* behavior: when a tool call fails, the runtime injects structured reflection guidance into the model context — the error details, the error type, and instructions to analyze what went wrong — so the agent can correct its approach instead of blindly repeating the same call. The value is the maximum number of consecutive failures for a tool before the agent stops retrying it (between 1 and 10). + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: Agent +metadata: + name: k8s-agent + namespace: kagent +spec: + type: Declarative + declarative: + modelConfig: default-model-config + systemMessage: | + You're a Kubernetes agent that helps users manage their cluster resources. + reliability: + toolRetries: 3 + tools: + - type: McpServer + mcpServer: + name: kagent-tool-server + kind: RemoteMCPServer + toolNames: + - k8s_create_resource + - k8s_describe_resource +``` + +When a tool call fails, the model receives a function response like this instead of a raw error: + +```json +{ + "error_details": "[Kubernetes] create -f /tmp/k8s-resource.yaml failed: exit status 1", + "error_type": "ToolError", + "reflection_guidance": "The call to tool `k8s_create_resource` failed.\n\n**Error Details:**\n..." +} +``` + +The agent then has a chance to fix its inputs (for example, correct an invalid manifest) and try again. After `toolRetries` consecutive failures of the same tool, the agent stops retrying and reports the failure gracefully in its final answer rather than erroring out. + +## Max LLM calls + +An agent that gets stuck in a tool-call loop can make hundreds of model calls for a single request, which gets expensive fast. `reliability.maxLLMCalls` is a cost safety rail that caps the total number of model calls the agent can make while handling a single request. + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: Agent +metadata: + name: k8s-agent + namespace: kagent +spec: + type: Declarative + declarative: + modelConfig: default-model-config + systemMessage: | + You're a Kubernetes agent that helps users manage their cluster resources. + reliability: + maxLLMCalls: 20 +``` + +If the agent exceeds the limit, the run stops with a clear error instead of looping: + +``` +Agent stopped: exceeded the configured limit of 20 model calls for a single +request. This safety rail prevents runaway loops. If the task legitimately +needs more model calls, increase reliability.maxLLMCalls on the agent. +``` + +If unset, the runtime default of 500 calls per request applies. Note that each tool call round-trip costs at least one model call, so set the limit with your agent's expected workflow in mind — a value that's too low will cut off legitimate multi-step tasks. + +## Debug logging + +When an agent misbehaves, the first question is always "what did the model actually see, and what did it respond?". Setting `reliability.debugLogging: true` enables verbose runtime logging of every LLM request, LLM response, tool call, and tool result to the agent pod logs. It's off by default because the output is verbose and may include prompt and response content. + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: Agent +metadata: + name: k8s-agent + namespace: kagent +spec: + type: Declarative + declarative: + modelConfig: default-model-config + systemMessage: | + You're a Kubernetes agent that helps users manage their cluster resources. + reliability: + debugLogging: true +``` + +View the output in the agent's pod logs: + +```shell +kubectl logs -n kagent deploy/k8s-agent -f +``` + +```console +[logging_plugin] 🚀 USER MESSAGE RECEIVED +[logging_plugin] Invocation ID: e-712e920d-00bf-40fb-8fd3-130071ab80f5 +[logging_plugin] Session ID: 2ed4bd5d-79f0-433b-b938-dfb75e8d256a +[logging_plugin] User Content: text: 'Describe the kagent-controller deployment' +[logging_plugin] 🧠 LLM REQUEST +[logging_plugin] Model: gpt-4.1-mini +[logging_plugin] System Instruction: 'You're a Kubernetes agent...' +[logging_plugin] 🔧 TOOL STARTED +[logging_plugin] Tool Name: k8s_describe_resource +[logging_plugin] 🔧 TOOL COMPLETED +[logging_plugin] Result: {'content': [{'type': 'text', 'text': 'Name: kagent-controller...'}]} +``` + +> **Note:** Debug logging records full prompts, model responses, and tool results. Avoid enabling it permanently in environments where conversation content is sensitive, and prefer [tracing](/docs/kagent/observability/tracing) for long-term observability. + +## LLM request retries + +LLM provider APIs fail transiently: rate limits (429), request timeouts (408), and server errors (5xx) are routine at scale. The `retry` field on a ModelConfig configures the provider SDK to automatically retry failed HTTP requests with exponential backoff before surfacing an error to the agent. + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: ModelConfig +metadata: + name: default-model-config + namespace: kagent +spec: + apiKeySecret: kagent-openai + apiKeySecretKey: OPENAI_API_KEY + model: gpt-4.1-mini + provider: OpenAI + openAI: {} + retry: + attempts: 5 +``` + +`attempts` is the maximum number of retry attempts after the initial request fails (0 to 20). Set it to `0` to disable retries entirely. + +A few things to know: + +- Retries are supported for the **OpenAI**, **Azure OpenAI**, **Anthropic**, and **Gemini** providers. Other providers ignore the setting and log a warning in the agent pod. +- The OpenAI and Anthropic SDKs already retry twice by default — the `retry` setting overrides that default. Use a higher value for production workloads that must ride out rate-limit bursts, or `0` if you'd rather fail fast. +- These retries happen at the HTTP transport level and are invisible to the model; they're complementary to [tool retries](#tool-retries-with-reflection), which operate at the agent reasoning level. + +## Combining the features + +The features are independent and compose naturally. A production-hardened agent typically combines transport-level retries with reasoning-level retries and a cost cap: + +```yaml +apiVersion: kagent.dev/v1alpha2 +kind: ModelConfig +metadata: + name: prod-model + namespace: kagent +spec: + apiKeySecret: kagent-openai + apiKeySecretKey: OPENAI_API_KEY + model: gpt-4.1-mini + provider: OpenAI + openAI: {} + retry: + attempts: 5 # ride out rate limits and transient 5xx errors +--- +apiVersion: kagent.dev/v1alpha2 +kind: Agent +metadata: + name: prod-k8s-agent + namespace: kagent +spec: + type: Declarative + declarative: + modelConfig: prod-model + systemMessage: | + You're a Kubernetes agent that helps users manage their cluster resources. + reliability: + toolRetries: 3 # self-correct failed tool calls + maxLLMCalls: 50 # hard cap on cost per request + tools: + - type: McpServer + mcpServer: + name: kagent-tool-server + kind: RemoteMCPServer + toolNames: + - k8s_get_resources + - k8s_describe_resource + - k8s_create_resource +``` + +Enable `debugLogging: true` temporarily when you need to diagnose agent behavior, and turn it off again once you're done. + +## Configuring via the UI + +All of these settings are also available in the kagent dashboard: + +- **Agent reliability** (`toolRetries`, `maxLLMCalls`, `debugLogging`): in the agent create/edit form. +- **LLM request retries** (`retry.attempts`): in the **Advanced** section of the model configuration form. diff --git a/src/app/docs/kagent/operations/debug/page.mdx b/src/app/docs/kagent/operations/debug/page.mdx index 54db99da..b9199d24 100644 --- a/src/app/docs/kagent/operations/debug/page.mdx +++ b/src/app/docs/kagent/operations/debug/page.mdx @@ -45,4 +45,13 @@ spec: .. ``` +To see exactly what the agent sends to and receives from the LLM — every model request, response, and tool call — enable [debug logging](/docs/kagent/concepts/reliability#debug-logging) on the agent: + +```yaml +spec: + declarative: + reliability: + debugLogging: true +``` + You can also ask for help in the [community](https://discord.gg/Fu3k65f2k3) or log an issue on [GitHub](https://github.com/kagent-dev/kagent). You can create a bug report using the kagent CLI by running `kagent bug-report`. Before attaching files to your bug report, make sure they don't contain any sensitive information! diff --git a/src/config/navigation.json b/src/config/navigation.json index e2a0fd09..e3507fe3 100644 --- a/src/config/navigation.json +++ b/src/config/navigation.json @@ -98,6 +98,11 @@ "title": "Agent Memory", "href": "/docs/kagent/concepts/agent-memory", "description": "Enable vector-backed long-term memory for agents to learn from past interactions." + }, + { + "title": "Reliability", + "href": "/docs/kagent/concepts/reliability", + "description": "Configure self-healing and safety behaviors for kagent agents: tool call retries with reflection, model call limits, debug logging, and LLM request retries." } ] },