Skip to content

Commit 5b50503

Browse files
authored
add graceful shutdown and propagate context in readiness-condition-reporter (#174)
Signed-off-by: Anish Ramasekar <anish.ramasekar@gmail.com>
1 parent 65f9662 commit 5b50503

File tree

2 files changed

+58
-20
lines changed

2 files changed

+58
-20
lines changed

cmd/readiness-condition-reporter/main.go

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222
"io"
2323
"net/http"
2424
"os"
25+
"os/signal"
26+
"syscall"
2527
"time"
2628

2729
corev1 "k8s.io/api/core/v1"
@@ -109,29 +111,42 @@ func main() {
109111
Timeout: defaultHTTPTimeout,
110112
}
111113

114+
// Create a context that cancels on SIGTERM or SIGINT
115+
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
116+
defer cancel()
117+
112118
klog.InfoS("Starting readiness condition reporter", "node", nodeName, "condition", conditionType, "interval", interval)
113119

114-
// Main loop to check health and update condition
120+
ticker := time.NewTicker(interval)
121+
defer ticker.Stop()
122+
123+
// Run immediately on startup, then on each tick
124+
runCheck(ctx, httpClient, clientset, checkEndpoint, nodeName, conditionType)
115125
for {
116-
// Check health
117-
health, err := checkHealth(context.TODO(), httpClient, checkEndpoint)
118-
if err != nil {
119-
klog.ErrorS(err, "Health check failed", "endpoint", checkEndpoint)
120-
// Report unhealthy on error
121-
health = &HealthResponse{
122-
Healthy: false,
123-
Reason: "HealthCheckFailed",
124-
Message: fmt.Sprintf("Health check failed: %v", err),
125-
}
126+
select {
127+
case <-ctx.Done():
128+
klog.InfoS("Shutting down readiness condition reporter", "reason", ctx.Err())
129+
return
130+
case <-ticker.C:
131+
runCheck(ctx, httpClient, clientset, checkEndpoint, nodeName, conditionType)
126132
}
133+
}
134+
}
127135

128-
// Update node condition
129-
if err := updateNodeCondition(clientset, nodeName, conditionType, health); err != nil {
130-
klog.ErrorS(err, "Failed to update node condition", "node", nodeName, "condition", conditionType)
136+
// runCheck performs a single health check and updates the node condition.
137+
func runCheck(ctx context.Context, httpClient *http.Client, clientset kubernetes.Interface, checkEndpoint, nodeName, conditionType string) {
138+
health, err := checkHealth(ctx, httpClient, checkEndpoint)
139+
if err != nil {
140+
klog.ErrorS(err, "Health check failed", "endpoint", checkEndpoint)
141+
health = &HealthResponse{
142+
Healthy: false,
143+
Reason: "HealthCheckFailed",
144+
Message: fmt.Sprintf("Health check failed: %v", err),
131145
}
146+
}
132147

133-
// Wait for next check
134-
time.Sleep(interval)
148+
if err := updateNodeCondition(ctx, clientset, nodeName, conditionType, health); err != nil {
149+
klog.ErrorS(err, "Failed to update node condition", "node", nodeName, "condition", conditionType)
135150
}
136151
}
137152

@@ -183,10 +198,10 @@ func checkHealth(ctx context.Context, client *http.Client, endpoint string) (*He
183198
}
184199

185200
// updateNodeCondition updates the node condition based on health check.
186-
func updateNodeCondition(client kubernetes.Interface, nodeName, conditionType string, health *HealthResponse) error {
201+
func updateNodeCondition(ctx context.Context, client kubernetes.Interface, nodeName, conditionType string, health *HealthResponse) error {
187202
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
188203
// Get the node
189-
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{})
204+
node, err := client.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
190205
if err != nil {
191206
return err
192207
}
@@ -237,7 +252,7 @@ func updateNodeCondition(client kubernetes.Interface, nodeName, conditionType st
237252
node.Status.Conditions = append(node.Status.Conditions, condition)
238253
}
239254

240-
_, err = client.CoreV1().Nodes().UpdateStatus(context.TODO(), node, metav1.UpdateOptions{})
255+
_, err = client.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{})
241256
return err
242257
})
243258
}

cmd/readiness-condition-reporter/main_test.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,29 @@ func TestCheckHealth(t *testing.T) {
8383
}
8484
}
8585

86+
func TestCheckHealthCancelledContext(t *testing.T) {
87+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
88+
w.WriteHeader(http.StatusOK)
89+
}))
90+
defer server.Close()
91+
92+
ctx, cancel := context.WithCancel(context.Background())
93+
cancel()
94+
95+
httpClient := &http.Client{Timeout: 1 * time.Second}
96+
health, err := checkHealth(ctx, httpClient, server.URL)
97+
// checkHealth wraps connection errors into a HealthResponse rather than returning an error
98+
if err != nil {
99+
t.Fatalf("checkHealth() returned unexpected error: %v", err)
100+
}
101+
if health.Healthy {
102+
t.Error("checkHealth() with cancelled context should report unhealthy")
103+
}
104+
if health.Reason != "EndpointConnectionError" {
105+
t.Errorf("checkHealth() reason = %v, want EndpointConnectionError", health.Reason)
106+
}
107+
}
108+
86109
func TestUpdateNodeCondition(t *testing.T) {
87110
nodeName := "test-node"
88111
conditionType := "TestCondition"
@@ -134,7 +157,7 @@ func TestUpdateNodeCondition(t *testing.T) {
134157
t.Run(tt.name, func(t *testing.T) {
135158
client := fake.NewSimpleClientset(tt.existingNode)
136159

137-
err := updateNodeCondition(client, nodeName, conditionType, tt.health)
160+
err := updateNodeCondition(context.Background(), client, nodeName, conditionType, tt.health)
138161
if err != nil {
139162
t.Errorf("updateNodeCondition() error = %v", err)
140163
}

0 commit comments

Comments
 (0)