Skip to content

Commit 82d5579

Browse files
authored
Restart the listener if pod is evicted (#4332)
1 parent 5402698 commit 82d5579

File tree

2 files changed

+104
-26
lines changed

2 files changed

+104
-26
lines changed

controllers/actions.github.com/autoscalinglistener_controller.go

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,14 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
211211
// TODO: make sure the role binding has the up-to-date role and service account
212212

213213
listenerPod := new(corev1.Pod)
214-
if err := r.Get(ctx, client.ObjectKey{Namespace: autoscalingListener.Namespace, Name: autoscalingListener.Name}, listenerPod); err != nil {
214+
if err := r.Get(
215+
ctx,
216+
client.ObjectKey{
217+
Namespace: autoscalingListener.Namespace,
218+
Name: autoscalingListener.Name,
219+
},
220+
listenerPod,
221+
); err != nil {
215222
if !kerrors.IsNotFound(err) {
216223
log.Error(err, "Unable to get listener pod", "namespace", autoscalingListener.Namespace, "name", autoscalingListener.Name)
217224
return ctrl.Result{}, err
@@ -229,37 +236,30 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
229236

230237
cs := listenerContainerStatus(listenerPod)
231238
switch {
239+
case listenerPod.Status.Reason == "Evicted":
240+
log.Info(
241+
"Listener pod is evicted",
242+
"phase", listenerPod.Status.Phase,
243+
"reason", listenerPod.Status.Reason,
244+
"message", listenerPod.Status.Message,
245+
)
246+
247+
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
248+
232249
case cs == nil:
233250
log.Info("Listener pod is not ready", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
234251
return ctrl.Result{}, nil
235252
case cs.State.Terminated != nil:
236-
log.Info("Listener pod is terminated", "namespace", listenerPod.Namespace, "name", listenerPod.Name, "reason", cs.State.Terminated.Reason, "message", cs.State.Terminated.Message)
237-
238-
if err := r.publishRunningListener(autoscalingListener, false); err != nil {
239-
log.Error(err, "Unable to publish runner listener down metric", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
240-
}
253+
log.Info(
254+
"Listener pod is terminated",
255+
"namespace", listenerPod.Namespace,
256+
"name", listenerPod.Name,
257+
"reason", cs.State.Terminated.Reason,
258+
"message", cs.State.Terminated.Message,
259+
)
241260

242-
if listenerPod.DeletionTimestamp.IsZero() {
243-
log.Info("Deleting the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
244-
if err := r.Delete(ctx, listenerPod); err != nil && !kerrors.IsNotFound(err) {
245-
log.Error(err, "Unable to delete the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
246-
return ctrl.Result{}, err
247-
}
261+
return ctrl.Result{}, r.deleteListenerPod(ctx, autoscalingListener, listenerPod, log)
248262

249-
// delete the listener config secret as well, so it gets recreated when the listener pod is recreated, with any new data if it exists
250-
var configSecret corev1.Secret
251-
err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerConfigName(autoscalingListener)}, &configSecret)
252-
switch {
253-
case err == nil && configSecret.DeletionTimestamp.IsZero():
254-
log.Info("Deleting the listener config secret")
255-
if err := r.Delete(ctx, &configSecret); err != nil {
256-
return ctrl.Result{}, fmt.Errorf("failed to delete listener config secret: %w", err)
257-
}
258-
case !kerrors.IsNotFound(err):
259-
return ctrl.Result{}, fmt.Errorf("failed to get the listener config secret: %w", err)
260-
}
261-
}
262-
return ctrl.Result{}, nil
263263
case cs.State.Running != nil:
264264
if err := r.publishRunningListener(autoscalingListener, true); err != nil {
265265
log.Error(err, "Unable to publish running listener", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
@@ -269,10 +269,39 @@ func (r *AutoscalingListenerReconciler) Reconcile(ctx context.Context, req ctrl.
269269
return ctrl.Result{}, nil
270270
}
271271
return ctrl.Result{}, nil
272+
272273
}
273274
return ctrl.Result{}, nil
274275
}
275276

277+
func (r *AutoscalingListenerReconciler) deleteListenerPod(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, listenerPod *corev1.Pod, log logr.Logger) error {
278+
if err := r.publishRunningListener(autoscalingListener, false); err != nil {
279+
log.Error(err, "Unable to publish runner listener down metric", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
280+
}
281+
282+
if listenerPod.DeletionTimestamp.IsZero() {
283+
log.Info("Deleting the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
284+
if err := r.Delete(ctx, listenerPod); err != nil && !kerrors.IsNotFound(err) {
285+
log.Error(err, "Unable to delete the listener pod", "namespace", listenerPod.Namespace, "name", listenerPod.Name)
286+
return err
287+
}
288+
289+
// delete the listener config secret as well, so it gets recreated when the listener pod is recreated, with any new data if it exists
290+
var configSecret corev1.Secret
291+
err := r.Get(ctx, types.NamespacedName{Namespace: autoscalingListener.Namespace, Name: scaleSetListenerConfigName(autoscalingListener)}, &configSecret)
292+
switch {
293+
case err == nil && configSecret.DeletionTimestamp.IsZero():
294+
log.Info("Deleting the listener config secret")
295+
if err := r.Delete(ctx, &configSecret); err != nil {
296+
return fmt.Errorf("failed to delete listener config secret: %w", err)
297+
}
298+
case !kerrors.IsNotFound(err):
299+
return fmt.Errorf("failed to get the listener config secret: %w", err)
300+
}
301+
}
302+
return nil
303+
}
304+
276305
func (r *AutoscalingListenerReconciler) cleanupResources(ctx context.Context, autoscalingListener *v1alpha1.AutoscalingListener, logger logr.Logger) (requeue bool, err error) {
277306
logger.Info("Cleaning up the listener pod")
278307
listenerPod := new(corev1.Pod)

controllers/actions.github.com/autoscalinglistener_controller_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,55 @@ var _ = Describe("Test AutoScalingListener customization", func() {
671671
autoscalingListenerTestInterval,
672672
).ShouldNot(BeEquivalentTo(oldPodUID), "Pod should be created")
673673
})
674+
675+
It("Should re-create pod when the listener pod is evicted", func() {
676+
pod := new(corev1.Pod)
677+
Eventually(
678+
func() (string, error) {
679+
err := k8sClient.Get(
680+
ctx,
681+
client.ObjectKey{
682+
Name: autoscalingListener.Name,
683+
Namespace: autoscalingListener.Namespace,
684+
},
685+
pod,
686+
)
687+
if err != nil {
688+
return "", err
689+
}
690+
691+
return pod.Name, nil
692+
},
693+
autoscalingListenerTestTimeout,
694+
autoscalingListenerTestInterval,
695+
).Should(
696+
BeEquivalentTo(autoscalingListener.Name),
697+
"Pod should be created",
698+
)
699+
700+
updated := pod.DeepCopy()
701+
oldPodUID := string(pod.UID)
702+
updated.Status.Reason = "Evicted"
703+
err := k8sClient.Status().Update(ctx, updated)
704+
Expect(err).NotTo(HaveOccurred(), "failed to update pod status")
705+
706+
pod = new(corev1.Pod)
707+
Eventually(
708+
func() (string, error) {
709+
err := k8sClient.Get(ctx, client.ObjectKey{Name: autoscalingListener.Name, Namespace: autoscalingListener.Namespace}, pod)
710+
if err != nil {
711+
return "", err
712+
}
713+
714+
return string(pod.UID), nil
715+
},
716+
autoscalingListenerTestTimeout,
717+
autoscalingListenerTestInterval,
718+
).ShouldNot(
719+
BeEquivalentTo(oldPodUID),
720+
"Pod should be created",
721+
)
722+
})
674723
})
675724
})
676725

0 commit comments

Comments
 (0)