Skip to content

Commit 32ae917

Browse files
mumoshuLink-
andauthored
Make EphemeralRunnerReconciler create runner pods earlier (#3831)
Co-authored-by: Bassem Dghaidi <568794+Link-@users.noreply.github.com>
1 parent 3998f6d commit 32ae917

File tree

1 file changed

+38
-17
lines changed

1 file changed

+38
-17
lines changed

controllers/actions.github.com/ephemeralrunner_controller.go

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
178178

179179
if ephemeralRunner.Status.RunnerId == 0 {
180180
log.Info("Creating new ephemeral runner registration and updating status with runner config")
181-
return r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log)
181+
if r, err := r.updateStatusWithRunnerConfig(ctx, ephemeralRunner, log); r != nil {
182+
return *r, err
183+
}
182184
}
183185

184186
secret := new(corev1.Secret)
@@ -189,7 +191,17 @@ func (r *EphemeralRunnerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
189191
}
190192
// create secret if not created
191193
log.Info("Creating new ephemeral runner secret for jitconfig.")
192-
return r.createSecret(ctx, ephemeralRunner, log)
194+
if r, err := r.createSecret(ctx, ephemeralRunner, log); r != nil {
195+
return *r, err
196+
}
197+
198+
// Retry to get the secret that was just created.
199+
// Otherwise, even though we want to continue to create the pod,
200+
// it fails due to the missing secret resulting in an invalid pod spec.
201+
if err := r.Get(ctx, req.NamespacedName, secret); err != nil {
202+
log.Error(err, "Failed to fetch secret")
203+
return ctrl.Result{}, err
204+
}
193205
}
194206

195207
pod := new(corev1.Pod)
@@ -511,12 +523,12 @@ func (r *EphemeralRunnerReconciler) deletePodAsFailed(ctx context.Context, ephem
511523

512524
// updateStatusWithRunnerConfig fetches runtime configuration needed by the runner
513525
// This method should always set .status.runnerId and .status.runnerJITConfig
514-
func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) {
526+
func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Context, ephemeralRunner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
515527
// Runner is not registered with the service. We need to register it first
516528
log.Info("Creating ephemeral runner JIT config")
517529
actionsClient, err := r.actionsClientFor(ctx, ephemeralRunner)
518530
if err != nil {
519-
return ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err)
531+
return &ctrl.Result{}, fmt.Errorf("failed to get actions client for generating JIT config: %v", err)
520532
}
521533

522534
jitSettings := &actions.RunnerScaleSetJitRunnerSetting{
@@ -534,12 +546,12 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
534546
if err != nil {
535547
actionsError := &actions.ActionsError{}
536548
if !errors.As(err, &actionsError) {
537-
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err)
549+
return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with generic error: %v", err)
538550
}
539551

540552
if actionsError.StatusCode != http.StatusConflict ||
541553
!actionsError.IsException("AgentExistsException") {
542-
return ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err)
554+
return &ctrl.Result{}, fmt.Errorf("failed to generate JIT config with Actions service error: %v", err)
543555
}
544556

545557
// If the runner with the name we want already exists it means:
@@ -552,29 +564,29 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
552564
log.Info("Getting runner jit config failed with conflict error, trying to get the runner by name", "runnerName", ephemeralRunner.Name)
553565
existingRunner, err := actionsClient.GetRunnerByName(ctx, ephemeralRunner.Name)
554566
if err != nil {
555-
return ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err)
567+
return &ctrl.Result{}, fmt.Errorf("failed to get runner by name: %v", err)
556568
}
557569

558570
if existingRunner == nil {
559571
log.Info("Runner with the same name does not exist, re-queuing the reconciliation")
560-
return ctrl.Result{Requeue: true}, nil
572+
return &ctrl.Result{Requeue: true}, nil
561573
}
562574

563575
log.Info("Found the runner with the same name", "runnerId", existingRunner.Id, "runnerScaleSetId", existingRunner.RunnerScaleSetId)
564576
if existingRunner.RunnerScaleSetId == ephemeralRunner.Spec.RunnerScaleSetId {
565577
log.Info("Removing the runner with the same name")
566578
err := actionsClient.RemoveRunner(ctx, int64(existingRunner.Id))
567579
if err != nil {
568-
return ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err)
580+
return &ctrl.Result{}, fmt.Errorf("failed to remove runner from the service: %v", err)
569581
}
570582

571583
log.Info("Removed the runner with the same name, re-queuing the reconciliation")
572-
return ctrl.Result{Requeue: true}, nil
584+
return &ctrl.Result{Requeue: true}, nil
573585
}
574586

575587
// TODO: Do we want to mark the ephemeral runner as failed, and let EphemeralRunnerSet to clean it up, so we can recover from this situation?
576588
// The situation is that the EphemeralRunner's name is already used by something else to register a runner, and we can't take the control back.
577-
return ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err)
589+
return &ctrl.Result{}, fmt.Errorf("runner with the same name but doesn't belong to this RunnerScaleSet: %v", err)
578590
}
579591
log.Info("Created ephemeral runner JIT config", "runnerId", jitConfig.Runner.Id)
580592

@@ -585,11 +597,20 @@ func (r *EphemeralRunnerReconciler) updateStatusWithRunnerConfig(ctx context.Con
585597
obj.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
586598
})
587599
if err != nil {
588-
return ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err)
600+
return &ctrl.Result{}, fmt.Errorf("failed to update runner status for RunnerId/RunnerName/RunnerJITConfig: %v", err)
589601
}
590602

603+
// We want to continue without a requeue for faster pod creation.
604+
//
605+
// To do so, we update the status in-place, so that both continuing the loop and
606+
// and requeuing and skipping updateStatusWithRunnerConfig in the next loop, will
607+
// have the same effect.
608+
ephemeralRunner.Status.RunnerId = jitConfig.Runner.Id
609+
ephemeralRunner.Status.RunnerName = jitConfig.Runner.Name
610+
ephemeralRunner.Status.RunnerJITConfig = jitConfig.EncodedJITConfig
611+
591612
log.Info("Updated ephemeral runner status with runnerId and runnerJITConfig")
592-
return ctrl.Result{}, nil
613+
return nil, nil
593614
}
594615

595616
func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alpha1.EphemeralRunner, secret *corev1.Secret, log logr.Logger) (ctrl.Result, error) {
@@ -665,21 +686,21 @@ func (r *EphemeralRunnerReconciler) createPod(ctx context.Context, runner *v1alp
665686
return ctrl.Result{}, nil
666687
}
667688

668-
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (ctrl.Result, error) {
689+
func (r *EphemeralRunnerReconciler) createSecret(ctx context.Context, runner *v1alpha1.EphemeralRunner, log logr.Logger) (*ctrl.Result, error) {
669690
log.Info("Creating new secret for ephemeral runner")
670691
jitSecret := r.ResourceBuilder.newEphemeralRunnerJitSecret(runner)
671692

672693
if err := ctrl.SetControllerReference(runner, jitSecret, r.Scheme); err != nil {
673-
return ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err)
694+
return &ctrl.Result{}, fmt.Errorf("failed to set controller reference: %v", err)
674695
}
675696

676697
log.Info("Created new secret spec for ephemeral runner")
677698
if err := r.Create(ctx, jitSecret); err != nil {
678-
return ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err)
699+
return &ctrl.Result{}, fmt.Errorf("failed to create jit secret: %v", err)
679700
}
680701

681702
log.Info("Created ephemeral runner secret", "secretName", jitSecret.Name)
682-
return ctrl.Result{Requeue: true}, nil
703+
return nil, nil
683704
}
684705

685706
// updateRunStatusFromPod is responsible for updating non-exiting statuses.

0 commit comments

Comments
 (0)