Skip to content

Commit 9d15ae1

Browse files
authored
Merge pull request #3013 from Nordix/lentzi90/oss-conditions-propagation
✨ Propagate OpenStackServer dependency errors to OpenStackMachine
2 parents bd72091 + 62fa13f commit 9d15ae1

File tree

5 files changed

+282
-105
lines changed

5 files changed

+282
-105
lines changed

controllers/openstackmachine_controller.go

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -366,11 +366,21 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
366366

367367
scope.Logger().Info("Reconciling Machine")
368368

369-
machineServer, waitingForServer, err := r.reconcileMachineServer(ctx, scope, openStackMachine, openStackCluster, machine)
370-
if err != nil || waitingForServer {
369+
machineServer, err := r.reconcileMachineServer(ctx, scope, openStackMachine, openStackCluster, machine)
370+
if err != nil {
371371
return ctrl.Result{}, err
372372
}
373373

374+
// Reconcile machine state early to propagate any errors from the OpenStackServer
375+
// (e.g., image not found, instance creation failed) to the OpenStackMachine.
376+
// This must happen before checking server readiness to ensure error states are visible.
377+
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
378+
if result != nil {
379+
return *result, nil
380+
}
381+
382+
// At this point the instance is ACTIVE. We can proceed with the rest of the reconciliation.
383+
374384
computeService, err := compute.NewService(scope)
375385
if err != nil {
376386
return ctrl.Result{}, err
@@ -415,19 +425,34 @@ func (r *OpenStackMachineReconciler) reconcileNormal(ctx context.Context, scope
415425
v1beta1conditions.MarkTrue(openStackMachine, infrav1.APIServerIngressReadyCondition)
416426
}
417427

418-
result := r.reconcileMachineState(scope, openStackMachine, machine, machineServer)
419-
if result != nil {
420-
return *result, nil
421-
}
422-
423428
scope.Logger().Info("Reconciled Machine create successfully")
424429
return ctrl.Result{}, nil
425430
}
426431

427432
// reconcileMachineState updates the conditions of the OpenStackMachine instance based on the instance state
428433
// and sets the ProviderID and Ready fields when the instance is active.
429-
// It returns a reconcile request if the instance is not yet active.
434+
// It returns a reconcile request if the instance is not yet active or in an error state.
430435
func (r *OpenStackMachineReconciler) reconcileMachineState(scope *scope.WithLogger, openStackMachine *infrav1.OpenStackMachine, machine *clusterv1.Machine, openStackServer *infrav1alpha1.OpenStackServer) *ctrl.Result {
436+
// Handle the case where the instance state is not yet available.
437+
// This can happen when the server is still being created or when
438+
// there was an error during resource resolution (e.g., image not found).
439+
if openStackServer.Status.InstanceState == nil {
440+
scope.Logger().Info("Waiting for OpenStackServer instance state", "name", openStackServer.Name)
441+
442+
// Check if there's a condition on the OpenStackServer that we should propagate.
443+
// This helps surface errors like "image not found" or status like "waiting for dependencies" to the OpenStackMachine.
444+
if condition := v1beta1conditions.Get(openStackServer, infrav1.InstanceReadyCondition); condition != nil && condition.Status == corev1.ConditionFalse {
445+
// Propagate the condition from OpenStackServer with its severity
446+
v1beta1conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, condition.Reason, condition.Severity, "%s", condition.Message)
447+
v1beta1conditions.MarkFalse(openStackMachine, clusterv1beta1.ReadyCondition, condition.Reason, condition.Severity, "%s", condition.Message)
448+
return &ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}
449+
}
450+
451+
v1beta1conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceNotReadyReason, clusterv1beta1.ConditionSeverityInfo, "Waiting for instance to be created")
452+
v1beta1conditions.MarkFalse(openStackMachine, clusterv1beta1.ReadyCondition, infrav1.InstanceNotReadyReason, clusterv1beta1.ConditionSeverityInfo, "Waiting for instance to be created")
453+
return &ctrl.Result{RequeueAfter: waitForBuildingInstanceToReconcile}
454+
}
455+
431456
switch *openStackServer.Status.InstanceState {
432457
case infrav1.InstanceStateActive:
433458
scope.Logger().Info("Machine instance state is ACTIVE", "id", openStackServer.Status.InstanceID)
@@ -463,11 +488,19 @@ func (r *OpenStackMachineReconciler) reconcileMachineState(scope *scope.WithLogg
463488
// If not, it is more likely a configuration error so we set failure and never retry.
464489
scope.Logger().Info("Machine instance state is ERROR", "id", openStackServer.Status.InstanceID)
465490
if !machine.Status.NodeRef.IsDefined() {
466-
err := fmt.Errorf("instance state %v is unexpected", openStackServer.Status.InstanceState)
491+
// Include the instance state in the error message for better debugging
492+
err := fmt.Errorf("instance state %v is unexpected", *openStackServer.Status.InstanceState)
467493
openStackMachine.SetFailure(capoerrors.DeprecatedCAPIUpdateMachineError, err)
468494
}
469-
v1beta1conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceStateErrorReason, clusterv1beta1.ConditionSeverityError, "")
470-
v1beta1conditions.MarkFalse(openStackMachine, clusterv1beta1.ReadyCondition, infrav1.InstanceStateErrorReason, clusterv1beta1.ConditionSeverityError, "Instance is in ERROR state")
495+
// Propagate the error message from the InstanceReady condition on the OpenStackServer if available
496+
var errorMessage string
497+
if condition := v1beta1conditions.Get(openStackServer, infrav1.InstanceReadyCondition); condition != nil && condition.Message != "" {
498+
errorMessage = condition.Message
499+
} else {
500+
errorMessage = "Instance is in ERROR state"
501+
}
502+
v1beta1conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceStateErrorReason, clusterv1beta1.ConditionSeverityError, "%s", errorMessage)
503+
v1beta1conditions.MarkFalse(openStackMachine, clusterv1beta1.ReadyCondition, infrav1.InstanceStateErrorReason, clusterv1beta1.ConditionSeverityError, "%s", errorMessage)
471504
return &ctrl.Result{}
472505
case infrav1.InstanceStateDeleted:
473506
// we should avoid further actions for DELETED VM
@@ -600,22 +633,18 @@ func openStackMachineSpecToOpenStackServerSpec(openStackMachineSpec *infrav1.Ope
600633
}
601634

602635
// reconcileMachineServer reconciles the OpenStackServer object for the OpenStackMachine.
603-
// It returns the OpenStackServer object and a boolean indicating if the OpenStackServer is ready.
604-
func (r *OpenStackMachineReconciler) reconcileMachineServer(ctx context.Context, scope *scope.WithLogger, openStackMachine *infrav1.OpenStackMachine, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine) (*infrav1alpha1.OpenStackServer, bool, error) {
605-
var server *infrav1alpha1.OpenStackServer
636+
// It returns the OpenStackServer object. The caller should check the server status
637+
// using reconcileMachineState to handle different server states appropriately.
638+
func (r *OpenStackMachineReconciler) reconcileMachineServer(ctx context.Context, scope *scope.WithLogger, openStackMachine *infrav1.OpenStackMachine, openStackCluster *infrav1.OpenStackCluster, machine *clusterv1.Machine) (*infrav1alpha1.OpenStackServer, error) {
606639
server, err := r.getOrCreateMachineServer(ctx, openStackCluster, openStackMachine, machine)
607640
if err != nil {
608641
// If an error occurs while getting or creating the OpenStackServer,
609642
// we won't requeue the request so reconcileNormal can add conditions to the OpenStackMachine
610643
// and we can see the error in the logs.
611644
scope.Logger().Error(err, "Failed to get or create OpenStackServer")
612-
return server, false, err
613-
}
614-
if !server.Status.Ready {
615-
scope.Logger().Info("Waiting for OpenStackServer to be ready", "name", server.Name)
616-
return server, true, nil
645+
return server, err
617646
}
618-
return server, false, nil
647+
return server, nil
619648
}
620649

621650
// getOrCreateMachineServer gets or creates the OpenStackServer object for the OpenStackMachine.

controllers/openstackmachine_controller_test.go

Lines changed: 126 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -446,20 +446,96 @@ func TestGetPortIDs(t *testing.T) {
446446
}
447447
}
448448

449-
func TestReconcileMachineState(t *testing.T) {
449+
func TestReconcileMachineState(t *testing.T) { //nolint:gocyclo,cyclop // this is test code
450450
tests := []struct {
451451
name string
452-
instanceState infrav1.InstanceState
452+
instanceState *infrav1.InstanceState
453453
machineHasNodeRef bool
454+
serverConditions []clusterv1beta1.Condition
454455
expectRequeue bool
455456
expectedInstanceReadyCondition *clusterv1beta1.Condition
456457
expectedReadyCondition *clusterv1beta1.Condition
457458
expectInitializationProvisioned bool
458459
expectFailureSet bool
459460
}{
461+
{
462+
name: "Nil InstanceState with DependencyFailed condition propagates error to machine",
463+
instanceState: nil,
464+
serverConditions: []clusterv1beta1.Condition{
465+
{
466+
Type: infrav1.InstanceReadyCondition,
467+
Status: corev1.ConditionFalse,
468+
Severity: clusterv1beta1.ConditionSeverityError,
469+
Reason: infrav1.DependencyFailedReason,
470+
Message: "Failed to resolve server spec: image not found",
471+
},
472+
},
473+
expectRequeue: true,
474+
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
475+
Type: infrav1.InstanceReadyCondition,
476+
Status: corev1.ConditionFalse,
477+
Severity: clusterv1beta1.ConditionSeverityError,
478+
Reason: infrav1.DependencyFailedReason,
479+
Message: "Failed to resolve server spec: image not found",
480+
},
481+
expectedReadyCondition: &clusterv1beta1.Condition{
482+
Type: clusterv1beta1.ReadyCondition,
483+
Status: corev1.ConditionFalse,
484+
Severity: clusterv1beta1.ConditionSeverityError,
485+
Reason: infrav1.DependencyFailedReason,
486+
Message: "Failed to resolve server spec: image not found",
487+
},
488+
},
489+
{
490+
name: "Nil InstanceState with InstanceNotReady condition propagates info to machine",
491+
instanceState: nil,
492+
serverConditions: []clusterv1beta1.Condition{
493+
{
494+
Type: infrav1.InstanceReadyCondition,
495+
Status: corev1.ConditionFalse,
496+
Severity: clusterv1beta1.ConditionSeverityInfo,
497+
Reason: infrav1.InstanceNotReadyReason,
498+
Message: "Waiting for dependencies",
499+
},
500+
},
501+
expectRequeue: true,
502+
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
503+
Type: infrav1.InstanceReadyCondition,
504+
Status: corev1.ConditionFalse,
505+
Severity: clusterv1beta1.ConditionSeverityInfo,
506+
Reason: infrav1.InstanceNotReadyReason,
507+
Message: "Waiting for dependencies",
508+
},
509+
expectedReadyCondition: &clusterv1beta1.Condition{
510+
Type: clusterv1beta1.ReadyCondition,
511+
Status: corev1.ConditionFalse,
512+
Severity: clusterv1beta1.ConditionSeverityInfo,
513+
Reason: infrav1.InstanceNotReadyReason,
514+
Message: "Waiting for dependencies",
515+
},
516+
},
517+
{
518+
name: "Nil InstanceState with no server condition sets default waiting conditions",
519+
instanceState: nil,
520+
expectRequeue: true,
521+
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
522+
Type: infrav1.InstanceReadyCondition,
523+
Status: corev1.ConditionFalse,
524+
Severity: clusterv1beta1.ConditionSeverityInfo,
525+
Reason: infrav1.InstanceNotReadyReason,
526+
Message: "Waiting for instance to be created",
527+
},
528+
expectedReadyCondition: &clusterv1beta1.Condition{
529+
Type: clusterv1beta1.ReadyCondition,
530+
Status: corev1.ConditionFalse,
531+
Severity: clusterv1beta1.ConditionSeverityInfo,
532+
Reason: infrav1.InstanceNotReadyReason,
533+
Message: "Waiting for instance to be created",
534+
},
535+
},
460536
{
461537
name: "Instance state ACTIVE sets conditions to True and initialization.provisioned",
462-
instanceState: infrav1.InstanceStateActive,
538+
instanceState: ptr.To(infrav1.InstanceStateActive),
463539
expectRequeue: false,
464540
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
465541
Type: infrav1.InstanceReadyCondition,
@@ -473,7 +549,7 @@ func TestReconcileMachineState(t *testing.T) {
473549
},
474550
{
475551
name: "Instance state ERROR sets conditions to False without NodeRef",
476-
instanceState: infrav1.InstanceStateError,
552+
instanceState: ptr.To(infrav1.InstanceStateError),
477553
machineHasNodeRef: false,
478554
expectRequeue: true,
479555
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
@@ -492,7 +568,7 @@ func TestReconcileMachineState(t *testing.T) {
492568
},
493569
{
494570
name: "Instance state ERROR with NodeRef does not set failure",
495-
instanceState: infrav1.InstanceStateError,
571+
instanceState: ptr.To(infrav1.InstanceStateError),
496572
machineHasNodeRef: true,
497573
expectRequeue: true,
498574
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
@@ -509,9 +585,39 @@ func TestReconcileMachineState(t *testing.T) {
509585
},
510586
expectFailureSet: false,
511587
},
588+
{
589+
name: "Instance state ERROR propagates error message from server condition",
590+
instanceState: ptr.To(infrav1.InstanceStateError),
591+
machineHasNodeRef: false,
592+
serverConditions: []clusterv1beta1.Condition{
593+
{
594+
Type: infrav1.InstanceReadyCondition,
595+
Status: corev1.ConditionFalse,
596+
Severity: clusterv1beta1.ConditionSeverityError,
597+
Reason: infrav1.InstanceStateErrorReason,
598+
Message: "Server entered ERROR state: No valid host was found",
599+
},
600+
},
601+
expectRequeue: true,
602+
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
603+
Type: infrav1.InstanceReadyCondition,
604+
Status: corev1.ConditionFalse,
605+
Severity: clusterv1beta1.ConditionSeverityError,
606+
Reason: infrav1.InstanceStateErrorReason,
607+
Message: "Server entered ERROR state: No valid host was found",
608+
},
609+
expectedReadyCondition: &clusterv1beta1.Condition{
610+
Type: clusterv1beta1.ReadyCondition,
611+
Status: corev1.ConditionFalse,
612+
Severity: clusterv1beta1.ConditionSeverityError,
613+
Reason: infrav1.InstanceStateErrorReason,
614+
Message: "Server entered ERROR state: No valid host was found",
615+
},
616+
expectFailureSet: true,
617+
},
512618
{
513619
name: "Instance state DELETED sets conditions to False",
514-
instanceState: infrav1.InstanceStateDeleted,
620+
instanceState: ptr.To(infrav1.InstanceStateDeleted),
515621
expectRequeue: true,
516622
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
517623
Type: infrav1.InstanceReadyCondition,
@@ -528,7 +634,7 @@ func TestReconcileMachineState(t *testing.T) {
528634
},
529635
{
530636
name: "Instance state BUILD sets ReadyCondition to False",
531-
instanceState: infrav1.InstanceStateBuild,
637+
instanceState: ptr.To(infrav1.InstanceStateBuild),
532638
expectRequeue: true,
533639
expectedReadyCondition: &clusterv1beta1.Condition{
534640
Type: clusterv1beta1.ReadyCondition,
@@ -539,7 +645,7 @@ func TestReconcileMachineState(t *testing.T) {
539645
},
540646
{
541647
name: "Instance state SHUTOFF sets conditions to Unknown",
542-
instanceState: infrav1.InstanceStateShutoff,
648+
instanceState: ptr.To(infrav1.InstanceStateShutoff),
543649
expectRequeue: true,
544650
expectedInstanceReadyCondition: &clusterv1beta1.Condition{
545651
Type: infrav1.InstanceReadyCondition,
@@ -590,10 +696,15 @@ func TestReconcileMachineState(t *testing.T) {
590696
},
591697
Status: infrav1alpha1.OpenStackServerStatus{
592698
InstanceID: ptr.To(testInstanceID),
593-
InstanceState: ptr.To(tt.instanceState),
699+
InstanceState: tt.instanceState,
594700
},
595701
}
596702

703+
// Set any pre-existing conditions on the OpenStackServer
704+
for i := range tt.serverConditions {
705+
v1beta1conditions.Set(openStackServer, &tt.serverConditions[i])
706+
}
707+
597708
r := &OpenStackMachineReconciler{}
598709
result := r.reconcileMachineState(scope.NewWithLogger(nil, logr.Discard()), openStackMachine, machine, openStackServer)
599710

@@ -620,6 +731,9 @@ func TestReconcileMachineState(t *testing.T) {
620731
if tt.expectedInstanceReadyCondition.Severity != "" && condition.Severity != tt.expectedInstanceReadyCondition.Severity {
621732
t.Errorf("expected %s severity %s, got %s", tt.expectedInstanceReadyCondition.Type, tt.expectedInstanceReadyCondition.Severity, condition.Severity)
622733
}
734+
if tt.expectedInstanceReadyCondition.Message != "" && condition.Message != tt.expectedInstanceReadyCondition.Message {
735+
t.Errorf("expected %s message %q, got %q", tt.expectedInstanceReadyCondition.Type, tt.expectedInstanceReadyCondition.Message, condition.Message)
736+
}
623737
}
624738
}
625739

@@ -638,6 +752,9 @@ func TestReconcileMachineState(t *testing.T) {
638752
if tt.expectedReadyCondition.Severity != "" && condition.Severity != tt.expectedReadyCondition.Severity {
639753
t.Errorf("expected %s severity %s, got %s", tt.expectedReadyCondition.Type, tt.expectedReadyCondition.Severity, condition.Severity)
640754
}
755+
if tt.expectedReadyCondition.Message != "" && condition.Message != tt.expectedReadyCondition.Message {
756+
t.Errorf("expected %s message %q, got %q", tt.expectedReadyCondition.Type, tt.expectedReadyCondition.Message, condition.Message)
757+
}
641758
}
642759
}
643760

controllers/openstackserver_controller.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,20 @@ func (r *OpenStackServerReconciler) reconcileNormal(ctx context.Context, scope *
319319
openStackServer.SetLabels(labels)
320320
}
321321

322-
changed, resolveDone, err := compute.ResolveServerSpec(ctx, scope, r.Client, openStackServer)
323-
if err != nil || !resolveDone {
322+
changed, resolveDone, pendingDependencies, err := compute.ResolveServerSpec(ctx, scope, r.Client, openStackServer)
323+
if err != nil {
324+
// Set a condition to make the error visible on the OpenStackServer status.
325+
// This helps users understand why the server is not being created.
326+
v1beta1conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.DependencyFailedReason, clusterv1beta1.ConditionSeverityError, "Failed to resolve server spec: %v", err)
324327
return ctrl.Result{}, err
325328
}
329+
if !resolveDone {
330+
// Set a condition to indicate we're waiting for dependencies (e.g., ORC Image not ready yet).
331+
// This helps users understand why the server is not being created.
332+
// Include the list of pending dependencies to help with debugging.
333+
v1beta1conditions.MarkFalse(openStackServer, infrav1.InstanceReadyCondition, infrav1.InstanceNotReadyReason, clusterv1beta1.ConditionSeverityInfo, "Waiting for server dependencies to be resolved: %v", pendingDependencies)
334+
return ctrl.Result{}, nil
335+
}
326336

327337
// Also add the finalizer when writing resolved resources so we can start creating resources on the next reconcile.
328338
if controllerutil.AddFinalizer(openStackServer, infrav1alpha1.OpenStackServerFinalizer) {

0 commit comments

Comments
 (0)