Skip to content

Commit 76ab594

Browse files
committed
Add missing conditions on network error paths in OpenStackCluster reconciler
Several error paths in reconcileNetworkComponents and related functions were missing proper condition updates, which meant the cluster status would not accurately reflect failures in network reconciliation. Changes: - ReconcileExternalNetwork failure now sets NetworkReadyCondition=False - ManagedSubnets > 1 validation error now sets NetworkReadyCondition=False and calls handleUpdateOSCError (previously returned error silently) - resolveLoadBalancerNetwork failure now sets NetworkReadyCondition=False - loadbalancer.NewService failure in reconcileControlPlaneEndpoint now calls handleUpdateOSCError to set ReadyCondition=False - resolveLoadBalancerNetwork: always call handleUpdateOSCError on GetNetworkByParam failure, not only for ErrFilterMatch errors Tests added for external network failure and ManagedSubnets validation. Signed-off-by: Dong Ma <winterma.dong@gmail.com>
1 parent 1266f9e commit 76ab594

File tree

2 files changed

+216
-4
lines changed

2 files changed

+216
-4
lines changed

controllers/openstackcluster_controller.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -687,9 +687,7 @@ func resolveLoadBalancerNetwork(openStackCluster *infrav1.OpenStackCluster, netw
687687
if lbSpec.Network != nil {
688688
lbNet, err := networkingService.GetNetworkByParam(lbSpec.Network)
689689
if err != nil {
690-
if errors.Is(err, capoerrors.ErrFilterMatch) {
691-
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to find loadbalancer network: %w", err))
692-
}
690+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to find loadbalancer network: %w", err))
693691
return fmt.Errorf("failed to find network: %w", err)
694692
}
695693

@@ -742,6 +740,12 @@ func reconcileNetworkComponents(scope *scope.WithLogger, cluster *clusterv1.Clus
742740

743741
err = networkingService.ReconcileExternalNetwork(openStackCluster)
744742
if err != nil {
743+
conditions.Set(openStackCluster, metav1.Condition{
744+
Type: infrav1.NetworkReadyCondition,
745+
Status: metav1.ConditionFalse,
746+
Reason: infrav1.NetworkReconcileFailedReason,
747+
Message: fmt.Sprintf("Failed to reconcile external network: %v", err),
748+
})
745749
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to reconcile external network: %w", err))
746750
return fmt.Errorf("failed to reconcile external network: %w", err)
747751
}
@@ -755,11 +759,25 @@ func reconcileNetworkComponents(scope *scope.WithLogger, cluster *clusterv1.Clus
755759
return err
756760
}
757761
} else {
758-
return fmt.Errorf("failed to reconcile network: ManagedSubnets only supports one element, %d provided", len(openStackCluster.Spec.ManagedSubnets))
762+
err := fmt.Errorf("failed to reconcile network: ManagedSubnets only supports one element, %d provided", len(openStackCluster.Spec.ManagedSubnets))
763+
conditions.Set(openStackCluster, metav1.Condition{
764+
Type: infrav1.NetworkReadyCondition,
765+
Status: metav1.ConditionFalse,
766+
Reason: infrav1.NetworkReconcileFailedReason,
767+
Message: err.Error(),
768+
})
769+
handleUpdateOSCError(openStackCluster, err)
770+
return err
759771
}
760772

761773
err = resolveLoadBalancerNetwork(openStackCluster, networkingService)
762774
if err != nil {
775+
conditions.Set(openStackCluster, metav1.Condition{
776+
Type: infrav1.NetworkReadyCondition,
777+
Status: metav1.ConditionFalse,
778+
Reason: infrav1.NetworkReconcileFailedReason,
779+
Message: fmt.Sprintf("Failed to reconcile load balancer network: %v", err),
780+
})
763781
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to reconcile loadbalancer network: %w", err))
764782
return fmt.Errorf("failed to reconcile loadbalancer network: %w", err)
765783
}
@@ -985,6 +1003,7 @@ func reconcileControlPlaneEndpoint(scope *scope.WithLogger, networkingService *n
9851003
case openStackCluster.Spec.APIServerLoadBalancer.IsEnabled():
9861004
loadBalancerService, err := loadbalancer.NewService(scope)
9871005
if err != nil {
1006+
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to create load balancer service: %w", err))
9881007
return err
9891008
}
9901009

controllers/openstackcluster_controller_test.go

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,95 @@ var _ = Describe("OpenStackCluster controller", func() {
925925
Expect(conditions.IsTrue(testCluster, infrav1.APIEndpointReadyCondition)).To(BeTrue())
926926
})
927927

928+
It("should set NetworkReadyCondition to False when ManagedSubnets has more than one element", func() {
929+
testCluster.SetName("managed-subnets-too-many")
930+
testCluster.Spec = infrav1.OpenStackClusterSpec{
931+
IdentityRef: infrav1.OpenStackIdentityReference{
932+
Name: "test-creds",
933+
CloudName: "openstack",
934+
},
935+
DisableExternalNetwork: ptr.To(true),
936+
DisableAPIServerFloatingIP: ptr.To(true),
937+
APIServerFixedIP: ptr.To("192.168.0.10"),
938+
ManagedSubnets: []infrav1.SubnetSpec{
939+
{CIDR: "192.168.0.0/24", DNSNameservers: []string{"8.8.8.8"}},
940+
},
941+
}
942+
err := k8sClient.Create(ctx, testCluster)
943+
Expect(err).To(BeNil())
944+
err = k8sClient.Create(ctx, capiCluster)
945+
Expect(err).To(BeNil())
946+
947+
// Add a second managed subnet in memory to bypass CRD validation
948+
// (maxItems: 1) and test the controller-level check.
949+
testCluster.Spec.ManagedSubnets = append(testCluster.Spec.ManagedSubnets,
950+
infrav1.SubnetSpec{CIDR: "192.168.1.0/24", DNSNameservers: []string{"8.8.8.8"}},
951+
)
952+
953+
log := GinkgoLogr
954+
clientScope, err := mockScopeFactory.NewClientScopeFromObject(ctx, k8sClient, nil, log, testCluster)
955+
Expect(err).To(BeNil())
956+
scope := scope.NewWithLogger(clientScope, log)
957+
958+
err = reconcileNetworkComponents(scope, capiCluster, testCluster)
959+
Expect(err).ToNot(BeNil())
960+
Expect(err.Error()).To(ContainSubstring("ManagedSubnets only supports one element"))
961+
962+
// Verify NetworkReadyCondition is set to False
963+
Expect(conditions.IsFalse(testCluster, infrav1.NetworkReadyCondition)).To(BeTrue())
964+
condition := conditions.Get(testCluster, infrav1.NetworkReadyCondition)
965+
Expect(condition).ToNot(BeNil())
966+
Expect(condition.Reason).To(Equal(infrav1.NetworkReconcileFailedReason))
967+
968+
// Verify ReadyCondition is set to False
969+
Expect(conditions.IsFalse(testCluster, clusterv1.ReadyCondition)).To(BeTrue())
970+
})
971+
972+
It("should set NetworkReadyCondition to False when external network reconciliation fails", func() {
973+
const externalNetworkID = "a42211a2-4d2c-426f-9413-830e4b4abbbc"
974+
975+
testCluster.SetName("external-network-failure")
976+
testCluster.Spec = infrav1.OpenStackClusterSpec{
977+
IdentityRef: infrav1.OpenStackIdentityReference{
978+
Name: "test-creds",
979+
CloudName: "openstack",
980+
},
981+
ExternalNetwork: &infrav1.NetworkParam{
982+
ID: ptr.To(externalNetworkID),
983+
},
984+
DisableAPIServerFloatingIP: ptr.To(true),
985+
APIServerFixedIP: ptr.To("192.168.0.10"),
986+
}
987+
err := k8sClient.Create(ctx, testCluster)
988+
Expect(err).To(BeNil())
989+
err = k8sClient.Create(ctx, capiCluster)
990+
Expect(err).To(BeNil())
991+
992+
log := GinkgoLogr
993+
clientScope, err := mockScopeFactory.NewClientScopeFromObject(ctx, k8sClient, nil, log, testCluster)
994+
Expect(err).To(BeNil())
995+
scope := scope.NewWithLogger(clientScope, log)
996+
997+
networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT()
998+
999+
// External network lookup fails
1000+
networkClientRecorder.GetNetwork(externalNetworkID).Return(nil, fmt.Errorf("external network not found"))
1001+
1002+
err = reconcileNetworkComponents(scope, capiCluster, testCluster)
1003+
Expect(err).ToNot(BeNil())
1004+
Expect(err.Error()).To(ContainSubstring("failed to reconcile external network"))
1005+
1006+
// Verify NetworkReadyCondition is set to False
1007+
Expect(conditions.IsFalse(testCluster, infrav1.NetworkReadyCondition)).To(BeTrue())
1008+
condition := conditions.Get(testCluster, infrav1.NetworkReadyCondition)
1009+
Expect(condition).ToNot(BeNil())
1010+
Expect(condition.Reason).To(Equal(infrav1.NetworkReconcileFailedReason))
1011+
Expect(condition.Message).To(ContainSubstring("Failed to reconcile external network"))
1012+
1013+
// Verify ReadyCondition is set to False
1014+
Expect(conditions.IsFalse(testCluster, clusterv1.ReadyCondition)).To(BeTrue())
1015+
})
1016+
9281017
It("should set NetworkReadyCondition to False when network lookup fails", func() {
9291018
const clusterNetworkID = "6c90b532-7ba0-418a-a276-5ae55060b5b0"
9301019

@@ -1161,6 +1250,110 @@ var _ = Describe("OpenStackCluster controller", func() {
11611250
Expect(conditions.IsTrue(testCluster, infrav1.NetworkReadyCondition)).To(BeTrue())
11621251
})
11631252

1253+
It("should clear ReadyCondition when security group reconciliation fails on a previously ready cluster", func() {
1254+
const clusterNetworkID = "6c90b532-7ba0-418a-a276-5ae55060b5b0"
1255+
const clusterSubnetID = "cad5a91a-36de-4388-823b-b0cc82cadfdc"
1256+
1257+
testCluster.SetName("sg-failure-previously-ready")
1258+
testCluster.Spec = infrav1.OpenStackClusterSpec{
1259+
IdentityRef: infrav1.OpenStackIdentityReference{
1260+
Name: "test-creds",
1261+
CloudName: "openstack",
1262+
},
1263+
Network: &infrav1.NetworkParam{
1264+
ID: ptr.To(clusterNetworkID),
1265+
},
1266+
DisableExternalNetwork: ptr.To(true),
1267+
DisableAPIServerFloatingIP: ptr.To(true),
1268+
APIServerFixedIP: ptr.To("192.168.0.10"),
1269+
ManagedSecurityGroups: &infrav1.ManagedSecurityGroups{
1270+
AllNodesSecurityGroupRules: []infrav1.SecurityGroupRuleSpec{
1271+
{
1272+
Direction: "ingress",
1273+
Protocol: ptr.To("tcp"),
1274+
RemoteManagedGroups: []infrav1.ManagedSecurityGroupName{
1275+
"worker",
1276+
},
1277+
},
1278+
},
1279+
},
1280+
}
1281+
err := k8sClient.Create(ctx, testCluster)
1282+
Expect(err).To(BeNil())
1283+
err = k8sClient.Create(ctx, capiCluster)
1284+
Expect(err).To(BeNil())
1285+
1286+
// Simulate a previously successful reconcile by pre-setting conditions to True.
1287+
// This is the scenario from https://github.com/kubernetes-sigs/cluster-api-provider-openstack/issues/2993
1288+
// where the cluster was already Ready and a subsequent security group update fails.
1289+
conditions.Set(testCluster, metav1.Condition{
1290+
Type: clusterv1.ReadyCondition,
1291+
Status: metav1.ConditionTrue,
1292+
Reason: infrav1.ReadyConditionReason,
1293+
})
1294+
conditions.Set(testCluster, metav1.Condition{
1295+
Type: infrav1.SecurityGroupsReadyCondition,
1296+
Status: metav1.ConditionTrue,
1297+
Reason: infrav1.ReadyConditionReason,
1298+
})
1299+
conditions.Set(testCluster, metav1.Condition{
1300+
Type: infrav1.NetworkReadyCondition,
1301+
Status: metav1.ConditionTrue,
1302+
Reason: infrav1.ReadyConditionReason,
1303+
})
1304+
1305+
// Verify preconditions: cluster appears Ready
1306+
Expect(conditions.IsTrue(testCluster, clusterv1.ReadyCondition)).To(BeTrue())
1307+
Expect(conditions.IsTrue(testCluster, infrav1.SecurityGroupsReadyCondition)).To(BeTrue())
1308+
1309+
log := GinkgoLogr
1310+
clientScope, err := mockScopeFactory.NewClientScopeFromObject(ctx, k8sClient, nil, log, testCluster)
1311+
Expect(err).To(BeNil())
1312+
scope := scope.NewWithLogger(clientScope, log)
1313+
1314+
networkClientRecorder := mockScopeFactory.NetworkClient.EXPECT()
1315+
1316+
// Network lookup succeeds
1317+
networkClientRecorder.GetNetwork(clusterNetworkID).Return(&networks.Network{
1318+
ID: clusterNetworkID,
1319+
Name: "cluster-network",
1320+
}, nil)
1321+
1322+
// Subnet lookup succeeds
1323+
networkClientRecorder.ListSubnet(subnets.ListOpts{
1324+
NetworkID: clusterNetworkID,
1325+
}).Return([]subnets.Subnet{
1326+
{
1327+
ID: clusterSubnetID,
1328+
Name: "cluster-subnet",
1329+
CIDR: "192.168.0.0/24",
1330+
},
1331+
}, nil)
1332+
1333+
// Security group reconciliation fails (e.g. rule conflict as reported in #2993)
1334+
networkClientRecorder.ListSecGroup(gomock.Any()).Return([]groups.SecGroup{}, nil).AnyTimes()
1335+
networkClientRecorder.CreateSecGroup(gomock.Any()).Return(nil, fmt.Errorf("SecurityGroupRuleExists")).AnyTimes()
1336+
1337+
err = reconcileNetworkComponents(scope, capiCluster, testCluster)
1338+
Expect(err).ToNot(BeNil())
1339+
Expect(err.Error()).To(ContainSubstring("failed to reconcile security groups"))
1340+
1341+
// Verify ReadyCondition is now False (was True before)
1342+
Expect(conditions.IsFalse(testCluster, clusterv1.ReadyCondition)).To(BeTrue())
1343+
readyCondition := conditions.Get(testCluster, clusterv1.ReadyCondition)
1344+
Expect(readyCondition).ToNot(BeNil())
1345+
Expect(readyCondition.Reason).To(Equal(infrav1.OpenStackErrorReason))
1346+
1347+
// Verify SecurityGroupsReadyCondition is now False (was True before)
1348+
Expect(conditions.IsFalse(testCluster, infrav1.SecurityGroupsReadyCondition)).To(BeTrue())
1349+
sgCondition := conditions.Get(testCluster, infrav1.SecurityGroupsReadyCondition)
1350+
Expect(sgCondition).ToNot(BeNil())
1351+
Expect(sgCondition.Reason).To(Equal(infrav1.SecurityGroupReconcileFailedReason))
1352+
1353+
// NetworkReadyCondition should remain True since network reconciliation succeeded
1354+
Expect(conditions.IsTrue(testCluster, infrav1.NetworkReadyCondition)).To(BeTrue())
1355+
})
1356+
11641357
It("should set APIEndpointReadyCondition to False when floating IP creation fails", func() {
11651358
const externalNetworkID = "a42211a2-4d2c-426f-9413-830e4b4abbbc"
11661359
const clusterNetworkID = "6c90b532-7ba0-418a-a276-5ae55060b5b0"

0 commit comments

Comments
 (0)