Skip to content

Commit c52814e

Browse files
authored
Merge pull request #1032 from Danil-Grigorev/fix-hc-reconcile-on-restart
🐛 Permit create events on HC reconciler and refactor implementation to watch provider objects
2 parents 4597b11 + 95ade98 commit c52814e

File tree

3 files changed

+62
-58
lines changed

3 files changed

+62
-58
lines changed

cmd/main.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,9 +321,7 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchConfigSecretCh
321321
os.Exit(1)
322322
}
323323

324-
if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
325-
Client: mgr.GetClient(),
326-
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
324+
if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
327325
setupLog.Error(err, "unable to create controller", "controller", "Healthcheck")
328326
os.Exit(1)
329327
}

internal/controller/healthcheck/healthcheck_controller.go

Lines changed: 60 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,15 @@ limitations under the License.
1717
package healthcheck
1818

1919
import (
20+
"cmp"
2021
"context"
2122
"fmt"
22-
"time"
2323

2424
appsv1 "k8s.io/api/apps/v1"
2525
"k8s.io/apimachinery/pkg/runtime/schema"
2626
"k8s.io/apimachinery/pkg/types"
2727

2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
29-
"k8s.io/apimachinery/pkg/runtime"
3029
kerrors "k8s.io/apimachinery/pkg/util/errors"
3130
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
3231
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
@@ -38,7 +37,7 @@ import (
3837
"sigs.k8s.io/controller-runtime/pkg/builder"
3938
"sigs.k8s.io/controller-runtime/pkg/client"
4039
"sigs.k8s.io/controller-runtime/pkg/controller"
41-
"sigs.k8s.io/controller-runtime/pkg/event"
40+
"sigs.k8s.io/controller-runtime/pkg/handler"
4241
"sigs.k8s.io/controller-runtime/pkg/predicate"
4342
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4443
)
@@ -59,12 +58,10 @@ const providerLabelKey = "cluster.x-k8s.io/provider"
5958

6059
var deploymentPredicate predicate.Predicate
6160

62-
type ProviderHealthCheckReconciler struct {
63-
Client client.Client
64-
}
61+
type ProviderHealthCheckReconciler struct{}
6562

6663
type GenericProviderHealthCheckReconciler struct {
67-
Client client.Client
64+
client.Client
6865
Provider operatorv1.GenericProvider
6966
providerGVK schema.GroupVersionKind
7067
}
@@ -103,7 +100,7 @@ func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, optio
103100
}
104101

105102
func (r *GenericProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
106-
kinds, _, err := r.Client.Scheme().ObjectKinds(r.Provider)
103+
kinds, _, err := r.Scheme().ObjectKinds(r.Provider)
107104
if err != nil {
108105
return err
109106
}
@@ -116,43 +113,61 @@ func (r *GenericProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager
116113

117114
return ctrl.NewControllerManagedBy(mgr).
118115
Named(name).
119-
For(&appsv1.Deployment{}, builder.WithPredicates(r.providerDeploymentPredicates())).
116+
For(&appsv1.Deployment{}, builder.WithPredicates(predicate.NewPredicateFuncs(r.isProviderDeployment))).
117+
Watches(r.Provider, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, provider client.Object) []reconcile.Request {
118+
deploymentList := &appsv1.DeploymentList{}
119+
if err := r.List(ctx, deploymentList, client.InNamespace(provider.GetNamespace()), client.HasLabels{providerLabelKey}); err != nil {
120+
ctrl.LoggerFrom(ctx).Error(err, "Failed to list deployments for provider", "provider", client.ObjectKeyFromObject(provider))
121+
return nil
122+
}
123+
124+
requests := []reconcile.Request{}
125+
126+
for _, dep := range deploymentList.Items {
127+
if r.getProviderName(&dep) == provider.GetName() {
128+
requests = append(requests, reconcile.Request{
129+
NamespacedName: client.ObjectKeyFromObject(&dep),
130+
})
131+
}
132+
}
133+
134+
return requests
135+
})).
120136
WithEventFilter(deploymentPredicate).
121137
WithOptions(options).
122-
Complete(r)
138+
Complete(reconcile.AsReconciler(mgr.GetClient(), r))
123139
}
124140

125-
func (r *GenericProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) {
126-
log := ctrl.LoggerFrom(ctx)
127-
128-
log.Info("Checking provider health")
141+
func (r *GenericProviderHealthCheckReconciler) Reconcile(ctx context.Context, deployment *appsv1.Deployment) (_ reconcile.Result, reterr error) {
142+
log := ctrl.LoggerFrom(ctx, "provider", r.providerGVK.Kind, "providerName", r.getProviderName(deployment), "deployment", client.ObjectKeyFromObject(deployment))
129143

130144
result := ctrl.Result{}
131145

132-
deployment := &appsv1.Deployment{}
146+
typedProvider, ok := r.Provider.DeepCopyObject().(operatorv1.GenericProvider)
147+
if !ok {
148+
log.Error(fmt.Errorf("failed to cast provider object as GenericProvider"), "unexpected provider type")
133149

134-
if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil {
135-
// Error reading the object - requeue the request.
136-
return result, err
150+
return result, nil
137151
}
138152

139153
// There should be one owner pointing to the Provider resource.
140-
if err := r.Client.Get(ctx, r.getProviderKey(deployment), r.Provider); err != nil {
154+
if err := r.Get(ctx, r.getProviderKey(deployment), typedProvider); err != nil {
141155
// Error reading the object - requeue the request.
156+
log.Error(err, "Failed to get provider for deployment")
142157
return result, err
143158
}
144159

145160
deploymentAvailableCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable)
146161

147-
typedProvider := r.Provider
148-
149162
// Stop earlier if this provider is not fully installed yet.
150163
if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) {
151164
log.V(2).Info("Provider not fully installed yet, requeueing")
152165

153-
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
166+
return result, nil
154167
}
155168

169+
log.Info("Checking provider health")
170+
156171
// Compare provider's Ready condition with the deployment's Available condition and stop if they already match.
157172
currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition)
158173
if currentReadyCondition != nil && deploymentAvailableCondition != nil && currentReadyCondition.Status == metav1.ConditionStatus(deploymentAvailableCondition.Status) {
@@ -167,35 +182,37 @@ func (r *GenericProviderHealthCheckReconciler) Reconcile(ctx context.Context, re
167182
return result, err
168183
}
169184

170-
if deploymentAvailableCondition != nil {
171-
reason := deploymentAvailableCondition.Reason
172-
if reason == "" {
173-
reason = operatorv1.DeploymentAvailableReason
185+
defer func() {
186+
if err := patchHelper.Patch(ctx, typedProvider, patch.WithOwnedConditions{Conditions: []string{clusterv1.ReadyCondition}}); err != nil {
187+
log.Error(err, "Failed to patch provider status")
188+
189+
reterr = kerrors.NewAggregate([]error{reterr, err})
174190
}
191+
}()
175192

193+
if deploymentAvailableCondition != nil {
176194
log.Info("Updating provider health status", "available", deploymentAvailableCondition.Status)
177195

178196
conditions.Set(typedProvider, metav1.Condition{
179-
Type: clusterv1.ReadyCondition,
180-
Status: metav1.ConditionStatus(deploymentAvailableCondition.Status),
181-
Reason: reason,
197+
Type: clusterv1.ReadyCondition,
198+
Status: metav1.ConditionStatus(deploymentAvailableCondition.Status),
199+
Reason: cmp.Or(deploymentAvailableCondition.Reason, operatorv1.DeploymentAvailableReason),
200+
Message: deploymentAvailableCondition.Message,
182201
})
183202
} else {
184203
conditions.Set(typedProvider, metav1.Condition{
185-
Type: clusterv1.ReadyCondition,
186-
Status: metav1.ConditionFalse,
187-
Reason: operatorv1.NoDeploymentAvailableConditionReason,
204+
Type: clusterv1.ReadyCondition,
205+
Status: metav1.ConditionFalse,
206+
Reason: operatorv1.NoDeploymentAvailableConditionReason,
207+
Message: "No minimum availability condition found on the provider deployment",
188208
})
189209
}
190210

191-
// Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds.
192-
if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) {
193-
result = ctrl.Result{RequeueAfter: 5 * time.Second}
211+
if !conditions.IsTrue(typedProvider, clusterv1.ReadyCondition) {
212+
log.V(2).Info("Provider is not ready yet")
194213
}
195214

196-
options := patch.WithOwnedConditions{Conditions: []string{clusterv1.ReadyCondition}}
197-
198-
return result, patchHelper.Patch(ctx, typedProvider, options)
215+
return result, nil
199216
}
200217

201218
func (r *GenericProviderHealthCheckReconciler) getProviderName(deploy client.Object) string {
@@ -227,20 +244,11 @@ func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.Depl
227244
return nil
228245
}
229246

230-
func (r *GenericProviderHealthCheckReconciler) providerDeploymentPredicates() predicate.Funcs {
231-
isProviderDeployment := func(obj runtime.Object) bool {
232-
deployment, ok := obj.(*appsv1.Deployment)
233-
if !ok {
234-
panic("expected to get an of object of type appsv1.Deployment")
235-
}
236-
237-
return r.getProviderName(deployment) != ""
247+
func (r *GenericProviderHealthCheckReconciler) isProviderDeployment(obj client.Object) bool {
248+
deployment, ok := obj.(*appsv1.Deployment)
249+
if !ok {
250+
panic("expected to get an of object of type appsv1.Deployment")
238251
}
239252

240-
return predicate.Funcs{
241-
CreateFunc: func(e event.CreateEvent) bool { return false },
242-
UpdateFunc: func(e event.UpdateEvent) bool { return isProviderDeployment(e.ObjectNew) },
243-
GenericFunc: func(e event.GenericEvent) bool { return false },
244-
DeleteFunc: func(e event.DeleteEvent) bool { return false },
245-
}
253+
return r.getProviderName(deployment) != ""
246254
}

internal/controller/healthcheck/suite_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ func TestMain(m *testing.M) {
5252
panic(fmt.Sprintf("Failed to start CoreProviderReconciler: %v", err))
5353
}
5454

55-
if err := (&ProviderHealthCheckReconciler{
56-
Client: env,
57-
}).SetupWithManager(env.Manager, controller.Options{MaxConcurrentReconciles: 1}); err != nil {
55+
if err := (&ProviderHealthCheckReconciler{}).SetupWithManager(env.Manager, controller.Options{MaxConcurrentReconciles: 1}); err != nil {
5856
panic(fmt.Sprintf("Failed to start Healthcheck controller: %v", err))
5957
}
6058

0 commit comments

Comments
 (0)