-
Notifications
You must be signed in to change notification settings - Fork 70
π Workload should still resilient when catalog is deleted #2439
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -303,21 +303,34 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro | |
| return bc.Client.Patch(ctx, obj, client.Apply, client.FieldOwner(bc.FieldOwner), client.ForceOwnership) | ||
| } | ||
|
|
||
| func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error { | ||
| // Generate desired revision | ||
| desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations) | ||
| func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) { | ||
| // List all existing revisions | ||
| existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName()) | ||
| if err != nil { | ||
| return err | ||
| return false, "", err | ||
| } | ||
|
Comment on lines
+307
to
311
|
||
|
|
||
| if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil { | ||
| return fmt.Errorf("set ownerref: %w", err) | ||
| // If contentFS is nil, we're maintaining the current state without catalog access. | ||
| // In this case, we should use the existing installed revision without generating a new one. | ||
| if contentFS == nil { | ||
| if len(existingRevisions) == 0 { | ||
| return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed revision found") | ||
| } | ||
| // Returning true here signals that the rollout has succeeded using the current revision. The | ||
| // ClusterExtensionRevision controller will continue to reconcile, apply, and maintain the | ||
| // resources defined in that revision via Server-Side Apply, ensuring the workload keeps running | ||
| // even when catalog access (and thus new revision content) is unavailable. | ||
| return true, "", nil | ||
| } | ||
|
|
||
| // List all existing revisions | ||
| existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName()) | ||
| // Generate desired revision | ||
| desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations) | ||
| if err != nil { | ||
| return err | ||
| return false, "", err | ||
| } | ||
|
|
||
| if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil { | ||
| return false, "", fmt.Errorf("set ownerref: %w", err) | ||
| } | ||
|
|
||
| currentRevision := &ocv1.ClusterExtensionRevision{} | ||
|
|
@@ -339,7 +352,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust | |
| // inplace patch was successful, no changes in phases | ||
| state = StateUnchanged | ||
| default: | ||
| return fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err) | ||
| return false, "", fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -353,7 +366,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust | |
| case StateNeedsInstall: | ||
| err := preflight.Install(ctx, plainObjs) | ||
| if err != nil { | ||
| return err | ||
| return false, "", err | ||
| } | ||
| // TODO: jlanford's IDE says that "StateNeedsUpgrade" condition is always true, but | ||
| // it isn't immediately obvious why that is. Perhaps len(existingRevisions) is | ||
|
|
@@ -362,7 +375,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust | |
| case StateNeedsUpgrade: | ||
| err := preflight.Upgrade(ctx, plainObjs) | ||
| if err != nil { | ||
| return err | ||
| return false, "", err | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -376,15 +389,15 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust | |
| desiredRevision.Spec.Revision = revisionNumber | ||
|
|
||
| if err = bc.garbageCollectOldRevisions(ctx, prevRevisions); err != nil { | ||
| return fmt.Errorf("garbage collecting old revisions: %w", err) | ||
| return false, "", fmt.Errorf("garbage collecting old revisions: %w", err) | ||
| } | ||
|
|
||
| if err := bc.createOrUpdate(ctx, desiredRevision); err != nil { | ||
| return fmt.Errorf("creating new Revision: %w", err) | ||
| return false, "", fmt.Errorf("creating new Revision: %w", err) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| return true, "", nil | ||
| } | ||
|
|
||
| // garbageCollectOldRevisions deletes archived revisions beyond ClusterExtensionRevisionRetentionLimit. | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // If contentFS is nil, we're maintaining the current state without catalog access. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // In this case, reconcile the existing Helm release if it exists. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if contentFS == nil { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
camilamacedo86 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if err != nil { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return false, "", err | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return h.reconcileExistingRelease(ctx, ac, ext) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| chrt, err := h.buildHelmChart(contentFS, ext) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| if err != nil { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return false, "", err | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -197,6 +207,45 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| return true, "", nil | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // reconcileExistingRelease reconciles an existing Helm release without catalog access. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // This is used when the catalog is unavailable but we need to maintain the current installation. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // It reconciles the release and sets up watchers to ensure resources are maintained. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| // It reconciles the release and sets up watchers to ensure resources are maintained. | |
| // It reconciles the release to maintain resources, and sets up watchers for monitoring/observability only. |
Copilot
AI
Jan 17, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When Manager or Watcher fields are nil in the Helm struct, calling Manager.Get() or cache.Watch() will cause a nil pointer dereference panic. While these fields should be properly initialized during setup, defensive nil checks would prevent runtime panics if there's a misconfiguration.
Consider adding nil checks for h.Manager and h.Watcher before using them, returning an appropriate error if they are nil.
| klog.FromContext(ctx).Info("watching managed objects") | |
| cache, err := h.Manager.Get(ctx, ext) | |
| if err != nil { | |
| return true, "", err | |
| } | |
| klog.FromContext(ctx).Info("watching managed objects") | |
| if h.Manager == nil { | |
| return true, "", fmt.Errorf("manager is nil") | |
| } | |
| cache, err := h.Manager.Get(ctx, ext) | |
| if err != nil { | |
| return true, "", err | |
| } | |
| if h.Watcher == nil { | |
| return true, "", fmt.Errorf("watcher is nil") | |
| } |
Copilot
AI
Jan 17, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When setting up monitoring/watches fails after successful reconciliation, the function returns true (indicating success) but also returns an error. This creates inconsistent state: the boolean indicates success but the error suggests failure. This could lead to confusing behavior in the caller.
Consider whether monitoring setup failures should truly be non-fatal. If monitoring is essential for proper operation, these errors should prevent returning success. If monitoring failures are acceptable, the errors should be logged rather than returned, or wrapped in a way that clearly indicates they are warnings rather than failures.
| // so we return true (rollout succeeded) even though monitoring setup failed. | |
| relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name)) | |
| if err != nil { | |
| return true, "", err | |
| } | |
| klog.FromContext(ctx).Info("watching managed objects") | |
| cache, err := h.Manager.Get(ctx, ext) | |
| if err != nil { | |
| return true, "", err | |
| } | |
| if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil { | |
| return true, "", err | |
| // so we return true (rollout succeeded) and log the monitoring failure as a warning. | |
| logger := klog.FromContext(ctx) | |
| relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name)) | |
| if err != nil { | |
| logger.Error(err, "failed to parse release manifest objects for watching", "release", rel.Name) | |
| return true, "", nil | |
| } | |
| logger.Info("watching managed objects", "release", rel.Name) | |
| cache, err := h.Manager.Get(ctx, ext) | |
| if err != nil { | |
| logger.Error(err, "failed to get cache for watching managed objects", "release", rel.Name) | |
| return true, "", nil | |
| } | |
| if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil { | |
| logger.Error(err, "failed to establish watches on managed objects", "release", rel.Name) | |
| return true, "", nil |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -13,7 +13,7 @@ import ( | |||||||||
| ) | ||||||||||
|
|
||||||||||
| func TestClusterExtensionSourceConfig(t *testing.T) { | ||||||||||
|
||||||||||
| func TestClusterExtensionSourceConfig(t *testing.T) { | |
| func TestClusterExtensionSourceConfig(t *testing.T) { | |
| // NOTE: The Kubernetes validation error for a JSON null sourceType renders as "null" | |
| // (with quotes) in the message. This expectation intentionally matches that format. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Boxcutter.Apply method signature was changed to return
(bool, string, error), but the ApplyBundleWithBoxcutter function still expects a function that returns onlyerror. This will cause a compilation failure.The ApplyBundleWithBoxcutter function in
internal/operator-controller/controllers/boxcutter_reconcile_steps.goneeds to be updated to accept the new signature and handle the rollout status similarly to how ApplyBundle handles it for Helm.