Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/operator-controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ func (c *boxcutterReconcilerConfigurator) Configure(ceReconciler *controllers.Cl
controllers.HandleFinalizers(c.finalizers),
controllers.MigrateStorage(storageMigrator),
controllers.RetrieveRevisionStates(revisionStatesGetter),
controllers.ResolveBundle(c.resolver),
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
controllers.UnpackBundle(c.imagePuller, c.imageCache),
controllers.ApplyBundleWithBoxcutter(appl.Apply),
}
Expand Down Expand Up @@ -742,7 +742,7 @@ func (c *helmReconcilerConfigurator) Configure(ceReconciler *controllers.Cluster
ceReconciler.ReconcileSteps = []controllers.ReconcileStepFunc{
controllers.HandleFinalizers(c.finalizers),
controllers.RetrieveRevisionStates(revisionStatesGetter),
controllers.ResolveBundle(c.resolver),
controllers.ResolveBundle(c.resolver, c.mgr.GetClient()),
controllers.UnpackBundle(c.imagePuller, c.imageCache),
controllers.ApplyBundle(appl),
}
Expand Down
43 changes: 28 additions & 15 deletions internal/operator-controller/applier/boxcutter.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,21 +303,34 @@ func (bc *Boxcutter) createOrUpdate(ctx context.Context, obj client.Object) erro
return bc.Client.Patch(ctx, obj, client.Apply, client.FieldOwner(bc.FieldOwner), client.ForceOwnership)
}

Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Boxcutter.Apply method signature was changed to return (bool, string, error), but the ApplyBundleWithBoxcutter function still expects a function that returns only error. This will cause a compilation failure.

The ApplyBundleWithBoxcutter function in internal/operator-controller/controllers/boxcutter_reconcile_steps.go needs to be updated to accept the new signature and handle the rollout status similarly to how ApplyBundle handles it for Helm.

Suggested change
// ApplyErrorOnly is a compatibility adapter that preserves the previous
// Boxcutter.Apply signature (returning only error) for callers that do not
// yet consume rollout status information. It delegates to Apply and
// discards the rollout status and message.
func (bc *Boxcutter) ApplyErrorOnly(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error {
_, _, err := bc.Apply(ctx, contentFS, ext, objectLabels, revisionAnnotations)
return err
}

Copilot uses AI. Check for mistakes.
func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error {
// Generate desired revision
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error) {
// List all existing revisions
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
if err != nil {
return err
return false, "", err
}
Comment on lines +307 to 311
Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The order of operations is problematic. The function calls getExistingRevisions before checking if contentFS is nil. If getExistingRevisions fails (e.g., due to API errors), the function returns an error even when contentFS is nil and we're just trying to maintain the current state.

This means that if there's a transient API error listing revisions, fallback mode will fail when it should succeed. Consider moving the contentFS nil check before the getExistingRevisions call, so that catalog deletion resilience isn't dependent on successfully listing revisions when we're just maintaining the current state.

Copilot uses AI. Check for mistakes.

if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
return fmt.Errorf("set ownerref: %w", err)
// If contentFS is nil, we're maintaining the current state without catalog access.
// In this case, we should use the existing installed revision without generating a new one.
if contentFS == nil {
if len(existingRevisions) == 0 {
return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed revision found")
}
// Returning true here signals that the rollout has succeeded using the current revision. The
// ClusterExtensionRevision controller will continue to reconcile, apply, and maintain the
// resources defined in that revision via Server-Side Apply, ensuring the workload keeps running
// even when catalog access (and thus new revision content) is unavailable.
return true, "", nil
}

// List all existing revisions
existingRevisions, err := bc.getExistingRevisions(ctx, ext.GetName())
// Generate desired revision
desiredRevision, err := bc.RevisionGenerator.GenerateRevision(ctx, contentFS, ext, objectLabels, revisionAnnotations)
if err != nil {
return err
return false, "", err
}

if err := controllerutil.SetControllerReference(ext, desiredRevision, bc.Scheme); err != nil {
return false, "", fmt.Errorf("set ownerref: %w", err)
}

currentRevision := &ocv1.ClusterExtensionRevision{}
Expand All @@ -339,7 +352,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
// inplace patch was successful, no changes in phases
state = StateUnchanged
default:
return fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err)
return false, "", fmt.Errorf("patching %s Revision: %w", desiredRevision.Name, err)
}
}

Expand All @@ -353,7 +366,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
case StateNeedsInstall:
err := preflight.Install(ctx, plainObjs)
if err != nil {
return err
return false, "", err
}
// TODO: jlanford's IDE says that "StateNeedsUpgrade" condition is always true, but
// it isn't immediately obvious why that is. Perhaps len(existingRevisions) is
Expand All @@ -362,7 +375,7 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
case StateNeedsUpgrade:
err := preflight.Upgrade(ctx, plainObjs)
if err != nil {
return err
return false, "", err
}
}
}
Expand All @@ -376,15 +389,15 @@ func (bc *Boxcutter) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
desiredRevision.Spec.Revision = revisionNumber

if err = bc.garbageCollectOldRevisions(ctx, prevRevisions); err != nil {
return fmt.Errorf("garbage collecting old revisions: %w", err)
return false, "", fmt.Errorf("garbage collecting old revisions: %w", err)
}

if err := bc.createOrUpdate(ctx, desiredRevision); err != nil {
return fmt.Errorf("creating new Revision: %w", err)
return false, "", fmt.Errorf("creating new Revision: %w", err)
}
}

return nil
return true, "", nil
}

// garbageCollectOldRevisions deletes archived revisions beyond ClusterExtensionRevisionRetentionLimit.
Expand Down
6 changes: 5 additions & 1 deletion internal/operator-controller/applier/boxcutter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -926,14 +926,18 @@ func TestBoxcutter_Apply(t *testing.T) {
labels.PackageNameKey: "test-package",
}
}
err := boxcutter.Apply(t.Context(), testFS, ext, nil, revisionAnnotations)
completed, status, err := boxcutter.Apply(t.Context(), testFS, ext, nil, revisionAnnotations)

// Assert
if tc.expectedErr != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tc.expectedErr)
assert.False(t, completed)
assert.Empty(t, status)
} else {
require.NoError(t, err)
assert.True(t, completed)
assert.Empty(t, status)
}

if tc.validate != nil {
Expand Down
49 changes: 49 additions & 0 deletions internal/operator-controller/applier/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@ func (h *Helm) runPreAuthorizationChecks(ctx context.Context, ext *ocv1.ClusterE
}

func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels map[string]string, storageLabels map[string]string) (bool, string, error) {
// If contentFS is nil, we're maintaining the current state without catalog access.
// In this case, reconcile the existing Helm release if it exists.
if contentFS == nil {
ac, err := h.ActionClientGetter.ActionClientFor(ctx, ext)
if err != nil {
return false, "", err
}
return h.reconcileExistingRelease(ctx, ac, ext)
}

chrt, err := h.buildHelmChart(contentFS, ext)
if err != nil {
return false, "", err
Expand Down Expand Up @@ -197,6 +207,45 @@ func (h *Helm) Apply(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExte
return true, "", nil
}

// reconcileExistingRelease reconciles an existing Helm release without catalog access.
// This is used when the catalog is unavailable but we need to maintain the current installation.
// It reconciles the release and sets up watchers to ensure resources are maintained.
Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function comment states it "sets up watchers to ensure resources are maintained," but this is misleading. The watchers set up here are for monitoring purposes only - they don't actively maintain or reconcile resources. The actual resource maintenance is done by the Helm Reconcile call on line 223. The watchers only observe changes; the actual reconciliation/maintenance happens elsewhere in the Helm operator plugin's controller loop.

Consider updating the comment to clarify that watchers are for monitoring/observability rather than active maintenance.

Suggested change
// It reconciles the release and sets up watchers to ensure resources are maintained.
// It reconciles the release to maintain resources, and sets up watchers for monitoring/observability only.

Copilot uses AI. Check for mistakes.
func (h *Helm) reconcileExistingRelease(ctx context.Context, ac helmclient.ActionInterface, ext *ocv1.ClusterExtension) (bool, string, error) {
rel, err := ac.Get(ext.GetName())
if errors.Is(err, driver.ErrReleaseNotFound) {
return false, "", fmt.Errorf("cannot maintain workload: no catalog content available and no previously installed Helm release found")
}
if err != nil {
return false, "", fmt.Errorf("getting current release: %w", err)
}

// Reconcile the existing release to ensure resources are maintained
if err := ac.Reconcile(rel); err != nil {
// Reconcile failed - resources NOT maintained
// Return false (rollout failed) with error
return false, "", err
}

// At this point: Reconcile succeeded - resources ARE maintained
// The operations below are for setting up monitoring (watches).
// If they fail, the resources are still successfully reconciled and maintained,
// so we return true (rollout succeeded) even though monitoring setup failed.
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
if err != nil {
return true, "", err
}
klog.FromContext(ctx).Info("watching managed objects")
cache, err := h.Manager.Get(ctx, ext)
if err != nil {
return true, "", err
}
Comment on lines +237 to +241
Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When Manager or Watcher fields are nil in the Helm struct, calling Manager.Get() or cache.Watch() will cause a nil pointer dereference panic. While these fields should be properly initialized during setup, defensive nil checks would prevent runtime panics if there's a misconfiguration.

Consider adding nil checks for h.Manager and h.Watcher before using them, returning an appropriate error if they are nil.

Suggested change
klog.FromContext(ctx).Info("watching managed objects")
cache, err := h.Manager.Get(ctx, ext)
if err != nil {
return true, "", err
}
klog.FromContext(ctx).Info("watching managed objects")
if h.Manager == nil {
return true, "", fmt.Errorf("manager is nil")
}
cache, err := h.Manager.Get(ctx, ext)
if err != nil {
return true, "", err
}
if h.Watcher == nil {
return true, "", fmt.Errorf("watcher is nil")
}

Copilot uses AI. Check for mistakes.
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
return true, "", err
Comment on lines +232 to +243
Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When setting up monitoring/watches fails after successful reconciliation, the function returns true (indicating success) but also returns an error. This creates inconsistent state: the boolean indicates success but the error suggests failure. This could lead to confusing behavior in the caller.

Consider whether monitoring setup failures should truly be non-fatal. If monitoring is essential for proper operation, these errors should prevent returning success. If monitoring failures are acceptable, the errors should be logged rather than returned, or wrapped in a way that clearly indicates they are warnings rather than failures.

Suggested change
// so we return true (rollout succeeded) even though monitoring setup failed.
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
if err != nil {
return true, "", err
}
klog.FromContext(ctx).Info("watching managed objects")
cache, err := h.Manager.Get(ctx, ext)
if err != nil {
return true, "", err
}
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
return true, "", err
// so we return true (rollout succeeded) and log the monitoring failure as a warning.
logger := klog.FromContext(ctx)
relObjects, err := util.ManifestObjects(strings.NewReader(rel.Manifest), fmt.Sprintf("%s-release-manifest", rel.Name))
if err != nil {
logger.Error(err, "failed to parse release manifest objects for watching", "release", rel.Name)
return true, "", nil
}
logger.Info("watching managed objects", "release", rel.Name)
cache, err := h.Manager.Get(ctx, ext)
if err != nil {
logger.Error(err, "failed to get cache for watching managed objects", "release", rel.Name)
return true, "", nil
}
if err := cache.Watch(ctx, h.Watcher, relObjects...); err != nil {
logger.Error(err, "failed to establish watches on managed objects", "release", rel.Name)
return true, "", nil

Copilot uses AI. Check for mistakes.
}

return true, "", nil
}

func (h *Helm) buildHelmChart(bundleFS fs.FS, ext *ocv1.ClusterExtension) (*chart.Chart, error) {
if h.HelmChartProvider == nil {
return nil, errors.New("HelmChartProvider is nil")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func MigrateStorage(m StorageMigrator) ReconcileStepFunc {
}
}

func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) error) ReconcileStepFunc {
func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, ext *ocv1.ClusterExtension, objectLabels, revisionAnnotations map[string]string) (bool, string, error)) ReconcileStepFunc {
return func(ctx context.Context, state *reconcileState, ext *ocv1.ClusterExtension) (*ctrl.Result, error) {
l := log.FromContext(ctx)
revisionAnnotations := map[string]string{
Expand All @@ -109,7 +109,8 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e
}

l.Info("applying bundle contents")
if err := apply(ctx, state.imageFS, ext, objLbls, revisionAnnotations); err != nil {
_, _, err := apply(ctx, state.imageFS, ext, objLbls, revisionAnnotations)
if err != nil {
// If there was an error applying the resolved bundle,
// report the error via the Progressing condition.
setStatusProgressing(ext, wrapErrorWithResolutionInfo(state.resolvedRevisionMetadata.BundleMetadata, err))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ func TestApplyBundleWithBoxcutter(t *testing.T) {
imageFS: fstest.MapFS{},
}

stepFunc := ApplyBundleWithBoxcutter(func(_ context.Context, _ fs.FS, _ *ocv1.ClusterExtension, _, _ map[string]string) error {
return nil
stepFunc := ApplyBundleWithBoxcutter(func(_ context.Context, _ fs.FS, _ *ocv1.ClusterExtension, _, _ map[string]string) (bool, string, error) {
return true, "", nil
})
result, err := stepFunc(ctx, state, ext)
require.NoError(t, err)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
)

func TestClusterExtensionSourceConfig(t *testing.T) {
Copy link

Copilot AI Jan 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test change updates the expected error string format from 'Invalid value: null' to 'Invalid value: "null"' (adding quotes around null). While this appears to be fixing a test to match updated validation error formatting, there's no context in the PR about why the error format changed. This could indicate:

  1. An intentional change in validation error formatting that should be documented
  2. An unintended side effect of a dependency update
  3. A change in how null values are serialized in error messages

Consider documenting why this error format changed, especially if it affects user-facing error messages.

Suggested change
func TestClusterExtensionSourceConfig(t *testing.T) {
func TestClusterExtensionSourceConfig(t *testing.T) {
// NOTE: The Kubernetes validation error for a JSON null sourceType renders as "null"
// (with quotes) in the message. This expectation intentionally matches that format.

Copilot uses AI. Check for mistakes.
sourceTypeEmptyError := "Invalid value: null"
sourceTypeEmptyError := "Invalid value: \"null\""
sourceTypeMismatchError := "spec.source.sourceType: Unsupported value"
sourceConfigInvalidError := "spec.source: Invalid value"
// unionField represents the required Catalog or (future) Bundle field required by SourceConfig
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,8 @@ func (r *ClusterExtensionReconciler) Reconcile(ctx context.Context, req ctrl.Req

// ensureAllConditionsWithReason checks that all defined condition types exist in the given ClusterExtension,
// and assigns a specified reason and custom message to any missing condition.
//
//nolint:unparam // reason parameter is designed to be flexible, even if current callers use the same value
func ensureAllConditionsWithReason(ext *ocv1.ClusterExtension, reason v1alpha1.ConditionReason, message string) {
for _, condType := range conditionsets.ConditionTypes {
cond := apimeta.FindStatusCondition(ext.Status.Conditions, condType)
Expand Down
Loading
Loading