Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/example/helloworld/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ func (c *addManagerConfig) runController(ctx context.Context, kubeConfig *rest.C
utils.NewAddOnDeploymentConfigGetter(addonClient),
),
).
WithConfigCheckEnabledOption().
WithAgentHealthProber(helloworld.AgentHealthProber()).
BuildTemplateAgentAddon()
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion cmd/example/helloworld_helm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ func runController(ctx context.Context, kubeConfig *rest.Config) error {
utils.AgentInstallNamespaceFromDeploymentConfigFunc(
utils.NewAddOnDeploymentConfigGetter(addonClient),
),
).WithAgentHealthProber(helloworld_helm.AgentHealthProber()).
).
WithConfigCheckEnabledOption().
WithAgentHealthProber(helloworld_helm.AgentHealthProber()).
BuildHelmAgentAddon()
if err != nil {
klog.Errorf("failed to build agent %v", err)
Expand Down
10 changes: 10 additions & 0 deletions pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ func (s *healthCheckSyncer) probeWorkAddonStatus(
}

// update Available condition after addon manifestWorks are applied
// NOTE: This check indirectly guards against the race where addon config is not yet synced.
// ManifestApplied is only set after the deploy syncer successfully applies manifests, and the
// deploy syncer checks ConfigCheckEnabled + Configured condition before calling Manifests().
// Therefore, by the time we reach this point, the Configured condition is True and
// Status.ConfigReferences has been populated by OCM's addonconfiguration controller.
if meta.FindStatusCondition(addon.Status.Conditions, addonapiv1beta1.ManagedClusterAddOnManifestApplied) == nil {
return nil
}
Expand All @@ -112,6 +117,11 @@ func (s *healthCheckSyncer) probeWorkloadAvailabilityAddonStatus(
}

// wait for the addon manifest applied
// NOTE: This check indirectly guards against the race where addon config is not yet synced.
// ManifestApplied is only set after the deploy syncer successfully applies manifests, and the
// deploy syncer checks ConfigCheckEnabled + Configured condition before calling Manifests().
// Therefore, by the time we reach this point, the Configured condition is True and
// Status.ConfigReferences has been populated by OCM's addonconfiguration controller.
if meta.FindStatusCondition(addon.Status.Conditions, addonapiv1beta1.ManagedClusterAddOnManifestApplied) == nil {
return nil
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/addonmanager/controllers/registration/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,13 @@ func (c *addonRegistrationController) sync(ctx context.Context, syncCtx factory.
return nil
}

// If ConfigCheckEnabled, wait for addon config to be ready before proceeding
if agentAddon.GetAgentAddonOptions().ConfigCheckEnabled &&
!meta.IsStatusConditionTrue(managedClusterAddonCopy.Status.Conditions, addonapiv1beta1.ManagedClusterAddOnConditionConfigured) {
klog.InfoS("Addon configured condition is not set, skipping registration", "addonName", addonName)
return nil
}

addonPatcher := patcher.NewPatcher[
*addonapiv1beta1.ManagedClusterAddOn,
addonapiv1beta1.ManagedClusterAddOnSpec,
Expand Down
95 changes: 93 additions & 2 deletions pkg/addonmanager/controllers/registration/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type testAgent struct {
registrations []agent.RegistrationConfig
agentInstallNamespace func(ctx context.Context, addon *addonapiv1beta1.ManagedClusterAddOn) (string, error)
permissionConfig agent.PermissionConfigFunc
configCheckEnabled bool
}

func (t *testAgent) Manifests(ctx context.Context, cluster *clusterv1.ManagedCluster, addon *addonapiv1beta1.ManagedClusterAddOn) ([]runtime.Object, error) {
Expand All @@ -37,11 +38,13 @@ func (t *testAgent) Manifests(ctx context.Context, cluster *clusterv1.ManagedClu
func (t *testAgent) GetAgentAddonOptions() agent.AgentAddonOptions {
if len(t.registrations) == 0 {
return agent.AgentAddonOptions{
AddonName: t.name,
AddonName: t.name,
ConfigCheckEnabled: t.configCheckEnabled,
}
}
agentOption := agent.AgentAddonOptions{
AddonName: t.name,
AddonName: t.name,
ConfigCheckEnabled: t.configCheckEnabled,
Registration: &agent.RegistrationOption{
Configurations: func(ctx context.Context, cluster *clusterv1.ManagedCluster, addon *addonapiv1beta1.ManagedClusterAddOn) ([]agent.RegistrationConfig, error) {
return t.registrations, nil
Expand Down Expand Up @@ -465,6 +468,94 @@ func TestReconcile(t *testing.T) {
},
},
},
{
name: "config check enabled but configured condition is false",
cluster: []runtime.Object{addontesting.NewManagedCluster("cluster1")},
addon: []runtime.Object{func() *addonapiv1beta1.ManagedClusterAddOn {
addon := addontesting.NewAddon("test", "cluster1", metav1.OwnerReference{
Kind: "ClusterManagementAddOn",
Name: "test"})
// Set Configured condition to False
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1beta1.ManagedClusterAddOnConditionConfigured,
Status: metav1.ConditionFalse,
Reason: "ConfigNotReady",
})
return addon
}()},
validateAddonActions: addontesting.AssertNoActions, // Should skip registration when config not ready
testaddon: &testAgent{
name: "test",
namespace: "test-ns",
configCheckEnabled: true,
registrations: []agent.RegistrationConfig{
&agent.KubeClientRegistration{},
},
},
},
{
name: "config check enabled and configured condition is true",
cluster: []runtime.Object{addontesting.NewManagedCluster("cluster1")},
addon: []runtime.Object{func() *addonapiv1beta1.ManagedClusterAddOn {
addon := addontesting.NewAddon("test", "cluster1", metav1.OwnerReference{
Kind: "ClusterManagementAddOn",
Name: "test"})
// Set Configured condition to True
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
Type: addonapiv1beta1.ManagedClusterAddOnConditionConfigured,
Status: metav1.ConditionTrue,
Reason: "ConfigReady",
})
return addon
}()},
validateAddonActions: func(t *testing.T, actions []clienttesting.Action) {
addontesting.AssertActions(t, actions, "patch")
actual := actions[0].(clienttesting.PatchActionImpl).Patch
addOn := &addonapiv1beta1.ManagedClusterAddOn{}
err := json.Unmarshal(actual, addOn)
if err != nil {
t.Fatal(err)
}
if !meta.IsStatusConditionTrue(addOn.Status.Conditions, addonapiv1beta1.ManagedClusterAddOnRegistrationApplied) {
t.Errorf("Unexpected status condition patch, got %s", string(actual))
}
},
testaddon: &testAgent{
name: "test",
namespace: "test-ns",
configCheckEnabled: true,
registrations: []agent.RegistrationConfig{
&agent.KubeClientRegistration{},
},
},
},
{
name: "config check disabled, no configured condition check",
cluster: []runtime.Object{addontesting.NewManagedCluster("cluster1")},
addon: []runtime.Object{addontesting.NewAddon("test", "cluster1", metav1.OwnerReference{
Kind: "ClusterManagementAddOn",
Name: "test"})},
validateAddonActions: func(t *testing.T, actions []clienttesting.Action) {
addontesting.AssertActions(t, actions, "patch")
actual := actions[0].(clienttesting.PatchActionImpl).Patch
addOn := &addonapiv1beta1.ManagedClusterAddOn{}
err := json.Unmarshal(actual, addOn)
if err != nil {
t.Fatal(err)
}
if !meta.IsStatusConditionTrue(addOn.Status.Conditions, addonapiv1beta1.ManagedClusterAddOnRegistrationApplied) {
t.Errorf("Unexpected status condition patch, got %s", string(actual))
}
},
testaddon: &testAgent{
name: "test",
namespace: "test-ns",
configCheckEnabled: false,
registrations: []agent.RegistrationConfig{
&agent.KubeClientRegistration{},
},
},
},
}

for _, c := range cases {
Expand Down
28 changes: 26 additions & 2 deletions pkg/utils/addon_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,35 @@ func AgentInstallNamespaceFromDeploymentConfigFunc(
return "", fmt.Errorf("failed to get deployment config for addon %s: %v", addon.Name, err)
}

// For now, we have no way of knowing if the addon depleoyment config is not configured, or
// For now, we have no way of knowing if the addon deployment config is not configured, or
// is configured but not yet been added to the managedclusteraddon status config references,
// we expect no error will be returned when the addon deployment config is not configured
// so we can use the default namespace.
// TODO: Find a way to distinguish between the above two cases
//
// RACE CONDITION FIX: This function is typically called to get the namespace from addon deployment
// config. It's used in several paths:
//
// 1. Built-in implementations (helm_agentaddon.go, template_agentaddon.go) call this via
// AgentInstallNamespace() inside their Manifests() implementations.
// 2. Custom AgentAddon implementations may call AgentInstallNamespace() or this function directly
// in their Manifests() implementations.
// 3. Registration controller calls AgentInstallNamespace() directly to set Status.Namespace.
//
// All these paths are protected by guards in the FRAMEWORK CONTROLLERS (not in addon code):
// - Registration controller: checks ConfigCheckEnabled + Configured before calling AgentInstallNamespace()
// - Deploy/hook syncers: check ConfigCheckEnabled + Configured before calling Manifests()
// - Health checks: wait for ManifestApplied (which requires deploy to succeed first)
//
// Addons enable this protection by using WithConfigCheckEnabledOption(). The guards wait for the
// Configured condition, which is set by OCM's addonconfiguration controller atomically with
// Status.ConfigReferences, so Configured=True guarantees ConfigReferences is populated.
//
// Custom addon implementations (implementing AgentAddon directly) are automatically protected by
// these same framework guards - no special code needed in the addon itself.
//
// WARNING: If calling this function directly outside these protected paths, YOU must check the
// Configured condition first, or accept that you may get an empty namespace when config is not
// ready yet.
if config == nil {
klog.V(4).InfoS("Addon deployment config is nil, return an empty string for agent install namespace",
"addonNamespace", addon.Namespace, "addonName", addon.Name)
Expand Down
Loading