Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 47 additions & 12 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error {
return err
}

// if we are at minSize already we wail early.
// if we are at minSize already we fail early.
if replicas <= ng.MinSize() {
return fmt.Errorf("min size reached, nodes will not be deleted")
}
Expand All @@ -119,7 +119,7 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error {
for _, node := range nodes {
actualNodeGroup, err := ng.machineController.nodeGroupForNode(node)
if err != nil {
return nil
return err
}

if actualNodeGroup == nil {
Expand All @@ -135,18 +135,58 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error {
// < minSize, then the request to delete that many nodes is bogus
// and we fail fast.
if replicas-len(nodes) < ng.MinSize() {
return fmt.Errorf("unable to delete %d machines in %q, machine replicas are %q, minSize is %q ", len(nodes), ng.Id(), replicas, ng.MinSize())
return fmt.Errorf("unable to delete %d machines in %q, machine replicas are %d, minSize is %d", len(nodes), ng.Id(), replicas, ng.MinSize())
}

// Step 3: annotate the corresponding machine that it is a
// suitable candidate for deletion and drop the replica count
// by 1. Fail fast on any error.
// Step 3: when a backing Machine exists, mark it as a deletion candidate
// and decrease the replica count by 1. For MachinePool-backed node groups,
// if no per-node Machine can be resolved, fall back to replica decrement
// after verifying the node belongs to the MachinePool providerID list.
for _, node := range nodes {
nodeGroup, err := ng.machineController.nodeGroupForNode(node)
if err != nil {
return err
}

machine, err := ng.machineController.findMachineByProviderID(normalizedProviderString(node.Spec.ProviderID))
if err != nil {
return err
}

if machine == nil {
// Fallback for MachinePool-based providers where no per-node Machine
// objects exist. In that case, allow scale-down by decreasing replicas,
// but only if the node providerID is explicitly present in the
// MachinePool providerIDList.
if nodeGroup.scalableResource.Kind() == machinePoolKind {
providerIDs, err := nodeGroup.scalableResource.ProviderIDs()
if err != nil {
return err
}

nodeProviderID := normalizedProviderString(node.Spec.ProviderID)
found := false
for _, id := range providerIDs {
if normalizedProviderString(id) == nodeProviderID {
found = true
break
}
}

if !found {
return fmt.Errorf("node %q is not present in MachinePool providerIDList for nodegroup %q", node.Spec.ProviderID, nodeGroup.Id())
}

klog.Warningf("No Machine found for node %q in MachinePool %q, falling back to replica decrement only", node.Spec.ProviderID, nodeGroup.Id())

if err := nodeGroup.scalableResource.SetSize(replicas - 1); err != nil {
return err
}

replicas--
continue
}

return fmt.Errorf("unknown machine for node %q", node.Spec.ProviderID)
}

Expand All @@ -157,16 +197,11 @@ func (ng *nodegroup) DeleteNodes(nodes []*corev1.Node) error {
continue
}

nodeGroup, err := ng.machineController.nodeGroupForNode(node)
if err != nil {
return err
}

if err := nodeGroup.scalableResource.MarkMachineForDeletion(machine); err != nil {
return err
}

if err := ng.scalableResource.SetSize(replicas - 1); err != nil {
if err := nodeGroup.scalableResource.SetSize(replicas - 1); err != nil {
_ = nodeGroup.scalableResource.UnmarkMachineForDeletion(machine)
return err
}
Expand Down