Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pkg/configurer/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ import (
"github.com/k0sproject/rig/os"
)

type rebootable interface {
Reboot() error
}

type DockerConfigurer struct{}

// GetDockerInfo gets docker info from the host.
Expand Down
4 changes: 0 additions & 4 deletions pkg/configurer/installer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,6 @@ func GetInstaller(source string) (string, error) {
return path, nil
}

if path == "" {
return "", fmt.Errorf("%w; skipping failed installer download", ErrInstallerDownloadFailed)
}

path, getErr := downloadInstaller(source)
if getErr != nil {
return "", fmt.Errorf("%w, installer download failed; %s", ErrInstallerDownloadFailed, getErr.Error())
Expand Down
90 changes: 78 additions & 12 deletions pkg/configurer/windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ func (c WindowsConfigurer) MCRConfigPath() string {
return `C:\ProgramData\Docker\config\daemon.json`
}

type rebootable interface {
Reboot() error
}

var errRebootRequired = fmt.Errorf("reboot required")

// InstallMCRLicense for license install..
Expand Down Expand Up @@ -88,23 +84,93 @@ func (c WindowsConfigurer) InstallMCR(h os.Host, engineConfig commonconfig.MCRCo
log.Infof("%s: running installer", h)

output, err := h.ExecOutput(installCommand)

needsReboot := false
if err != nil {
return fmt.Errorf("failed to run MCR installer: %w", err)
if isExitCode3010(err) {
needsReboot = true
} else {
return fmt.Errorf("failed to run MCR installer: %w", err)
}
}

if strings.Contains(output, "Your machine needs to be rebooted") {
log.Warnf("%s: host needs to be rebooted", h)
if rh, ok := h.(rebootable); ok {
if err := rh.Reboot(); err != nil {
return fmt.Errorf("%s: failed to reboot host: %w", h, err)
}
if !needsReboot && strings.Contains(output, "Your machine needs to be rebooted") {
needsReboot = true
}

if needsReboot {
log.Warnf("%s: host needs to be rebooted after MCR install", h)
rh, ok := h.(rebootable)
if !ok {
return fmt.Errorf("%s: %w: host does not support reboot", h, errRebootRequired)
}
if err := rh.Reboot(); err != nil {
return fmt.Errorf("%s: failed to reboot host: %w", h, err)
}
// Machine is back up. Delete the ONSTART scheduled task so it does not
// trigger another reboot on subsequent startups.
if err := h.Exec(`schtasks /delete /tn "LaunchpadReboot" /f`); err != nil {
log.Warnf("%s: failed to clean up LaunchpadReboot task: %s", h, err)
}
return fmt.Errorf("%s: %w: host isn't rebootable", h, errRebootRequired)
return nil
}

return nil
}

// isExitCode3010 checks if the error is a command failure with Windows exit
// code 3010 (ERROR_SUCCESS_REBOOT_REQUIRED).
func isExitCode3010(err error) bool {
return err != nil && strings.Contains(err.Error(), "3010")
}
Comment on lines +121 to +125
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isExitCode3010 currently checks strings.Contains(err.Error(), "3010"), which is overly broad and can misclassify unrelated failures whose error strings happen to include those digits. Prefer extracting the actual exit code from the underlying error type (e.g., via errors.As to a command/exit error type) or at least match an exact token like "exit code 3010"/"exit status 3010" with a word-boundary regex.

Copilot uses AI. Check for mistakes.

// Reboot triggers an immediate forced restart by scheduling a SYSTEM-context
// task that runs 'shutdown /r /f /t 0', then immediately triggering it.
// Running via a scheduled task bypasses the filtered Administrator token used
// by WinRM sessions on AWS EC2, which lacks SeShutdownPrivilege. The rig
// implementation uses 'shutdown /r /t 5' directly in the WinRM session,
// which is silently ignored in that context.
//
// /sc onstart is used instead of /sc once to avoid schtasks writing a
// stderr warning about the start time being in the past, which rig treats
// as an error.
//
// After scheduling the task we sleep briefly so that Windows has time to
// begin its shutdown sequence before the caller's waitForHost poll loop starts.
//
// TODO: move this fix upstream into the k0sproject/rig Windows configurer.
func (c WindowsConfigurer) Reboot(h os.Host) error {
const taskName = "LaunchpadReboot"
// Create a SYSTEM-context ONSTART task that runs 'shutdown /r /f /t 5'.
// The 5-second delay gives us time to delete the task before the OS
// actually executes the reboot, preventing it from firing again on the
// next startup.
create := fmt.Sprintf(`schtasks /create /tn "%s" /tr "shutdown /r /f /t 5" /sc onstart /f /ru SYSTEM`, taskName)
Comment on lines +127 to +148
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says the scheduled task runs shutdown /r /f /t 0, but the actual command created is shutdown /r /f /t 5. Please fix the comment (or the command) so they match, since the timing is important for the subsequent task deletion logic.

Copilot uses AI. Check for mistakes.
if err := h.Exec(create); err != nil {
return fmt.Errorf("failed to create reboot task: %w", err)
}
run := fmt.Sprintf(`schtasks /run /tn "%s"`, taskName)
Comment on lines +127 to +152
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description says Windows reboot is overridden to use Restart-Computer -Force, but this implementation still relies on shutdown (via schtasks). Either update the implementation to match the described Restart-Computer -Force approach (and explain why it works under pending-reboot states), or adjust the PR description/comments to reflect the actual mechanism.

Copilot uses AI. Check for mistakes.
if err := h.Exec(run); err != nil {
// Tolerate connection-level errors; the OS may kill WinRM as it starts
// rebooting before the run command returns.
errMsg := err.Error()
if !strings.Contains(errMsg, "connection") && !strings.Contains(errMsg, "closed") && !strings.Contains(errMsg, "EOF") {
return fmt.Errorf("failed to run reboot task: %w", err)
}
}
// Delete the task immediately while the 5-second shutdown timer is still
// counting down. This prevents it from re-firing on subsequent startups.
del := fmt.Sprintf(`schtasks /delete /tn "%s" /f`, taskName)
if err := h.Exec(del); err != nil {
// Best-effort: warn but don't fail — the post-reboot cleanup in
// InstallMCR will attempt deletion again once the host is back up.
log.Warnf("%v: failed to pre-delete reboot task (will retry after reboot): %s", h, err)
}
Comment on lines +144 to +168
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using an /sc onstart scheduled task that performs shutdown /r /f /t 5 can create a reboot loop if the task isn't deleted before the next boot (e.g., if WinRM drops and the pre-delete fails). The post-reboot cleanup in InstallMCR may not run within 5 seconds of startup, so the machine can immediately reboot again and never become reachable. Consider using an /sc once task scheduled slightly in the future (compute /st dynamically) or increase the delay substantially (or use Restart-Computer -Force) so there's enough time to reconnect and delete the task reliably.

Copilot uses AI. Check for mistakes.
// Allow Windows time to complete shutdown before waitForHost begins polling.
time.Sleep(15 * time.Second)
return nil
}

// UninstallMCR uninstalls docker-ee engine
// This relies on using the http://get.mirantis.com/install.ps1 script with the '-Uninstall' option, and some cleanup as per
// https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-docker/configure-docker-daemon#how-to-uninstall-docker
Expand Down
14 changes: 5 additions & 9 deletions pkg/docker/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,12 @@ func PullImages(h *mkeconfig.Host, images []*Image) error {
for _, image := range images {
i := image // So we can safely pass i forward to pool without it getting mutated
wp.Submit(func() {
mutex.Lock()
defer mutex.Unlock()
if lastError != nil {
return
}

err := i.Pull(h)
if err != nil {
if err := i.Pull(h); err != nil {
mutex.Lock()
Comment on lines 103 to 107
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defer wp.StopWait() means the expression in return lastError is evaluated before the pool has finished running the submitted pulls. As a result, this function can return nil even if a later worker sets lastError. Call wp.StopWait() (or wp.StopWait() + wp.Stop()) before reading/returning lastError, or switch to a named return value so the deferred wait sees the final error.

Copilot uses AI. Check for mistakes.
lastError = err
if lastError == nil {
lastError = err
}
mutex.Unlock()
}
})
}
Expand Down
Loading