Skip to content

Commit c2e942c

Browse files
authored
update (#612)
1 parent 86bf1df commit c2e942c

File tree

9 files changed

+372
-124
lines changed

9 files changed

+372
-124
lines changed

.github/workflows/cluster-provisioning.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ jobs:
6969
repository: rancher/tfp-automation
7070
path: tfp-automation
7171

72+
- name: Setup kubectl
73+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
74+
with:
75+
version: "latest"
76+
7277
- name: Configure AWS credentials
7378
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
7479
with:
@@ -417,6 +422,11 @@ jobs:
417422
repository: rancher/tfp-automation
418423
path: tfp-automation
419424

425+
- name: Setup kubectl
426+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
427+
with:
428+
version: "latest"
429+
420430
- name: Configure AWS credentials
421431
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
422432
with:
@@ -765,6 +775,11 @@ jobs:
765775
repository: rancher/tfp-automation
766776
path: tfp-automation
767777

778+
- name: Setup kubectl
779+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
780+
with:
781+
version: "latest"
782+
768783
- name: Configure AWS credentials
769784
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
770785
with:

.github/workflows/rancher-upgrade-cluster-provisioning.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ jobs:
6969
repository: rancher/tfp-automation
7070
path: tfp-automation
7171

72+
- name: Setup kubectl
73+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
74+
with:
75+
version: "latest"
76+
7277
- name: Configure AWS credentials
7378
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
7479
with:
@@ -423,6 +428,11 @@ jobs:
423428
repository: rancher/tfp-automation
424429
path: tfp-automation
425430

431+
- name: Setup kubectl
432+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
433+
with:
434+
version: "latest"
435+
426436
- name: Configure AWS credentials
427437
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
428438
with:
@@ -777,6 +787,11 @@ jobs:
777787
repository: rancher/tfp-automation
778788
path: tfp-automation
779789

790+
- name: Setup kubectl
791+
uses: azure/setup-kubectl@15650b3ad78fff148532a140b8a4c821796b2d7b #v5
792+
with:
793+
version: "latest"
794+
780795
- name: Configure AWS credentials
781796
uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 #v6
782797
with:

actions/provisioning/verify.go

Lines changed: 182 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,33 @@ package provisioning
33
import (
44
"context"
55
"fmt"
6+
"os"
7+
"os/exec"
68
"strings"
79
"testing"
810
"time"
911

1012
provv1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1"
11-
12-
"github.com/rancher/tests/actions/clusters"
13-
"github.com/rancher/tests/actions/provisioninginput"
14-
wranglername "github.com/rancher/wrangler/pkg/name"
15-
1613
"github.com/rancher/shepherd/clients/rancher"
1714
management "github.com/rancher/shepherd/clients/rancher/generated/management/v3"
1815
steveV1 "github.com/rancher/shepherd/clients/rancher/v1"
1916
shepherdclusters "github.com/rancher/shepherd/extensions/clusters"
2017
"github.com/rancher/shepherd/extensions/clusters/bundledclusters"
2118
"github.com/rancher/shepherd/extensions/defaults"
19+
shephDefaults "github.com/rancher/shepherd/extensions/defaults"
2220
"github.com/rancher/shepherd/extensions/defaults/namespaces"
2321
"github.com/rancher/shepherd/extensions/defaults/stevetypes"
2422
"github.com/rancher/shepherd/extensions/kubeconfig"
2523
nodestat "github.com/rancher/shepherd/extensions/nodes"
2624
"github.com/rancher/shepherd/extensions/sshkeys"
2725
"github.com/rancher/shepherd/extensions/workloads/pods"
2826
"github.com/rancher/shepherd/pkg/wait"
27+
"github.com/rancher/tests/actions/clusters"
28+
"github.com/rancher/tests/actions/provisioninginput"
2929
psadeploy "github.com/rancher/tests/actions/psact"
3030
"github.com/rancher/tests/actions/registries"
3131
"github.com/rancher/tests/actions/reports"
32+
wranglername "github.com/rancher/wrangler/pkg/name"
3233
"github.com/sirupsen/logrus"
3334
"github.com/stretchr/testify/assert"
3435
"github.com/stretchr/testify/require"
@@ -37,6 +38,8 @@ import (
3738
"k8s.io/apimachinery/pkg/runtime/schema"
3839
kwait "k8s.io/apimachinery/pkg/util/wait"
3940
"k8s.io/apimachinery/pkg/watch"
41+
"k8s.io/client-go/kubernetes"
42+
"k8s.io/client-go/tools/clientcmd"
4043
capi "sigs.k8s.io/cluster-api/api/v1beta1"
4144
)
4245

@@ -523,6 +526,179 @@ func VerifyACE(t *testing.T, client *rancher.Client, cluster *steveV1.SteveAPIOb
523526
}
524527
}
525528

529+
// VerifyACELocalUnavailable validates that the ACE resources are healthy in a given cluster when the local cluster is unavailable.
530+
func VerifyACELocalUnavailable(t *testing.T, rancherClient *rancher.Client, cluster *steveV1.SteveAPIObject, clusterStatus *provv1.ClusterStatus, pemFilePath string, sshUser string) {
531+
localKubeconfigPath := "./local-kubeconfig.yaml"
532+
533+
kubeConfigPtr, err := kubeconfig.GetKubeconfig(rancherClient, "local")
534+
require.NoError(t, err, "failed to get local cluster kubeconfig")
535+
require.NotNil(t, kubeConfigPtr, "local kubeconfig is nil")
536+
kubeConfig := *kubeConfigPtr
537+
538+
rawConfig, err := kubeConfig.RawConfig()
539+
require.NoError(t, err, "failed to get raw kubeconfig")
540+
541+
localRestConfig, err := clientcmd.NewDefaultClientConfig(rawConfig, &clientcmd.ConfigOverrides{}).ClientConfig()
542+
require.NoError(t, err, "failed to create REST config from local kubeconfig")
543+
544+
localClient, err := kubernetes.NewForConfig(localRestConfig)
545+
require.NoError(t, err, "failed to create local Kubernetes client")
546+
547+
nodes, err := localClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
548+
require.NoError(t, err, "failed to list nodes in local cluster")
549+
550+
var controlPlaneIP string
551+
552+
for _, node := range nodes.Items {
553+
if node.Labels["node-role.kubernetes.io/control-plane"] == "true" {
554+
for _, addr := range node.Status.Addresses {
555+
if addr.Type == corev1.NodeExternalIP {
556+
controlPlaneIP = addr.Address
557+
break
558+
}
559+
}
560+
if controlPlaneIP != "" {
561+
break
562+
}
563+
}
564+
}
565+
require.NotEmpty(t, controlPlaneIP, "could not find controlplane node IP")
566+
567+
scpCmd := exec.Command(
568+
"ssh",
569+
"-i", pemFilePath,
570+
"-o", "StrictHostKeyChecking=no",
571+
fmt.Sprintf("%s@%s", sshUser, controlPlaneIP),
572+
fmt.Sprintf("sudo cat /etc/rancher/rke2/rke2.yaml"),
573+
)
574+
575+
scpOutput, err := scpCmd.CombinedOutput()
576+
require.NoErrorf(t, err, "failed to fetch kubeconfig: %s", string(scpOutput))
577+
err = os.WriteFile(localKubeconfigPath, scpOutput, 0600)
578+
require.NoError(t, err)
579+
580+
rawLocalConfig, err := clientcmd.LoadFromFile(localKubeconfigPath)
581+
require.NoError(t, err, "failed to load local kubeconfig for patching")
582+
583+
for _, cluster := range rawLocalConfig.Clusters {
584+
cluster.Server = fmt.Sprintf("https://%s:6443", controlPlaneIP)
585+
}
586+
587+
err = clientcmd.WriteToFile(*rawLocalConfig, localKubeconfigPath)
588+
require.NoError(t, err, "failed to write patched kubeconfig")
589+
590+
downstreamConfigPtr, err := kubeconfig.GetKubeconfig(rancherClient, clusterStatus.ClusterName)
591+
require.NoError(t, err)
592+
require.NotNil(t, downstreamConfigPtr)
593+
downstreamConfig := *downstreamConfigPtr
594+
595+
rawDownstreamConfig, err := downstreamConfig.RawConfig()
596+
require.NoError(t, err)
597+
598+
for name, ctx := range rawDownstreamConfig.Contexts {
599+
cluster := rawDownstreamConfig.Clusters[ctx.Cluster]
600+
if strings.Contains(cluster.Server, ":6443") && !strings.Contains(cluster.Server, "/k8s/clusters/") {
601+
rawDownstreamConfig.CurrentContext = name
602+
break
603+
}
604+
}
605+
606+
downstreamClientConfig := clientcmd.NewDefaultClientConfig(rawDownstreamConfig, &clientcmd.ConfigOverrides{})
607+
downstreamRestConfig, err := downstreamClientConfig.ClientConfig()
608+
require.NoError(t, err)
609+
610+
downstreamClient, err := kubernetes.NewForConfig(downstreamRestConfig)
611+
require.NoError(t, err)
612+
613+
logrus.Info("Scaling Rancher deployment to 0 replicas")
614+
localDeployment, err := rancherClient.Steve.SteveType("apps.deployment").ByID("cattle-system/rancher")
615+
require.NoError(t, err)
616+
617+
obj := localDeployment.JSONResp
618+
spec, ok := obj["spec"].(map[string]any)
619+
require.True(t, ok)
620+
spec["replicas"] = int64(0)
621+
622+
_, err = rancherClient.Steve.SteveType("apps.deployment").Update(localDeployment, obj)
623+
require.NoError(t, err)
624+
625+
logrus.Info("Waiting for Rancher deployment to scale down")
626+
_ = kwait.PollUntilContextTimeout(context.TODO(), 1*time.Second, shephDefaults.OneMinuteTimeout, true, func(ctx context.Context) (bool, error) {
627+
_, err = rancherClient.Steve.SteveType("apps.deployment").ByID("cattle-system/rancher")
628+
if err != nil && (strings.Contains(err.Error(), "500") ||
629+
strings.Contains(err.Error(), "502") ||
630+
strings.Contains(err.Error(), "503") ||
631+
strings.Contains(err.Error(), "504") ||
632+
strings.Contains(err.Error(), "EOF")) {
633+
634+
logrus.Info("Rancher deployment scaled to 0")
635+
return true, nil
636+
}
637+
return false, nil
638+
})
639+
640+
podsClient := downstreamClient.CoreV1().Pods("cattle-system")
641+
642+
podList, err := podsClient.List(context.TODO(), metav1.ListOptions{
643+
LabelSelector: "app=kube-api-auth",
644+
})
645+
if err != nil {
646+
require.NoError(t, err)
647+
}
648+
require.NotEmpty(t, podList.Items, "kube-api-auth pod not found in cattle-system namespace")
649+
650+
kubeAPIPod := podList.Items[0]
651+
var image string
652+
for _, c := range kubeAPIPod.Spec.Containers {
653+
if c.Name == "kube-api-auth" {
654+
image = c.Image
655+
break
656+
}
657+
}
658+
require.NotEmpty(t, image, "kube-api-auth container image not found")
659+
660+
parts := strings.Split(image, ":")
661+
version := parts[len(parts)-1]
662+
logrus.Infof("kube-api-auth pod version (downstream): %s", version)
663+
664+
allPods, err := downstreamClient.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{})
665+
if err != nil {
666+
require.NoError(t, err)
667+
} else {
668+
for _, p := range allPods.Items {
669+
logrus.Debugf("Pod %s (downstream, ns=%s)", p.Name, p.Namespace)
670+
}
671+
}
672+
673+
scaleCmd := exec.Command(
674+
"kubectl",
675+
"--kubeconfig", localKubeconfigPath,
676+
"-n", "cattle-system",
677+
"scale", "deployment/rancher",
678+
"--replicas=3",
679+
)
680+
output, err := scaleCmd.CombinedOutput()
681+
require.NoErrorf(t, err, "failed to scale Rancher back up: %s", string(output))
682+
logrus.Infof("Rancher deployment scale command output:\n%s", string(output))
683+
684+
waitCmd := exec.Command(
685+
"kubectl",
686+
"--kubeconfig", localKubeconfigPath,
687+
"-n", "cattle-system",
688+
"rollout", "status", "deployment/rancher",
689+
"--timeout=5m",
690+
)
691+
waitOutput, err := waitCmd.CombinedOutput()
692+
require.NoErrorf(t, err, "Rancher rollout did not complete: %s", string(waitOutput))
693+
logrus.Infof("Rancher deployment rollout complete:\n%s", string(waitOutput))
694+
695+
if err := os.Remove(localKubeconfigPath); err != nil {
696+
logrus.Warnf("failed to remove local kubeconfig %s: %v", localKubeconfigPath, err)
697+
} else {
698+
logrus.Infof("Removed local kubeconfig: %s", localKubeconfigPath)
699+
}
700+
}
701+
526702
// VerifyHostnameLength validates that the hostnames of the nodes in a cluster are of the correct length
527703
func VerifyHostnameLength(t *testing.T, client *rancher.Client, clusterObject *steveV1.SteveAPIObject) {
528704
clusterSpec := &provv1.ClusterSpec{}
@@ -631,4 +807,4 @@ func VerifyDataDirectories(t *testing.T, client *rancher.Client, cluster *steveV
631807
assert.Error(t, err)
632808
logrus.Debugf("Verified that the default data directory(%s) on node(%s) does not exist", clusterSpec.RKEConfig.DataDirectories.SystemAgent, clusterNode.NodeID)
633809
}
634-
}
810+
}

validation/Dockerfile.validation

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ RUN for pem_file in .ssh/jenkins-*; do \
1919
ssh-keygen -f "$pem_file" -y > "/root/.ssh/$(basename "$pem_file").pub"; \
2020
done
2121

22+
RUN curl -LO "https://dl.k8s.io/release/$(curl -Ls https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" && \
23+
install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl && \
24+
rm kubectl
25+
2226
RUN CGO_ENABLED=0
2327

2428
# necessary to run if statements using [[ ]]

validation/provisioning/k3s/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ ACE(Authorized Cluster Endpoint) test verifies that a node driver cluster can be
2222
2. [Cluster Config](#cluster-config)
2323
3. [Machine Config](#machine-config)
2424

25+
NOTE - ACE tests are only supported with RKE2 local clusters
26+
2527
#### Table Tests:
2628
1. `K3S_ACE`
2729

0 commit comments

Comments
 (0)