@@ -3,32 +3,33 @@ package provisioning
33import (
44 "context"
55 "fmt"
6+ "os"
7+ "os/exec"
68 "strings"
79 "testing"
810 "time"
911
1012 provv1 "github.com/rancher/rancher/pkg/apis/provisioning.cattle.io/v1"
11-
12- "github.com/rancher/tests/actions/clusters"
13- "github.com/rancher/tests/actions/provisioninginput"
14- wranglername "github.com/rancher/wrangler/pkg/name"
15-
1613 "github.com/rancher/shepherd/clients/rancher"
1714 management "github.com/rancher/shepherd/clients/rancher/generated/management/v3"
1815 steveV1 "github.com/rancher/shepherd/clients/rancher/v1"
1916 shepherdclusters "github.com/rancher/shepherd/extensions/clusters"
2017 "github.com/rancher/shepherd/extensions/clusters/bundledclusters"
2118 "github.com/rancher/shepherd/extensions/defaults"
19+ shephDefaults "github.com/rancher/shepherd/extensions/defaults"
2220 "github.com/rancher/shepherd/extensions/defaults/namespaces"
2321 "github.com/rancher/shepherd/extensions/defaults/stevetypes"
2422 "github.com/rancher/shepherd/extensions/kubeconfig"
2523 nodestat "github.com/rancher/shepherd/extensions/nodes"
2624 "github.com/rancher/shepherd/extensions/sshkeys"
2725 "github.com/rancher/shepherd/extensions/workloads/pods"
2826 "github.com/rancher/shepherd/pkg/wait"
27+ "github.com/rancher/tests/actions/clusters"
28+ "github.com/rancher/tests/actions/provisioninginput"
2929 psadeploy "github.com/rancher/tests/actions/psact"
3030 "github.com/rancher/tests/actions/registries"
3131 "github.com/rancher/tests/actions/reports"
32+ wranglername "github.com/rancher/wrangler/pkg/name"
3233 "github.com/sirupsen/logrus"
3334 "github.com/stretchr/testify/assert"
3435 "github.com/stretchr/testify/require"
@@ -37,6 +38,8 @@ import (
3738 "k8s.io/apimachinery/pkg/runtime/schema"
3839 kwait "k8s.io/apimachinery/pkg/util/wait"
3940 "k8s.io/apimachinery/pkg/watch"
41+ "k8s.io/client-go/kubernetes"
42+ "k8s.io/client-go/tools/clientcmd"
4043 capi "sigs.k8s.io/cluster-api/api/v1beta1"
4144)
4245
@@ -523,6 +526,179 @@ func VerifyACE(t *testing.T, client *rancher.Client, cluster *steveV1.SteveAPIOb
523526 }
524527}
525528
529+ // VerifyACELocalUnavailable validates that the ACE resources are healthy in a given cluster when the local cluster is unavailable.
530+ func VerifyACELocalUnavailable (t * testing.T , rancherClient * rancher.Client , cluster * steveV1.SteveAPIObject , clusterStatus * provv1.ClusterStatus , pemFilePath string , sshUser string ) {
531+ localKubeconfigPath := "./local-kubeconfig.yaml"
532+
533+ kubeConfigPtr , err := kubeconfig .GetKubeconfig (rancherClient , "local" )
534+ require .NoError (t , err , "failed to get local cluster kubeconfig" )
535+ require .NotNil (t , kubeConfigPtr , "local kubeconfig is nil" )
536+ kubeConfig := * kubeConfigPtr
537+
538+ rawConfig , err := kubeConfig .RawConfig ()
539+ require .NoError (t , err , "failed to get raw kubeconfig" )
540+
541+ localRestConfig , err := clientcmd .NewDefaultClientConfig (rawConfig , & clientcmd.ConfigOverrides {}).ClientConfig ()
542+ require .NoError (t , err , "failed to create REST config from local kubeconfig" )
543+
544+ localClient , err := kubernetes .NewForConfig (localRestConfig )
545+ require .NoError (t , err , "failed to create local Kubernetes client" )
546+
547+ nodes , err := localClient .CoreV1 ().Nodes ().List (context .TODO (), metav1.ListOptions {})
548+ require .NoError (t , err , "failed to list nodes in local cluster" )
549+
550+ var controlPlaneIP string
551+
552+ for _ , node := range nodes .Items {
553+ if node .Labels ["node-role.kubernetes.io/control-plane" ] == "true" {
554+ for _ , addr := range node .Status .Addresses {
555+ if addr .Type == corev1 .NodeExternalIP {
556+ controlPlaneIP = addr .Address
557+ break
558+ }
559+ }
560+ if controlPlaneIP != "" {
561+ break
562+ }
563+ }
564+ }
565+ require .NotEmpty (t , controlPlaneIP , "could not find controlplane node IP" )
566+
567+ scpCmd := exec .Command (
568+ "ssh" ,
569+ "-i" , pemFilePath ,
570+ "-o" , "StrictHostKeyChecking=no" ,
571+ fmt .Sprintf ("%s@%s" , sshUser , controlPlaneIP ),
572+ fmt .Sprintf ("sudo cat /etc/rancher/rke2/rke2.yaml" ),
573+ )
574+
575+ scpOutput , err := scpCmd .CombinedOutput ()
576+ require .NoErrorf (t , err , "failed to fetch kubeconfig: %s" , string (scpOutput ))
577+ err = os .WriteFile (localKubeconfigPath , scpOutput , 0600 )
578+ require .NoError (t , err )
579+
580+ rawLocalConfig , err := clientcmd .LoadFromFile (localKubeconfigPath )
581+ require .NoError (t , err , "failed to load local kubeconfig for patching" )
582+
583+ for _ , cluster := range rawLocalConfig .Clusters {
584+ cluster .Server = fmt .Sprintf ("https://%s:6443" , controlPlaneIP )
585+ }
586+
587+ err = clientcmd .WriteToFile (* rawLocalConfig , localKubeconfigPath )
588+ require .NoError (t , err , "failed to write patched kubeconfig" )
589+
590+ downstreamConfigPtr , err := kubeconfig .GetKubeconfig (rancherClient , clusterStatus .ClusterName )
591+ require .NoError (t , err )
592+ require .NotNil (t , downstreamConfigPtr )
593+ downstreamConfig := * downstreamConfigPtr
594+
595+ rawDownstreamConfig , err := downstreamConfig .RawConfig ()
596+ require .NoError (t , err )
597+
598+ for name , ctx := range rawDownstreamConfig .Contexts {
599+ cluster := rawDownstreamConfig .Clusters [ctx .Cluster ]
600+ if strings .Contains (cluster .Server , ":6443" ) && ! strings .Contains (cluster .Server , "/k8s/clusters/" ) {
601+ rawDownstreamConfig .CurrentContext = name
602+ break
603+ }
604+ }
605+
606+ downstreamClientConfig := clientcmd .NewDefaultClientConfig (rawDownstreamConfig , & clientcmd.ConfigOverrides {})
607+ downstreamRestConfig , err := downstreamClientConfig .ClientConfig ()
608+ require .NoError (t , err )
609+
610+ downstreamClient , err := kubernetes .NewForConfig (downstreamRestConfig )
611+ require .NoError (t , err )
612+
613+ logrus .Info ("Scaling Rancher deployment to 0 replicas" )
614+ localDeployment , err := rancherClient .Steve .SteveType ("apps.deployment" ).ByID ("cattle-system/rancher" )
615+ require .NoError (t , err )
616+
617+ obj := localDeployment .JSONResp
618+ spec , ok := obj ["spec" ].(map [string ]any )
619+ require .True (t , ok )
620+ spec ["replicas" ] = int64 (0 )
621+
622+ _ , err = rancherClient .Steve .SteveType ("apps.deployment" ).Update (localDeployment , obj )
623+ require .NoError (t , err )
624+
625+ logrus .Info ("Waiting for Rancher deployment to scale down" )
626+ _ = kwait .PollUntilContextTimeout (context .TODO (), 1 * time .Second , shephDefaults .OneMinuteTimeout , true , func (ctx context.Context ) (bool , error ) {
627+ _ , err = rancherClient .Steve .SteveType ("apps.deployment" ).ByID ("cattle-system/rancher" )
628+ if err != nil && (strings .Contains (err .Error (), "500" ) ||
629+ strings .Contains (err .Error (), "502" ) ||
630+ strings .Contains (err .Error (), "503" ) ||
631+ strings .Contains (err .Error (), "504" ) ||
632+ strings .Contains (err .Error (), "EOF" )) {
633+
634+ logrus .Info ("Rancher deployment scaled to 0" )
635+ return true , nil
636+ }
637+ return false , nil
638+ })
639+
640+ podsClient := downstreamClient .CoreV1 ().Pods ("cattle-system" )
641+
642+ podList , err := podsClient .List (context .TODO (), metav1.ListOptions {
643+ LabelSelector : "app=kube-api-auth" ,
644+ })
645+ if err != nil {
646+ require .NoError (t , err )
647+ }
648+ require .NotEmpty (t , podList .Items , "kube-api-auth pod not found in cattle-system namespace" )
649+
650+ kubeAPIPod := podList .Items [0 ]
651+ var image string
652+ for _ , c := range kubeAPIPod .Spec .Containers {
653+ if c .Name == "kube-api-auth" {
654+ image = c .Image
655+ break
656+ }
657+ }
658+ require .NotEmpty (t , image , "kube-api-auth container image not found" )
659+
660+ parts := strings .Split (image , ":" )
661+ version := parts [len (parts )- 1 ]
662+ logrus .Infof ("kube-api-auth pod version (downstream): %s" , version )
663+
664+ allPods , err := downstreamClient .CoreV1 ().Pods ("" ).List (context .TODO (), metav1.ListOptions {})
665+ if err != nil {
666+ require .NoError (t , err )
667+ } else {
668+ for _ , p := range allPods .Items {
669+ logrus .Debugf ("Pod %s (downstream, ns=%s)" , p .Name , p .Namespace )
670+ }
671+ }
672+
673+ scaleCmd := exec .Command (
674+ "kubectl" ,
675+ "--kubeconfig" , localKubeconfigPath ,
676+ "-n" , "cattle-system" ,
677+ "scale" , "deployment/rancher" ,
678+ "--replicas=3" ,
679+ )
680+ output , err := scaleCmd .CombinedOutput ()
681+ require .NoErrorf (t , err , "failed to scale Rancher back up: %s" , string (output ))
682+ logrus .Infof ("Rancher deployment scale command output:\n %s" , string (output ))
683+
684+ waitCmd := exec .Command (
685+ "kubectl" ,
686+ "--kubeconfig" , localKubeconfigPath ,
687+ "-n" , "cattle-system" ,
688+ "rollout" , "status" , "deployment/rancher" ,
689+ "--timeout=5m" ,
690+ )
691+ waitOutput , err := waitCmd .CombinedOutput ()
692+ require .NoErrorf (t , err , "Rancher rollout did not complete: %s" , string (waitOutput ))
693+ logrus .Infof ("Rancher deployment rollout complete:\n %s" , string (waitOutput ))
694+
695+ if err := os .Remove (localKubeconfigPath ); err != nil {
696+ logrus .Warnf ("failed to remove local kubeconfig %s: %v" , localKubeconfigPath , err )
697+ } else {
698+ logrus .Infof ("Removed local kubeconfig: %s" , localKubeconfigPath )
699+ }
700+ }
701+
526702// VerifyHostnameLength validates that the hostnames of the nodes in a cluster are of the correct length
527703func VerifyHostnameLength (t * testing.T , client * rancher.Client , clusterObject * steveV1.SteveAPIObject ) {
528704 clusterSpec := & provv1.ClusterSpec {}
@@ -631,4 +807,4 @@ func VerifyDataDirectories(t *testing.T, client *rancher.Client, cluster *steveV
631807 assert .Error (t , err )
632808 logrus .Debugf ("Verified that the default data directory(%s) on node(%s) does not exist" , clusterSpec .RKEConfig .DataDirectories .SystemAgent , clusterNode .NodeID )
633809 }
634- }
810+ }
0 commit comments