|
| 1 | +// Unless explicitly stated otherwise all files in this repository are licensed |
| 2 | +// under the Apache License Version 2.0. |
| 3 | +// This product includes software developed at Datadog (https://www.datadoghq.com/). |
| 4 | +// Copyright 2016-present Datadog, Inc. |
| 5 | + |
| 6 | +package installscript |
| 7 | + |
| 8 | +import ( |
| 9 | + "fmt" |
| 10 | + "os" |
| 11 | + "regexp" |
| 12 | + "strings" |
| 13 | + "testing" |
| 14 | + "time" |
| 15 | + |
| 16 | + "github.com/stretchr/testify/assert" |
| 17 | + "github.com/stretchr/testify/require" |
| 18 | + |
| 19 | + e2eos "github.com/DataDog/datadog-agent/test/e2e-framework/components/os" |
| 20 | + "github.com/DataDog/datadog-agent/test/e2e-framework/scenarios/aws/ec2" |
| 21 | + "github.com/DataDog/datadog-agent/test/e2e-framework/testing/e2e" |
| 22 | + awshost "github.com/DataDog/datadog-agent/test/e2e-framework/testing/provisioners/aws/host" |
| 23 | + installer "github.com/DataDog/datadog-agent/test/new-e2e/tests/installer/unix" |
| 24 | +) |
| 25 | + |
| 26 | +const ( |
| 27 | + // apmInjectServicePath is where the apminject package writes its systemd unit. |
| 28 | + // Mirrors systemd.UserUnitsPath ("/etc/systemd/system") + the unit name. |
| 29 | + apmInjectServicePath = "/etc/systemd/system/datadog-apm-inject.service" |
| 30 | + apmInjectServiceName = "datadog-apm-inject.service" |
| 31 | + // launcherPreloadPath is the line the apm-inject service must keep in |
| 32 | + // /etc/ld.so.preload for host instrumentation to be effective. |
| 33 | + launcherPreloadPath = "/opt/datadog-packages/datadog-apm-inject/stable/inject/launcher.preload.so" |
| 34 | + // productionSSIScriptURL installs the current GA SSI stack. That stack predates |
| 35 | + // the systemd-managed ld.so.preload feature and ships an installer whose |
| 36 | + // `apm` command has no `instrument-start`/`instrument-stop` subcommands. |
| 37 | + productionSSIScriptURL = "https://install.datadoghq.com/scripts/install-ssi.sh" |
| 38 | +) |
| 39 | + |
| 40 | +// execStartInstallerRe extracts the installer binary path baked into the unit's |
| 41 | +// ExecStart line, e.g. "ExecStart=/opt/.../installer apm instrument-start host". |
| 42 | +var execStartInstallerRe = regexp.MustCompile(`(?m)^ExecStart=(\S+) apm instrument-start`) |
| 43 | + |
| 44 | +// ssiUpgradeSuite verifies that upgrading a host that already has an older SSI |
| 45 | +// stack (whose datadog-installer lacks the `apm instrument-start`/`instrument-stop` |
| 46 | +// subcommands) to a build that enables systemd-managed ld.so.preload produces a |
| 47 | +// datadog-apm-inject.service that points at a *working* installer — and that host |
| 48 | +// instrumentation survives a reboot, when only the service (not the install-time |
| 49 | +// direct fallback) is responsible for populating /etc/ld.so.preload. |
| 50 | +type ssiUpgradeSuite struct { |
| 51 | + installerScriptBaseSuite |
| 52 | +} |
| 53 | + |
| 54 | +// TestSSIUpgrade provisions a single host and runs the upgrade suite. The upgrade |
| 55 | +// target is the current pipeline build, so a pipeline id (or commit sha) is required. |
| 56 | +func TestSSIUpgrade(t *testing.T) { |
| 57 | + requirePipeline(t) |
| 58 | + |
| 59 | + flavor := e2eos.Ubuntu2404 |
| 60 | + flavor.Architecture = e2eos.AMD64Arch |
| 61 | + |
| 62 | + suite := &ssiUpgradeSuite{ |
| 63 | + installerScriptBaseSuite: newInstallerScriptSuite( |
| 64 | + "installer-ssi-upgrade", flavor, flavor.Architecture, |
| 65 | + awshost.WithRunOptions(ec2.WithoutFakeIntake()), |
| 66 | + awshost.WithRunOptions(ec2.WithoutAgent()), |
| 67 | + ), |
| 68 | + } |
| 69 | + |
| 70 | + opts := []awshost.ProvisionerOption{ |
| 71 | + awshost.WithRunOptions( |
| 72 | + ec2.WithEC2InstanceOptions(ec2.WithOSArch(flavor, flavor.Architecture)), |
| 73 | + ec2.WithoutAgent(), |
| 74 | + ), |
| 75 | + } |
| 76 | + opts = append(opts, suite.ProvisionerOptions()...) |
| 77 | + |
| 78 | + e2e.Run(t, suite, |
| 79 | + e2e.WithProvisioner(awshost.Provisioner(opts...)), |
| 80 | + e2e.WithStackName(suite.Name()), |
| 81 | + ) |
| 82 | +} |
| 83 | + |
| 84 | +func (s *ssiUpgradeSuite) TestUpgradePreservesHostInjection() { |
| 85 | + defer s.Purge() |
| 86 | + |
| 87 | + // 1. Install the current GA SSI stack from production. This lands an older |
| 88 | + // datadog-installer on disk — one whose `apm` command has no |
| 89 | + // instrument-start/instrument-stop subcommands. |
| 90 | + s.installProductionSSI() |
| 91 | + s.host.AssertPackageInstalledByInstaller("datadog-apm-inject") |
| 92 | + s.assertLDPreloadInstrumented("after fresh GA install") |
| 93 | + |
| 94 | + // 2. Upgrade to the pipeline build and opt into systemd-managed preload. |
| 95 | + s.RunInstallScript( |
| 96 | + s.scriptURLPrefix+"install-ssi.sh", |
| 97 | + "DD_SITE=datadoghq.com", |
| 98 | + "DD_APM_INSTRUMENTATION_LIBRARIES=python:4", |
| 99 | + "DD_APM_INSTRUMENTATION_ENABLED=host", |
| 100 | + "DD_APM_INSTRUMENTATION_PRELOAD_MODE=systemd", |
| 101 | + "DD_NO_AGENT_INSTALL=true", |
| 102 | + "DD_INSTALLER_REGISTRY_URL_AGENT_PACKAGE=installtesting.datad0g.com.internal.dda-testing.com", |
| 103 | + ) |
| 104 | + |
| 105 | + // 3. The unit must exist, be enabled, and — crucially — reference an installer |
| 106 | + // that actually supports the subcommand it invokes. A stale installer |
| 107 | + // resolved from a higher-priority candidate path would silently break SSI |
| 108 | + // on the next boot. |
| 109 | + state := s.host.State() |
| 110 | + state.AssertFileExists(apmInjectServicePath, 0644, "root", "root") |
| 111 | + state.AssertUnitsLoaded(apmInjectServiceName) |
| 112 | + state.AssertUnitsEnabled(apmInjectServiceName) |
| 113 | + |
| 114 | + installerPath := s.execStartInstallerPath() |
| 115 | + _, err := s.Env().RemoteHost.Execute(fmt.Sprintf("sudo %s apm instrument-start --help", installerPath)) |
| 116 | + require.NoErrorf(s.T(), err, |
| 117 | + "datadog-apm-inject.service ExecStart references %q, which does not support `apm instrument-start`; "+ |
| 118 | + "the service would fail on every boot and host injection would silently break", installerPath) |
| 119 | + |
| 120 | + // Right after the upgrade, ld.so.preload is correct regardless of the service |
| 121 | + // because Instrument() also writes it directly. The reboot below is what proves |
| 122 | + // the *service* (and thus the resolved installer) is healthy. |
| 123 | + s.assertLDPreloadInstrumented("after upgrade, before reboot") |
| 124 | + |
| 125 | + // 4. Reboot. Now only datadog-apm-inject.service's ExecStart can populate |
| 126 | + // /etc/ld.so.preload — the install-time direct write does not run again. |
| 127 | + s.reboot() |
| 128 | + |
| 129 | + require.EventuallyWithT(s.T(), func(c *assert.CollectT) { |
| 130 | + out, err := s.Env().RemoteHost.Execute("systemctl is-active " + apmInjectServiceName) |
| 131 | + assert.NoErrorf(c, err, "apm-inject service not active after reboot (status: %s)", strings.TrimSpace(out)) |
| 132 | + }, 2*time.Minute, 5*time.Second) |
| 133 | + s.assertLDPreloadInstrumented("after reboot") |
| 134 | +} |
| 135 | + |
| 136 | +// installProductionSSI installs the GA SSI stack directly from production, without |
| 137 | +// any pipeline/testing registry overrides, so a genuinely older installer ends up |
| 138 | +// on disk. |
| 139 | +func (s *ssiUpgradeSuite) installProductionSSI() { |
| 140 | + s.Env().RemoteHost.MustExecute(fmt.Sprintf("curl -L %s > install_ssi_prod", productionSSIScriptURL)) |
| 141 | + params := []string{ |
| 142 | + "DD_API_KEY=" + installer.GetAPIKey(), |
| 143 | + "DD_SITE=datadoghq.com", |
| 144 | + "DD_APM_INSTRUMENTATION_LIBRARIES=python:4", |
| 145 | + "DD_APM_INSTRUMENTATION_ENABLED=host", |
| 146 | + "DD_NO_AGENT_INSTALL=true", |
| 147 | + } |
| 148 | + _, err := s.Env().RemoteHost.Execute(strings.Join(params, " ") + " bash install_ssi_prod") |
| 149 | + require.NoError(s.T(), err, "failed to install production SSI stack") |
| 150 | +} |
| 151 | + |
| 152 | +// execStartInstallerPath returns the installer binary path baked into the unit's |
| 153 | +// ExecStart line. |
| 154 | +func (s *ssiUpgradeSuite) execStartInstallerPath() string { |
| 155 | + content, err := s.host.ReadFile(apmInjectServicePath) |
| 156 | + require.NoError(s.T(), err) |
| 157 | + m := execStartInstallerRe.FindStringSubmatch(string(content)) |
| 158 | + require.Lenf(s.T(), m, 2, "could not find ExecStart installer path in unit:\n%s", string(content)) |
| 159 | + return m[1] |
| 160 | +} |
| 161 | + |
| 162 | +// assertLDPreloadInstrumented asserts the injector launcher is present in |
| 163 | +// /etc/ld.so.preload. |
| 164 | +func (s *ssiUpgradeSuite) assertLDPreloadInstrumented(when string) { |
| 165 | + out := s.Env().RemoteHost.MustExecute("cat /etc/ld.so.preload || true") |
| 166 | + require.Containsf(s.T(), out, launcherPreloadPath, |
| 167 | + "injector launcher missing from /etc/ld.so.preload (%s); contents:\n%s", when, out) |
| 168 | +} |
| 169 | + |
| 170 | +// reboot reboots the host and waits for it to come back with a new boot id. |
| 171 | +func (s *ssiUpgradeSuite) reboot() { |
| 172 | + before := strings.TrimSpace(s.Env().RemoteHost.MustExecute("cat /proc/sys/kernel/random/boot_id")) |
| 173 | + // The connection drops as the host goes down; ignore the error. |
| 174 | + _, _ = s.Env().RemoteHost.Execute("sudo reboot") |
| 175 | + require.EventuallyWithT(s.T(), func(c *assert.CollectT) { |
| 176 | + if err := s.Env().RemoteHost.Reconnect(); err != nil { |
| 177 | + assert.NoError(c, err) |
| 178 | + return |
| 179 | + } |
| 180 | + out, err := s.Env().RemoteHost.Execute("cat /proc/sys/kernel/random/boot_id") |
| 181 | + if !assert.NoError(c, err) { |
| 182 | + return |
| 183 | + } |
| 184 | + assert.NotEqualf(c, before, strings.TrimSpace(out), "host has not rebooted yet") |
| 185 | + }, 5*time.Minute, 10*time.Second) |
| 186 | +} |
| 187 | + |
| 188 | +func requirePipeline(t *testing.T) { |
| 189 | + t.Helper() |
| 190 | + _, hasPipeline := os.LookupEnv("E2E_PIPELINE_ID") |
| 191 | + _, hasSHA := os.LookupEnv("CI_COMMIT_SHA") |
| 192 | + if !hasPipeline && !hasSHA { |
| 193 | + t.Skip("E2E_PIPELINE_ID / CI_COMMIT_SHA not set; this test upgrades to the current pipeline build") |
| 194 | + } |
| 195 | +} |
0 commit comments