Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions Dockerfiles/agent/cont-init.d/60-sysprobe-check.sh

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
ad_identifiers:
- _oom_kill

init_config:

instances:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
ad_identifiers:
- _tcp_queue_length

init_config:

instances:
Expand Down
5 changes: 5 additions & 0 deletions comp/core/autodiscovery/impl/autoconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
logComp "github.com/DataDog/datadog-agent/comp/core/log/def"
secrets "github.com/DataDog/datadog-agent/comp/core/secrets/def"
"github.com/DataDog/datadog-agent/comp/core/status"
sysprobeconfig "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/def"
Comment thread
vboulineau marked this conversation as resolved.
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
telemetry "github.com/DataDog/datadog-agent/comp/core/telemetry/def"
workloadfilter "github.com/DataDog/datadog-agent/comp/core/workloadfilter/def"
Expand Down Expand Up @@ -69,6 +70,7 @@ type Requires struct {
TaggerComp tagger.Component
Secrets secrets.Component
WMeta option.Option[workloadmeta.Component]
SysProbeConfig option.Option[sysprobeconfig.Component] `optional:"true"`
FilterStore workloadfilter.Component
Telemetry telemetry.Component
HealthPlatform option.Option[healthplatformdef.Component]
Expand Down Expand Up @@ -103,6 +105,7 @@ type AutoConfig struct {
healthPlatform option.Option[healthplatformdef.Component]
staticConfigIndex *listeners.StaticConfigIndex
serviceTracker adtypes.ServiceTracker
sysProbeConfig option.Option[sysprobeconfig.Component]

// m covers the `configPollers`, `listenerCandidates`, `listeners`, and `listenerRetryStop`, but
// not the values they point to.
Expand Down Expand Up @@ -181,6 +184,7 @@ func newAutoConfig(deps Requires) autodiscoverydef.Component {
}()

ac := createNewAutoConfig(schController, deps.Secrets, deps.WMeta, deps.TaggerComp, deps.Log, deps.Telemetry, deps.FilterStore, deps.HealthPlatform, deps.ServiceTracker)
ac.sysProbeConfig = deps.SysProbeConfig
deps.Lc.Append(compdef.Hook{
OnStart: func(_ context.Context) error {
ac.start()
Expand Down Expand Up @@ -600,6 +604,7 @@ func (ac *AutoConfig) addListenerCandidates(listenerConfigs []pkgconfigsetup.Lis
Filter: ac.filterStore,
Tagger: ac.taggerComp,
Wmeta: ac.wmeta,
SysProbeConfig: ac.sysProbeConfig,
StaticConfigIndex: ac.staticConfigIndex,
ServiceTracker: ac.serviceTracker,
}
Expand Down
28 changes: 25 additions & 3 deletions comp/core/autodiscovery/listeners/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,27 @@ package listeners

import (
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
sysprobeconfig "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/def"
workloadfilter "github.com/DataDog/datadog-agent/comp/core/workloadfilter/def"
workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def"
"github.com/DataDog/datadog-agent/pkg/config/env"
"github.com/DataDog/datadog-agent/pkg/util/log"
"github.com/DataDog/datadog-agent/pkg/util/option"
)

// Checks activated from configuration state (avoid double work to activate it for users)
var sysProbeConfigChecks = []struct {
adIdentifier string
configKey string
}{
{adIdentifier: "_oom_kill", configKey: "system_probe_config.enable_oom_kill"},
{adIdentifier: "_tcp_queue_length", configKey: "system_probe_config.enable_tcp_queue_length"},
}

// EnvironmentListener implements a ServiceListener based on current environment
type EnvironmentListener struct {
newService chan<- Service
newService chan<- Service
sysProbeConfig option.Option[sysprobeconfig.Component]
}

// EnvironmentService represents services generated from EnvironmentListener
Expand All @@ -27,8 +39,8 @@ type EnvironmentService struct {
var _ Service = &EnvironmentService{}

// NewEnvironmentListener creates an EnvironmentListener
func NewEnvironmentListener(ServiceListernerDeps) (ServiceListener, error) {
return &EnvironmentListener{}, nil
func NewEnvironmentListener(deps ServiceListernerDeps) (ServiceListener, error) {
return &EnvironmentListener{sysProbeConfig: deps.SysProbeConfig}, nil
}

// Listen starts the goroutine to detect checks based on environment
Expand Down Expand Up @@ -69,6 +81,16 @@ func (l *EnvironmentListener) createServices() {
log.Infof("Listener created container service from environment")
l.newService <- &EnvironmentService{adIdentifier: "_container"}
}

// Handle checks auto-activated from system-probe configuration state.
if sysProbeConfig, ok := l.sysProbeConfig.Get(); ok {
for _, check := range sysProbeConfigChecks {
if sysProbeConfig.GetBool(check.configKey) {
log.Infof("Listener created %s service from system-probe configuration", check.adIdentifier)
l.newService <- &EnvironmentService{adIdentifier: check.adIdentifier}
Comment on lines +86 to +90

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep sysprobe activation independent of env AD opt-out

When DD_AUTOCONFIG_FROM_ENVIRONMENT=false/autoconfig_from_environment: false, setupAutoDiscovery skips DiscoverComponentsFromEnv, so this EnvironmentListener is never added unless users configured it manually. The deleted container init script was not gated by that setting and still enabled OOM/TCPQ checks from DD_SYSTEM_PROBE_CONFIG_ENABLE_*, so existing container deployments that disable environment autodiscovery now stop scheduling these checks; move the sysprobe-config service emission to an unconditional startup path or add it independently of env feature autodiscovery.

Useful? React with 👍 / 👎.

}
}
}
}

// Equal returns whether the two EnvironmentService are equal
Expand Down
93 changes: 93 additions & 0 deletions comp/core/autodiscovery/listeners/environment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2017-present Datadog, Inc.

package listeners

import (
"testing"

"github.com/stretchr/testify/assert"

sysprobeconfig "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/def"
sysprobeconfigmock "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/mock"
"github.com/DataDog/datadog-agent/pkg/util/option"
)

// collectEnvironmentServices runs createServices synchronously and returns the AD
// identifiers of the emitted services.
func collectEnvironmentServices(l *EnvironmentListener) []string {
ch := make(chan Service, 16)
l.newService = ch
l.createServices()
close(ch)

var ids []string
for svc := range ch {
ids = append(ids, svc.GetServiceID())
}
return ids
}

func TestEnvironmentListenerSysProbeChecks(t *testing.T) {
tests := []struct {
name string
overrides map[string]any
expected []string
absent []string
}{
{
name: "nothing enabled",
overrides: map[string]any{},
absent: []string{"_oom_kill", "_tcp_queue_length"},
},
{
name: "oom_kill enabled",
overrides: map[string]any{"system_probe_config.enable_oom_kill": true},
expected: []string{"_oom_kill"},
absent: []string{"_tcp_queue_length"},
},
{
name: "tcp_queue_length enabled",
overrides: map[string]any{"system_probe_config.enable_tcp_queue_length": true},
expected: []string{"_tcp_queue_length"},
absent: []string{"_oom_kill"},
},
{
name: "both enabled",
overrides: map[string]any{
"system_probe_config.enable_oom_kill": true,
"system_probe_config.enable_tcp_queue_length": true,
},
expected: []string{"_oom_kill", "_tcp_queue_length"},
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
sysProbeCfg := sysprobeconfigmock.NewMockWithOverrides(t, tc.overrides)
l := &EnvironmentListener{sysProbeConfig: option.New(sysProbeCfg)}

ids := collectEnvironmentServices(l)

for _, id := range tc.expected {
assert.Contains(t, ids, id)
}
for _, id := range tc.absent {
assert.NotContains(t, ids, id)
}
})
}
}

// TestEnvironmentListenerNoSysProbeConfig ensures the listener does not panic and
// emits no system-probe based service when the sysprobeconfig component is absent.
func TestEnvironmentListenerNoSysProbeConfig(t *testing.T) {
l := &EnvironmentListener{sysProbeConfig: option.None[sysprobeconfig.Component]()}

ids := collectEnvironmentServices(l)

assert.NotContains(t, ids, "_oom_kill")
assert.NotContains(t, ids, "_tcp_queue_length")
}
2 changes: 2 additions & 0 deletions comp/core/autodiscovery/listeners/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/common/types"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/integration"
"github.com/DataDog/datadog-agent/comp/core/autodiscovery/telemetry"
sysprobeconfig "github.com/DataDog/datadog-agent/comp/core/sysprobeconfig/def"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
workloadfilter "github.com/DataDog/datadog-agent/comp/core/workloadfilter/def"
workloadmeta "github.com/DataDog/datadog-agent/comp/core/workloadmeta/def"
Expand Down Expand Up @@ -76,6 +77,7 @@ type ServiceListernerDeps struct {
Tagger tagger.Component
Filter workloadfilter.Component
Wmeta option.Option[workloadmeta.Component]
SysProbeConfig option.Option[sysprobeconfig.Component]
StaticConfigIndex *StaticConfigIndex
ServiceTracker types.ServiceTracker
}
Expand Down
Loading