Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions kinetic/cli/commands/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def pool_add(
zone=state.zone,
cluster_name=state.cluster_name,
node_pools=all_pools,
force_destroy=state.force_destroy,
)

if preview:
Expand Down Expand Up @@ -153,6 +154,7 @@ def pool_remove(project, zone, cluster_name, pool_name, yes, preview):
zone=state.zone,
cluster_name=state.cluster_name,
node_pools=remaining,
force_destroy=state.force_destroy,
)

if preview:
Expand Down
47 changes: 46 additions & 1 deletion kinetic/cli/commands/pool_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
_SENTINEL = object()


def _make_state(node_pools=None, stack=_SENTINEL):
def _make_state(node_pools=None, stack=_SENTINEL, force_destroy=True):
"""Create a StackState for testing."""
if stack is _SENTINEL:
stack = mock.MagicMock()
Expand All @@ -24,6 +24,7 @@ def _make_state(node_pools=None, stack=_SENTINEL):
cluster_name="kinetic-cluster",
node_pools=node_pools or [],
stack=stack,
force_destroy=force_destroy,
)


Expand Down Expand Up @@ -287,6 +288,50 @@ def test_add_no_stack_shows_friendly_error(self):
self.assertIn("No Pulumi stack found", result.output)


class PoolForceDestroyPreservationTest(absltest.TestCase):
"""Pool commands must pass the existing force_destroy through unchanged."""

def setUp(self):
super().setUp()
self.runner = CliRunner()
self.mock_load = self.enterContext(
mock.patch("kinetic.cli.commands.pool.load_state")
)
self.mock_apply = self.enterContext(
mock.patch("kinetic.cli.commands.pool.apply_update", return_value=True)
)
self.enterContext(
mock.patch(
"kinetic.cli.commands.pool.generate_pool_name",
return_value="gpu-l4-abcd",
)
)

def test_add_preserves_no_force_destroy(self):
self.mock_load.return_value = _make_state(force_destroy=False)

result = self.runner.invoke(pool, _ADD_ARGS)

self.assertEqual(result.exit_code, 0, result.output)
config = self.mock_apply.call_args[0][0]
self.assertFalse(config.force_destroy)

def test_remove_preserves_no_force_destroy(self):
existing = NodePoolConfig(
"gpu-l4-abcd",
GpuConfig("l4", 1, "nvidia-l4", "g2-standard-4"),
)
self.mock_load.return_value = _make_state(
node_pools=[existing], force_destroy=False
)

result = self.runner.invoke(pool, _REMOVE_ARGS)

self.assertEqual(result.exit_code, 0, result.output)
config = self.mock_apply.call_args[0][0]
self.assertFalse(config.force_destroy)


class PoolRemoveNoStackTest(absltest.TestCase):
def setUp(self):
super().setUp()
Expand Down
20 changes: 18 additions & 2 deletions kinetic/cli/commands/up.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from kinetic.cli.constants import DEFAULT_CLUSTER_NAME, DEFAULT_ZONE
from kinetic.cli.infra.post_deploy import configure_kubectl
from kinetic.cli.infra.state import apply_preview, apply_update, load_state
from kinetic.cli.options import common_options
from kinetic.cli.options import common_options, force_destroy_option
from kinetic.cli.output import (
banner,
config_summary,
Expand All @@ -23,6 +23,7 @@

@click.command()
@common_options
@force_destroy_option
@click.option(
"--accelerator",
default=None,
Expand All @@ -41,7 +42,16 @@
is_flag=True,
help="Preview infrastructure changes without applying them",
)
def up(project, zone, accelerator, cluster_name, min_nodes, yes, preview):
def up(
project,
zone,
accelerator,
cluster_name,
min_nodes,
yes,
preview,
force_destroy,
):
"""Provision GCP infrastructure for kinetic."""
banner("kinetic Setup")

Expand Down Expand Up @@ -75,10 +85,16 @@ def up(project, zone, accelerator, cluster_name, min_nodes, yes, preview):
check_prerequisites=False,
)

# Precedence: explicit CLI flag > existing stack state > default True.
resolved_force_destroy = (
force_destroy if force_destroy is not None else state.force_destroy
)

config = InfraConfig(
project=project,
zone=zone,
cluster_name=cluster_name,
force_destroy=resolved_force_destroy,
)

if state.node_pools:
Expand Down
48 changes: 47 additions & 1 deletion kinetic/cli/commands/up_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
from kinetic.core.accelerators import GpuConfig, TpuConfig


def _make_state(node_pools=None):
def _make_state(node_pools=None, force_destroy=True):
"""Create a StackState for testing."""
return StackState(
project="test-project",
zone="us-central2-b",
cluster_name="kinetic-cluster",
node_pools=node_pools or [],
stack=mock.MagicMock(),
force_destroy=force_destroy,
)


Expand Down Expand Up @@ -210,5 +211,50 @@ def test_first_run_no_existing_stack(self):
self.assertIn("Setup Complete", result.output)


class UpCommandForceDestroyTest(absltest.TestCase):
"""Precedence for force_destroy: explicit CLI flag > state > default True."""

def setUp(self):
super().setUp()
self.runner = CliRunner()
self.mocks = _start_patches(self)

def test_default_when_no_flag_and_no_existing_state(self):
self.mocks["load_state"].return_value = _make_state()

result = self.runner.invoke(up, _CLI_ARGS)

self.assertEqual(result.exit_code, 0, result.output)
config = self.mocks["apply_update"].call_args[0][0]
self.assertTrue(config.force_destroy)

def test_preserves_false_from_existing_state(self):
self.mocks["load_state"].return_value = _make_state(force_destroy=False)

result = self.runner.invoke(up, _CLI_ARGS)

self.assertEqual(result.exit_code, 0, result.output)
config = self.mocks["apply_update"].call_args[0][0]
self.assertFalse(config.force_destroy)

def test_explicit_flag_overrides_state(self):
self.mocks["load_state"].return_value = _make_state(force_destroy=False)

result = self.runner.invoke(up, _CLI_ARGS + ["--force-destroy"])

self.assertEqual(result.exit_code, 0, result.output)
config = self.mocks["apply_update"].call_args[0][0]
self.assertTrue(config.force_destroy)

def test_no_force_destroy_flag_overrides_default(self):
self.mocks["load_state"].return_value = _make_state()

result = self.runner.invoke(up, _CLI_ARGS + ["--no-force-destroy"])

self.assertEqual(result.exit_code, 0, result.output)
config = self.mocks["apply_update"].call_args[0][0]
self.assertFalse(config.force_destroy)


if __name__ == "__main__":
absltest.main()
4 changes: 4 additions & 0 deletions kinetic/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ class InfraConfig:
zone: str = DEFAULT_ZONE
cluster_name: str = DEFAULT_CLUSTER_NAME
node_pools: list[NodePoolConfig] = field(default_factory=list)
# When True, GCS buckets are created with force_destroy=True so that
# `kinetic down` can delete them even when non-empty. Set to False to
# require manually emptying the buckets before teardown.
force_destroy: bool = True
9 changes: 7 additions & 2 deletions kinetic/cli/infra/program.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _create_buckets(
region: str,
ar_location: str,
enabled_apis: list[gcp.projects.Service],
force_destroy: bool,
) -> tuple[gcp.storage.Bucket, gcp.storage.Bucket]:
"""Create Cloud Storage buckets for jobs and build artifacts."""
api_deps = pulumi.ResourceOptions(depends_on=enabled_apis)
Expand All @@ -119,7 +120,7 @@ def _create_buckets(
name=f"{project_id}-kn-{cluster_name}-jobs",
location=region,
project=project_id,
force_destroy=True,
force_destroy=force_destroy,
uniform_bucket_level_access=True,
lifecycle_rules=_BUCKET_LIFECYCLE_30D,
opts=api_deps,
Expand All @@ -129,7 +130,7 @@ def _create_buckets(
name=f"{project_id}-kn-{cluster_name}-builds",
location=ar_location,
project=project_id,
force_destroy=True,
force_destroy=force_destroy,
uniform_bucket_level_access=True,
lifecycle_rules=_BUCKET_LIFECYCLE_30D,
opts=api_deps,
Expand Down Expand Up @@ -485,13 +486,15 @@ def _export_stack_outputs(
ar_location: str,
cluster_name: str,
pool_entries: list[tuple[GpuConfig | TpuConfig, gcp.container.NodePool, int]],
force_destroy: bool,
) -> None:
"""Export all Pulumi stack outputs."""
pulumi.export("project", project_id)
pulumi.export("zone", zone)
pulumi.export("cluster_name", cluster.name)
pulumi.export("cluster_endpoint", cluster.endpoint)
pulumi.export("node_sa_email", node_sa.email)
pulumi.export("force_destroy", force_destroy)
pulumi.export(
"ar_registry",
repo.name.apply(
Expand Down Expand Up @@ -569,6 +572,7 @@ def pulumi_program() -> None:
region,
ar_location,
enabled_apis,
config.force_destroy,
)

node_sa, _build_sa = _create_service_accounts(
Expand Down Expand Up @@ -613,6 +617,7 @@ def pulumi_program() -> None:
ar_location,
cluster_name,
pool_entries,
config.force_destroy,
)

return pulumi_program
Expand Down
44 changes: 43 additions & 1 deletion kinetic/cli/infra/program_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,14 @@ def test_placement_policy(self, gcp_mock, tpu, expect_placement):
self.assertIsNone(placement)


def _make_config(node_pools=None):
def _make_config(node_pools=None, force_destroy=True):
"""Create a mock InfraConfig for testing."""
config = mock.MagicMock()
config.project = "test-project"
config.zone = "us-central1-a"
config.cluster_name = "test-cluster"
config.node_pools = node_pools or []
config.force_destroy = force_destroy
return config


Expand Down Expand Up @@ -140,5 +141,46 @@ def test_not_installed_for_tpu_only(self):
self.assertEmpty(gpu_calls)


class TestForceDestroy(parameterized.TestCase):
"""force_destroy on InfraConfig must flow to both GCS buckets."""

@parameterized.named_parameters(
dict(testcase_name="enabled", force_destroy=True),
dict(testcase_name="disabled", force_destroy=False),
)
def test_buckets_receive_force_destroy(self, force_destroy):
config = _make_config(force_destroy=force_destroy)

with (
mock.patch.object(program, "pulumi"),
mock.patch.object(program, "command"),
mock.patch.object(program, "gcp") as gcp_mock,
mock.patch.object(program, "k8s"),
):
program.create_program(config)()

bucket_calls = gcp_mock.storage.Bucket.call_args_list
self.assertLen(bucket_calls, 2)
for call in bucket_calls:
self.assertEqual(call.kwargs["force_destroy"], force_destroy)

def test_force_destroy_is_exported(self):
config = _make_config(force_destroy=False)

with (
mock.patch.object(program, "pulumi") as pulumi_mock,
mock.patch.object(program, "command"),
mock.patch.object(program, "gcp"),
mock.patch.object(program, "k8s"),
):
program.create_program(config)()

exported = {
call.args[0]: call.args[1] for call in pulumi_mock.export.call_args_list
}
self.assertIn("force_destroy", exported)
self.assertFalse(exported["force_destroy"])


if __name__ == "__main__":
absltest.main()
13 changes: 13 additions & 0 deletions kinetic/cli/infra/stack_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,19 @@ def get_stack(program_fn, config):
return stack


def get_current_force_destroy(stack) -> bool:
"""Read ``force_destroy`` from stack outputs, defaulting to True.

Stacks created before this setting was exported do not have the output,
in which case they used ``force_destroy=True`` implicitly, so the same
default is returned for compatibility.
"""
outputs = stack.outputs()
if "force_destroy" in outputs:
return bool(outputs["force_destroy"].value)
return True


def get_current_node_pools(stack) -> list[NodePoolConfig]:
"""Read the current node pool list from Pulumi stack exports.

Expand Down
4 changes: 4 additions & 0 deletions kinetic/cli/infra/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from kinetic.cli.constants import DEFAULT_CLUSTER_NAME, DEFAULT_ZONE
from kinetic.cli.infra.program import create_program
from kinetic.cli.infra.stack_manager import (
get_current_force_destroy,
get_current_node_pools,
get_stack,
)
Expand All @@ -32,6 +33,7 @@ class StackState:
cluster_name: str
node_pools: list[NodePoolConfig] = field(default_factory=list)
stack: auto.Stack | None = None
force_destroy: bool = True


def load_state(
Expand Down Expand Up @@ -91,13 +93,15 @@ def load_state(
warning("State refresh encountered an issue (using cached state).")

node_pools = get_current_node_pools(stack)
force_destroy = get_current_force_destroy(stack)

return StackState(
project=project,
zone=zone,
cluster_name=cluster_name,
node_pools=node_pools,
stack=stack,
force_destroy=force_destroy,
)


Expand Down
Loading