fix tests

ngc92 · ngc92 · commit aa3e6ae013e2 · 2025-08-27T18:39:40.000+02:00
diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py
@@ -218,7 +218,9 @@ def compile_cuda_script(  # # noqa: C901
     )
 
 
-def run_program(args: list[str], seed: Optional[int], timeout: int, multi_gpu: bool = False) -> RunResult:
+def run_program(
+    args: list[str], seed: Optional[int], timeout: int, multi_gpu: bool = False
+) -> RunResult:
     print("[Running]")
     # set up a pipe so the tester can communicate its verdict with us
     env = os.environ.copy()
@@ -229,6 +231,7 @@ def run_program(args: list[str], seed: Optional[int], timeout: int, multi_gpu: b
 
     if multi_gpu:
         import torch
+
         env["POPCORN_GPUS"] = str(torch.cuda.device_count())
 
     execution_start_time = time.perf_counter()
@@ -302,7 +305,9 @@ def run_single_evaluation(
         with tempfile.NamedTemporaryFile("w") as tests_file:
             tests_file.write(tests)
             tests_file.flush()
-            return run_program(call + [mode, tests_file.name], seed=seed, timeout=test_timeout, multi_gpu=multi_gpu)
+            return run_program(
+                call + [mode, tests_file.name], seed=seed, timeout=test_timeout, multi_gpu=multi_gpu
+            )
     elif mode in ["benchmark", "profile", "leaderboard"]:
         timeout = ranked_timeout if mode == "leaderboard" else benchmark_timeout
         with tempfile.NamedTemporaryFile("w") as bench_file:
@@ -311,7 +316,9 @@ def run_single_evaluation(
             else:
                 bench_file.write(benchmarks)
             bench_file.flush()
-            return run_program(call + [mode, bench_file.name], seed=seed, timeout=timeout, multi_gpu=multi_gpu)
+            return run_program(
+                call + [mode, bench_file.name], seed=seed, timeout=timeout, multi_gpu=multi_gpu
+            )
     else:
         raise ValueError(f"Invalid mode {mode}")
 
diff --git a/src/libkernelbot/task.py b/src/libkernelbot/task.py
@@ -114,7 +114,7 @@ class LeaderboardDefinition:
     templates: dict[str, str] = dataclasses.field(default_factory=dict)
 
 
-def make_task_definition(yaml_file: str | Path) -> LeaderboardDefinition:
+def make_task_definition(yaml_file: str | Path) -> LeaderboardDefinition:  # noqa: C901
     if Path(yaml_file).is_dir():
         yaml_file = Path(yaml_file) / "task.yml"
 
diff --git a/tests/test_backend.py b/tests/test_backend.py
@@ -153,6 +153,7 @@ async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory):
         "lang": "py",
         "main": "kernel.py",
         "mode": "leaderboard",
+        "multi_gpu": False,
         "ranked_timeout": 180,
         "ranking_by": "geom",
         "seed": 1337,
@@ -206,6 +207,7 @@ async def test_submit_leaderboard(bot: backend.KernelBackend, task_directory):
                     "start_time": eval_result.start.replace(tzinfo=datetime.timezone.utc),
                     "system": {
                         "cpu": "Intel i9-12900K",
+                        "device_count": 1,
                         "gpu": "NVIDIA RTX 4090",
                         "platform": "Linux-5.15.0",
                         "torch": "2.0.1+cu118",
@@ -310,6 +312,7 @@ async def test_submit_full(bot: backend.KernelBackend, task_directory):
                     "start_time": ANY,
                     "system": {
                         "cpu": "Intel i9-12900K",
+                        "device_count": 1,
                         "gpu": "NVIDIA RTX 4090",
                         "platform": "Linux-5.15.0",
                         "torch": "2.0.1+cu118",
@@ -351,6 +354,7 @@ async def test_submit_full(bot: backend.KernelBackend, task_directory):
                     "start_time": ANY,
                     "system": {
                         "cpu": "Intel i9-12900K",
+                        "device_count": 1,
                         "gpu": "NVIDIA RTX 4090",
                         "platform": "Linux-5.15.0",
                         "torch": "2.0.1+cu118",
diff --git a/tests/test_modal.py b/tests/test_modal.py
@@ -187,12 +187,10 @@ async def test_modal_launcher_python_script(
 @pytest.mark.integration
 @pytest.mark.asyncio
 @pytest.mark.parametrize("script, good", [("submission.py", True), ("wrong.py", False)])
-async def test_modal_multi_gpu(
-    modal_deployment, project_root: Path, script: str, good: bool
-):
+async def test_modal_multi_gpu(modal_deployment, project_root: Path, script: str, good: bool):
     """
-    This isn't really a modal test, but instead a test using modal to check that multi-gpu submission
-    testing works (on modal...).
+    This isn't really a modal test, but instead a test using modal to check
+    that multi-gpu submission testing works (on modal...).
     """
     launcher = ModalLauncher(add_include_dirs=[])
     reporter = MockProgressReporter("progress")
@@ -245,8 +243,8 @@ async def test_modal_multi_gpu_benchmark(
     modal_deployment, project_root: Path, script: str, good: bool
 ):
     """
-    This isn't really a modal test, but instead a test using modal to check that multi-gpu submission
-    testing works (on modal...).
+    This isn't really a modal test, but instead a test using modal
+    to check that multi-gpu submission testing works (on modal...).
     """
     launcher = ModalLauncher(add_include_dirs=[])
     reporter = MockProgressReporter("progress")