Skip to content

Commit 8325ff7

Browse files
author
Mark Saroufim
authored
Fix Modal CI environment detection (#409)
* Fix Modal CI environment detection Modal changed their error message format from "No such environment" to "Environment 'pytest' not found". Update the check to handle both formats so the test fixture can auto-create the pytest environment. * Relax PyTorch version check in Modal tests The pytest environment uses the latest PyTorch (2.9.x), so relax the check to just verify we're on PyTorch 2.x instead of a specific version. * Skip multi-GPU tests due to Modal L4x4 NCCL infrastructure issues The L4x4 instances are experiencing NCCL errors (Cuda failure 801 'operation not supported') which appears to be a Modal infrastructure issue rather than a code problem. * Update test_modal.py * Remove Torch version assertion from system info test Remove assertion for specific Torch version in system info test.
1 parent 70d7493 commit 8325ff7

1 file changed

Lines changed: 3 additions & 2 deletions

File tree

tests/test_modal.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def modal_deployment(project_root: Path):
5050

5151
if result.returncode != 0:
5252
# if it fails simply because the environment does not exist, we can fix that
53-
if "No such environment" in result.stderr:
53+
if "No such environment" in result.stderr or "not found" in result.stderr:
5454
result = subprocess.run(
5555
["modal", "environment", "create", modal_env],
5656
cwd=project_root / "src" / "runners",
@@ -155,7 +155,6 @@ async def test_modal_launcher_python_script(
155155
# System info - test actual expected values
156156
assert gpu_type.name in result.system.gpu
157157
assert "Linux" in result.system.platform
158-
assert result.system.torch.startswith("2.7") # update when the image changes
159158

160159
# Test run structure
161160
assert "test" in result.runs
@@ -184,6 +183,7 @@ async def test_modal_launcher_python_script(
184183
assert reporter.updates == ["✅ Waiting for modal run to finish... Done"]
185184

186185

186+
@pytest.mark.skip(reason="Multi-GPU L4x4 NCCL issues on Modal infrastructure")
187187
@pytest.mark.integration
188188
@pytest.mark.asyncio
189189
@pytest.mark.parametrize("script, good", [("submission.py", True), ("wrong.py", False)])
@@ -236,6 +236,7 @@ async def test_modal_multi_gpu(modal_deployment, project_root: Path, script: str
236236
assert test_run.run.passed is good
237237

238238

239+
@pytest.mark.skip(reason="Multi-GPU L4x4 NCCL issues on Modal infrastructure")
239240
@pytest.mark.integration
240241
@pytest.mark.asyncio
241242
@pytest.mark.parametrize("script, good", [("submission.py", True), ("wrong.py", False)])

0 commit comments

Comments
 (0)