Fix tests

simonkurtz-MSFT · simonkurtz-MSFT · commit cadb4a46a16a · 2026-06-11T14:44:55.000-04:00
diff --git a/samples/inference-failover/create.ipynb b/samples/inference-failover/create.ipynb
@@ -897,8 +897,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from pathlib import Path\n",
+    "import importlib\n",
     "import tempfile\n",
+    "from pathlib import Path\n",
     "\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
@@ -907,6 +908,8 @@
     "import htmlreport\n",
     "from console import print_ok, print_warning\n",
     "\n",
+    "importlib.reload(htmlreport)\n",
+    "\n",
     "if 'gpt_5_1_backend_labels' not in locals():\n",
     "    print_warning('Please run the scenario chart cell first')\n",
     "    raise SystemExit(1)\n",
diff --git a/tests/python/test_inference_failover.py b/tests/python/test_inference_failover.py
@@ -336,17 +336,7 @@ def test_inference_notebook_generates_local_html_report() -> None:
     assert "'inference-failover-report.html'" in code_source
     assert "'', 'Test #', 'Scenario', 'Requests', 'HTTP 200', 'Other', 'APIM retries', 'Priority / weight mix', 'What the data says'" in code_source
     assert "htmlreport.HtmlText(f'Scenario Outcomes: {model_name}', bold_tokens=(model_name,))" in code_source
-    assert 'if retry_count > 0' in code_source
-    assert 'caller_succeeded = not non_200_responses' in code_source
-    assert "htmlreport.HtmlSuccess('All requests returned HTTP 200')" in code_source
-    assert "else htmlreport.HtmlWarning('Some requests returned non-200 responses')" in code_source
-    assert "observation_items = tuple(f'{item.strip()}' for item in '; '.join(observations).split(';'))" in code_source
-    assert 'htmlreport.HtmlList(observation_items)' in code_source
-    assert 'htmlreport.HtmlText(retry_mix, preserve_line_breaks=True)' in code_source
-    assert 'def get_priority_and_weight(' in code_source
-    assert "weights_by_priority.setdefault(priority, []).append(f'W{weight}: {count} ({count / total_requests:.1%})')" in code_source
-    assert "priority_mix = '\\n'.join(f'P{priority}: {\", \".join(weight_mix)}'" in code_source
-    assert 'htmlreport.HtmlText(priority_mix, bold_tokens=priority_tokens, preserve_line_breaks=True)' in code_source
+    assert 'inference_failover_helpers.build_scenario_report_row(' in code_source
     assert "column_widths=['4%', '5%', '10%', '6%', '6%', '5%', '11%', '17%', '36%']" in code_source
     assert "'A-1',\n                'Baseline Warm Path'" in code_source
     assert "'B-1',\n                'Baseline Warm Path'" in code_source
@@ -358,12 +348,6 @@ def test_inference_notebook_generates_local_html_report() -> None:
     assert 'report.add_info_callout(' in code_source
     assert "'Lab Capacity Is Intentionally Low'" in code_source
     assert "'Each regional Azure OpenAI deployment is intentionally configured at 1,000 TPM so that" in code_source
-    assert 'observed_backend_failures = backend_retries_absorbed + caller_visible_failures' in code_source
-    assert 'shielded_percentage = backend_retries_absorbed / observed_backend_failures * 100' in code_source
-    assert "f'APIM absorbed {backend_retries_absorbed} backend failures and sent {caller_visible_failures} failures to callers'" in code_source
-    assert "f'APIM prevented {shielded_percentage:.1f}% of observed failed backend attempts from reaching callers'" in code_source
-    assert 'terminal_503_responses = status_counts.get(503, 0)' in code_source
-    assert 'caller-visible HTTP 503 responses followed eligible-capacity exhaustion in the low-TPM pool' in code_source
     assert "'Observed X-Backend-URL values'" not in code_source.split('report = htmlreport.HtmlReport(', maxsplit=1)[1]
     assert 'if all_scenario_requests_succeeded:' in code_source
     assert 'report.add_success_callout(' in code_source
diff --git a/tests/python/test_inference_failover_helpers.py b/tests/python/test_inference_failover_helpers.py
@@ -145,6 +145,16 @@ def test_context_manager_closes_session_after_exception():
     session.close.assert_called_once_with()
 
 
+@pytest.mark.unit
+def test_close_is_idempotent_without_an_open_session():
+    runner, session = _create_runner()
+
+    runner.close()
+    runner.close()
+
+    session.close.assert_not_called()
+
+
 @pytest.mark.unit
 def test_pause_uses_injected_sleep_and_rejects_negative_values():
     sleep = MagicMock()
@@ -209,6 +219,18 @@ def test_with_backend_identifier_leaves_frames_without_urls_unchanged():
     assert result is not source
 
 
+@pytest.mark.unit
+def test_format_gateway_distribution_leaves_frames_without_backend_urls():
+    source = pd.DataFrame([['api', 'not available', 'not available']], columns=['API', 'AverageBackendMs', 'SuccessRate'])
+
+    result = format_gateway_distribution(source)
+
+    assert result['AverageBackendMs'].tolist() == ['']
+    assert result['SuccessRate'].tolist() == ['']
+    assert 'Backend' not in result.columns
+    assert result is not source
+
+
 @pytest.mark.unit
 def test_get_priority_and_weight_parses_legend_label():
     labels = {
@@ -273,3 +295,16 @@ def test_build_scenario_report_row_reports_failover_retries_and_terminal_503():
     assert 'no resolved backend' in observations
     assert 'HTTP 503 responses' in observations
     assert 'APIM prevented 80.0%' in observations
+
+
+@pytest.mark.unit
+def test_build_scenario_report_row_reports_unresolved_non_503_failure():
+    results = [{'status_code': 429, 'backend_retry': 0, 'backend_url': 'unknown'}]
+
+    row = build_scenario_report_row('A-3', 'Capacity Exhausted', results, {}, {})
+
+    assert isinstance(row[0], HtmlWarning)
+    assert row[7] == HtmlText('No resolved backend: 1 (100.0%)', preserve_line_breaks=True)
+    observations = '\n'.join(row[8].items)
+    assert 'caller-visible failures remained' in observations
+    assert 'Deepest routed tier' not in observations