Skip to content

Commit 90697ba

Browse files
vyadavmsftLiliDeng
authored andcommitted
Fix passthrough TCP NTTTCP setup after reboot
NTTTCP setup can reboot guests when it raises TasksMax for the 20480 TCP connection case. In passthrough scenarios that reboot drops the test NIC DHCP state, but the performance helper continued using the pre-reboot NIC names and internal addresses. Add an optional post_ntttcp_setup hook to perf_ntttcp so callers can refresh platform-specific NIC state after setup_system completes. Use it in the passthrough host/guest and two-guest TCP NTTTCP tests to rerun passthrough NIC configuration before lagscope and NTTTCP start. Also allow passthrough TCP NTTTCP to use a larger client timeout tolerance so the high-fanout client can finish emitting final output instead of being killed during cooldown/drain. Validation: black, flake8, and py_compile on common.py and networkperf_passthrough.py.
1 parent 6ac3296 commit 90697ba

2 files changed

Lines changed: 40 additions & 1 deletion

File tree

lisa/microsoft/testsuites/performance/common.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pathlib
55
import time
66
from functools import partial
7-
from typing import Any, Dict, List, Optional, Union, cast
7+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
88

99
from assertpy import assert_that
1010
from retry import retry
@@ -59,6 +59,9 @@
5959
from lisa.util import LisaException
6060
from lisa.util.process import ExecutableResult, Process
6161

62+
# NTTTCP may need extra time after the requested run duration to emit totals.
63+
DEFAULT_NTTTCP_CLIENT_TIMEOUT_TOLERANCE_SECONDS = 60
64+
6265

6366
def perf_nvme(
6467
node: Node,
@@ -353,6 +356,12 @@ def perf_ntttcp( # noqa: C901
353356
client_nic_name: Optional[str] = None,
354357
variables: Optional[Dict[str, Any]] = None,
355358
skip_server_task_max: bool = False,
359+
post_ntttcp_setup: Optional[
360+
Callable[[], Tuple[Optional[str], Optional[str]]]
361+
] = None,
362+
client_ntttcp_timeout_tolerance_seconds: int = (
363+
DEFAULT_NTTTCP_CLIENT_TIMEOUT_TOLERANCE_SECONDS
364+
),
356365
) -> List[Union[NetworkTCPPerformanceMessage, NetworkUDPPerformanceMessage]]:
357366
# Either server and client are set explicitly or we use the first two nodes
358367
# from the environment. We never combine the two options. We need to specify
@@ -414,6 +423,11 @@ def perf_ntttcp( # noqa: C901
414423
client_ntttcp.setup_system(udp_mode, set_task_max)
415424
# skip_server_task_max: don't reboot the baremetal host (NIC DHCP state lost).
416425
server_ntttcp.setup_system(udp_mode, set_task_max and not skip_server_task_max)
426+
if post_ntttcp_setup:
427+
# Platform setup may reboot guests and drop test NIC DHCP state.
428+
refreshed_client_nic_name, refreshed_server_nic_name = post_ntttcp_setup()
429+
client_nic_name = refreshed_client_nic_name or client_nic_name
430+
server_nic_name = refreshed_server_nic_name or server_nic_name
417431
for lagscope in [client_lagscope, server_lagscope]:
418432
lagscope.set_busy_poll()
419433
client_nic = client.nics.default_nic
@@ -556,6 +570,7 @@ def perf_ntttcp( # noqa: C901
556570
ports_count=num_threads_p,
557571
dev_differentiator=dev_differentiator,
558572
udp_mode=udp_mode,
573+
tolerance_seconds=client_ntttcp_timeout_tolerance_seconds,
559574
)
560575

561576
# Stop the server and collect results from both client

lisa/microsoft/testsuites/performance/networkperf_passthrough.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class NetworkPerformance(TestSuite):
6262
# PPS_TIMEOUT: 3000s (50 min) - shorter for PPS tests which are less intensive
6363
TIMEOUT = 12000
6464
PPS_TIMEOUT = 3000
65+
NTTTCP_TCP_CLIENT_TIMEOUT_TOLERANCE_SECONDS = 180 # High-fanout TCP drain.
6566

6667
# Track baremetal host nodes for cleanup
6768
_baremetal_hosts: list[RemoteNode] = []
@@ -251,13 +252,23 @@ def perf_tcp_ntttcp_passthrough_host_guest(
251252
node, log_path, host_node=server
252253
)
253254

255+
def refresh_passthrough_nics() -> Tuple[Optional[str], Optional[str]]:
256+
_, refreshed_client_nic_name = self._configure_passthrough_nic_for_node(
257+
node, log_path, host_node=server
258+
)
259+
return refreshed_client_nic_name, None
260+
254261
perf_ntttcp(
255262
test_result=result,
256263
client=client,
257264
server=server,
258265
server_nic_name=self._get_host_nic_name(server),
259266
client_nic_name=client_nic_name,
260267
skip_server_task_max=True, # host: TasksMax reboot clears NIC DHCP state
268+
post_ntttcp_setup=refresh_passthrough_nics,
269+
client_ntttcp_timeout_tolerance_seconds=(
270+
self.NTTTCP_TCP_CLIENT_TIMEOUT_TOLERANCE_SECONDS
271+
),
261272
)
262273

263274
@TestCaseMetadata(
@@ -491,12 +502,25 @@ def perf_tcp_ntttcp_passthrough_two_guest(
491502
server_node, log_path
492503
)
493504

505+
def refresh_passthrough_nics() -> Tuple[Optional[str], Optional[str]]:
506+
_, refreshed_client_nic_name = self._configure_passthrough_nic_for_node(
507+
client_node, log_path
508+
)
509+
_, refreshed_server_nic_name = self._configure_passthrough_nic_for_node(
510+
server_node, log_path
511+
)
512+
return refreshed_client_nic_name, refreshed_server_nic_name
513+
494514
perf_ntttcp(
495515
test_result=result,
496516
client=client,
497517
server=server,
498518
server_nic_name=server_nic_name,
499519
client_nic_name=client_nic_name,
520+
post_ntttcp_setup=refresh_passthrough_nics,
521+
client_ntttcp_timeout_tolerance_seconds=(
522+
self.NTTTCP_TCP_CLIENT_TIMEOUT_TOLERANCE_SECONDS
523+
),
500524
)
501525

502526
@TestCaseMetadata(

0 commit comments

Comments
 (0)