PrimeIntellect-ai · willccbb · Apr 20, 2026 · cursor · Apr 20, 2026
diff --git a/docs/evaluation.md b/docs/evaluation.md
@@ -208,8 +208,8 @@ When evaluating multiple environments, the display shows an overview panel at th
 | Flag | Short | Default | Description |
 |------|-------|---------|-------------|
 | `--verbose` | `-v` | false | Enable debug logging |
-| `--fullscreen` | `-f` | false | Use alternate screen buffer (fullscreen) for the Rich display |
-| `--disable-tui` | `-d` | false | Disable Rich display; use normal logging and tqdm progress |
+| `--tui` | `-u` | false | Use alternate screen mode (TUI) for display |
+| `--debug` | `-d` | false | Disable Rich display; use normal logging and tqdm progress |
 | `--abbreviated-summary` | `-A` | false | Abbreviated summary: show settings and stats, skip example prompts |
 | `--output-dir` | `-o` | — | Custom output directory for evaluation results and logs |
 | `--save-results` | `-s` | false | Save results to disk |

diff --git a/tests/test_eval_cli.py b/tests/test_eval_cli.py
@@ -58,8 +58,8 @@ def _run_cli(
             "hf_hub_dataset_name": "",
             "extra_env_kwargs": {},
             "max_retries": 0,
-            "fullscreen": False,
-            "disable_tui": False,
+            "tui": False,
+            "debug": False,
             "abbreviated_summary": False,
             "heartbeat_url": None,
         }

diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py
@@ -392,14 +392,14 @@ def build_parser() -> argparse.ArgumentParser:
         help='Extra environment as JSON object (e.g., \'{"key": "value", "num": 42}\'). Passed to environment constructor.',
     )
     parser.add_argument(
-        "--fullscreen",
-        "-f",
+        "--tui",
+        "-u",
         default=False,
         action="store_true",
-        help="Use fullscreen (alternate-screen) mode for the Rich live evaluation display",
+        help="Use TUI mode for live evaluation display",
     )
     parser.add_argument(
-        "--disable-tui",
+        "--debug",
         "-d",
         default=False,
         action="store_true",
@@ -449,14 +449,7 @@ def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
 def main(argv: list[str] | None = None):
     args = parse_args(argv)
 
-    if args.disable_tui and args.fullscreen:
-        raise SystemExit(
-            "error: --disable-tui and --fullscreen are mutually exclusive "
-            "(--disable-tui turns off the Rich display entirely; --fullscreen only "
-            "controls whether the Rich display uses the alternate screen buffer)."
-        )
-
-    if args.disable_tui:  # only set up console logging when TUI is disabled
+    if args.debug:  # only set up console logging in debug mode
         setup_logging(get_log_level(args.verbose))
 
     # Build raw configs: both paths produce list[dict]
@@ -738,7 +731,7 @@ def build_eval_config(raw: dict) -> EvalConfig:
             num_workers=raw.get("num_workers", "auto"),
             disable_env_server=raw.get("disable_env_server", False),
             verbose=raw.get("verbose", False),
-            disable_tui=raw.get("disable_tui", False),
+            debug=raw.get("debug", False),
             state_columns=raw.get("state_columns", []),
             save_results=raw.get("save_results", False),
             resume_path=resume_path,
@@ -767,13 +760,13 @@ def build_eval_config(raw: dict) -> EvalConfig:
     eval_run_config = EvalRunConfig(
         evals=eval_configs, heartbeat_url=args.heartbeat_url
     )
-    if args.disable_tui:
+    if args.debug:
         asyncio.run(run_evaluations(eval_run_config))
     else:
         asyncio.run(
             run_evaluations_tui(
                 eval_run_config,
-                fullscreen=args.fullscreen,
+                tui_mode=args.tui,
                 compact=args.abbreviated_summary,
             )
         )

diff --git a/verifiers/types.py b/verifiers/types.py
@@ -534,7 +534,7 @@ class EvalConfig(BaseModel):
     disable_env_server: bool = False
     # logging
     verbose: bool = False
-    disable_tui: bool = False
+    debug: bool = False
     # saving
     output_dir: str | None = None
     state_columns: list[str] | None = None

diff --git a/verifiers/utils/display_utils.py b/verifiers/utils/display_utils.py
@@ -188,7 +188,7 @@ def refresh(self) -> None:
 
     def get_log_hint(self) -> Text | None:
         """Return an optional hint for viewing full logs."""
-        return Text("full logs: --disable-tui", style="dim")
+        return Text("full logs: --debug", style="dim")
 
     def _make_log_panel(self) -> Panel:
         """Create a panel showing recent log messages with placeholder lines."""

diff --git a/verifiers/utils/eval_utils.py b/verifiers/utils/eval_utils.py
@@ -453,7 +453,7 @@ def load_toml_config(
         "disable_env_server",
         # logging
         "verbose",
-        "disable_tui",
+        "debug",
         # saving
         "output_dir",
         "state_columns",
@@ -818,7 +818,7 @@ async def run_evaluation(
                 num_workers=num_workers,
                 log_level=get_log_level(config.verbose),
                 log_dir=log_dir,
-                console_logging=config.disable_tui,
+                console_logging=config.debug,
             )
             if on_log_file is not None:
                 from verifiers.serve import EnvServer
@@ -916,14 +916,13 @@ async def run_evaluations(config: EvalRunConfig) -> None:
 
 
 async def run_evaluations_tui(
-    config: EvalRunConfig, fullscreen: bool = False, compact: bool = False
+    config: EvalRunConfig, tui_mode: bool = True, compact: bool = False
 ) -> None:
     """Run multi-environment evaluation with a Rich display.
 
     Args:
         config: Evaluation run configuration.
-        fullscreen: If True, use alternate screen buffer (--fullscreen flag).
-            If False, refresh in-place.
+        tui_mode: If True, use alternate screen (--tui flag). If False, refresh in-place.
         compact: If True, show compact summary (settings + stats, skip example prompts).
     """
     from verifiers.utils.eval_display import EvalDisplay, is_tty
@@ -940,7 +939,7 @@ async def run_evaluations_tui(
 
         heart = Heartbeat(config.heartbeat_url)
 
-    display = EvalDisplay(config.evals, screen=fullscreen, compact=compact)
+    display = EvalDisplay(config.evals, screen=tui_mode, compact=compact)
 
     async def run_with_progress(
         env_config: EvalConfig, env_idx: int
@@ -1044,7 +1043,7 @@ def refresh_loop() -> None:
                 )
 
                 display.refresh()
-                if fullscreen:
+                if tui_mode:
                     await display.wait_for_exit()
             finally:
                 refresh_stop.set()