neuroinformatics-unit · themattinthehatt · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/docs/source/api_index.rst b/docs/source/api_index.rst
@@ -25,7 +25,8 @@ clips
     :toctree: api_generated
     :template: function.rst
 
-    extract_clip
+    extract_single_clip
+    extract_clips
 
 
 utils

diff --git a/docs/source/benchmark-dataset.md b/docs/source/benchmark-dataset.md
@@ -248,12 +248,12 @@ Here `id: 0` through `id: 4` are the local clip indices, while `frame-1000` thro
 #### Intermediate file `videolabels.json`
 
 :::{note}
-This file is **not a required part of a benchmark dataset**. It is an intermediate cache file useful for data contributors when preparing labelled clips, and it is documented here only because it is optionally auto-discovered by the `extract-clip` command and the corresponding {func}`~poseinterface.clips.extract_clip` function.
+This file is **not a required part of a benchmark dataset**. It is an intermediate cache file useful for data contributors when preparing labelled clips, and it is documented here only because it is optionally auto-discovered by the `extract-clips` command and the corresponding {func}`~poseinterface.clips.extract_clips` function.
 :::
 
 * A `videolabels.json` file uses the **same schema as [`cliplabels.json`](target-cliplabels)**, but it refers to a full video rather than to a clip of it.
 * It is produced once per video (e.g. by converting model predictions for the entire video into the `cliplabels` schema) and reused to extract any number of clip label files from that video.
-* When present alongside a session video as `sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json`, the `extract-clip` command will slice it into per-clip `cliplabels.json` files matching the requested frame ranges.
+* When present alongside a session video as `sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json`, the `extract-clips` command will slice it into per-clip `cliplabels.json` files matching the requested frame ranges.
 * In the `videolabels.json` file, each entry in the `images` list uses the **0-based frame index in the video** as its `id` (same convention as [frame labels](target-framelabels)).
 
 (target-startlabels)=

diff --git a/examples/convert_dlc_to_benchmark.py b/examples/convert_dlc_to_benchmark.py
@@ -16,7 +16,7 @@
 from pathlib import Path
 
 import poseinterface
-from poseinterface.clips import extract_clip
+from poseinterface.clips import extract_single_clip
 from poseinterface.io import (
     annotations_to_poseinterface,
     frames_to_poseinterface,
@@ -270,7 +270,7 @@
     )
 
     for start_frame in start_frames:
-        clip_path, _ = extract_clip(
+        clip_path, _ = extract_single_clip(
             video_path=session_dir / f"{sub_ses_cam_prefix}.mp4",
             start_frame=start_frame,
             duration=duration,

diff --git a/poseinterface/clips.py b/poseinterface/clips.py
@@ -1,4 +1,35 @@
-"""Functions to extract clips from ``poseinterface`` videos."""
+"""Functions to extract clips from ``poseinterface`` videos.
+
+Module structure
+----------------
+The module is organised around three layers:
+
+**Core extraction** (:func:`extract_clip`, :func:`extract_clips`)
+    ``extract_clip`` writes a single ``.mp4`` clip (and its
+    ``_cliplabels.json`` if a sibling ``*_videolabels.json`` exists).
+    ``extract_clips`` applies it over a list of start frames.
+
+**Start-frame generators** (:func:`_uniform_start_frames`, …)
+    Pure functions that return a ``list[int]`` of start frames given
+    video-level parameters.  They know nothing about I/O.
+
+**Strategy wrappers** (:func:`extract_clips_uniform`, …)
+    Thin public functions that pair a start-frame generator with
+    ``extract_clips``.  These are the entry points wired to the CLI.
+
+Adding a new sampling strategy
+-------------------------------
+1. Write a start-frame generator ``_<name>_start_frames(...)  -> list[int]``
+   that accepts whatever parameters the strategy needs and returns start
+   frames.  Validate its own inputs and raise ``ValueError`` on bad input.
+2. Write a wrapper ``extract_clips_<name>(video_path, duration, ...)``
+   that calls your generator, then passes the result to :func:`extract_clips`.
+3. Register the strategy in :func:`parse_args` by adding ``"<name>"`` to
+   the ``choices`` list of ``--sampling``, and a corresponding
+   ``--<parameter>`` argument.
+4. Dispatch to ``extract_clips_<name>`` in :func:`main`, following the
+   pattern of the existing ``"uniform"`` branch.
+"""
 
 import argparse
 import json
@@ -9,7 +40,16 @@
 import sleap_io as sio
 
 
-def extract_clip(
+def _validate_clip_request(start_frame: int, duration: int) -> None:
+    if start_frame < 0:
+        raise ValueError(
+            f"start_frame must be non-negative, got {start_frame}"
+        )
+    if duration <= 0:
+        raise ValueError(f"duration must be positive, got {duration}")
+
+
+def extract_single_clip(
     video_path: str | Path,
     start_frame: int,
     duration: int,
@@ -23,7 +63,6 @@ def extract_clip(
     matching ``_cliplabels.json`` containing only the annotations within
     the requested frame range is also written.
 
-
     Parameters
     ----------
     video_path
@@ -66,13 +105,7 @@ def extract_clip(
     source ``*_videolabels.json`` corresponds to 0-based global frame indices
     of the full video.
     """
-    # Check input values
-    if start_frame < 0:
-        raise ValueError(
-            f"start_frame must be non-negative, got {start_frame}"
-        )
-    if duration <= 0:
-        raise ValueError(f"duration must be positive, got {duration}")
+    _validate_clip_request(start_frame, duration)
 
     # Create "Clips" directory if it doesn't exist
     video_path = Path(video_path)
@@ -121,6 +154,103 @@ def extract_clip(
     return clip_path, clip_json
 
 
+def extract_clips(
+    video_path: Path,
+    duration: int,
+    start_frames: list[int],
+) -> list[tuple[Path, Path | None]]:
+    """Extract multiple clips from a video.
+
+    Parameters
+    ----------
+    video_path
+        Path to the input ``.mp4`` video. See :func:`extract_clip` for
+        naming conventions.
+    duration
+        Number of frames per clip, common to all clips.
+    start_frames
+        Start frame indices (0-based) for each clip.
+
+    Returns
+    -------
+    list[tuple[Path, Path | None]]
+        One ``(clip_path, clip_json)`` tuple per extracted clip, in the
+        same order as ``start_frames``. ``clip_json`` is ``None`` when no
+        sibling ``*_videolabels.json`` file exists.
+    """
+    # duration and start_frame validated in each call to extract_clip()
+    return [
+        extract_single_clip(video_path, sf, duration) for sf in start_frames
+    ]
+
+
+def extract_clips_uniform(
+    video_path: Path,
+    duration: int,
+    num_clips: int,
+) -> list[tuple[Path, Path | None]]:
+    """Extract uniformly spaced clips from a video.
+
+    Parameters
+    ----------
+    video_path
+        Path to the input ``.mp4`` video. See :func:`extract_clip` for
+        naming conventions.
+    duration
+        Number of frames per clip.
+    num_clips
+        Number of clips to extract, spaced evenly across the video via
+        :func:`_uniform_start_frames`.
+
+    Returns
+    -------
+    list[tuple[Path, Path | None]]
+        See :func:`extract_clips`.
+    """
+    n_frames = sio.load_video(Path(video_path)).shape[0]
+    frames = _uniform_start_frames(num_clips, duration, n_frames)
+    return extract_clips(video_path, duration, frames)
+
+
+def _uniform_start_frames(
+    num_clips: int, duration: int, n_frames: int
+) -> list[int]:
+    """Compute uniformly spaced clip start frames.
+
+    The video is divided into ``num_clips`` equal segments of
+    ``n_frames / num_clips`` frames each; each clip starts at the
+    beginning of its segment.
+
+    Parameters
+    ----------
+    num_clips
+        Number of clips to extract.
+    duration
+        Length of each clip in frames.
+    n_frames
+        Total number of frames in the video.
+
+    Returns
+    -------
+    list[int]
+        Sorted list of ``num_clips`` start frames.
+
+    Raises
+    ------
+    ValueError
+        If ``num_clips`` is not positive or ``duration`` exceeds
+        ``n_frames``.
+    """
+    if num_clips <= 0:
+        raise ValueError(f"num_clips must be positive, got {num_clips}")
+    if duration > n_frames:
+        raise ValueError(
+            f"duration ({duration}) exceeds video length ({n_frames})"
+        )
+    step = n_frames / num_clips
+    return [round(i * step) for i in range(num_clips)]
+
+
 def _extract_cliplabels(
     video_path: Path, clips_dir: Path, start_frame: int, duration: int
 ) -> Path:
@@ -179,20 +309,28 @@ def _extract_cliplabels(
 
 
 def main(args: argparse.Namespace) -> None:
-    """Run clip extraction from parsed command-line arguments.
-
-    Parameters
-    ----------
-    args
-        Parsed arguments containing ``video_path``, ``start_frame``,
-        and ``duration``.
-    """
-    # Extract clip
-    extract_clip(args.video_path, args.start_frame, args.duration)
+    """Run multi-clip extraction from parsed command-line arguments."""
+    try:
+        if args.sampling == "uniform":
+            if args.num_clips is None:
+                raise SystemExit(
+                    "error: --num_clips is required when --sampling uniform"
+                )
+            extract_clips_uniform(
+                args.video_path, args.duration, args.num_clips
+            )
+        elif args.sampling == "manual":
+            if not args.start_frames:
+                raise SystemExit(
+                    "error: --start_frames is required when --sampling manual"
+                )
+            extract_clips(args.video_path, args.duration, args.start_frames)
+    except ValueError as e:
+        raise SystemExit(f"error: {e}")
 
 
 def parse_args(args: list[str]) -> argparse.Namespace:
-    """Parse command-line arguments for clip extraction.
+    """Parse command-line arguments for multi-clip extraction.
 
     Parameters
     ----------
@@ -203,11 +341,12 @@ def parse_args(args: list[str]) -> argparse.Namespace:
     -------
     argparse.Namespace
         Parsed arguments with attributes ``video_path`` (str),
-        ``start_frame`` (int), and ``duration`` (int).
+        ``duration`` (int), ``sampling`` (str), ``num_clips``
+        (int | None), and ``start_frames`` (list[int] | None).
     """
     parser = argparse.ArgumentParser(
         description=(
-            "Extract clips from video (and corresponding "
+            "Extract multiple clips from a video (and corresponding "
             "clip labels if available)."
         )
     )
@@ -221,22 +360,45 @@ def parse_args(args: list[str]) -> argparse.Namespace:
         "``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.",
     )
     parser.add_argument(
-        "--start_frame",
+        "--duration",
         type=int,
         required=True,
-        help="Start frame of the clip as a 0-based index.",
+        help="Number of frames per clip.",
     )
     parser.add_argument(
-        "--duration",
-        type=int,
+        "--sampling",
+        type=str,
         required=True,
-        help="Total length of the output clip in frames",
+        choices=["uniform", "manual"],
+        help=(
+            "Clip selection strategy. "
+            "'uniform': evenly space clips across the video "
+            "(requires --num_clips). "
+            "'manual': use explicit start frames "
+            "(requires --start_frames)."
+        ),
+    )
+    parser.add_argument(
+        "--num_clips",
+        type=int,
+        default=None,
+        help="Number of clips to extract. Required when --sampling uniform.",
+    )
+    parser.add_argument(
+        "--start_frames",
+        type=int,
+        nargs="+",
+        default=None,
+        help=(
+            "Start frame indices (0-based, space-separated). "
+            "Required when --sampling manual."
+        ),
     )
     return parser.parse_args(args)
 
 
 def wrapper() -> None:
-    """Entry point for the ``extract-clip`` console script."""
+    """Entry point for the ``extract-clips`` console script."""
     args = parse_args(sys.argv[1:])
     main(args)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -63,7 +63,7 @@ docs = [
 ]
 
 [project.scripts]
-extract-clip = "poseinterface.clips:wrapper"
+extract-clips = "poseinterface.clips:wrapper"
 
 [build-system]
 requires = [

diff --git a/tests/test_unit/test_cli.py b/tests/test_unit/test_cli.py
@@ -3,5 +3,5 @@
 
 def test_smoke():
     """Smoke test to check entry point."""
-    result = subprocess.run(["extract-clip", "--help"])
+    result = subprocess.run(["extract-clips", "--help"])
     assert result.returncode == 0