neuroinformatics-unit
diff --git a/‎docs/source/api_index.rst‎
Lines changed: 11 additions & 0 deletions b/‎docs/source/api_index.rst‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/source/benchmark-dataset.md‎
Lines changed: 12 additions & 0 deletions b/‎docs/source/benchmark-dataset.md‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎poseinterface/clips.py‎
Lines changed: 245 additions & 0 deletions b/‎poseinterface/clips.py‎
Lines changed: 245 additions & 0 deletions
@@ -11,3 +11,14 @@ io
     :template: function.rst
 
     annotations_to_poseinterface
+    video_to_poseinterface
+
+clips
+-----
+.. currentmodule:: poseinterface.clips
+
+.. autosummary::
+    :toctree: api_generated
+    :template: function.rst
+
+    extract_clip
@@ -243,6 +243,18 @@ For a clip starting at frame 1000 with a duration of 5 frames, the `images` arra
 Here `id: 0` through `id: 4` are the local clip indices, while `frame-1000` through `frame-1004` in the `file_name` values refer to the original frame positions in the session video.
 :::
 
+(target-videolabels)=
+#### Intermediate file `videolabels.json`
+
+:::{note}
+This file is **not a required part of a benchmark dataset**. It is an intermediate cache file useful for data contributors when preparing labelled clips, and it is documented here only because it is optionally auto-discovered by the `extract-clip` command and the corresponding {func}`~poseinterface.clips.extract_clip` function.
+:::
+
+* A `videolabels.json` file uses the **same schema as [`cliplabels.json`](target-cliplabels)**, but it refers to a full video rather than to a clip of it.
+* It is produced once per video (e.g. by converting model predictions for the entire video into the `cliplabels` schema) and reused to extract any number of clip label files from that video.
+* When present alongside a session video as `sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json`, the `extract-clip` command will slice it into per-clip `cliplabels.json` files matching the requested frame ranges.
+* In the `videolabels.json` file, each entry in the `images` list uses the **0-based frame index in the video** as its `id` (same convention as [frame labels](target-framelabels)).
+
 (target-startlabels)=
 ### Clip start labels (`startlabels.json`)
 
 
@@ -0,0 +1,245 @@
+"""Functions to extract clips from ``poseinterface`` videos."""
+
+import argparse
+import json
+import logging
+import sys
+from pathlib import Path
+
+import sleap_io as sio
+
+
+def extract_clip(
+    video_path: str | Path,
+    start_frame: int,
+    duration: int,
+) -> tuple[Path, Path | None]:
+    """Extract a video clip (and its clip labels if available).
+
+    Reads the source video and saves a ``.mp4`` clip to a ``Clips/``
+    subdirectory next to the source video. If a sibling
+    ``*_videolabels.json`` file exists (holding labels for the entire
+    session video, using the same schema as ``cliplabels.json``), a
+    matching ``_cliplabels.json`` containing only the annotations within
+    the requested frame range is also written.
+
+
+    Parameters
+    ----------
+    video_path
+        Path to the input ``.mp4`` video. The filename should follow
+        the convention ``sub-<subjectID>_ses-<sessionID>_cam-<camID>.mp4``, and
+        if a sibling labels file exists, its filename should be
+        ``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.
+    start_frame
+        Index of the first frame to include in the clip (0-based).
+    duration
+        Number of frames to include in the clip.  If ``start_frame +
+        duration`` exceeds the video length, the duration is clamped to the
+        remaining frames and a warning is logged.
+
+    Returns
+    -------
+    clip_path : Path
+        Path to the output clip file.
+    clip_json : Path | None
+        Path to the ``_cliplabels.json`` file for the clip if extracted,
+        None otherwise.
+
+    Raises
+    ------
+    ValueError
+        If ``start_frame`` is negative or ``duration`` is not positive.
+
+    Notes
+    -----
+    This function optionally consumes a ``*_videolabels.json`` file, sibling
+    to the input video file and holding labels for the entire video. This
+    file is an intermediate cache useful for data contributors: it follows
+    the same schema as ``cliplabels.json`` but it refers to the full video,
+    rather than to a clip of it. The ``*_videolabels.json`` file is not part
+    of the published benchmark dataset. For further details, see the
+    "Intermediate file: `videolabels.json`" section of the benchmark
+    dataset specification.
+
+    This function assumes that the ``id`` field in the ``images`` list of the
+    source ``*_videolabels.json`` corresponds to 0-based global frame indices
+    of the full video.
+    """
+    # Check input values
+    if start_frame < 0:
+        raise ValueError(
+            f"start_frame must be non-negative, got {start_frame}"
+        )
+    if duration <= 0:
+        raise ValueError(f"duration must be positive, got {duration}")
+
+    # Create "Clips" directory if it doesn't exist
+    video_path = Path(video_path)
+    clips_dir = video_path.parent / "Clips"
+    clips_dir.mkdir(parents=True, exist_ok=True)
+
+    # Read video as array
+    video = sio.load_video(video_path)
+    logging.info(
+        f"filename: {video_path.name}, fps: {video.fps}, shape: {video.shape}"
+    )
+
+    # Clamp duration if it exceeds the video length
+    if start_frame + duration > video.shape[0]:
+        duration = video.shape[0] - start_frame
+        logging.warning(
+            "Clip exceeds video length. "
+            f"Clamping duration to {duration} frames."
+        )
+
+    # Slice clip and save as mp4
+    clip = video[start_frame : start_frame + duration]
+    clip_path = (
+        clips_dir / f"{video_path.stem}_start-{start_frame}_dur-{duration}.mp4"
+    )
+    sio.save_video(clip, clip_path, fps=video.fps)
+
+    # Generate cliplabels.json only if a companion videolabels.json file exists
+    video_json = video_path.parent / f"{video_path.stem}_videolabels.json"
+    if video_json.exists():
+        clip_json = _extract_cliplabels(
+            video_path, clips_dir, start_frame, duration
+        )
+        logging.info(
+            f"Extracted clip {clip_path.name} with labels {clip_json.name} "
+            f"({duration} frames from start_frame={start_frame})."
+        )
+    else:
+        clip_json = None
+        logging.info(
+            f"Extracted clip {clip_path.name} "
+            f"({duration} frames from start_frame={start_frame}). "
+            "No companion *_videolabels.json found; skipping label extraction."
+        )
+
+    return clip_path, clip_json
+
+
+def _extract_cliplabels(
+    video_path: Path, clips_dir: Path, start_frame: int, duration: int
+) -> Path:
+    """Extract clip labels from the sibling *_videolabels.json file."""
+    # Read file with labels for the whole video
+    video_json = video_path.parent / f"{video_path.stem}_videolabels.json"
+    with open(video_json) as f:
+        video_labels = json.load(f)
+
+    # Compute clip end frame
+    end_frame = start_frame + duration
+
+    # Keep only data from the images in the clip, re-indexing ids to be
+    # 0-based within the clip. file_name is left untouched to retain in it
+    # the global (video-based) frame index
+    clip_labels = {}
+    clip_labels["images"] = [
+        {
+            **img,
+            "id": img["id"] - start_frame,  # overwrite id
+        }
+        for img in video_labels["images"]
+        if start_frame <= img["id"] < end_frame
+    ]
+
+    # Keep only annotations within the clip, remapping image_id to the local
+    # (clip-based) frame index, and renumbering annotation ids to be 1-based
+    # within the clip.
+    clip_labels["annotations"] = [
+        {
+            **annot,
+            "image_id": annot["image_id"] - start_frame,  # overwrite image_id
+            "id": new_id,
+        }
+        for new_id, annot in enumerate(
+            (
+                ant
+                for ant in video_labels["annotations"]
+                if start_frame <= ant["image_id"] < end_frame
+            ),  # generator lazily yields only annotations within the clip
+            start=1,  # annotation ids are 1-based within clip
+        )
+    ]
+    # pass categories unchanged
+    clip_labels["categories"] = video_labels["categories"]
+
+    # Save json with filtered data to clips directory
+    clip_json = (
+        clips_dir / f"{video_path.stem}_"
+        f"start-{start_frame}_dur-{duration}_cliplabels.json"
+    )
+    with open(clip_json, "w") as f:
+        json.dump(clip_labels, f)
+
+    return clip_json
+
+
+def main(args: argparse.Namespace) -> None:
+    """Run clip extraction from parsed command-line arguments.
+
+    Parameters
+    ----------
+    args
+        Parsed arguments containing ``video_path``, ``start_frame``,
+        and ``duration``.
+    """
+    # Extract clip
+    extract_clip(args.video_path, args.start_frame, args.duration)
+
+
+def parse_args(args: list[str]) -> argparse.Namespace:
+    """Parse command-line arguments for clip extraction.
+
+    Parameters
+    ----------
+    args
+        List of command-line argument strings (e.g. ``sys.argv[1:]``).
+
+    Returns
+    -------
+    argparse.Namespace
+        Parsed arguments with attributes ``video_path`` (str),
+        ``start_frame`` (int), and ``duration`` (int).
+    """
+    parser = argparse.ArgumentParser(
+        description=(
+            "Extract clips from video (and corresponding "
+            "clip labels if available)."
+        )
+    )
+    parser.add_argument(
+        "--video_path",
+        type=str,
+        required=True,
+        help="Path to video file to clip. The filename should follow "
+        "the convention ``sub-<subjectID>_ses-<sessionID>_cam-<camID>.mp4``, "
+        "and if a sibling labels file exists, its filename should be "
+        "``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.",
+    )
+    parser.add_argument(
+        "--start_frame",
+        type=int,
+        required=True,
+        help="Start frame of the clip as a 0-based index.",
+    )
+    parser.add_argument(
+        "--duration",
+        type=int,
+        required=True,
+        help="Total length of the output clip in frames",
+    )
+    return parser.parse_args(args)
+
+
+def wrapper() -> None:
+    """Entry point for the ``extract-clip`` console script."""
+    args = parse_args(sys.argv[1:])
+    main(args)
+
+
+if __name__ == "__main__":
+    wrapper()