Skip to content
Open
3 changes: 2 additions & 1 deletion docs/source/api_index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ clips
:toctree: api_generated
:template: function.rst

extract_clip
extract_single_clip
extract_clips


utils
Expand Down
4 changes: 2 additions & 2 deletions docs/source/benchmark-dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,12 @@ Here `id: 0` through `id: 4` are the local clip indices, while `frame-1000` thro
#### Intermediate file `videolabels.json`

:::{note}
This file is **not a required part of a benchmark dataset**. It is an intermediate cache file useful for data contributors when preparing labelled clips, and it is documented here only because it is optionally auto-discovered by the `extract-clip` command and the corresponding {func}`~poseinterface.clips.extract_clip` function.
This file is **not a required part of a benchmark dataset**. It is an intermediate cache file useful for data contributors when preparing labelled clips, and it is documented here only because it is optionally auto-discovered by the `extract-clips` command and the corresponding {func}`~poseinterface.clips.extract_clips` function.
:::

* A `videolabels.json` file uses the **same schema as [`cliplabels.json`](target-cliplabels)**, but it refers to a full video rather than to a clip of it.
* It is produced once per video (e.g. by converting model predictions for the entire video into the `cliplabels` schema) and reused to extract any number of clip label files from that video.
* When present alongside a session video as `sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json`, the `extract-clip` command will slice it into per-clip `cliplabels.json` files matching the requested frame ranges.
* When present alongside a session video as `sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json`, the `extract-clips` command will slice it into per-clip `cliplabels.json` files matching the requested frame ranges.
* In the `videolabels.json` file, each entry in the `images` list uses the **0-based frame index in the video** as its `id` (same convention as [frame labels](target-framelabels)).

(target-startlabels)=
Expand Down
4 changes: 2 additions & 2 deletions examples/convert_dlc_to_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pathlib import Path

import poseinterface
from poseinterface.clips import extract_clip
from poseinterface.clips import extract_single_clip
from poseinterface.io import (
annotations_to_poseinterface,
frames_to_poseinterface,
Expand Down Expand Up @@ -270,7 +270,7 @@
)

for start_frame in start_frames:
clip_path, _ = extract_clip(
clip_path, _ = extract_single_clip(
video_path=session_dir / f"{sub_ses_cam_prefix}.mp4",
start_frame=start_frame,
duration=duration,
Expand Down
220 changes: 191 additions & 29 deletions poseinterface/clips.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
"""Functions to extract clips from ``poseinterface`` videos."""
"""Functions to extract clips from ``poseinterface`` videos.

Module structure
----------------
The module is organised around three layers:

**Core extraction** (:func:`extract_clip`, :func:`extract_clips`)
``extract_clip`` writes a single ``.mp4`` clip (and its
``_cliplabels.json`` if a sibling ``*_videolabels.json`` exists).
``extract_clips`` applies it over a list of start frames.

**Start-frame generators** (:func:`_uniform_start_frames`, …)
Pure functions that return a ``list[int]`` of start frames given
video-level parameters. They know nothing about I/O.

**Strategy wrappers** (:func:`extract_clips_uniform`, …)
Thin public functions that pair a start-frame generator with
``extract_clips``. These are the entry points wired to the CLI.

Adding a new sampling strategy
-------------------------------
1. Write a start-frame generator ``_<name>_start_frames(...) -> list[int]``
that accepts whatever parameters the strategy needs and returns start
frames. Validate its own inputs and raise ``ValueError`` on bad input.
2. Write a wrapper ``extract_clips_<name>(video_path, duration, ...)``
that calls your generator, then passes the result to :func:`extract_clips`.
3. Register the strategy in :func:`parse_args` by adding ``"<name>"`` to
the ``choices`` list of ``--sampling``, and a corresponding
``--<parameter>`` argument.
4. Dispatch to ``extract_clips_<name>`` in :func:`main`, following the
pattern of the existing ``"uniform"`` branch.
"""

import argparse
import json
Expand All @@ -9,7 +40,16 @@
import sleap_io as sio


def extract_clip(
def _validate_clip_request(start_frame: int, duration: int) -> None:
if start_frame < 0:
raise ValueError(
f"start_frame must be non-negative, got {start_frame}"
)
if duration <= 0:
raise ValueError(f"duration must be positive, got {duration}")


def extract_single_clip(
video_path: str | Path,
start_frame: int,
duration: int,
Expand All @@ -23,7 +63,6 @@ def extract_clip(
matching ``_cliplabels.json`` containing only the annotations within
the requested frame range is also written.


Parameters
----------
video_path
Expand Down Expand Up @@ -66,13 +105,7 @@ def extract_clip(
source ``*_videolabels.json`` corresponds to 0-based global frame indices
of the full video.
"""
# Check input values
if start_frame < 0:
raise ValueError(
f"start_frame must be non-negative, got {start_frame}"
)
if duration <= 0:
raise ValueError(f"duration must be positive, got {duration}")
_validate_clip_request(start_frame, duration)

# Create "Clips" directory if it doesn't exist
video_path = Path(video_path)
Expand Down Expand Up @@ -121,6 +154,103 @@ def extract_clip(
return clip_path, clip_json


def extract_clips(
video_path: Path,
duration: int,
start_frames: list[int],
) -> list[tuple[Path, Path | None]]:
"""Extract multiple clips from a video.

Parameters
----------
video_path
Path to the input ``.mp4`` video. See :func:`extract_clip` for
naming conventions.
duration
Number of frames per clip, common to all clips.
start_frames
Start frame indices (0-based) for each clip.

Returns
-------
list[tuple[Path, Path | None]]
One ``(clip_path, clip_json)`` tuple per extracted clip, in the
same order as ``start_frames``. ``clip_json`` is ``None`` when no
sibling ``*_videolabels.json`` file exists.
"""
# duration and start_frame validated in each call to extract_clip()
return [
extract_single_clip(video_path, sf, duration) for sf in start_frames
]


def extract_clips_uniform(
video_path: Path,
duration: int,
num_clips: int,
) -> list[tuple[Path, Path | None]]:
"""Extract uniformly spaced clips from a video.

Parameters
----------
video_path
Path to the input ``.mp4`` video. See :func:`extract_clip` for
naming conventions.
duration
Number of frames per clip.
num_clips
Number of clips to extract, spaced evenly across the video via
:func:`_uniform_start_frames`.

Returns
-------
list[tuple[Path, Path | None]]
See :func:`extract_clips`.
"""
n_frames = sio.load_video(Path(video_path)).shape[0]
frames = _uniform_start_frames(num_clips, duration, n_frames)
return extract_clips(video_path, duration, frames)


def _uniform_start_frames(

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current approach in extract_clips bundles together input preparation (i.e. computing start_frames based on the sampling strategy) and the action (i.e. the extraction of the frames). The same applies for the suggestions I gave. This makes a more convenient function call, but the coupling of these two objectives has some downsides.

We can decouple it (initially this is what I had in mind). The extract_clips function can take start_frames as input, and then we have several functions that generate these start frames (e.g., the current _uniform_start_frames). For convenience, we can have wrappers that we can wire to the CLI. These wrappers combine extract_clips with a start-frame-production function:

# "core" function, only extracts
def extract_clips(
    video_path: Path,
    duration: int,
    start_frames: list[int],
) -> list[tuple[Path, Path | None]]:
	# potentially some validation here
    return [extract_clip(video_path, sf, duration) for sf in start_frames]

# wrapper, prepares the input. We link this to the CLI
def extract_clips_uniform(video_path, duration, num_clips):
    n_frames = sio.load_video(video_path).shape[0]
    frames = _uniform_start_frames(num_clips, duration, n_frames)
    return extract_clips(video_path, duration, frames)

This decoupled approach has a few benefits:

  • simpler and smaller unit tests (because each function does one thing)
  • it scales nicely to other strategies (contributors need to add a "start-frames" function and then write the thin wrapper that collects the call; extract_clips stays the same)
  • the dispatch that translates a string (uniform) into a function (_uniform_start_frames) now occurs at the CLI main (it is removed entirely from here). We would need to keep the None-guards in main as mentioned above tho.

I think it would also help with the validation of inputs:

  • Right now, start_frames values aren't validated as a list of positive integers anywhere (each frame is only checked individually inside the loop that calls extract_clip, which is late to catch the error). This is easy to miss with the current approach I think, because validation is a bit spread across different places. The start frame validation would belong nicely within the decoupled extract_clips, and that way we can report all bad frames at once instead of dying on the first.
  • Claude suggests to use a small validation function, to use in extract_clip and extract_clips. But in the multi-clip approach, we would validate the inputs up-front.
def _validate_clip_request(start_frame: int, duration: int) -> None:
    if start_frame < 0:
        raise ValueError(f"start_frame must be non-negative, got {start_frame}")
    if duration <= 0:
        raise ValueError(f"duration must be positive, got {duration}")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P.S.: Claude says "argparse can enforce the conditional requirement via subcommands". It would look something like this:

extract-clips uniform --video_path v.mp4 --duration 100 --num_clips 5
extract-clips manual  --video_path v.mp4 --duration 100 --start_frames 0 500 1000

I have never done this, and I think I prefer the argparse to have no "important" logic, but just in case you want to look into it.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

another great suggestion @sfmig! I also agree that I don't want argparse performing this logic, it's simple in the manual case but we'll likely have more/more complex logic checks for other sampling strategies, so this should be handled in extract_clips_*.

num_clips: int, duration: int, n_frames: int
) -> list[int]:
"""Compute uniformly spaced clip start frames.

The video is divided into ``num_clips`` equal segments of
``n_frames / num_clips`` frames each; each clip starts at the
beginning of its segment.

Parameters
----------
num_clips
Number of clips to extract.
duration
Length of each clip in frames.
n_frames
Total number of frames in the video.

Returns
-------
list[int]
Sorted list of ``num_clips`` start frames.

Raises
------
ValueError
If ``num_clips`` is not positive or ``duration`` exceeds
``n_frames``.
"""
if num_clips <= 0:
raise ValueError(f"num_clips must be positive, got {num_clips}")
if duration > n_frames:
raise ValueError(
f"duration ({duration}) exceeds video length ({n_frames})"
)
step = n_frames / num_clips
return [round(i * step) for i in range(num_clips)]


def _extract_cliplabels(
video_path: Path, clips_dir: Path, start_frame: int, duration: int
) -> Path:
Expand Down Expand Up @@ -179,20 +309,28 @@ def _extract_cliplabels(


def main(args: argparse.Namespace) -> None:
"""Run clip extraction from parsed command-line arguments.

Parameters
----------
args
Parsed arguments containing ``video_path``, ``start_frame``,
and ``duration``.
"""
# Extract clip
extract_clip(args.video_path, args.start_frame, args.duration)
"""Run multi-clip extraction from parsed command-line arguments."""
try:
if args.sampling == "uniform":
if args.num_clips is None:
raise SystemExit(
"error: --num_clips is required when --sampling uniform"
)
extract_clips_uniform(
args.video_path, args.duration, args.num_clips
)
elif args.sampling == "manual":
if not args.start_frames:
raise SystemExit(
"error: --start_frames is required when --sampling manual"
)
extract_clips(args.video_path, args.duration, args.start_frames)
except ValueError as e:
raise SystemExit(f"error: {e}")


def parse_args(args: list[str]) -> argparse.Namespace:
"""Parse command-line arguments for clip extraction.
"""Parse command-line arguments for multi-clip extraction.

Parameters
----------
Expand All @@ -203,11 +341,12 @@ def parse_args(args: list[str]) -> argparse.Namespace:
-------
argparse.Namespace
Parsed arguments with attributes ``video_path`` (str),
``start_frame`` (int), and ``duration`` (int).
``duration`` (int), ``sampling`` (str), ``num_clips``
(int | None), and ``start_frames`` (list[int] | None).
"""
parser = argparse.ArgumentParser(
description=(
"Extract clips from video (and corresponding "
"Extract multiple clips from a video (and corresponding "
"clip labels if available)."
)
)
Expand All @@ -221,22 +360,45 @@ def parse_args(args: list[str]) -> argparse.Namespace:
"``sub-<subjectID>_ses-<sessionID>_cam-<camID>_videolabels.json``.",
)
parser.add_argument(
"--start_frame",
"--duration",
type=int,
required=True,
help="Start frame of the clip as a 0-based index.",
help="Number of frames per clip.",
)
parser.add_argument(
"--duration",
type=int,
"--sampling",
type=str,
required=True,
help="Total length of the output clip in frames",
choices=["uniform", "manual"],
help=(
"Clip selection strategy. "
"'uniform': evenly space clips across the video "
"(requires --num_clips). "
"'manual': use explicit start frames "
"(requires --start_frames)."
),
)
parser.add_argument(
"--num_clips",
type=int,
default=None,
help="Number of clips to extract. Required when --sampling uniform.",
)
parser.add_argument(
"--start_frames",
type=int,
nargs="+",
default=None,
help=(
"Start frame indices (0-based, space-separated). "
"Required when --sampling manual."
),
)
return parser.parse_args(args)


def wrapper() -> None:
"""Entry point for the ``extract-clip`` console script."""
"""Entry point for the ``extract-clips`` console script."""
args = parse_args(sys.argv[1:])
main(args)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ docs = [
]

[project.scripts]
extract-clip = "poseinterface.clips:wrapper"
extract-clips = "poseinterface.clips:wrapper"

[build-system]
requires = [
Expand Down
2 changes: 1 addition & 1 deletion tests/test_unit/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

def test_smoke():
"""Smoke test to check entry point."""
result = subprocess.run(["extract-clip", "--help"])
result = subprocess.run(["extract-clips", "--help"])
assert result.returncode == 0
Loading
Loading