Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions surfsense_backend/app/podcasts/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ async def create_podcast(
session,
search_space_id=body.search_space_id,
speaker_count=body.speaker_count,
min_minutes=body.min_minutes,
max_minutes=body.max_minutes,
min_seconds=body.min_seconds,
max_seconds=body.max_seconds,
focus=body.focus,
)
await service.attach_brief(podcast, spec)
Expand Down
20 changes: 16 additions & 4 deletions surfsense_backend/app/podcasts/api/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,19 @@

from pydantic import BaseModel, ConfigDict, Field

from app.podcasts.duration_limits import (
DEFAULT_MAX_SECONDS,
DEFAULT_MIN_SECONDS,
MAX_DURATION_SECONDS,
MIN_DURATION_SECONDS,
)
from app.podcasts.persistence import Podcast, PodcastStatus
from app.podcasts.schemas import PodcastSpec, Transcript
from app.podcasts.service import has_stored_episode, read_spec, read_transcript

# Defaults applied when a create request omits brief sizing; the brief gate lets
# the user adjust before any cost is incurred.
DEFAULT_SPEAKER_COUNT = 2
DEFAULT_MIN_MINUTES = 10
DEFAULT_MAX_MINUTES = 20


class CreatePodcastRequest(BaseModel):
Expand All @@ -30,8 +34,16 @@ class CreatePodcastRequest(BaseModel):
source_content: str = Field(..., min_length=1)
thread_id: int | None = None
speaker_count: int = Field(default=DEFAULT_SPEAKER_COUNT, ge=1, le=6)
min_minutes: int = Field(default=DEFAULT_MIN_MINUTES, ge=1)
max_minutes: int = Field(default=DEFAULT_MAX_MINUTES, ge=1)
min_seconds: int = Field(
default=DEFAULT_MIN_SECONDS,
ge=MIN_DURATION_SECONDS,
le=MAX_DURATION_SECONDS,
)
max_seconds: int = Field(
default=DEFAULT_MAX_SECONDS,
ge=MIN_DURATION_SECONDS,
le=MAX_DURATION_SECONDS,
)
Comment on lines +37 to +46

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "Check CreatePodcastRequest duration validation:"
rg -n -C2 'class CreatePodcastRequest|min_seconds|max_seconds|model_validator|_check_duration_order' surfsense_backend/app/podcasts/api/schemas.py

echo
echo "Check downstream DurationTarget construction that currently relies on ordered bounds:"
rg -n -C2 'DurationTarget\(|min_seconds=brief\.min_seconds|max_seconds=brief\.max_seconds' surfsense_backend/app/podcasts/generation/brief/nodes.py

Repository: MODSetter/SurfSense

Length of output: 922


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Check complete CreatePodcastRequest class ==="
rg -A 30 'class CreatePodcastRequest' surfsense_backend/app/podcasts/api/schemas.py

echo
echo "=== Find and examine DurationTarget ==="
fd -e py | xargs rg -l 'class DurationTarget' 2>/dev/null | head -5

echo
echo "=== Check DurationTarget implementation ==="
rg -B 2 -A 15 'class DurationTarget' --type py

Repository: MODSetter/SurfSense

Length of output: 2683


🏁 Script executed:

#!/bin/bash
set -euo pipefail

echo "=== Read full DurationTarget class ==="
rg -A 50 'class DurationTarget' surfsense_backend/app/podcasts/schemas/spec.py

Repository: MODSetter/SurfSense

Length of output: 2097


Enforce duration ordering in CreatePodcastRequest.

min_seconds and max_seconds are validated independently, so inverted ranges pass request validation and fail later when DurationTarget is instantiated in surfsense_backend/app/podcasts/generation/brief/nodes.py (line 82) with a 500 error. Reject this at the API boundary with a 422 validation error instead.

Suggested fix
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, model_validator
@@
 class CreatePodcastRequest(BaseModel):
@@
     max_seconds: int = Field(
         default=DEFAULT_MAX_SECONDS,
         ge=MIN_DURATION_SECONDS,
         le=MAX_DURATION_SECONDS,
     )
     focus: str | None = Field(default=None, max_length=2000)
+
+    `@model_validator`(mode="after")
+    def _check_duration_order(self) -> "CreatePodcastRequest":
+        if self.max_seconds < self.min_seconds:
+            raise ValueError("max_seconds must be >= min_seconds")
+        return self
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@surfsense_backend/app/podcasts/api/schemas.py` around lines 37 - 46, The
CreatePodcastRequest schema validates min_seconds and max_seconds independently,
allowing inverted ranges like min_seconds=100, max_seconds=50 to pass validation
at the API boundary. Add a Pydantic validator (root_validator or field_validator
depending on your Pydantic version) to the CreatePodcastRequest class that
enforces the constraint min_seconds <= max_seconds, ensuring invalid duration
ranges are rejected with a 422 validation error at the API boundary instead of
causing a 500 error later when DurationTarget is instantiated.

focus: str | None = Field(default=None, max_length=2000)


Expand Down
6 changes: 6 additions & 0 deletions surfsense_backend/app/podcasts/duration_limits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Shared bounds and defaults for podcast target duration."""

MAX_DURATION_SECONDS = 24 * 60 * 60
MIN_DURATION_SECONDS = 15
DEFAULT_MIN_SECONDS = 20
DEFAULT_MAX_SECONDS = 30
11 changes: 7 additions & 4 deletions surfsense_backend/app/podcasts/generation/brief/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,22 @@

from langchain_core.runnables import RunnableConfig

from app.podcasts.duration_limits import (
DEFAULT_MAX_SECONDS,
DEFAULT_MIN_SECONDS,
)

# Sensible defaults for a fresh brief; the user adjusts the range at the gate.
DEFAULT_SPEAKER_COUNT = 2
DEFAULT_MIN_MINUTES = 10
DEFAULT_MAX_MINUTES = 20


@dataclass(kw_only=True)
class BriefConfig:
"""Signals used to propose a brief; everything here is non-LLM context."""

speaker_count: int = DEFAULT_SPEAKER_COUNT
min_minutes: int = DEFAULT_MIN_MINUTES
max_minutes: int = DEFAULT_MAX_MINUTES
min_seconds: int = DEFAULT_MIN_SECONDS
max_seconds: int = DEFAULT_MAX_SECONDS
focus: str | None = None
last_used_language: str | None = None
last_used_voices: list[str] = field(default_factory=list)
Expand Down
2 changes: 1 addition & 1 deletion surfsense_backend/app/podcasts/generation/brief/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def propose_spec(state: BriefState, config: RunnableConfig) -> dict[str, Any]:
style=PodcastStyle.CONVERSATIONAL,
speakers=speakers,
duration=DurationTarget(
min_minutes=brief.min_minutes, max_minutes=brief.max_minutes
min_seconds=brief.min_seconds, max_seconds=brief.max_seconds
),
focus=brief.focus,
)
Expand Down
11 changes: 6 additions & 5 deletions surfsense_backend/app/podcasts/generation/brief/propose.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

from sqlalchemy.ext.asyncio import AsyncSession

from app.podcasts.duration_limits import DEFAULT_MAX_SECONDS, DEFAULT_MIN_SECONDS
from app.podcasts.persistence import PodcastRepository
from app.podcasts.schemas import PodcastSpec
from app.podcasts.service import preferences_from

from .config import DEFAULT_MAX_MINUTES, DEFAULT_MIN_MINUTES, DEFAULT_SPEAKER_COUNT
from .config import DEFAULT_SPEAKER_COUNT
from .graph import graph as brief_graph
from .state import BriefState

Expand All @@ -18,8 +19,8 @@ async def propose_brief(
*,
search_space_id: int,
speaker_count: int = DEFAULT_SPEAKER_COUNT,
min_minutes: int = DEFAULT_MIN_MINUTES,
max_minutes: int = DEFAULT_MAX_MINUTES,
min_seconds: int = DEFAULT_MIN_SECONDS,
max_seconds: int = DEFAULT_MAX_SECONDS,
focus: str | None = None,
) -> PodcastSpec:
"""Reuse the last-used language and voices, else English; return the spec."""
Expand All @@ -29,8 +30,8 @@ async def propose_brief(
config = {
"configurable": {
"speaker_count": speaker_count,
"min_minutes": min_minutes,
"max_minutes": max_minutes,
"min_seconds": min_seconds,
"max_seconds": max_seconds,
"focus": focus,
"last_used_language": last_language,
"last_used_voices": last_voices,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def plan_outline(
tc = TranscriptConfig.from_runnable_config(config)
llm = await _require_llm(state, tc)

target_words = round(tc.spec.duration.midpoint_minutes * _WORDS_PER_MINUTE)
target_words = round(tc.spec.duration.midpoint_seconds * _WORDS_PER_MINUTE / 60)
suggested_segments = max(1, round(target_words / _WORDS_PER_SEGMENT))

messages = [
Expand Down
43 changes: 32 additions & 11 deletions surfsense_backend/app/podcasts/schemas/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@

import re
from enum import StrEnum
from typing import Any

from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator

from app.podcasts.duration_limits import (
MAX_DURATION_SECONDS,
MIN_DURATION_SECONDS,
)

# A speaker count beyond this is almost never a real podcast and explodes the
# voice/turn-attribution space, so we reject it at the brief gate.
MAX_SPEAKERS = 6

# Long-form is a goal, but an open-ended upper bound invites runaway TTS bills.
# One day of audio is a generous ceiling that still blocks obvious mistakes.
MAX_DURATION_MINUTES = 24 * 60

# BCP-47 primary subtag plus optional region (e.g. ``en``, ``en-US``, ``pt-BR``).
# Kept deliberately permissive: the voice catalog, not the brief, decides which
# languages can actually be synthesised. Casing is normalised after matching.
Expand Down Expand Up @@ -91,7 +93,7 @@ def _strip_required_text(cls, value: str) -> str:


class DurationTarget(BaseModel):
"""The desired finished length as an inclusive minute range.
"""The desired finished length as an inclusive second range.

Drafting aims for the midpoint and treats the bounds as soft guardrails;
storing a range (rather than a point) keeps long-form expectations honest
Expand All @@ -100,19 +102,38 @@ class DurationTarget(BaseModel):

model_config = ConfigDict(extra="forbid")

min_minutes: int = Field(..., ge=1, le=MAX_DURATION_MINUTES)
max_minutes: int = Field(..., ge=1, le=MAX_DURATION_MINUTES)
min_seconds: int = Field(..., ge=MIN_DURATION_SECONDS, le=MAX_DURATION_SECONDS)
max_seconds: int = Field(..., ge=MIN_DURATION_SECONDS, le=MAX_DURATION_SECONDS)

@model_validator(mode="before")
@classmethod
def _coerce_legacy_minutes(cls, data: Any) -> Any:
"""Rows stored before seconds-based briefs still load from JSONB."""
if (
isinstance(data, dict)
and "min_seconds" not in data
and "min_minutes" in data
):
migrated = dict(data)
migrated["min_seconds"] = int(migrated.pop("min_minutes")) * 60
migrated["max_seconds"] = int(migrated.pop("max_minutes")) * 60
return migrated
return data

@model_validator(mode="after")
def _check_order(self) -> DurationTarget:
if self.max_minutes < self.min_minutes:
raise ValueError("max_minutes must be >= min_minutes")
if self.max_seconds < self.min_seconds:
raise ValueError("max_seconds must be >= min_seconds")
return self

@property
def midpoint_minutes(self) -> float:
def midpoint_seconds(self) -> float:
"""The runtime drafting should aim for within the range."""
return (self.min_minutes + self.max_minutes) / 2
return (self.min_seconds + self.max_seconds) / 2

@property
def midpoint_minutes(self) -> float:
return self.midpoint_seconds / 60


class PodcastSpec(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion surfsense_backend/tests/integration/podcasts/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def build_spec(
slot=1, name="Guest", role=SpeakerRole.GUEST, voice_id=voice_ids[1]
),
],
duration=DurationTarget(min_minutes=10, max_minutes=20),
duration=DurationTarget(min_seconds=600, max_seconds=1200),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ async def test_quota_denial_fails_the_podcast_without_a_transcript(
async def _deny(**_kwargs):
raise QuotaInsufficientError(
usage_type="podcast_generation",
used_micros=5_000_000,
limit_micros=5_000_000,
balance_micros=5_000_000,
remaining_micros=0,
)
yield # pragma: no cover - unreachable, satisfies the CM protocol
Expand Down
6 changes: 3 additions & 3 deletions surfsense_backend/tests/unit/podcasts/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def _make(
language: str = "en",
style: PodcastStyle = PodcastStyle.CONVERSATIONAL,
speakers: list[SpeakerSpec] | None = None,
min_minutes: int = 10,
max_minutes: int = 20,
min_seconds: int = 600,
max_seconds: int = 1200,
focus: str | None = None,
) -> PodcastSpec:
if speakers is None:
Expand All @@ -54,7 +54,7 @@ def _make(
language=language,
style=style,
speakers=speakers,
duration=DurationTarget(min_minutes=min_minutes, max_minutes=max_minutes),
duration=DurationTarget(min_seconds=min_seconds, max_seconds=max_seconds),
focus=focus,
)

Expand Down
2 changes: 1 addition & 1 deletion surfsense_backend/tests/unit/podcasts/test_renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _spec(voice_id: str) -> PodcastSpec:
speakers=[
SpeakerSpec(slot=0, name="Host", role=SpeakerRole.HOST, voice_id=voice_id)
],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)


Expand Down
27 changes: 17 additions & 10 deletions surfsense_backend/tests/unit/podcasts/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_spec_normalizes_its_language_on_construction():
spec = PodcastSpec(
language="EN-us",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)
assert spec.language == "en-us"

Expand All @@ -68,7 +68,7 @@ def test_speakers_must_have_unique_slots():
PodcastSpec(
language="en",
speakers=[_speaker(0), _speaker(0, voice_id="kokoro:af_bella")],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)


Expand All @@ -77,7 +77,7 @@ def test_a_brief_needs_at_least_one_speaker():
PodcastSpec(
language="en",
speakers=[],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)


Expand All @@ -86,7 +86,7 @@ def test_a_monologue_brief_carries_exactly_one_speaker():
language="en",
style=PodcastStyle.MONOLOGUE,
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)
assert spec.style is PodcastStyle.MONOLOGUE

Expand All @@ -98,26 +98,33 @@ def test_a_monologue_brief_rejects_multiple_speakers():
language="en",
style=PodcastStyle.MONOLOGUE,
speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)


def test_duration_rejects_an_inverted_range():
"""A max below the min is a user error caught at the brief gate."""
with pytest.raises(ValidationError):
DurationTarget(min_minutes=20, max_minutes=10)
DurationTarget(min_seconds=1200, max_seconds=600)


def test_duration_midpoint_is_where_drafting_aims():
assert DurationTarget(min_minutes=10, max_minutes=20).midpoint_minutes == 15
assert DurationTarget(min_seconds=600, max_seconds=1200).midpoint_seconds == 900
assert DurationTarget(min_seconds=600, max_seconds=1200).midpoint_minutes == 15


def test_duration_loads_legacy_minute_fields_from_json():
duration = DurationTarget.model_validate({"min_minutes": 10, "max_minutes": 20})
assert duration.min_seconds == 600
assert duration.max_seconds == 1200


def test_blank_focus_becomes_absent():
"""Whitespace-only steer is treated as no steer."""
spec = PodcastSpec(
language="en",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
focus=" ",
)
assert spec.focus is None
Expand All @@ -127,7 +134,7 @@ def test_speaker_for_returns_the_speaker_bound_to_a_slot():
spec = PodcastSpec(
language="en",
speakers=[_speaker(0), _speaker(1, voice_id="kokoro:af_bella")],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)
assert spec.speaker_for(1).voice_id == "kokoro:af_bella"

Expand All @@ -136,7 +143,7 @@ def test_speaker_for_raises_when_no_speaker_matches():
spec = PodcastSpec(
language="en",
speakers=[_speaker(0)],
duration=DurationTarget(min_minutes=5, max_minutes=10),
duration=DurationTarget(min_seconds=300, max_seconds=600),
)
with pytest.raises(KeyError):
spec.speaker_for(99)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,7 @@ async def test_ainvoke_propagates_quota_insufficient_error(monkeypatch):
async def _denying_billable_call(**_kwargs):
raise QuotaInsufficientError(
usage_type="vision_extraction",
used_micros=5_000_000,
limit_micros=5_000_000,
balance_micros=5_000_000,
remaining_micros=0,
)
yield # unreachable but required for asynccontextmanager type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,7 @@ async def _denying_billable_call(**kwargs):
_CALL_LOG.append(kwargs)
raise QuotaInsufficientError(
usage_type=kwargs.get("usage_type", "?"),
used_micros=5_000_000,
limit_micros=5_000_000,
balance_micros=5_000_000,
remaining_micros=0,
)
yield SimpleNamespace() # pragma: no cover
Expand Down
Loading
Loading