Skip to content

Commit 683a827

Browse files
authored
Merge pull request #1500 from CREDO23/fix/podcast-stream-missing-audio
fix(podcasts): guard stream when audio missing and share object store volume
2 parents 3e53931 + a7be41d commit 683a827

8 files changed

Lines changed: 71 additions & 8 deletions

File tree

docker/docker-compose.dev.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ services:
106106
volumes:
107107
- ../surfsense_backend/app:/app/app
108108
- shared_temp:/shared_tmp
109+
- object_store:/app/.local_object_store
109110
env_file:
110111
- ../surfsense_backend/.env
111112
extra_hosts:
@@ -119,6 +120,7 @@ services:
119120
- PYTHONPATH=/app
120121
- UVICORN_LOOP=asyncio
121122
- UNSTRUCTURED_HAS_PATCHED_LOOP=1
123+
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
122124
- LANGCHAIN_TRACING_V2=false
123125
- LANGSMITH_TRACING=false
124126
- AUTH_TYPE=${AUTH_TYPE:-LOCAL}
@@ -171,6 +173,7 @@ services:
171173
volumes:
172174
- ../surfsense_backend/app:/app/app
173175
- shared_temp:/shared_tmp
176+
- object_store:/app/.local_object_store
174177
env_file:
175178
- ../surfsense_backend/.env
176179
extra_hosts:
@@ -182,6 +185,7 @@ services:
182185
- REDIS_APP_URL=${REDIS_URL:-redis://redis:6379/0}
183186
- CELERY_TASK_DEFAULT_QUEUE=surfsense
184187
- PYTHONPATH=/app
188+
- FILE_STORAGE_LOCAL_PATH=/app/.local_object_store
185189
- SEARXNG_DEFAULT_HOST=${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
186190
- SERVICE_ROLE=worker
187191
depends_on:
@@ -278,6 +282,8 @@ volumes:
278282
name: surfsense-dev-redis
279283
shared_temp:
280284
name: surfsense-dev-shared-temp
285+
object_store:
286+
name: surfsense-dev-object-store
281287
zero_cache_data:
282288
name: surfsense-dev-zero-cache
283289
whatsapp_sessions:

docker/docker-compose.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ services:
100100
- "${BACKEND_PORT:-8929}:8000"
101101
volumes:
102102
- shared_temp:/shared_tmp
103+
- object_store:/app/.local_object_store
103104
env_file:
104105
- .env
105106
extra_hosts:
@@ -113,6 +114,7 @@ services:
113114
PYTHONPATH: /app
114115
UVICORN_LOOP: asyncio
115116
UNSTRUCTURED_HAS_PATCHED_LOOP: "1"
117+
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
116118
NEXT_FRONTEND_URL: ${NEXT_FRONTEND_URL:-http://localhost:${FRONTEND_PORT:-3929}}
117119
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
118120
WHATSAPP_BRIDGE_URL: ${WHATSAPP_BRIDGE_URL:-http://whatsapp-bridge:9929}
@@ -165,6 +167,7 @@ services:
165167
image: ghcr.io/modsetter/surfsense-backend:${SURFSENSE_VERSION:-latest}${SURFSENSE_VARIANT:+-${SURFSENSE_VARIANT}}
166168
volumes:
167169
- shared_temp:/shared_tmp
170+
- object_store:/app/.local_object_store
168171
env_file:
169172
- .env
170173
extra_hosts:
@@ -176,6 +179,7 @@ services:
176179
REDIS_APP_URL: ${REDIS_URL:-redis://redis:6379/0}
177180
CELERY_TASK_DEFAULT_QUEUE: surfsense
178181
PYTHONPATH: /app
182+
FILE_STORAGE_LOCAL_PATH: /app/.local_object_store
179183
SEARXNG_DEFAULT_HOST: ${SEARXNG_DEFAULT_HOST:-http://searxng:8080}
180184
SERVICE_ROLE: worker
181185
depends_on:
@@ -278,6 +282,8 @@ volumes:
278282
name: surfsense-redis
279283
shared_temp:
280284
name: surfsense-shared-temp
285+
object_store:
286+
name: surfsense-object-store
281287
zero_cache_data:
282288
name: surfsense-zero-cache
283289
whatsapp_sessions:

surfsense_backend/app/podcasts/api/routes.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@
2727
get_async_session,
2828
)
2929
from app.podcasts.generation.brief import propose_brief
30-
from app.podcasts.persistence import Podcast, PodcastRepository
30+
from app.podcasts.persistence import Podcast, PodcastRepository, PodcastStatus
3131
from app.podcasts.service import (
3232
InvalidTransitionError,
3333
PodcastService,
3434
PreconditionFailedError,
3535
SpecConflictError,
3636
)
37-
from app.podcasts.storage import open_audio_stream, purge_audio
37+
from app.podcasts.storage import audio_exists, open_audio_stream, purge_audio
3838
from app.podcasts.tasks import draft_transcript_task
3939
from app.podcasts.tts import get_text_to_speech
4040
from app.podcasts.voices import (
@@ -272,6 +272,11 @@ async def stream_podcast(
272272
podcast = await _load(session, user, podcast_id, Permission.PODCASTS_READ)
273273

274274
if podcast.storage_key:
275+
# Verify first so a missing object is a 404, not a mid-stream crash.
276+
if not await audio_exists(podcast):
277+
raise HTTPException(
278+
status_code=404, detail="Podcast audio is no longer available"
279+
)
275280
return StreamingResponse(
276281
open_audio_stream(podcast),
277282
media_type="audio/mpeg",
@@ -295,7 +300,10 @@ def iterfile():
295300
},
296301
)
297302

298-
raise HTTPException(status_code=404, detail="Podcast audio not found")
303+
# No audio: terminal states never will have any, otherwise it's in flight.
304+
if PodcastStatus(podcast.status).is_terminal:
305+
raise HTTPException(status_code=404, detail="Podcast audio not found")
306+
raise HTTPException(status_code=409, detail="Podcast audio is not ready yet")
299307

300308

301309
async def _require(

surfsense_backend/app/podcasts/storage.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ def open_audio_stream(podcast: Podcast) -> AsyncIterator[bytes]:
4242
return get_storage_backend().open_stream(podcast.storage_key)
4343

4444

45+
async def audio_exists(podcast: Podcast) -> bool:
46+
"""Whether the podcast's stored audio object is actually present."""
47+
return bool(podcast.storage_key) and await get_storage_backend().exists(
48+
podcast.storage_key
49+
)
50+
51+
4552
async def purge_audio(podcast: Podcast) -> None:
4653
"""Delete a podcast's stored audio if present; a missing object is fine."""
4754
await purge_audio_object(podcast.storage_key)

surfsense_backend/app/routes/public_chat_routes.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,14 @@ async def stream_public_podcast(
103103
if storage_key:
104104
from app.file_storage.factory import get_storage_backend
105105

106+
backend = get_storage_backend()
107+
# Verify first so a missing object is a 404, not a mid-stream crash.
108+
if not await backend.exists(storage_key):
109+
raise HTTPException(
110+
status_code=404, detail="Podcast audio is no longer available"
111+
)
106112
return StreamingResponse(
107-
get_storage_backend().open_stream(storage_key),
113+
backend.open_stream(storage_key),
108114
media_type="audio/mpeg",
109115
headers={"Accept-Ranges": "bytes"},
110116
)

surfsense_backend/tests/integration/podcasts/conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ async def put(self, key: str, data: bytes, content_type: str | None = None) -> N
120120
async def open_stream(self, key: str) -> AsyncIterator[bytes]:
121121
yield self.objects.get(key, b"audio-bytes")
122122

123+
async def exists(self, key: str) -> bool:
124+
return key in self.objects
125+
123126
async def delete(self, key: str) -> None:
124127
self.deleted.append(key)
125128

surfsense_backend/tests/integration/podcasts/test_public_stream.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,22 @@ async def test_public_stream_serves_audio_via_storage_key(
4848
assert resp.content == b"public-audio"
4949

5050

51+
async def test_public_stream_404_when_object_missing(
52+
client, db_session, db_search_space, db_user, fake_storage
53+
):
54+
await _snapshot(
55+
db_session,
56+
search_space_id=db_search_space.id,
57+
user=db_user,
58+
token="tok-gone",
59+
podcasts=[{"original_id": 556, "storage_key": "podcasts/gone.mp3"}],
60+
)
61+
62+
resp = await client.get("/api/v1/public/tok-gone/podcasts/556/stream")
63+
64+
assert resp.status_code == 404
65+
66+
5167
async def test_public_stream_404_when_podcast_absent_from_snapshot(
5268
client, db_session, db_search_space, db_user
5369
):

surfsense_backend/tests/integration/podcasts/test_streaming.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
"""Streaming a podcast's rendered audio over HTTP.
22
3-
A ready podcast streams its bytes from the storage backend; a podcast with no
4-
stored audio returns 404. Storage is an in-memory backend (the object store is a
5-
system boundary).
3+
A ready podcast streams its bytes; an in-flight one is 409, a stored-but-missing
4+
object is 404. Storage is an in-memory backend (the object store is a boundary).
65
"""
76

87
from __future__ import annotations
@@ -31,11 +30,23 @@ async def test_stream_serves_stored_audio(
3130
assert resp.content == b"the-audio"
3231

3332

34-
async def test_stream_404_when_no_audio(client, db_search_space, make_podcast):
33+
async def test_stream_409_while_in_flight(client, db_search_space, make_podcast):
3534
podcast = await make_podcast(
3635
search_space_id=db_search_space.id, status=PodcastStatus.DRAFTING
3736
)
3837

3938
resp = await client.get(f"{BASE}/{podcast.id}/stream")
4039

40+
assert resp.status_code == 409
41+
42+
43+
async def test_stream_404_when_object_missing(
44+
client, db_search_space, make_podcast, fake_storage
45+
):
46+
podcast = await make_podcast(
47+
search_space_id=db_search_space.id, status=PodcastStatus.READY
48+
)
49+
50+
resp = await client.get(f"{BASE}/{podcast.id}/stream")
51+
4152
assert resp.status_code == 404

0 commit comments

Comments
 (0)