Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions streamrip/client/downloadable.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,36 @@ def generate_temp_path(url: str):
)


async def fast_async_download(path, url, headers, callback):
"""Synchronous download with yield for every 1MB read.
def _do_download(path, url, headers, callback, loop):
"""Blocking download using requests. Runs in a thread pool via fast_async_download.

Large chunk size avoids the CPU-bound problem caused by yielding to the event loop
on every small read (the original aiohttp/aiofiles approach capped total speed at ~10MB/s).

Using aiofiles/aiohttp resulted in a yield to the event loop for every 1KB,
which made file downloads CPU-bound. This resulted in a ~10MB max total download
speed. This fixes the issue by only yielding to the event loop for every 1MB read.
callback is dispatched back onto the event loop (thread-safe) since rich's Live
display must only be updated from the main thread.
"""
chunk_size: int = 2**17 # 131 KB
counter = 0
yield_every = 8 # 1 MB
with open(path, "wb") as file: # noqa: ASYNC101
with requests.get( # noqa: ASYNC100
with open(path, "wb") as file:
with requests.get(
url,
headers=headers,
allow_redirects=True,
stream=True,
) as resp:
for chunk in resp.iter_content(chunk_size=chunk_size):
file.write(chunk)
callback(len(chunk))
if counter % yield_every == 0:
await asyncio.sleep(0)
counter += 1
loop.call_soon_threadsafe(callback, len(chunk))


async def fast_async_download(path, url, headers, callback):
"""Run the blocking download in a thread pool so the event loop stays free.

Keeping requests + large chunks for throughput, but offloading to a thread
so concurrent downloads are not frozen while one connection is active.
"""
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, _do_download, path, url, headers, callback, loop)


@dataclass(slots=True)
Expand Down