Skip to content

Commit cd6887a

Browse files
author
pytorchbot
committed
2026-06-01 nightly release (2ab858e)
1 parent eb85a1b commit cd6887a

15 files changed

Lines changed: 199 additions & 244 deletions

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ on:
99
- release/*
1010
tags:
1111
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
12+
- v[0-9]+.[0-9]+.[0-9]+
1213
workflow_dispatch:
1314

1415
concurrency:

docs/source/conf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,10 @@ def __call__(self, filename):
143143
source_suffix = [".rst"]
144144

145145
version = ".".join(torchcodec.__version__.split(".")[:2])
146+
# Strip CUDA suffix (e.g. "0.14.0-cu126" -> "0.14.0") for display
147+
release = torchcodec.__version__.split("-")[0]
146148

147-
html_title = f"TorchCodec {torchcodec.__version__} Documentation"
149+
html_title = f"TorchCodec {release} Documentation"
148150

149151
# The master toctree document.
150152
master_doc = "index"

examples/decoding/audio_decoding.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
from IPython.display import Audio
2323

2424

25-
def play_audio(samples):
26-
return Audio(samples.data, rate=samples.sample_rate)
25+
def play_5s(samples):
26+
# Play 5 seconds of the audio. Playing the entire file would take too much
27+
# space in our docs (~40Mb!).
28+
return Audio(samples.data[:, :5 * samples.sample_rate], rate=samples.sample_rate)
2729

2830

2931
# sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_audio.jpg'
@@ -67,7 +69,7 @@ def play_audio(samples):
6769
samples = decoder.get_all_samples()
6870

6971
print(samples)
70-
play_audio(samples)
72+
play_5s(samples)
7173

7274
# %%
7375
# The ``.data`` field is a tensor of shape ``(num_channels, num_samples)`` and
@@ -88,7 +90,7 @@ def play_audio(samples):
8890
samples = decoder.get_samples_played_in_range(start_seconds=10, stop_seconds=70)
8991

9092
print(samples)
91-
play_audio(samples)
93+
play_5s(samples)
9294

9395
# %%
9496
# Custom sample rate
@@ -103,4 +105,4 @@ def play_audio(samples):
103105
samples = decoder.get_all_samples()
104106

105107
print(samples)
106-
play_audio(samples)
108+
play_5s(samples)

examples/encoding/video_encoding.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ def play_video(encoded_bytes):
268268
with encoder.open_file_like(buf, format="mp4"):
269269
vs.add_frames(frames)
270270

271-
play_video(buf.getvalue())
271+
# play_video is disabled because crf=0 creates a 50+ Mb video that we don't want
272+
# to check into our docs
273+
# play_video(buf.getvalue())
272274

273275
# %%
274276

src/torchcodec/_core/AVIOContextHolder.cpp

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,41 @@
99

1010
namespace facebook::torchcodec {
1111

12-
void AVIOContextHolder::createAVIOContext(
13-
AVIOReadFunction read,
14-
AVIOWriteFunction write,
15-
AVIOSeekFunction seek,
16-
void* heldData,
17-
bool isForWriting,
18-
int bufferSize) {
12+
// --------------------------------------------------------------------------
13+
// FFmpeg AVIO callbacks — delegate to virtual methods via opaque=this
14+
// --------------------------------------------------------------------------
15+
16+
int AVIOContextHolder::readCallback(void* opaque, uint8_t* buf, int buf_size) {
17+
auto self = static_cast<AVIOContextHolder*>(opaque);
18+
int result = self->read(buf, buf_size);
19+
return result < 0 ? AVERROR_EOF : result;
20+
}
21+
22+
int AVIOContextHolder::writeCallback(
23+
void* opaque,
24+
const uint8_t* buf,
25+
int buf_size) {
26+
auto self = static_cast<AVIOContextHolder*>(opaque);
27+
return self->write(buf, buf_size);
28+
}
29+
30+
int64_t
31+
AVIOContextHolder::seekCallback(void* opaque, int64_t offset, int whence) {
32+
auto self = static_cast<AVIOContextHolder*>(opaque);
33+
if (whence == AVSEEK_SIZE) {
34+
int64_t size = self->getSize();
35+
// INT64_MAX means "unknown size" (e.g. streaming file-like objects).
36+
// Tell FFmpeg the size is unavailable rather than passing a bogus value.
37+
return size == INT64_MAX ? AVERROR(EIO) : size;
38+
}
39+
return self->seek(offset, whence);
40+
}
41+
42+
// --------------------------------------------------------------------------
43+
// AVIO context creation and lifecycle
44+
// --------------------------------------------------------------------------
45+
46+
void AVIOContextHolder::createAVIOContext(bool isForWriting, int bufferSize) {
1947
STD_TORCH_CHECK(
2048
bufferSize > 0,
2149
"Buffer size must be greater than 0; is " + std::to_string(bufferSize));
@@ -24,23 +52,14 @@ void AVIOContextHolder::createAVIOContext(
2452
buffer != nullptr,
2553
"Failed to allocate buffer of size " + std::to_string(bufferSize));
2654

27-
STD_TORCH_CHECK(seek != nullptr, "seek method must be defined");
28-
29-
if (isForWriting) {
30-
STD_TORCH_CHECK(
31-
write != nullptr, "write method must be defined for writing");
32-
} else {
33-
STD_TORCH_CHECK(read != nullptr, "read method must be defined for reading");
34-
}
35-
3655
avioContext_.reset(avioAllocContext(
3756
buffer,
3857
bufferSize,
3958
/*write_flag=*/isForWriting,
40-
heldData,
41-
read,
42-
write,
43-
seek));
59+
/*opaque=*/this,
60+
isForWriting ? nullptr : &readCallback,
61+
isForWriting ? &writeCallback : nullptr,
62+
&seekCallback));
4463

4564
if (!avioContext_) {
4665
av_freep(&buffer);
@@ -58,10 +77,18 @@ AVIOContext* AVIOContextHolder::getAVIOContext() {
5877
return avioContext_.get();
5978
}
6079

80+
// --------------------------------------------------------------------------
81+
// Default virtual method implementations
82+
// --------------------------------------------------------------------------
83+
6184
int AVIOContextHolder::read(uint8_t*, int) {
6285
STD_TORCH_CHECK(false, "read() is not supported by this AVIOContextHolder");
6386
}
6487

88+
int AVIOContextHolder::write(const uint8_t*, int) {
89+
STD_TORCH_CHECK(false, "write() is not supported by this AVIOContextHolder");
90+
}
91+
6592
int64_t AVIOContextHolder::seek(int64_t, int) {
6693
STD_TORCH_CHECK(false, "seek() is not supported by this AVIOContextHolder");
6794
}

src/torchcodec/_core/AVIOContextHolder.h

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,58 +11,41 @@
1111

1212
namespace facebook::torchcodec {
1313

14-
// The AVIOContextHolder serves several purposes:
14+
// The AVIOContextHolder is a base class for I/O backends. It serves as:
1515
//
16-
// 1. It is a smart pointer for the AVIOContext. It has the logic to create
17-
// a new AVIOContext and will appropriately free the AVIOContext when it
18-
// goes out of scope. Note that this requires more than just having a
19-
// UniqueAVIOContext, as the AVIOContext points to a buffer which must be
20-
// freed.
21-
// 2. It is a base class for AVIOContext specializations. When specializing a
22-
// AVIOContext, we need to provide four things:
23-
// 1. A read callback function, for decoding.
24-
// 2. A seek callback function, for decoding and encoding.
25-
// 3. A write callback function, for encoding.
26-
// 4. A pointer to some context object that has the same lifetime as the
27-
// AVIOContext itself. This context object holds the custom state that
28-
// tracks the custom behavior of reading, seeking and writing. It is
29-
// provided upon AVIOContext creation and to the read, seek and
30-
// write callback functions.
31-
// The callback functions do not need to be members of the derived class,
32-
// but the derived class must have access to them. The context object must
33-
// be a member of the derived class. Derived classes need to call
34-
// createAVIOContext(), ideally in their constructor.
35-
// 3. A generic handle for those that just need to manage having access to an
36-
// AVIOContext, but aren't necessarily concerned with how it was customized:
37-
// typically, the SingleStreamDecoder.
16+
// 1. A generic I/O interface: derived classes override virtual methods
17+
// (read, write, seek, getSize) to implement their specific I/O.
18+
// These can be called directly by consumers like WavDecoder.
19+
//
20+
// 2. An FFmpeg AVIO adapter: calling createAVIOContext() sets up an
21+
// FFmpeg AVIOContext whose callbacks automatically delegate to the
22+
// virtual methods. This is used by SingleStreamDecoder and Encoder.
23+
//
24+
// 3. A smart pointer for the AVIOContext, freeing it and its buffer
25+
// on destruction.
3826
class FORCE_PUBLIC_VISIBILITY AVIOContextHolder {
3927
public:
4028
virtual ~AVIOContextHolder();
4129
AVIOContext* getAVIOContext();
4230

43-
// Generic I/O primitives used by consumers that don't go through
44-
// FFmpeg's AVIO layer (e.g. WavDecoder). Derived classes override
45-
// the ones they support.
4631
virtual int read(uint8_t* buf, int size);
32+
virtual int write(const uint8_t* buf, int size);
4733
virtual int64_t seek(int64_t offset, int whence);
4834
virtual int64_t getSize();
4935

5036
protected:
51-
// Make constructor protected to prevent anyone from constructing
52-
// an AVIOContextHolder without deriving it. (Ordinarily this would be
53-
// enforced by having a pure virtual methods, but we don't have any.)
5437
AVIOContextHolder() = default;
5538

56-
// Deriving classes should call this function in their constructor.
57-
void createAVIOContext(
58-
AVIOReadFunction read,
59-
AVIOWriteFunction write,
60-
AVIOSeekFunction seek,
61-
void* heldData,
62-
bool isForWriting,
63-
int bufferSize = defaultBufferSize);
39+
// Sets up an FFmpeg AVIOContext whose callbacks delegate to the
40+
// virtual methods above. Derived classes that need FFmpeg AVIO
41+
// should call this in their constructor.
42+
void createAVIOContext(bool isForWriting, int bufferSize = defaultBufferSize);
6443

6544
private:
45+
static int readCallback(void* opaque, uint8_t* buf, int buf_size);
46+
static int writeCallback(void* opaque, const uint8_t* buf, int buf_size);
47+
static int64_t seekCallback(void* opaque, int64_t offset, int whence);
48+
6649
UniqueAVIOContext avioContext_;
6750

6851
// Defaults to 64 KB

src/torchcodec/_core/AVIOFileLikeContext.cpp

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -33,38 +33,7 @@ AVIOFileLikeContext::AVIOFileLikeContext(
3333
py::hasattr(fileLike, "seek"),
3434
"File like object must implement a seek method.");
3535
}
36-
createAVIOContext(
37-
&readCallback, &writeCallback, &seekCallback, this, isForWriting);
38-
}
39-
40-
int AVIOFileLikeContext::readCallback(
41-
void* opaque,
42-
uint8_t* buf,
43-
int buf_size) {
44-
auto self = static_cast<AVIOFileLikeContext*>(opaque);
45-
int result = self->read(buf, buf_size);
46-
return result < 0 ? AVERROR_EOF : result;
47-
}
48-
49-
int64_t
50-
AVIOFileLikeContext::seekCallback(void* opaque, int64_t offset, int whence) {
51-
if (whence == AVSEEK_SIZE) {
52-
// Size of file-like is typically unknown, since the data is potentially
53-
// streaming.
54-
return AVERROR(EIO);
55-
}
56-
auto self = static_cast<AVIOFileLikeContext*>(opaque);
57-
return self->seek(offset, whence);
58-
}
59-
60-
int AVIOFileLikeContext::writeCallback(
61-
void* opaque,
62-
const uint8_t* buf,
63-
int buf_size) {
64-
auto self = static_cast<AVIOFileLikeContext*>(opaque);
65-
py::gil_scoped_acquire gil;
66-
py::bytes bytes_obj(reinterpret_cast<const char*>(buf), buf_size);
67-
return py::cast<int>(self->fileLike_->attr("write")(bytes_obj));
36+
createAVIOContext(isForWriting);
6837
}
6938

7039
int AVIOFileLikeContext::read(uint8_t* buf, int size) {
@@ -105,6 +74,12 @@ int AVIOFileLikeContext::read(uint8_t* buf, int size) {
10574
return totalNumRead == 0 ? -1 : totalNumRead;
10675
}
10776

77+
int AVIOFileLikeContext::write(const uint8_t* buf, int size) {
78+
py::gil_scoped_acquire gil;
79+
py::bytes bytes_obj(reinterpret_cast<const char*>(buf), size);
80+
return py::cast<int>(fileLike_->attr("write")(bytes_obj));
81+
}
82+
10883
int64_t AVIOFileLikeContext::seek(int64_t offset, int whence) {
10984
py::gil_scoped_acquire gil;
11085
return py::cast<int64_t>(fileLike_->attr("seek")(offset, whence));

src/torchcodec/_core/AVIOFileLikeContext.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,11 @@ class AVIOFileLikeContext : public AVIOContextHolder {
2828
explicit AVIOFileLikeContext(const py::object& fileLike, bool isForWriting);
2929

3030
int read(uint8_t* buf, int size) override;
31+
int write(const uint8_t* buf, int size) override;
3132
int64_t seek(int64_t offset, int whence) override;
3233
int64_t getSize() override;
3334

3435
private:
35-
static int readCallback(void* opaque, uint8_t* buf, int buf_size);
36-
static int64_t seekCallback(void* opaque, int64_t offset, int whence);
37-
static int writeCallback(void* opaque, const uint8_t* buf, int buf_size);
38-
3936
// Note that we dynamically allocate the Python object because we need to
4037
// strictly control when its destructor is called. We must hold the GIL
4138
// when its destructor gets called, as it needs to update the reference

0 commit comments

Comments
 (0)