meta-pytorch
diff --git a/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 3 additions & 1 deletion b/‎docs/source/conf.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/decoding/audio_decoding.py‎
Lines changed: 7 additions & 5 deletions b/‎examples/decoding/audio_decoding.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎examples/encoding/video_encoding.py‎
Lines changed: 3 additions & 1 deletion b/‎examples/encoding/video_encoding.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/torchcodec/_core/AVIOContextHolder.cpp‎
Lines changed: 47 additions & 20 deletions b/‎src/torchcodec/_core/AVIOContextHolder.cpp‎
Lines changed: 47 additions & 20 deletions
diff --git a/‎src/torchcodec/_core/AVIOContextHolder.h‎
Lines changed: 20 additions & 37 deletions b/‎src/torchcodec/_core/AVIOContextHolder.h‎
Lines changed: 20 additions & 37 deletions
diff --git a/‎src/torchcodec/_core/AVIOFileLikeContext.cpp‎
Lines changed: 7 additions & 32 deletions b/‎src/torchcodec/_core/AVIOFileLikeContext.cpp‎
Lines changed: 7 additions & 32 deletions
diff --git a/‎src/torchcodec/_core/AVIOFileLikeContext.h‎
Lines changed: 1 addition & 4 deletions b/‎src/torchcodec/_core/AVIOFileLikeContext.h‎
Lines changed: 1 addition & 4 deletions
@@ -9,6 +9,7 @@ on:
       - release/*
     tags:
         - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
+        - v[0-9]+.[0-9]+.[0-9]+
   workflow_dispatch:
 
 concurrency:
 
@@ -143,8 +143,10 @@ def __call__(self, filename):
 source_suffix = [".rst"]
 
 version = ".".join(torchcodec.__version__.split(".")[:2])
+# Strip CUDA suffix (e.g. "0.14.0-cu126" -> "0.14.0") for display
+release = torchcodec.__version__.split("-")[0]
 
-html_title = f"TorchCodec {torchcodec.__version__} Documentation"
+html_title = f"TorchCodec {release} Documentation"
 
 # The master toctree document.
 master_doc = "index"
 
@@ -22,8 +22,10 @@
 from IPython.display import Audio
 
 
-def play_audio(samples):
-    return Audio(samples.data, rate=samples.sample_rate)
+def play_5s(samples):
+    # Play 5 seconds of the audio. Playing the entire file would take too much
+    # space in our docs (~40Mb!).
+    return Audio(samples.data[:, :5 * samples.sample_rate], rate=samples.sample_rate)
 
 
 # sphinx_gallery_thumbnail_path = '_static/thumbnails/grumps_audio.jpg'
@@ -67,7 +69,7 @@ def play_audio(samples):
 samples = decoder.get_all_samples()
 
 print(samples)
-play_audio(samples)
+play_5s(samples)
 
 # %%
 # The ``.data`` field is a tensor of shape ``(num_channels, num_samples)`` and
@@ -88,7 +90,7 @@ def play_audio(samples):
 samples = decoder.get_samples_played_in_range(start_seconds=10, stop_seconds=70)
 
 print(samples)
-play_audio(samples)
+play_5s(samples)
 
 # %%
 # Custom sample rate
@@ -103,4 +105,4 @@ def play_audio(samples):
 samples = decoder.get_all_samples()
 
 print(samples)
-play_audio(samples)
+play_5s(samples)
@@ -268,7 +268,9 @@ def play_video(encoded_bytes):
 with encoder.open_file_like(buf, format="mp4"):
     vs.add_frames(frames)
 
-play_video(buf.getvalue())
+# play_video is disabled because crf=0 creates a 50+ Mb video that we don't want
+# to check into our docs
+# play_video(buf.getvalue())
 
 # %%
 
 
@@ -9,13 +9,41 @@
 
 namespace facebook::torchcodec {
 
-void AVIOContextHolder::createAVIOContext(
-    AVIOReadFunction read,
-    AVIOWriteFunction write,
-    AVIOSeekFunction seek,
-    void* heldData,
-    bool isForWriting,
-    int bufferSize) {
+// --------------------------------------------------------------------------
+// FFmpeg AVIO callbacks — delegate to virtual methods via opaque=this
+// --------------------------------------------------------------------------
+
+int AVIOContextHolder::readCallback(void* opaque, uint8_t* buf, int buf_size) {
+  auto self = static_cast<AVIOContextHolder*>(opaque);
+  int result = self->read(buf, buf_size);
+  return result < 0 ? AVERROR_EOF : result;
+}
+
+int AVIOContextHolder::writeCallback(
+    void* opaque,
+    const uint8_t* buf,
+    int buf_size) {
+  auto self = static_cast<AVIOContextHolder*>(opaque);
+  return self->write(buf, buf_size);
+}
+
+int64_t
+AVIOContextHolder::seekCallback(void* opaque, int64_t offset, int whence) {
+  auto self = static_cast<AVIOContextHolder*>(opaque);
+  if (whence == AVSEEK_SIZE) {
+    int64_t size = self->getSize();
+    // INT64_MAX means "unknown size" (e.g. streaming file-like objects).
+    // Tell FFmpeg the size is unavailable rather than passing a bogus value.
+    return size == INT64_MAX ? AVERROR(EIO) : size;
+  }
+  return self->seek(offset, whence);
+}
+
+// --------------------------------------------------------------------------
+// AVIO context creation and lifecycle
+// --------------------------------------------------------------------------
+
+void AVIOContextHolder::createAVIOContext(bool isForWriting, int bufferSize) {
   STD_TORCH_CHECK(
       bufferSize > 0,
       "Buffer size must be greater than 0; is " + std::to_string(bufferSize));
@@ -24,23 +52,14 @@ void AVIOContextHolder::createAVIOContext(
       buffer != nullptr,
       "Failed to allocate buffer of size " + std::to_string(bufferSize));
 
-  STD_TORCH_CHECK(seek != nullptr, "seek method must be defined");
-
-  if (isForWriting) {
-    STD_TORCH_CHECK(
-        write != nullptr, "write method must be defined for writing");
-  } else {
-    STD_TORCH_CHECK(read != nullptr, "read method must be defined for reading");
-  }
-
   avioContext_.reset(avioAllocContext(
       buffer,
       bufferSize,
       /*write_flag=*/isForWriting,
-      heldData,
-      read,
-      write,
-      seek));
+      /*opaque=*/this,
+      isForWriting ? nullptr : &readCallback,
+      isForWriting ? &writeCallback : nullptr,
+      &seekCallback));
 
   if (!avioContext_) {
     av_freep(&buffer);
@@ -58,10 +77,18 @@ AVIOContext* AVIOContextHolder::getAVIOContext() {
   return avioContext_.get();
 }
 
+// --------------------------------------------------------------------------
+// Default virtual method implementations
+// --------------------------------------------------------------------------
+
 int AVIOContextHolder::read(uint8_t*, int) {
   STD_TORCH_CHECK(false, "read() is not supported by this AVIOContextHolder");
 }
 
+int AVIOContextHolder::write(const uint8_t*, int) {
+  STD_TORCH_CHECK(false, "write() is not supported by this AVIOContextHolder");
+}
+
 int64_t AVIOContextHolder::seek(int64_t, int) {
   STD_TORCH_CHECK(false, "seek() is not supported by this AVIOContextHolder");
 }
 
@@ -11,58 +11,41 @@
 
 namespace facebook::torchcodec {
 
-// The AVIOContextHolder serves several purposes:
+// The AVIOContextHolder is a base class for I/O backends. It serves as:
 //
-//   1. It is a smart pointer for the AVIOContext. It has the logic to create
-//      a new AVIOContext and will appropriately free the AVIOContext when it
-//      goes out of scope. Note that this requires more than just having a
-//      UniqueAVIOContext, as the AVIOContext points to a buffer which must be
-//      freed.
-//   2. It is a base class for AVIOContext specializations. When specializing a
-//      AVIOContext, we need to provide four things:
-//        1. A read callback function, for decoding.
-//        2. A seek callback function, for decoding and encoding.
-//        3. A write callback function, for encoding.
-//        4. A pointer to some context object that has the same lifetime as the
-//           AVIOContext itself. This context object holds the custom state that
-//           tracks the custom behavior of reading, seeking and writing. It is
-//           provided upon AVIOContext creation and to the read, seek and
-//           write callback functions.
-//      The callback functions do not need to be members of the derived class,
-//      but the derived class must have access to them. The context object must
-//      be a member of the derived class. Derived classes need to call
-//      createAVIOContext(), ideally in their constructor.
-//  3. A generic handle for those that just need to manage having access to an
-//     AVIOContext, but aren't necessarily concerned with how it was customized:
-//     typically, the SingleStreamDecoder.
+//   1. A generic I/O interface: derived classes override virtual methods
+//      (read, write, seek, getSize) to implement their specific I/O.
+//      These can be called directly by consumers like WavDecoder.
+//
+//   2. An FFmpeg AVIO adapter: calling createAVIOContext() sets up an
+//      FFmpeg AVIOContext whose callbacks automatically delegate to the
+//      virtual methods. This is used by SingleStreamDecoder and Encoder.
+//
+//   3. A smart pointer for the AVIOContext, freeing it and its buffer
+//      on destruction.
 class FORCE_PUBLIC_VISIBILITY AVIOContextHolder {
  public:
   virtual ~AVIOContextHolder();
   AVIOContext* getAVIOContext();
 
-  // Generic I/O primitives used by consumers that don't go through
-  // FFmpeg's AVIO layer (e.g. WavDecoder). Derived classes override
-  // the ones they support.
   virtual int read(uint8_t* buf, int size);
+  virtual int write(const uint8_t* buf, int size);
   virtual int64_t seek(int64_t offset, int whence);
   virtual int64_t getSize();
 
  protected:
-  // Make constructor protected to prevent anyone from constructing
-  // an AVIOContextHolder without deriving it. (Ordinarily this would be
-  // enforced by having a pure virtual methods, but we don't have any.)
   AVIOContextHolder() = default;
 
-  // Deriving classes should call this function in their constructor.
-  void createAVIOContext(
-      AVIOReadFunction read,
-      AVIOWriteFunction write,
-      AVIOSeekFunction seek,
-      void* heldData,
-      bool isForWriting,
-      int bufferSize = defaultBufferSize);
+  // Sets up an FFmpeg AVIOContext whose callbacks delegate to the
+  // virtual methods above. Derived classes that need FFmpeg AVIO
+  // should call this in their constructor.
+  void createAVIOContext(bool isForWriting, int bufferSize = defaultBufferSize);
 
  private:
+  static int readCallback(void* opaque, uint8_t* buf, int buf_size);
+  static int writeCallback(void* opaque, const uint8_t* buf, int buf_size);
+  static int64_t seekCallback(void* opaque, int64_t offset, int whence);
+
   UniqueAVIOContext avioContext_;
 
   // Defaults to 64 KB
 
@@ -33,38 +33,7 @@ AVIOFileLikeContext::AVIOFileLikeContext(
         py::hasattr(fileLike, "seek"),
         "File like object must implement a seek method.");
   }
-  createAVIOContext(
-      &readCallback, &writeCallback, &seekCallback, this, isForWriting);
-}
-
-int AVIOFileLikeContext::readCallback(
-    void* opaque,
-    uint8_t* buf,
-    int buf_size) {
-  auto self = static_cast<AVIOFileLikeContext*>(opaque);
-  int result = self->read(buf, buf_size);
-  return result < 0 ? AVERROR_EOF : result;
-}
-
-int64_t
-AVIOFileLikeContext::seekCallback(void* opaque, int64_t offset, int whence) {
-  if (whence == AVSEEK_SIZE) {
-    // Size of file-like is typically unknown, since the data is potentially
-    // streaming.
-    return AVERROR(EIO);
-  }
-  auto self = static_cast<AVIOFileLikeContext*>(opaque);
-  return self->seek(offset, whence);
-}
-
-int AVIOFileLikeContext::writeCallback(
-    void* opaque,
-    const uint8_t* buf,
-    int buf_size) {
-  auto self = static_cast<AVIOFileLikeContext*>(opaque);
-  py::gil_scoped_acquire gil;
-  py::bytes bytes_obj(reinterpret_cast<const char*>(buf), buf_size);
-  return py::cast<int>(self->fileLike_->attr("write")(bytes_obj));
+  createAVIOContext(isForWriting);
 }
 
 int AVIOFileLikeContext::read(uint8_t* buf, int size) {
@@ -105,6 +74,12 @@ int AVIOFileLikeContext::read(uint8_t* buf, int size) {
   return totalNumRead == 0 ? -1 : totalNumRead;
 }
 
+int AVIOFileLikeContext::write(const uint8_t* buf, int size) {
+  py::gil_scoped_acquire gil;
+  py::bytes bytes_obj(reinterpret_cast<const char*>(buf), size);
+  return py::cast<int>(fileLike_->attr("write")(bytes_obj));
+}
+
 int64_t AVIOFileLikeContext::seek(int64_t offset, int whence) {
   py::gil_scoped_acquire gil;
   return py::cast<int64_t>(fileLike_->attr("seek")(offset, whence));
 
@@ -28,14 +28,11 @@ class AVIOFileLikeContext : public AVIOContextHolder {
   explicit AVIOFileLikeContext(const py::object& fileLike, bool isForWriting);
 
   int read(uint8_t* buf, int size) override;
+  int write(const uint8_t* buf, int size) override;
   int64_t seek(int64_t offset, int whence) override;
   int64_t getSize() override;
 
  private:
-  static int readCallback(void* opaque, uint8_t* buf, int buf_size);
-  static int64_t seekCallback(void* opaque, int64_t offset, int whence);
-  static int writeCallback(void* opaque, const uint8_t* buf, int buf_size);
-
   // Note that we dynamically allocate the Python object because we need to
   // strictly control when its destructor is called. We must hold the GIL
   // when its destructor gets called, as it needs to update the reference