Skip to content

Commit 4612dfa

Browse files
authored
Expose HDR Color metadata (#1271)
1 parent a37ccef commit 4612dfa

6 files changed

Lines changed: 112 additions & 3 deletions

File tree

src/torchcodec/_core/Metadata.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
#include "Metadata.h"
88
#include "StableABICompat.h"
99

10+
extern "C" {
11+
#include <libavutil/pixdesc.h>
12+
}
13+
1014
namespace facebook::torchcodec {
1115

1216
std::optional<double> StreamMetadata::getDurationSeconds(
@@ -123,4 +127,38 @@ std::optional<double> StreamMetadata::getAverageFps(SeekMode seekMode) const {
123127
}
124128
}
125129

130+
std::optional<std::string> StreamMetadata::getColorPrimariesName() const {
131+
if (!colorPrimaries.has_value()) {
132+
return std::nullopt;
133+
}
134+
const char* name = av_color_primaries_name(*colorPrimaries);
135+
if (name == nullptr) {
136+
return std::nullopt;
137+
}
138+
return std::string(name);
139+
}
140+
141+
std::optional<std::string> StreamMetadata::getColorSpaceName() const {
142+
if (!colorSpace.has_value()) {
143+
return std::nullopt;
144+
}
145+
const char* name = av_color_space_name(*colorSpace);
146+
if (name == nullptr) {
147+
return std::nullopt;
148+
}
149+
return std::string(name);
150+
}
151+
152+
std::optional<std::string> StreamMetadata::getColorTransferCharacteristicName()
153+
const {
154+
if (!colorTransferCharacteristic.has_value()) {
155+
return std::nullopt;
156+
}
157+
const char* name = av_color_transfer_name(*colorTransferCharacteristic);
158+
if (name == nullptr) {
159+
return std::nullopt;
160+
}
161+
return std::string(name);
162+
}
163+
126164
} // namespace facebook::torchcodec

src/torchcodec/_core/Metadata.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
extern "C" {
1414
#include <libavcodec/avcodec.h>
1515
#include <libavutil/avutil.h>
16+
#include <libavutil/pixfmt.h>
1617
#include <libavutil/rational.h>
1718
}
1819

@@ -59,6 +60,9 @@ struct StreamMetadata {
5960
std::optional<AVRational> sampleAspectRatio;
6061
// Rotation angle in degrees from display matrix, in the range [-180, 180].
6162
std::optional<double> rotation;
63+
std::optional<AVColorPrimaries> colorPrimaries;
64+
std::optional<AVColorSpace> colorSpace;
65+
std::optional<AVColorTransferCharacteristic> colorTransferCharacteristic;
6266
// The pixel format of the encoded video, e.g. "yuv420p".
6367
std::optional<std::string> pixelFormat;
6468

@@ -73,6 +77,12 @@ struct StreamMetadata {
7377
std::optional<double> getEndStreamSeconds(SeekMode seekMode) const;
7478
std::optional<int64_t> getNumFrames(SeekMode seekMode) const;
7579
std::optional<double> getAverageFps(SeekMode seekMode) const;
80+
81+
// Color metadata name accessors. These return nullopt if the field is unset
82+
// or if FFmpeg returns NULL for the name.
83+
std::optional<std::string> getColorPrimariesName() const;
84+
std::optional<std::string> getColorSpaceName() const;
85+
std::optional<std::string> getColorTransferCharacteristicName() const;
7686
};
7787

7888
struct ContainerMetadata {

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,16 @@ void SingleStreamDecoder::initializeDecoder() {
171171
streamMetadata.sampleAspectRatio =
172172
avStream->codecpar->sample_aspect_ratio;
173173

174+
if (avStream->codecpar->color_primaries != AVCOL_PRI_UNSPECIFIED) {
175+
streamMetadata.colorPrimaries = avStream->codecpar->color_primaries;
176+
}
177+
if (avStream->codecpar->color_space != AVCOL_SPC_UNSPECIFIED) {
178+
streamMetadata.colorSpace = avStream->codecpar->color_space;
179+
}
180+
if (avStream->codecpar->color_trc != AVCOL_TRC_UNSPECIFIED) {
181+
streamMetadata.colorTransferCharacteristic =
182+
avStream->codecpar->color_trc;
183+
}
174184
AVPixelFormat pixelFormat =
175185
static_cast<AVPixelFormat>(avStream->codecpar->format);
176186
// If the AVPixelFormat is not recognized, we get back nullptr. We have

src/torchcodec/_core/_metadata.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,17 @@ class VideoStreamMetadata(StreamMetadata):
117117
or -90 degree rotation, this means width and height are swapped
118118
compared to the raw encoded dimensions in the container.
119119
"""
120+
color_primaries: str | None
121+
"""Color primaries as reported by FFmpeg. E.g. ``"bt709"``, ``"bt2020"``."""
122+
color_space: str | None
123+
"""Color space as reported by FFmpeg. E.g. ``"bt709"``,
124+
``"bt2020nc"``."""
125+
color_transfer_characteristic: str | None
126+
"""Color transfer characteristic as reported by FFmpeg
127+
E.g. ``"bt709"``, ``"smpte2084"`` (PQ), ``"arib-std-b67"`` (HLG)."""
120128
pixel_format: str | None
121-
"""The source pixel format of the video, as reported by FFmpeg. E.g. ``'yuv420p'``, ``'yuv444p'``, etc."""
129+
"""The source pixel format of the video as reported by FFmpeg.
130+
E.g. ``'yuv420p'``, ``'yuv444p'``, etc."""
122131

123132
# Computed fields (computed in C++ with fallback logic)
124133
end_stream_seconds: float | None
@@ -245,6 +254,11 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
245254
average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
246255
pixel_aspect_ratio=_get_optional_par_fraction(stream_dict),
247256
rotation=stream_dict.get("rotation"),
257+
color_primaries=stream_dict.get("colorPrimaries"),
258+
color_space=stream_dict.get("colorSpace"),
259+
color_transfer_characteristic=stream_dict.get(
260+
"colorTransferCharacteristic"
261+
),
248262
pixel_format=stream_dict.get("pixelFormat"),
249263
**common_meta,
250264
)

src/torchcodec/_core/custom_ops.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
#include <cstdint>
1010
#include <sstream>
1111
#include <string>
12+
13+
extern "C" {
14+
#include <libavutil/pixdesc.h>
15+
}
16+
1217
#include "AVIOFileLikeContext.h"
1318
#include "AVIOTensorContext.h"
1419
#include "Encoder.h"
@@ -987,6 +992,15 @@ std::string get_stream_json_metadata(
987992
if (streamMetadata.rotation.has_value()) {
988993
map["rotation"] = std::to_string(*streamMetadata.rotation);
989994
}
995+
if (auto name = streamMetadata.getColorPrimariesName()) {
996+
map["colorPrimaries"] = quoteValue(*name);
997+
}
998+
if (auto name = streamMetadata.getColorSpaceName()) {
999+
map["colorSpace"] = quoteValue(*name);
1000+
}
1001+
if (auto name = streamMetadata.getColorTransferCharacteristicName()) {
1002+
map["colorTransferCharacteristic"] = quoteValue(*name);
1003+
}
9901004
if (streamMetadata.pixelFormat.has_value()) {
9911005
map["pixelFormat"] = quoteValue(streamMetadata.pixelFormat.value());
9921006
}

test/test_metadata.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from fractions import Fraction
99

1010
import pytest
11-
1211
from torchcodec import ffmpeg_major_version
1312
from torchcodec._core import (
1413
add_video_stream,
@@ -20,7 +19,12 @@
2019
)
2120
from torchcodec.decoders import AudioDecoder, VideoDecoder
2221

23-
from .utils import NASA_AUDIO_MP3, NASA_VIDEO, NASA_VIDEO_ROTATED
22+
from .utils import (
23+
BT2020_LIMITED_RANGE_10BIT,
24+
NASA_AUDIO_MP3,
25+
NASA_VIDEO,
26+
NASA_VIDEO_ROTATED,
27+
)
2428

2529

2630
# TODO: Expected values in these tests should be based on the assets's
@@ -169,6 +173,22 @@ def test_rotation_metadata():
169173
)
170174

171175

176+
def test_color_metadata():
177+
# BT2020_LIMITED_RANGE_10BIT is a BT.2020 10-bit HEVC video with PQ transfer
178+
decoder_bt2020 = VideoDecoder(BT2020_LIMITED_RANGE_10BIT.path)
179+
assert decoder_bt2020.metadata.color_primaries == "bt2020"
180+
assert decoder_bt2020.metadata.color_space == "bt2020nc"
181+
assert decoder_bt2020.metadata.color_transfer_characteristic == "smpte2084"
182+
assert decoder_bt2020.metadata.pixel_format == "yuv420p10le"
183+
184+
# NASA_VIDEO has BT.709 color metadata
185+
decoder_nasa = VideoDecoder(NASA_VIDEO.path)
186+
assert decoder_nasa.metadata.color_primaries == "bt709"
187+
assert decoder_nasa.metadata.color_space == "bt709"
188+
assert decoder_nasa.metadata.color_transfer_characteristic == "bt709"
189+
assert decoder_nasa.metadata.pixel_format == "yuv420p"
190+
191+
172192
def test_repr():
173193
# Test for calls to print(), str(), etc. Useful to make sure we don't forget
174194
# to add additional @properties to __repr__
@@ -191,6 +211,9 @@ def test_repr():
191211
average_fps_from_header: 29.97002997002997
192212
pixel_aspect_ratio: 1
193213
rotation: None
214+
color_primaries: bt709
215+
color_space: bt709
216+
color_transfer_characteristic: bt709
194217
pixel_format: yuv420p
195218
end_stream_seconds: 13.013
196219
num_frames: 390

0 commit comments

Comments
 (0)