-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_rustfrecord.py
More file actions
96 lines (75 loc) · 2.38 KB
/
test_rustfrecord.py
File metadata and controls
96 lines (75 loc) · 2.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import torch
from rustfrecord import Reader
from torch import Tensor
# torch.set_num_threads(1)
# torch.set_num_interop_threads(1)
filename = "tf_example/sample_images.tfrecord"
class TFRecordDataset(torch.utils.data.IterableDataset):
def __init__(self, filename: str, compressed: bool = True, features: list = None):
super().__init__()
self.filename = filename
self.compressed = compressed
self.features = features
def __iter__(self):
reader = Reader(
self.filename,
compressed=self.compressed,
features=self.features,
)
return iter(reader)
def test_loader():
ds = TFRecordDataset(
filename,
compressed=filename.endswith(".gz"),
features=[
"label",
"image/encoded",
"image/shape",
],
)
batch_size = 256
loader = torch.utils.data.DataLoader(ds, batch_size=batch_size)
print()
for i, batch in enumerate(loader):
labels: Tensor = batch["label"]
shapes: Tensor = batch["image/shape"]
images: Tensor = batch["image/encoded"]
j = 0
label = labels[j]
shape = torch.Size(tuple(shapes[j]))
image = images[j].reshape(shape)
print(f"batch={i} ({len(labels)}), {j=}, {label=}, {image.shape}")
def test_dataset():
for _ in range(1):
ds = TFRecordDataset(
filename,
compressed=filename.endswith(".gz"),
features=[
"label",
"image/encoded",
"image/shape",
],
)
print()
for i, features in enumerate(ds):
label: Tensor = features["label"].tobytes().decode("utf-8")
shape = torch.Size(tuple(features["image/shape"]))
image: Tensor = features["image/encoded"].reshape(shape)
print(i, label, image.shape)
def test_reader():
r = Reader(
filename,
compressed=filename.endswith(".gz"),
features=[
"label",
"image/encoded",
"image/shape",
],
)
print()
for i, features in enumerate(r):
label: Tensor = features["label"]
shape = torch.Size(tuple(features["image/shape"]))
image: Tensor = features["image/encoded"].reshape(shape)
if i % 1000 == 0:
print(i, label, image.shape)