Skip to content

Commit dcc3fb2

Browse files
add OCI metadata, created timestamp, and simplify per-arch layout
- Add standard OCI annotations (manifest) and labels (config) to all platform manifests: created, source, revision, version, title, description, vendor, and licenses. - Add crane.set_created() to set the image config 'created' field so docker images shows the correct timestamp instead of N/A. - Add labels parameter to crane.mutate() for writing to image config. - Bump crane to v0.21.2 in Dockerfile.release. Simplify the multi-arch image layout: Combined (no suffix): linux/amd64: [A1..A4, B1..B4] (native-first, all 8 layers) linux/arm64: [B1..B4, A1..A4] Per-arch (-amd64 / -arm64 suffix): linux/amd64: [X1..X4] (same 4 layers under both platforms) linux/arm64: [X1..X4] Pulling the combined image then the native per-arch image gives Docker cache hits because the per-arch layers form a prefix of the combined chain. Cross-arch caching is not possible (Docker chain-IDs encode full layer ancestry) and is no longer attempted. Remove _other_arch_available() and the cross-chain mirroring logic. Remove --platform selection from pull() since per-arch platform entries are now identical. Signed-off-by: Jacob Weinstock <jakobweinstock@gmail.com>
1 parent 437cd7b commit dcc3fb2

3 files changed

Lines changed: 136 additions & 77 deletions

File tree

Dockerfile.release

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# docker run --rm -v $(pwd):/work captainos-release release publish
77
FROM python:3.12-slim
88

9-
ARG CRANE_VERSION=v0.20.3
9+
ARG CRANE_VERSION=v0.21.2
1010

1111
# Install git (needed for version tag computation) and tar (for crane export)
1212
RUN apt-get update && apt-get install -y --no-install-recommends \

captain/crane.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from __future__ import annotations
1414

15+
import json
1516
import os
1617
import subprocess
1718
import tarfile
@@ -48,16 +49,24 @@ def mutate(
4849
*,
4950
platform: str | None = None,
5051
annotations: dict[str, str] | None = None,
52+
labels: dict[str, str] | None = None,
5153
tag: str | None = None,
5254
logger: StageLogger | None = None,
5355
) -> None:
54-
"""Mutate metadata on *image_ref* (platform, annotations, re-tag)."""
56+
"""Mutate metadata on *image_ref* (platform, annotations, labels, re-tag).
57+
58+
*annotations* are written to the OCI manifest (visible via
59+
``crane manifest``). *labels* are written to the image config
60+
(visible via ``docker inspect``).
61+
"""
5562
_log = logger or _default_log
5663
cmd: list[str] = ["crane", "mutate", image_ref]
5764
if platform:
5865
cmd += ["--set-platform", platform]
5966
for key, value in (annotations or {}).items():
6067
cmd += ["-a", f"{key}={value}"]
68+
for key, value in (labels or {}).items():
69+
cmd += ["-l", f"{key}={value}"]
6170
if tag:
6271
cmd += ["-t", tag]
6372
_log.log(f"crane mutate {image_ref}")
@@ -143,3 +152,33 @@ def tag(src_ref: str, new_tag: str, *, logger: StageLogger | None = None) -> Non
143152
_log = logger or _default_log
144153
_log.log(f"crane tag {src_ref} {new_tag}")
145154
run(["crane", "tag", src_ref, new_tag])
155+
156+
157+
def set_created(
158+
image_ref: str,
159+
created: str,
160+
*,
161+
logger: StageLogger | None = None,
162+
) -> None:
163+
"""Set the ``created`` timestamp in the image config of *image_ref*.
164+
165+
Uses ``crane config`` to read the config JSON, patches the
166+
``created`` field, and pipes it back through ``crane edit config``.
167+
This sets the top-level config timestamp that ``docker images``
168+
displays in the CREATED column.
169+
"""
170+
_log = logger or _default_log
171+
_log.log(f"crane set-created {image_ref}")
172+
cfg_result = run(["crane", "config", image_ref], capture=True)
173+
config = json.loads(cfg_result.stdout)
174+
config["created"] = created
175+
edit_proc = subprocess.Popen(
176+
["crane", "edit", "config", image_ref],
177+
stdin=subprocess.PIPE,
178+
)
179+
edit_proc.communicate(input=json.dumps(config).encode())
180+
if edit_proc.returncode != 0:
181+
raise subprocess.CalledProcessError(
182+
edit_proc.returncode,
183+
["crane", "edit", "config", image_ref],
184+
)

captain/oci.py

Lines changed: 95 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,20 @@
33
Each artifact file is pushed as its own layer so that OCI registries
44
can deduplicate blobs between per-arch and combined images.
55
6-
Per-arch images (``-amd64``, ``-arm64``) are multi-arch OCI indexes
7-
where both platforms point to the same content, so any host can pull.
6+
Combined image (``target="both"``, no tag suffix):
7+
A multi-arch index where each platform manifest has the native
8+
arch's layers first, then the other arch's layers (8 layers total).
9+
``linux/amd64`` → ``[A1‥A4, B1‥B4]``,
10+
``linux/arm64`` → ``[B1‥B4, A1‥A4]``.
811
9-
The combined image (no suffix) is a multi-arch index where each
10-
platform entry contains **only** that platform's artifacts. This
11-
keeps layer chains identical to the per-arch images so that Docker
12-
caches layers between the combined and per-arch pulls.
12+
Per-arch image (``target="amd64"`` or ``"arm64"``, tag suffix ``-{arch}``):
13+
A multi-arch index where both platform entries contain the same
14+
4 layers (only that arch's artifacts).
15+
16+
Docker layer caching: pulling the combined image first and then the
17+
native per-arch image gives cache hits, because the per-arch layers
18+
form a prefix of the combined chain. Cross-arch caching is not
19+
possible due to Docker's chain-ID Merkle structure.
1320
1421
* **containerd** can pull it (valid ``rootfs.diff_ids`` in the config) —
1522
Kubernetes image-volume mounts work.
@@ -20,6 +27,7 @@
2027

2128
import subprocess
2229
import tarfile
30+
from datetime import datetime, timezone
2331
from pathlib import Path
2432

2533
from captain import artifacts, crane
@@ -142,47 +150,32 @@ def _push_platform_manifest(
142150
sha: str,
143151
repository: str,
144152
logger: StageLogger,
153+
*,
154+
created: str,
155+
tag: str,
156+
artifact_name: str,
145157
) -> None:
146158
"""Push artifact layers and set platform metadata on a temp manifest."""
147159
for i, tar_path in enumerate(layer_tars):
148160
crane.append(tar_path, temp_ref, base=temp_ref if i > 0 else None, logger=logger)
161+
oci_metadata = {
162+
"org.opencontainers.image.created": created,
163+
"org.opencontainers.image.source": f"https://github.com/{repository}",
164+
"org.opencontainers.image.revision": sha,
165+
"org.opencontainers.image.version": tag,
166+
"org.opencontainers.image.title": artifact_name,
167+
"org.opencontainers.image.description": "CaptainOS build artifacts",
168+
"org.opencontainers.image.vendor": "Tinkerbell",
169+
"org.opencontainers.image.licenses": "Apache-2.0",
170+
}
149171
crane.mutate(
150172
temp_ref,
151173
platform=platform,
152-
annotations={
153-
"org.opencontainers.image.source": f"https://github.com/{repository}",
154-
"org.opencontainers.image.revision": sha,
155-
},
174+
annotations=oci_metadata,
175+
labels=oci_metadata,
156176
logger=logger,
157177
)
158-
159-
160-
def _publish_arch(
161-
cfg: Config,
162-
arch: str,
163-
final_ref: str,
164-
image_base: str,
165-
sha: str,
166-
repository: str,
167-
logger: StageLogger,
168-
) -> tuple[list[str], list[str]]:
169-
"""Collect artifacts for *arch*, push a platform manifest, return (digest_refs, names).
170-
171-
The image is pushed to *final_ref* with platform ``linux/{arch}``.
172-
The caller captures the digest before the next push overwrites the tag.
173-
"""
174-
out = ensure_dir(cfg.output_dir)
175-
arch_files = _collect_arch_artifacts(cfg.project_dir, out, arch, logger)
176-
layer_tars = [_deterministic_tar(f, out) for f in arch_files]
177-
try:
178-
_push_platform_manifest(
179-
layer_tars, final_ref, f"linux/{arch}", sha, repository, logger,
180-
)
181-
d = crane.digest(final_ref, logger=logger)
182-
finally:
183-
for t in layer_tars:
184-
t.unlink(missing_ok=True)
185-
return [f"{image_base}@{d}"], [f.name for f in arch_files]
178+
crane.set_created(temp_ref, created, logger=logger)
186179

187180

188181
def publish(
@@ -198,60 +191,87 @@ def publish(
198191
) -> None:
199192
"""Collect artifacts and publish a multi-arch OCI index.
200193
201-
Each artifact file becomes its own layer so that OCI registries
202-
deduplicate shared blobs between per-arch and combined images.
203-
204-
When *target* is ``"both"``, each platform entry in the index
205-
contains only that platform's artifacts (linux/amd64 → amd64
206-
layers, linux/arm64 → arm64 layers). This keeps the layer
207-
chains identical to the per-arch images so that Docker can
208-
cache layers between the combined and per-arch pulls.
194+
Each artifact file becomes its own layer. Deterministic tar
195+
generation ensures byte-identical layers across publish runs,
196+
so OCI registries deduplicate blobs automatically.
209197
210198
*target* selects which artifacts to include: ``"amd64"``,
211-
``"arm64"``, or ``"both"`` (all artifacts from both arches).
199+
``"arm64"``, or ``"both"``.
212200
"""
213201
_log = logger or _default_log
214202
arches = list(_ARCHES) if target == "both" else [target]
215203
tag_suffix = "" if target == "both" else f"-{target}"
216-
final_ref = _image_ref(registry, repository, artifact_name, f"{tag}{tag_suffix}")
204+
full_tag = f"{tag}{tag_suffix}"
205+
final_ref = _image_ref(registry, repository, artifact_name, full_tag)
217206
out = ensure_dir(cfg.output_dir)
218207
image_base = f"{registry}/{repository}/{artifact_name}"
208+
created = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
209+
210+
# Collect artifacts for every requested architecture.
211+
arch_files: dict[str, list[Path]] = {}
212+
for arch in arches:
213+
arch_files[arch] = _collect_arch_artifacts(
214+
cfg.project_dir,
215+
out,
216+
arch,
217+
_log,
218+
)
219219

220-
digest_refs: list[str] = []
221-
artifact_names: list[str] = []
220+
# Create deterministic layer tars (shared across manifest pushes).
221+
arch_layer_tars: dict[str, list[Path]] = {}
222+
for arch, files in arch_files.items():
223+
arch_layer_tars[arch] = [_deterministic_tar(f, out) for f in files]
222224

223-
if target == "both":
224-
# Each arch gets its own platform manifest so that the layer
225-
# chains match the per-arch images exactly (enabling Docker
226-
# layer caching between the combined and arch-specific tags).
227-
for arch in _ARCHES:
228-
refs, names = _publish_arch(
229-
cfg, arch, final_ref, image_base, sha, repository, _log,
230-
)
231-
digest_refs.extend(refs)
232-
artifact_names.extend(names)
233-
else:
234-
# Single-arch: publish the same layers under both platforms so
235-
# the image is pullable from any host architecture.
236-
arch_files = _collect_arch_artifacts(cfg.project_dir, out, target, _log)
237-
artifact_names = [f.name for f in arch_files]
238-
layer_tars = [_deterministic_tar(f, out) for f in arch_files]
239-
try:
240-
for platform in [f"linux/{a}" for a in _ARCHES]:
225+
try:
226+
digest_refs: list[str] = []
227+
228+
if target == "both":
229+
# Combined image: native-first ordering per platform.
230+
for arch in _ARCHES:
231+
other = next(a for a in _ARCHES if a != arch)
232+
ordered = list(arch_layer_tars[arch]) + list(arch_layer_tars[other])
241233
_push_platform_manifest(
242-
layer_tars, final_ref, platform, sha, repository, _log,
234+
ordered,
235+
final_ref,
236+
f"linux/{arch}",
237+
sha,
238+
repository,
239+
_log,
240+
created=created,
241+
tag=full_tag,
242+
artifact_name=artifact_name,
243+
)
244+
d = crane.digest(final_ref, logger=_log)
245+
digest_refs.append(f"{image_base}@{d}")
246+
else:
247+
# Per-arch: same layers under both platforms.
248+
for arch in _ARCHES:
249+
_push_platform_manifest(
250+
arch_layer_tars[target],
251+
final_ref,
252+
f"linux/{arch}",
253+
sha,
254+
repository,
255+
_log,
256+
created=created,
257+
tag=full_tag,
258+
artifact_name=artifact_name,
243259
)
244260
d = crane.digest(final_ref, logger=_log)
245261
digest_refs.append(f"{image_base}@{d}")
246-
finally:
247-
for t in layer_tars:
248-
t.unlink(missing_ok=True)
249262

250-
# Create multi-arch index (overwrites the tag with the index)
251-
crane.index_append(final_ref, digest_refs, logger=_log)
263+
# Create multi-arch index (overwrites the tag with the index)
264+
crane.index_append(final_ref, digest_refs, logger=_log)
265+
finally:
266+
for tars in arch_layer_tars.values():
267+
for t in tars:
268+
t.unlink(missing_ok=True)
252269

253270
# Recap
254-
platforms = [f"linux/{a}" for a in arches]
271+
artifact_names: list[str] = []
272+
for arch in arches:
273+
artifact_names.extend(f.name for f in arch_files.get(arch, []))
274+
platforms = [f"linux/{a}" for a in _ARCHES]
255275
_log.log("")
256276
_log.log("Publish complete")
257277
_log.log(f" Image: {final_ref}")

0 commit comments

Comments
 (0)