Skip to content

Commit fbfeeb8

Browse files
authored
OCI image fixes (#41)
## Description Add OCI metadata, created timestamp, and simplify per-arch layout - Add standard OCI annotations (manifest) and labels (config) to all platform manifests: created, source, revision, version, title, description, vendor, and licenses. - Add crane.set_created() to set the image config 'created' field so docker images shows the correct timestamp instead of N/A. - Add labels parameter to crane.mutate() for writing to image config. - Bump crane to v0.21.2 in Dockerfile.release. Simplify the multi-arch image layout: Combined (no suffix): linux/amd64: [A1..A4, B1..B4] (native-first, all 8 layers) linux/arm64: [B1..B4, A1..A4] Per-arch (-amd64 / -arm64 suffix): linux/amd64: [X1..X4] (same 4 layers under both platforms) linux/arm64: [X1..X4] Pulling the combined image then the native per-arch image gives Docker cache hits because the per-arch layers form a prefix of the combined chain. Cross-arch caching is not possible (Docker chain-IDs encode full layer ancestry) and is no longer attempted. Remove _other_arch_available() and the cross-chain mirroring logic. Remove --platform selection from pull() since per-arch platform entries are now identical. Fixes: # ## How Has This Been Tested? ## How are existing users impacted? What migration steps/scripts do we need? ## Checklist: I have: - [ ] updated the documentation and/or roadmap (if required) - [ ] added unit or e2e tests - [ ] provided instructions on how to upgrade
2 parents 7540d73 + dcc3fb2 commit fbfeeb8

3 files changed

Lines changed: 140 additions & 35 deletions

File tree

Dockerfile.release

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# docker run --rm -v $(pwd):/work captainos-release release publish
77
FROM python:3.12-slim
88

9-
ARG CRANE_VERSION=v0.20.3
9+
ARG CRANE_VERSION=v0.21.2
1010

1111
# Install git (needed for version tag computation) and tar (for crane export)
1212
RUN apt-get update && apt-get install -y --no-install-recommends \

captain/crane.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
from __future__ import annotations
1414

15+
import json
1516
import os
1617
import subprocess
1718
import tarfile
@@ -48,16 +49,24 @@ def mutate(
4849
*,
4950
platform: str | None = None,
5051
annotations: dict[str, str] | None = None,
52+
labels: dict[str, str] | None = None,
5153
tag: str | None = None,
5254
logger: StageLogger | None = None,
5355
) -> None:
54-
"""Mutate metadata on *image_ref* (platform, annotations, re-tag)."""
56+
"""Mutate metadata on *image_ref* (platform, annotations, labels, re-tag).
57+
58+
*annotations* are written to the OCI manifest (visible via
59+
``crane manifest``). *labels* are written to the image config
60+
(visible via ``docker inspect``).
61+
"""
5562
_log = logger or _default_log
5663
cmd: list[str] = ["crane", "mutate", image_ref]
5764
if platform:
5865
cmd += ["--set-platform", platform]
5966
for key, value in (annotations or {}).items():
6067
cmd += ["-a", f"{key}={value}"]
68+
for key, value in (labels or {}).items():
69+
cmd += ["-l", f"{key}={value}"]
6170
if tag:
6271
cmd += ["-t", tag]
6372
_log.log(f"crane mutate {image_ref}")
@@ -143,3 +152,33 @@ def tag(src_ref: str, new_tag: str, *, logger: StageLogger | None = None) -> Non
143152
_log = logger or _default_log
144153
_log.log(f"crane tag {src_ref} {new_tag}")
145154
run(["crane", "tag", src_ref, new_tag])
155+
156+
157+
def set_created(
158+
image_ref: str,
159+
created: str,
160+
*,
161+
logger: StageLogger | None = None,
162+
) -> None:
163+
"""Set the ``created`` timestamp in the image config of *image_ref*.
164+
165+
Uses ``crane config`` to read the config JSON, patches the
166+
``created`` field, and pipes it back through ``crane edit config``.
167+
This sets the top-level config timestamp that ``docker images``
168+
displays in the CREATED column.
169+
"""
170+
_log = logger or _default_log
171+
_log.log(f"crane set-created {image_ref}")
172+
cfg_result = run(["crane", "config", image_ref], capture=True)
173+
config = json.loads(cfg_result.stdout)
174+
config["created"] = created
175+
edit_proc = subprocess.Popen(
176+
["crane", "edit", "config", image_ref],
177+
stdin=subprocess.PIPE,
178+
)
179+
edit_proc.communicate(input=json.dumps(config).encode())
180+
if edit_proc.returncode != 0:
181+
raise subprocess.CalledProcessError(
182+
edit_proc.returncode,
183+
["crane", "edit", "config", image_ref],
184+
)

captain/oci.py

Lines changed: 99 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,22 @@
11
"""High-level OCI artifact operations for publishing and retrieving releases.
22
33
Each artifact file is pushed as its own layer so that OCI registries
4-
can deduplicate blobs between per-arch and combined images. Every
5-
image is a multi-arch OCI index (linux/amd64 + linux/arm64 entries
6-
pointing to the same content) so that any platform can pull it.
4+
can deduplicate blobs between per-arch and combined images.
5+
6+
Combined image (``target="both"``, no tag suffix):
7+
A multi-arch index where each platform manifest has the native
8+
arch's layers first, then the other arch's layers (8 layers total).
9+
``linux/amd64`` → ``[A1‥A4, B1‥B4]``,
10+
``linux/arm64`` → ``[B1‥B4, A1‥A4]``.
11+
12+
Per-arch image (``target="amd64"`` or ``"arm64"``, tag suffix ``-{arch}``):
13+
A multi-arch index where both platform entries contain the same
14+
4 layers (only that arch's artifacts).
15+
16+
Docker layer caching: pulling the combined image first and then the
17+
native per-arch image gives cache hits, because the per-arch layers
18+
form a prefix of the combined chain. Cross-arch caching is not
19+
possible due to Docker's chain-ID Merkle structure.
720
821
* **containerd** can pull it (valid ``rootfs.diff_ids`` in the config) —
922
Kubernetes image-volume mounts work.
@@ -14,6 +27,7 @@
1427

1528
import subprocess
1629
import tarfile
30+
from datetime import datetime, timezone
1731
from pathlib import Path
1832

1933
from captain import artifacts, crane
@@ -136,19 +150,32 @@ def _push_platform_manifest(
136150
sha: str,
137151
repository: str,
138152
logger: StageLogger,
153+
*,
154+
created: str,
155+
tag: str,
156+
artifact_name: str,
139157
) -> None:
140158
"""Push artifact layers and set platform metadata on a temp manifest."""
141159
for i, tar_path in enumerate(layer_tars):
142160
crane.append(tar_path, temp_ref, base=temp_ref if i > 0 else None, logger=logger)
161+
oci_metadata = {
162+
"org.opencontainers.image.created": created,
163+
"org.opencontainers.image.source": f"https://github.com/{repository}",
164+
"org.opencontainers.image.revision": sha,
165+
"org.opencontainers.image.version": tag,
166+
"org.opencontainers.image.title": artifact_name,
167+
"org.opencontainers.image.description": "CaptainOS build artifacts",
168+
"org.opencontainers.image.vendor": "Tinkerbell",
169+
"org.opencontainers.image.licenses": "Apache-2.0",
170+
}
143171
crane.mutate(
144172
temp_ref,
145173
platform=platform,
146-
annotations={
147-
"org.opencontainers.image.source": f"https://github.com/{repository}",
148-
"org.opencontainers.image.revision": sha,
149-
},
174+
annotations=oci_metadata,
175+
labels=oci_metadata,
150176
logger=logger,
151177
)
178+
crane.set_created(temp_ref, created, logger=logger)
152179

153180

154181
def publish(
@@ -164,54 +191,93 @@ def publish(
164191
) -> None:
165192
"""Collect artifacts and publish a multi-arch OCI index.
166193
167-
Each artifact file becomes its own layer so that OCI registries
168-
deduplicate shared blobs between per-arch and combined images.
194+
Each artifact file becomes its own layer. Deterministic tar
195+
generation ensures byte-identical layers across publish runs,
196+
so OCI registries deduplicate blobs automatically.
169197
170198
*target* selects which artifacts to include: ``"amd64"``,
171-
``"arm64"``, or ``"both"`` (all artifacts from both arches).
199+
``"arm64"``, or ``"both"``.
172200
"""
173201
_log = logger or _default_log
174202
arches = list(_ARCHES) if target == "both" else [target]
175203
tag_suffix = "" if target == "both" else f"-{target}"
176-
final_ref = _image_ref(registry, repository, artifact_name, f"{tag}{tag_suffix}")
204+
full_tag = f"{tag}{tag_suffix}"
205+
final_ref = _image_ref(registry, repository, artifact_name, full_tag)
177206
out = ensure_dir(cfg.output_dir)
207+
image_base = f"{registry}/{repository}/{artifact_name}"
208+
created = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
209+
210+
# Collect artifacts for every requested architecture.
211+
arch_files: dict[str, list[Path]] = {}
212+
for arch in arches:
213+
arch_files[arch] = _collect_arch_artifacts(
214+
cfg.project_dir,
215+
out,
216+
arch,
217+
_log,
218+
)
178219

179-
all_files: list[Path] = []
180-
for a in arches:
181-
files = _collect_arch_artifacts(cfg.project_dir, out, a, _log)
182-
all_files.extend(files)
220+
# Create deterministic layer tars (shared across manifest pushes).
221+
arch_layer_tars: dict[str, list[Path]] = {}
222+
for arch, files in arch_files.items():
223+
arch_layer_tars[arch] = [_deterministic_tar(f, out) for f in files]
183224

184-
# Create deterministic per-file tars for layer dedup
185-
layer_tars: list[Path] = []
186225
try:
187-
for f in all_files:
188-
layer_tars.append(_deterministic_tar(f, out))
189-
190-
# Push platform manifests and capture their digests.
191-
# Each push overwrites the same tag; the digest is captured before
192-
# the next overwrite. This avoids leftover intermediate tags.
193-
image_base = f"{registry}/{repository}/{artifact_name}"
194-
platforms = ["linux/amd64", "linux/arm64"]
195226
digest_refs: list[str] = []
196-
for platform in platforms:
197-
_push_platform_manifest(layer_tars, final_ref, platform, sha, repository, _log)
198-
d = crane.digest(final_ref, logger=_log)
199-
digest_refs.append(f"{image_base}@{d}")
227+
228+
if target == "both":
229+
# Combined image: native-first ordering per platform.
230+
for arch in _ARCHES:
231+
other = next(a for a in _ARCHES if a != arch)
232+
ordered = list(arch_layer_tars[arch]) + list(arch_layer_tars[other])
233+
_push_platform_manifest(
234+
ordered,
235+
final_ref,
236+
f"linux/{arch}",
237+
sha,
238+
repository,
239+
_log,
240+
created=created,
241+
tag=full_tag,
242+
artifact_name=artifact_name,
243+
)
244+
d = crane.digest(final_ref, logger=_log)
245+
digest_refs.append(f"{image_base}@{d}")
246+
else:
247+
# Per-arch: same layers under both platforms.
248+
for arch in _ARCHES:
249+
_push_platform_manifest(
250+
arch_layer_tars[target],
251+
final_ref,
252+
f"linux/{arch}",
253+
sha,
254+
repository,
255+
_log,
256+
created=created,
257+
tag=full_tag,
258+
artifact_name=artifact_name,
259+
)
260+
d = crane.digest(final_ref, logger=_log)
261+
digest_refs.append(f"{image_base}@{d}")
200262

201263
# Create multi-arch index (overwrites the tag with the index)
202264
crane.index_append(final_ref, digest_refs, logger=_log)
203265
finally:
204-
for t in layer_tars:
205-
t.unlink(missing_ok=True)
266+
for tars in arch_layer_tars.values():
267+
for t in tars:
268+
t.unlink(missing_ok=True)
206269

207270
# Recap
208-
artifact_names = [f.name for f in all_files]
271+
artifact_names: list[str] = []
272+
for arch in arches:
273+
artifact_names.extend(f.name for f in arch_files.get(arch, []))
274+
platforms = [f"linux/{a}" for a in _ARCHES]
209275
_log.log("")
210276
_log.log("Publish complete")
211277
_log.log(f" Image: {final_ref}")
212278
_log.log(f" Target: {target}")
213279
_log.log(f" Platforms: {', '.join(platforms)}")
214-
_log.log(f" Layers: {len(layer_tars)}")
280+
_log.log(f" Layers: {len(artifact_names)}")
215281
_log.log(" Artifacts:")
216282
for name in artifact_names:
217283
_log.log(f" - {name}")

0 commit comments

Comments
 (0)