Skip to content

Commit c2c19c3

Browse files
committed
feat(meta): add META_TRIM_META size limit for meta payload (Fixes #132)
Add configurable `meta_trim_meta` option (default 128KB, 0 = no limit) to CoreOptions that validates decoded `meta` payload size in `gen_meta_code_v0`. Raises ValueError for oversized payloads instead of truncating, since meta contains structured data that cannot be meaningfully truncated. Added to conformance-critical options set.
1 parent 333a63e commit c2c19c3

4 files changed

Lines changed: 60 additions & 7 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## [1.3.0] - Unreleased
44

5+
- Added `meta_trim_meta` option to limit decoded `meta` payload size in `gen_meta_code_v0` (Fixes #132)
56
- Added conformance test vectors for JCS number canonicalization in Meta-Code (Fixes #131)
67
- Migrated from Poetry to uv for dependency management and build tooling
78
- Migrated pyproject.toml from Poetry format to PEP 621 with setuptools backend

iscc_core/code_meta.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,26 @@ def gen_meta_code_v0(name, description=None, meta=None, bits=ic.core_opts.meta_b
6464
# Data-URL expected
6565
durl = meta
6666
payload = DataURL.from_url(durl).data
67-
meta_code_digest = soft_hash_meta_v0(name, payload)
68-
metahash = ic.multi_hash_blake3(payload)
69-
metadata_value = durl
7067
elif isinstance(meta, dict):
7168
payload = jcs.canonicalize(meta)
72-
meta_code_digest = soft_hash_meta_v0(name, payload)
73-
metahash = ic.multi_hash_blake3(payload)
69+
else:
70+
raise TypeError(f"metadata must be Data-URL string or dict not {type(meta)}")
71+
72+
limit = ic.core_opts.meta_trim_meta
73+
if limit and len(payload) > limit:
74+
raise ValueError(
75+
f"meta payload size ({len(payload)} bytes) exceeds META_TRIM_META ({limit} bytes)"
76+
)
77+
78+
meta_code_digest = soft_hash_meta_v0(name, payload)
79+
metahash = ic.multi_hash_blake3(payload)
80+
81+
if isinstance(meta, str):
82+
metadata_value = durl
83+
else:
7484
media_type = "application/ld+json" if "@context" in meta else "application/json"
7585
durl_obj = DataURL.from_byte_data(media_type, data=payload)
7686
metadata_value = durl_obj.url
77-
else:
78-
raise TypeError(f"metadata must be Data-URL string or dict not {type(meta)}")
7987
else:
8088
payload = " ".join((name, description)).strip().encode("utf-8")
8189
meta_code_digest = soft_hash_meta_v0(name, description)

iscc_core/options.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ class Config:
3434
meta_trim_description: int = Field(
3535
4096, description="Trim `description` to this number of bytes"
3636
)
37+
meta_trim_meta: int = Field(
38+
128_000,
39+
description="Maximum decoded payload size in bytes for the meta element. 0 = no limit.",
40+
)
3741
meta_ngram_size_text: int = Field(
3842
3, description="Sliding window width (characters) for metadata"
3943
)
@@ -366,6 +370,7 @@ class Config:
366370
conformanc_critical = {
367371
"meta_trim_name",
368372
"meta_trim_description",
373+
"meta_trim_meta",
369374
"meta_ngram_size_text",
370375
"meta_ngram_size_bytes",
371376
"text_ngram_size",

tests/test_code_meta.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# -*- coding: utf-8 -*-
2+
import base64
23
import pytest
34
import iscc_core as ic
45

@@ -179,3 +180,41 @@ def test_remove_newlines():
179180
txt = " Hello\nWorld! - How Are you "
180181
exp = "Hello World! - How Are you"
181182
assert ic.text_remove_newlines(txt) == exp
183+
184+
185+
def test_meta_trim_meta_at_limit(monkeypatch):
186+
"""Payload exactly at limit should succeed."""
187+
monkeypatch.setattr(ic.core_opts, "meta_trim_meta", 100)
188+
payload = b"x" * 100
189+
encoded = base64.b64encode(payload).decode("ascii")
190+
durl = f"data:application/octet-stream;base64,{encoded}"
191+
result = ic.gen_meta_code_v0("Test", meta=durl)
192+
assert "iscc" in result
193+
194+
195+
def test_meta_trim_meta_over_limit(monkeypatch):
196+
"""Payload exceeding limit should raise ValueError."""
197+
monkeypatch.setattr(ic.core_opts, "meta_trim_meta", 100)
198+
payload = b"x" * 101
199+
encoded = base64.b64encode(payload).decode("ascii")
200+
durl = f"data:application/octet-stream;base64,{encoded}"
201+
with pytest.raises(ValueError, match="META_TRIM_META"):
202+
ic.gen_meta_code_v0("Test", meta=durl)
203+
204+
205+
def test_meta_trim_meta_disabled(monkeypatch):
206+
"""Limit of 0 disables the check."""
207+
monkeypatch.setattr(ic.core_opts, "meta_trim_meta", 0)
208+
payload = b"x" * 200_000
209+
encoded = base64.b64encode(payload).decode("ascii")
210+
durl = f"data:application/octet-stream;base64,{encoded}"
211+
result = ic.gen_meta_code_v0("Test", meta=durl)
212+
assert "iscc" in result
213+
214+
215+
def test_meta_trim_meta_dict_over_limit(monkeypatch):
216+
"""Dict meta exceeding limit after JCS canonicalization should raise ValueError."""
217+
monkeypatch.setattr(ic.core_opts, "meta_trim_meta", 50)
218+
big_dict = {"key": "v" * 100}
219+
with pytest.raises(ValueError, match="META_TRIM_META"):
220+
ic.gen_meta_code_v0("Test", meta=big_dict)

0 commit comments

Comments
 (0)