"""Canonicalisation: same data must always hash to the same bytes. Article 1 (source of truth) and Article 8 (audit ledger) both depend on a stable canonical form: votes are signed over canonical bytes and the ledger chains canonical hashes. These tests pin the properties that the rest of the pipeline relies on. """ from __future__ import annotations import re from govtool import canonical from tests.helpers import resolve_fn HEX64 = re.compile(r"^[0-9a-f]{64}$") def _dumps(obj) -> bytes: fn = resolve_fn(canonical, "canonical_json", "canonical_dumps", "dumps", "to_canonical") out = fn(obj) return out if isinstance(out, bytes) else str(out).encode("utf-8") def _hash(obj) -> str: fn = resolve_fn(canonical, "canonical_hash", "hash_obj", "sha256_hex", "digest", "content_hash") return str(fn(obj)) def test_key_order_does_not_matter(): a = {"beta": 1, "alpha": [1, 2, {"z": 0, "y": None}], "gamma": "x"} b = {"gamma": "x", "alpha": [1, 2, {"y": None, "z": 0}], "beta": 1} assert _dumps(a) == _dumps(b) assert _hash(a) == _hash(b) def test_list_order_does_matter(): assert _hash({"votes": ["yes", "no"]}) != _hash({"votes": ["no", "yes"]}) def test_deterministic_across_calls(): obj = {"id": "prop-001", "tally": {"yes": 4, "no": 1}, "nested": [{"a": 1}, {"b": 2}]} assert _dumps(obj) == _dumps(obj) assert _hash(obj) == _hash(obj) def test_hash_is_sha256_hex(): digest = _hash({"hello": "world"}) assert HEX64.match(digest), f"expected 64 lowercase hex chars, got {digest!r}" def test_distinct_objects_distinct_hashes(): assert _hash({"yes": 1}) != _hash({"yes": 2}) assert _hash({"yes": 1}) != _hash({"no": 1}) def test_unicode_is_stable(): obj = {"name": "Citizen \u00e9\u00e8\u00ea \u4e16\u754c", "emoji": "\U0001f5f3\ufe0f"} first = _dumps(obj) second = _dumps(obj) assert first == second assert _hash(obj) == _hash(obj) def test_scalar_types_are_distinguished(): # The string "1" and the integer 1 must not collide in canonical form. assert _hash({"v": 1}) != _hash({"v": "1"})