"""Canonicalization tests. The wire format (milestone #2) requires that every signed operation is serialised through a single canonical JSON encoding: UTF-8, lexicographically sorted object keys, no insignificant whitespace, deterministic for equal values regardless of construction order. Signature and hash stability across implementations depends entirely on these properties. """ from __future__ import annotations import json import pytest from pmp.canonical import canonical_json, hash_bytes, hash_canonical from pmp.errors import PMPError def test_returns_bytes(): out = canonical_json({"a": 1}) assert isinstance(out, (bytes, bytearray)) def test_key_order_is_sorted_and_deterministic(): a = canonical_json({"b": 1, "a": 2, "c": 3}) b = canonical_json({"c": 3, "a": 2, "b": 1}) assert a == b decoded = a.decode("utf-8") assert decoded.index('"a"') < decoded.index('"b"') < decoded.index('"c"') def test_no_insignificant_whitespace(): out = canonical_json({"a": [1, 2], "b": {"c": True}}).decode("utf-8") # Canonical form must not contain spaces outside string values. assert " " not in out assert "\n" not in out assert "\t" not in out def test_nested_structures_roundtrip(): value = { "list": [3, 1, 2], # list order must be preserved, not sorted "nested": {"y": None, "x": [True, False]}, "empty_list": [], "empty_obj": {}, "int": 42, "str": "hello", } out = canonical_json(value) assert json.loads(out.decode("utf-8")) == value def test_list_order_preserved(): a = canonical_json({"k": [2, 1]}) b = canonical_json({"k": [1, 2]}) assert a != b def test_unicode_values_roundtrip(): value = {"name": "Çağrı", "city": "Zürich", "emoji": "🥐"} out = canonical_json(value) assert json.loads(out.decode("utf-8")) == value def test_equal_values_hash_equal(): h1 = hash_canonical({"x": 1, "y": [1, 2, 3]}) h2 = hash_canonical({"y": [1, 2, 3], "x": 1}) assert h1 == h2 def test_different_values_hash_differently(): assert hash_canonical({"x": 1}) != hash_canonical({"x": 2}) assert hash_canonical({"x": 1}) != hash_canonical({"x": "1"}) def test_hash_format_is_sha256_hex(): h = hash_canonical({"x": 1}) digest = h.split(":")[-1] assert len(digest) == 64 assert set(digest) <= set("0123456789abcdef") def test_hash_bytes_matches_known_input_shape(): h1 = hash_bytes(b"hello") h2 = hash_bytes(b"hello") h3 = hash_bytes(b"hellp") assert h1 == h2 assert h1 != h3 digest = h1.split(":")[-1] assert len(digest) == 64 def test_non_serialisable_values_rejected(): with pytest.raises((PMPError, TypeError, ValueError)): canonical_json({"x": object()})