"""Tests for canonical JSON encoding (``mnema.core.canonical``). The canonical encoding is the foundation of every content id and signature in the protocol: two nodes must derive byte-identical encodings for semantically identical values, or ids and signatures will not interoperate. """ import json import pytest from mnema.core.canonical import canonical_json def test_returns_bytes(): assert isinstance(canonical_json({"a": 1}), bytes) def test_key_order_independence(): a = {"b": 2, "a": 1, "c": [1, 2]} b = {"c": [1, 2], "a": 1, "b": 2} assert canonical_json(a) == canonical_json(b) def test_nested_key_order_independence(): a = {"outer": {"y": [{"k": 1, "j": 2}], "x": 0}} b = {"outer": {"x": 0, "y": [{"j": 2, "k": 1}]}} assert canonical_json(a) == canonical_json(b) def test_array_order_is_significant(): # Canonicalisation must never reorder arrays - only object keys. assert canonical_json([1, 2]) != canonical_json([2, 1]) def test_round_trip_preserves_value(): value = { "s": "héllo", "n": 3, "f": 0.5, "b": True, "z": None, "l": [1, "two", {"three": 3}], } assert json.loads(canonical_json(value).decode("utf-8")) == value def test_deterministic_across_calls(): value = {"k": list(range(50)), "m": {"x": "y"}} assert canonical_json(value) == canonical_json(value) def test_distinct_values_produce_distinct_bytes(): assert canonical_json({"a": 1}) != canonical_json({"a": 2}) # Type distinctions must survive: the number 1 is not the string "1". assert canonical_json({"a": 1}) != canonical_json({"a": "1"}) def test_empty_containers_are_distinct(): assert canonical_json({}) != canonical_json([]) @pytest.mark.parametrize("bad", [float("nan"), float("inf"), float("-inf")]) def test_rejects_non_finite_numbers(bad): # NaN/Infinity have no JSON representation; allowing them would produce # encodings other implementations cannot parse. with pytest.raises((ValueError, TypeError)): canonical_json({"x": bad}) def test_unicode_round_trip(): value = {"text": "café ☕ — 東京"} decoded = json.loads(canonical_json(value).decode("utf-8")) assert decoded == value