"""Canonical serialization tests (spec/02-wire-format/01-encoding-and-identifiers.md). The canonical form is a JCS-style deterministic JSON encoding: UTF-8, keys sorted, no insignificant whitespace, integers only (no floats), minimal string escaping with literal UTF-8 for non-ASCII characters. """ from __future__ import annotations import pytest from fpcf import canonical from fpcf.errors import ConformanceError def enc(value) -> bytes: return canonical.canonical_encode(value) # --------------------------------------------------------------------------- # Object key ordering and structure # --------------------------------------------------------------------------- def test_keys_are_sorted(): assert enc({"b": 1, "a": 2}) == b'{"a":2,"b":1}' def test_key_sorting_ignores_insertion_order(): a = {"x": 1, "y": 2, "z": 3} b = {"z": 3, "x": 1, "y": 2} assert enc(a) == enc(b) def test_nested_objects_sorted_recursively(): value = {"outer": {"b": [1, {"d": 4, "c": 3}], "a": 0}} assert enc(value) == b'{"outer":{"a":0,"b":[1,{"c":3,"d":4}]}}' def test_array_order_is_preserved(): assert enc([3, 1, 2]) == b"[3,1,2]" def test_no_insignificant_whitespace(): out = enc({"a": [1, 2], "b": {"c": True}}) assert b" " not in out assert b"\n" not in out assert b"\t" not in out def test_empty_containers(): assert enc({}) == b"{}" assert enc([]) == b"[]" # --------------------------------------------------------------------------- # Scalars # --------------------------------------------------------------------------- def test_integers(): assert enc(0) == b"0" assert enc(10) == b"10" assert enc(-7) == b"-7" # Largest exactly-representable integer permitted by the spec. assert enc(2**53 - 1) == b"9007199254740991" def test_booleans_and_null(): assert enc(True) == b"true" assert enc(False) == b"false" assert enc(None) == b"null" def test_floats_are_rejected(): with pytest.raises(ConformanceError): enc(1.5) def test_float_valued_integers_are_rejected(): # 1.0 is still a float at the data-model level; the wire format has no # float type, so encoders must refuse it rather than silently coerce. with pytest.raises(ConformanceError): enc(1.0) def test_nan_and_infinity_are_rejected(): for bad in (float("nan"), float("inf"), float("-inf")): with pytest.raises(ConformanceError): enc(bad) def test_non_string_keys_are_rejected(): with pytest.raises(ConformanceError): enc({1: "a"}) # --------------------------------------------------------------------------- # Strings # --------------------------------------------------------------------------- def test_string_short_escapes(): assert enc('a\nb"c\\d') == b'"a\\nb\\"c\\\\d"' def test_control_characters_use_unicode_escapes(): assert enc("\x01") == b'"\\u0001"' def test_non_ascii_is_literal_utf8(): # JCS-style: non-ASCII characters are emitted as literal UTF-8 bytes, # not \u escapes. assert enc("é") == b'"\xc3\xa9"' # --------------------------------------------------------------------------- # Decode and round-trips # --------------------------------------------------------------------------- def test_roundtrip(): value = {"a": [1, 2, {"b": None, "c": True}], "d": "x\ny", "e": -3} assert canonical.canonical_decode(enc(value)) == value def test_encode_is_idempotent_on_canonical_bytes(): data = b'{"a":1,"b":[true,null,"x"]}' assert enc(canonical.canonical_decode(data)) == data def test_decode_rejects_duplicate_keys(): with pytest.raises(ConformanceError): canonical.canonical_decode(b'{"a":1,"a":2}') def test_decode_rejects_invalid_utf8(): with pytest.raises(ConformanceError): canonical.canonical_decode(b'{"a":"\xff"}') def test_errors_carry_machine_readable_codes(): try: enc(1.5) except ConformanceError as exc: assert isinstance(exc.code, str) and exc.code.startswith("ERR_") else: # pragma: no cover - defensive pytest.fail("expected ConformanceError")