"""Content-addressed identifiers. All identifiers in Mnema are derived from BLAKE2b-256 over MNC-1 canonical bytes and carry a short type prefix: ``op_<64 hex>`` Operation id: hash of the full signed envelope (including ``sig``). ``ck_<64 hex>`` Claim key: a *stable identity* for a claim, derived from ``(subject, predicate, discriminator)``. Re-derivations of the same logical claim share a claim key, which is what lets corrections and refutations target "this belief" rather than one log entry. ``vf_<64 hex>`` Value fingerprint: hash of a claim's value object, used so a refutation of a *specific value* can suppress re-assertion of that value without suppressing the whole claim key. ``mnk_<64 hex>`` Author id: hex of an ed25519 public key. """ from __future__ import annotations import hashlib from typing import Any, Mapping, Optional, Union from mnema.core.canonical import canonical_bytes __all__ = [ "OP_ID_PREFIX", "CLAIM_KEY_PREFIX", "VALUE_FP_PREFIX", "AUTHOR_ID_PREFIX", "blake2b_256", "op_id_for_wire", "claim_key", "value_fingerprint", "is_op_id", "is_claim_key", "is_author_id", ] OP_ID_PREFIX = "op_" CLAIM_KEY_PREFIX = "ck_" VALUE_FP_PREFIX = "vf_" AUTHOR_ID_PREFIX = "mnk_" _HEX_LEN = 64 _HEX_CHARS = frozenset("0123456789abcdef") def blake2b_256(data: bytes) -> bytes: """BLAKE2b with a 32-byte digest (the protocol hash function).""" return hashlib.blake2b(data, digest_size=32).digest() def _hashed_id(prefix: str, obj: Any) -> str: return prefix + blake2b_256(canonical_bytes(obj)).hex() def op_id_for_wire(envelope_without_id: Mapping[str, Any]) -> str: """Compute the operation id for a signed envelope. The input must contain exactly the keys ``v, type, author, ts, prev, body, sig`` (i.e. the wire envelope minus ``op_id``). """ expected = {"v", "type", "author", "ts", "prev", "body", "sig"} got = set(envelope_without_id.keys()) if got != expected: raise ValueError( f"op_id input must have exactly keys {sorted(expected)}, got {sorted(got)}" ) return _hashed_id(OP_ID_PREFIX, dict(envelope_without_id)) def claim_key( subject: str, predicate: str, discriminator: Optional[Union[str, Mapping[str, Any]]] = None, ) -> str: """Stable identity for a logical claim. *discriminator* distinguishes claims that share a subject and predicate but are about different things — e.g. two ``routine.weekly`` claims about different activities. It must be a canonical-encodable string or mapping, and derivers must build it only from *normalized* fields so that re-derivation yields the same key. """ if not subject: raise ValueError("claim_key: subject must be non-empty") if not predicate: raise ValueError("claim_key: predicate must be non-empty") payload: dict = {"s": subject, "p": predicate} if discriminator is not None: payload["d"] = discriminator if isinstance(discriminator, str) else dict(discriminator) return _hashed_id(CLAIM_KEY_PREFIX, payload) def value_fingerprint(value: Mapping[str, Any]) -> str: """Content fingerprint of a claim value object.""" return _hashed_id(VALUE_FP_PREFIX, dict(value)) def _has_form(s: object, prefix: str) -> bool: if not isinstance(s, str): return False if not s.startswith(prefix): return False hexpart = s[len(prefix):] return len(hexpart) == _HEX_LEN and all(c in _HEX_CHARS for c in hexpart) def is_op_id(s: object) -> bool: return _has_form(s, OP_ID_PREFIX) def is_claim_key(s: object) -> bool: return _has_form(s, CLAIM_KEY_PREFIX) def is_author_id(s: object) -> bool: return _has_form(s, AUTHOR_ID_PREFIX)