"""Data model for the mnema derivation layer. This module defines the two node types of the derivation graph: * :class:`EvidenceRecord` -- an immutable observation imported from a source (calendar, notes, photo metadata, ...). Evidence is never edited; it can only be retracted, which mechanically cascades to everything derived from it. * :class:`Claim` -- a derived statement about the subject ("user"). Every claim records: - its **identity** (`subject`, `predicate`, `identity` dict): *which* fact it asserts, independent of its current value or confidence; - its full **value** payload; - the **deriver** that produced it and that deriver's version; - its exact **inputs** (evidence and/or claim ids) -- the provenance chain; - a calibrated **confidence** in ``[0.01, 0.99]``; - a lifecycle **status** driven by signed operations in the log. Two derivations of "the user does yoga on Tuesdays" share the same *claim key* even when occurrence counts or confidence differ; the newer one supersedes the older one. The claim key is also what gets *blocked* when a user refutes a claim, so the machine never silently re-asserts it. """ from __future__ import annotations import hashlib import json from dataclasses import dataclass, field from enum import Enum from typing import Any, Mapping, Optional class ClaimStatus(str, Enum): """Lifecycle of a claim, transitioned only via logged operations.""" ACTIVE = "active" #: Replaced by a newer derivation of the same claim key (new evidence, #: changed inputs, or updated confidence). Not an error state. SUPERSEDED = "superseded" #: An input was refuted or retracted; invalidation cascaded mechanically. INVALIDATED = "invalidated" #: The user explicitly said this claim is wrong. REFUTED = "refuted" def _canonical(obj: Any) -> str: """Deterministic JSON used for content-addressed keys and ids.""" return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False) def content_id(prefix: str, obj: Any) -> str: """Content-addressed identifier: ``_``.""" digest = hashlib.sha256(_canonical(obj).encode("utf-8")).hexdigest() return f"{prefix}_{digest[:24]}" def claim_key(subject: str, predicate: str, identity: Mapping[str, Any]) -> str: """Stable identity of a claim: *which* fact it asserts. Excludes value details, confidence, inputs and timestamps so that re-derivations of the same fact map onto the same key (supersession), and refutations of the key persist across re-derivation attempts. """ digest = hashlib.sha256( _canonical([subject, predicate, dict(identity)]).encode("utf-8") ).hexdigest() return f"ck_{digest[:32]}" @dataclass class EvidenceRecord: """One imported observation. Immutable once logged; retractable.""" evidence_id: str kind: str # e.g. "calendar.event", "note", "photo.meta", "contact" source: str # adapter id, e.g. "adapter.ics", "adapter.notes" payload: dict observed_at: str # ISO 8601 timestamp of the underlying observation op_id: Optional[str] = None # log operation that introduced it def to_dict(self) -> dict: return { "evidence_id": self.evidence_id, "kind": self.kind, "source": self.source, "payload": self.payload, "observed_at": self.observed_at, "op_id": self.op_id, } @classmethod def from_dict(cls, d: Mapping[str, Any]) -> "EvidenceRecord": return cls( evidence_id=d["evidence_id"], kind=d["kind"], source=d.get("source", "unknown"), payload=dict(d.get("payload", {})), observed_at=d["observed_at"], op_id=d.get("op_id"), ) @dataclass class Claim: """A derived statement with full provenance and a lifecycle status.""" claim_id: str subject: str # usually "user" predicate: str # e.g. "routine.weekly", "relationship.frequent_contact" identity: dict # the fields that define *which* claim this is value: dict # full payload (a superset of identity) confidence: float deriver: str # deriver id, or "user" for direct assertions deriver_version: str inputs: list = field(default_factory=list) # evidence/claim ids derived_at: str = "" status: ClaimStatus = ClaimStatus.ACTIVE status_reason: Optional[str] = None explanation_id: Optional[str] = None supersedes: Optional[str] = None # claim id this replaced, if any op_id: Optional[str] = None # log operation that asserted it def key(self) -> str: return claim_key(self.subject, self.predicate, self.identity) def to_dict(self) -> dict: return { "claim_id": self.claim_id, "subject": self.subject, "predicate": self.predicate, "identity": self.identity, "value": self.value, "confidence": self.confidence, "deriver": self.deriver, "deriver_version": self.deriver_version, "inputs": list(self.inputs), "derived_at": self.derived_at, "status": self.status.value, "status_reason": self.status_reason, "explanation_id": self.explanation_id, "supersedes": self.supersedes, "op_id": self.op_id, } @classmethod def from_dict(cls, d: Mapping[str, Any]) -> "Claim": return cls( claim_id=d["claim_id"], subject=d["subject"], predicate=d["predicate"], identity=dict(d.get("identity", {})), value=dict(d.get("value", {})), confidence=float(d["confidence"]), deriver=d["deriver"], deriver_version=d.get("deriver_version", "0"), inputs=list(d.get("inputs", [])), derived_at=d.get("derived_at", ""), status=ClaimStatus(d.get("status", "active")), status_reason=d.get("status_reason"), explanation_id=d.get("explanation_id"), supersedes=d.get("supersedes"), op_id=d.get("op_id"), )