"""Deriver interface and shared helpers. A deriver is a pure function from a :class:`DerivationContext` (a read-only view of active evidence and claims at a point in time) to a list of :class:`CandidateClaim`. Derivers do **not** mutate the graph, emit operations, mint ids, or decide supersession -- the engine owns all of that. This keeps derivers trivially testable and makes the fixpoint loop safe. Contract: * ``derive(ctx)`` must be deterministic given the context (no wall-clock reads -- use ``ctx.now``; no randomness; stable output order). * Candidates must list *every* input (evidence or claim id) that contributed, because those edges are what cascade invalidation walks. * Confidence must be composed from :mod:`mnema.derive.confidence` primitives and accounted for in ``confidence_account`` so explanations can show the arithmetic. """ from __future__ import annotations import re import unicodedata from abc import ABC, abstractmethod from collections import Counter from dataclasses import dataclass, field from datetime import datetime, timezone from typing import Any, Iterable, List, Optional, Sequence from mnema.derive.graph import DerivationGraph from mnema.derive.model import Claim, EvidenceRecord WEEKDAYS = ( "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ) _WS = re.compile(r"\s+") def parse_ts(value: str) -> datetime: """Parse an ISO-8601 timestamp; ``Z`` suffix accepted; naive => UTC.""" if not isinstance(value, str) or not value: raise ValueError(f"not a timestamp: {value!r}") raw = value.strip() if raw.endswith("Z") or raw.endswith("z"): raw = raw[:-1] + "+00:00" dt = datetime.fromisoformat(raw) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt def iso(dt: datetime) -> str: if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") def normalize_text(s: str) -> str: """NFKC-normalise, lowercase, collapse whitespace.""" return _WS.sub(" ", unicodedata.normalize("NFKC", s or "").lower()).strip() def most_common(items: Iterable[Any]) -> Optional[Any]: counts = Counter(x for x in items if x not in (None, "")) if not counts: return None # Counter.most_common ties are insertion-ordered; sort for determinism. best = sorted(counts.items(), key=lambda kv: (-kv[1], str(kv[0]))) return best[0][0] @dataclass class CandidateClaim: """What a deriver proposes; the engine turns it into a logged claim.""" subject: str predicate: str identity: dict # fields defining *which* claim this is (drives the key) value: dict # full payload; must be a superset of identity confidence: float inputs: List[str] # evidence/claim ids -- the provenance edges summary: str # one human sentence: why the system believes this reasoning: List[str] = field(default_factory=list) # step-by-step trace confidence_account: dict = field(default_factory=dict) # the arithmetic @dataclass(frozen=True) class DeriverInfo: deriver_id: str version: str consumes_evidence: Sequence[str] # evidence kinds read consumes_predicates: Sequence[str] # claim predicates read (higher-order) produces_predicates: Sequence[str] class DerivationContext: """Read-only, point-in-time view handed to derivers.""" def __init__(self, graph: DerivationGraph, now: datetime) -> None: self._graph = graph self._now = now if now.tzinfo else now.replace(tzinfo=timezone.utc) @property def now(self) -> datetime: return self._now def evidence(self, kind: Optional[str] = None) -> List[EvidenceRecord]: """Active (non-retracted) evidence, deterministically ordered.""" return self._graph.evidence(kind=kind) def claims( self, predicate: Optional[str] = None, min_confidence: float = 0.0 ) -> List[Claim]: """ACTIVE claims only -- invalidated/refuted inputs never feed forward.""" return [ c for c in self._graph.claims(predicate=predicate) if c.confidence >= min_confidence ] def age_days(self, ts: str) -> float: """Days between ``ts`` and ``now`` (clamped at 0 for future stamps).""" delta = (self._now - parse_ts(ts)).total_seconds() / 86400.0 return max(0.0, delta) class Deriver(ABC): """Base class for all derivers. Subclasses set ``info`` and implement :meth:`derive`.""" info: DeriverInfo @abstractmethod def derive(self, ctx: DerivationContext) -> List[CandidateClaim]: """Propose candidate claims from the current context.""" raise NotImplementedError def __repr__(self) -> str: # pragma: no cover - cosmetic return f"<{type(self).__name__} {self.info.deriver_id}@{self.info.version}>"