""" Shared fixtures and helpers for the derivation-layer test suite. These tests exercise the public derive API delivered in earlier passes of this milestone. The contract they rely on (and which the implementation modules must uphold) is: mnema.derive.model EvidenceRecord(evidence_id, source, kind, payload, observed_at) - evidence_id: opaque string, unique per record - source: "calendar" | "notes" | "photos" - kind: e.g. "calendar.event", "note.entry", "photo.meta" - payload: source-shaped dict (see builders below) - observed_at: ISO-8601 timestamp string Claim - claim_id: opaque string - predicate: dotted string, prefixed by family ("routine.*", "place.*", "relationship.*", "preference.*") - value: dict describing the claim - confidence: float in (0, 1] - inputs: list of node ids (evidence and/or claim ids) the claim was derived from -- the provenance edge set - status: claim lifecycle status mnema.derive.engine DerivationEngine(derivers=[...]) # omitting derivers => default set .run(evidence) -> result exposing the produced claims .explain(claim_id) -> explanation record for the claim mnema.derive.derivers.{routines,places,relationships,preferences} RoutineDeriver, PlaceDeriver, RelationshipDeriver, PreferenceDeriver All helpers are exposed to tests through the single `kit` fixture so that the test modules do not depend on test-package import mechanics. """ from __future__ import annotations import itertools import json from datetime import datetime, timedelta, timezone import pytest from mnema.derive.model import EvidenceRecord # A Monday morning, fixed so every test run sees identical timestamps. T0 = datetime(2025, 1, 6, 8, 0, tzinfo=timezone.utc) _seq = itertools.count(1) def _iso(dt: datetime) -> str: return dt.isoformat() def _email_for(name: str) -> str: return name.lower().replace(" ", ".") + "@example.com" # --------------------------------------------------------------------------- # Evidence builders # --------------------------------------------------------------------------- def make_event(title, start, *, duration_min=60, attendees=(), location=None, **extra): """Build a calendar.event EvidenceRecord. `attendees` may be plain names (emails are synthesised) or full dicts. """ end = start + timedelta(minutes=duration_min) payload = { "title": title, "start": _iso(start), "end": _iso(end), "location": location, "attendees": [ a if isinstance(a, dict) else {"name": a, "email": _email_for(a)} for a in attendees ], } payload.update(extra) return EvidenceRecord( evidence_id=f"ev-cal-{next(_seq):04d}", source="calendar", kind="calendar.event", payload=payload, observed_at=_iso(end), ) def make_note(text, created_at=None, *, tags=()): """Build a note.entry EvidenceRecord.""" created = created_at or T0 payload = {"text": text, "tags": list(tags), "created_at": _iso(created)} return EvidenceRecord( evidence_id=f"ev-note-{next(_seq):04d}", source="notes", kind="note.entry", payload=payload, observed_at=_iso(created), ) def make_photo(taken_at, lat, lon, *, place_name=None, **extra): """Build a photo.meta EvidenceRecord (mock photo metadata, no pixels).""" payload = {"taken_at": _iso(taken_at), "lat": lat, "lon": lon} if place_name is not None: payload["place_name"] = place_name payload.update(extra) return EvidenceRecord( evidence_id=f"ev-photo-{next(_seq):04d}", source="photos", kind="photo.meta", payload=payload, observed_at=_iso(taken_at), ) def weekly_events(title, *, weekday, hour, minute=0, count, start=None, **kwargs): """`count` events with `title`, repeating weekly on `weekday` at hour:minute. weekday follows datetime convention: Monday == 0. """ base = start or T0 days_ahead = (weekday - base.weekday()) % 7 first = (base + timedelta(days=days_ahead)).replace( hour=hour, minute=minute, second=0, microsecond=0 ) return [make_event(title, first + timedelta(weeks=i), **kwargs) for i in range(count)] # --------------------------------------------------------------------------- # Engine helpers # --------------------------------------------------------------------------- def new_engine(derivers=None): from mnema.derive.engine import DerivationEngine if derivers is None: return DerivationEngine() return DerivationEngine(derivers=list(derivers)) def run_claims(engine, evidence): """Run the engine over evidence and normalise the result to a claim list.""" result = engine.run(list(evidence)) claims = getattr(result, "claims", result) return list(claims) def by_predicate(claims, prefix): return [c for c in claims if c.predicate.startswith(prefix)] def best(claims): """Highest-confidence claim in a non-empty list.""" assert claims, "expected at least one claim" return max(claims, key=lambda c: c.confidence) def vdump(claim) -> str: """Lower-cased JSON dump of a claim's value, for substring assertions.""" return json.dumps(claim.value, default=str, sort_keys=True).lower() def explanation_text(engine, claim) -> str: """Best-effort extraction of the human-readable explanation for a claim.""" exp = engine.explain(claim.claim_id) if exp is None: return "" if isinstance(exp, str): return exp for attr in ("text", "summary", "narrative", "human_text"): v = getattr(exp, attr, None) if isinstance(v, str) and v: return v return json.dumps(exp, default=lambda o: getattr(o, "__dict__", str(o))) _NEGATIVE_TOKENS = ("dislike", "hate", "avoid", "negative", "aversion") _POSITIVE_TOKENS = ("like", "love", "prefer", "enjoy", "positive", "favorite", "favourite") def polarity(claim) -> str: """Classify a preference claim as 'positive' or 'negative'. Looks at the predicate first, then the value. Negative tokens are checked first because 'dislike' contains 'like'. """ haystacks = (claim.predicate.lower(), vdump(claim)) for hay in haystacks: if any(tok in hay for tok in _NEGATIVE_TOKENS): return "negative" for hay in haystacks: if any(tok in hay for tok in _POSITIVE_TOKENS): return "positive" return "unknown" class Kit: """Namespace object handed to tests via the `kit` fixture.""" T0 = T0 make_event = staticmethod(make_event) make_note = staticmethod(make_note) make_photo = staticmethod(make_photo) weekly_events = staticmethod(weekly_events) new_engine = staticmethod(new_engine) run_claims = staticmethod(run_claims) by_predicate = staticmethod(by_predicate) best = staticmethod(best) vdump = staticmethod(vdump) explanation_text = staticmethod(explanation_text) polarity = staticmethod(polarity) @pytest.fixture def kit(): return Kit