"""Relationship inference from calendar attendees, photo people, and note mentions. Two layers: 1. ``relationship.frequent_contact`` (evidence -> claim): a person who co-occurs with the user in >= :data:`MIN_INTERACTIONS` observations. Per-channel support scores are combined with noisy-OR, so multi-channel presence (calendar **and** photos **and** notes) earns more confidence than the same count in one channel. 2. ``relationship.close`` (claim -> claim): a frequent contact whose claim confidence is >= :data:`CLOSE_MIN_CONFIDENCE` and who appears in >= 2 channels. Its only input is the frequent-contact *claim*, which is what makes refuting the lower claim cascade into this one -- the canonical cascade-invalidation demo. """ from __future__ import annotations import re from collections import Counter from dataclasses import dataclass, field from typing import Dict, Iterable, List, Optional, Tuple from mnema.derive.confidence import ( clamp, combine_independent, discount, support_curve, temporal_decay, ) from mnema.derive.derivers.base import ( CandidateClaim, DerivationContext, Deriver, DeriverInfo, iso, normalize_text, parse_ts, ) MIN_INTERACTIONS = 3 CHANNEL_SUPPORT_MIDPOINT = 2.0 CHANNEL_SUPPORT_CEILING = 0.7 HALF_LIFE_DAYS = 180.0 GRACE_DAYS = 21.0 CLOSE_MIN_CONFIDENCE = 0.55 CLOSE_MIN_CHANNELS = 2 CLOSE_DISCOUNT = 0.9 _NAME_EMAIL = re.compile(r"^\s*(?P[^<]*?)\s*<\s*(?P[^>]+?)\s*>\s*$") _MENTION = re.compile(r"@([A-Za-z][\w'\-]*)") def _parse_attendee(raw) -> Optional[Tuple[str, str]]: """Return (person_key, display_name) or None.""" name, email = "", "" if isinstance(raw, dict): name = str(raw.get("name") or "").strip() email = str(raw.get("email") or "").strip() elif isinstance(raw, str): m = _NAME_EMAIL.match(raw) if m: name, email = m.group("name").strip(), m.group("email").strip() elif "@" in raw: email = raw.strip() else: name = raw.strip() if email: return email.lower(), (name or email) if name: return normalize_text(name), name return None def _note_text(payload: dict) -> str: return str(payload.get("text") or payload.get("body") or payload.get("content") or "") @dataclass class _PersonStats: display: str channels: Counter = field(default_factory=Counter) evidence_ids: set = field(default_factory=set) last_seen: Optional[object] = None # datetime def record(self, channel: str, evidence_id: str, seen) -> None: self.channels[channel] += 1 self.evidence_ids.add(evidence_id) if seen is not None and (self.last_seen is None or seen > self.last_seen): self.last_seen = seen class RelationshipsDeriver(Deriver): info = DeriverInfo( deriver_id="mnema.relationships", version="1.0.0", consumes_evidence=("calendar.event", "photo.meta", "note"), consumes_predicates=("relationship.frequent_contact",), produces_predicates=("relationship.frequent_contact", "relationship.close"), ) def __init__(self, self_aliases: Iterable[str] = ()) -> None: #: Identifiers of the user themself (own emails/names), to be skipped. self.self_aliases = {normalize_text(a) for a in self_aliases} # ------------------------------------------------------------------ # def derive(self, ctx: DerivationContext) -> List[CandidateClaim]: return self._frequent_contacts(ctx) + self._close_ties(ctx) def _is_self(self, key: str, display: str) -> bool: return key in self.self_aliases or normalize_text(display) in self.self_aliases def _frequent_contacts(self, ctx: DerivationContext) -> List[CandidateClaim]: people: Dict[str, _PersonStats] = {} def record(key: str, display: str, channel: str, evidence_id: str, seen) -> None: if self._is_self(key, display): return stats = people.setdefault(key, _PersonStats(display=display)) stats.record(channel, evidence_id, seen) for ev in ctx.evidence("calendar.event"): seen = None for k in ("start", "start_time", "dtstart", "begin"): if ev.payload.get(k): try: seen = parse_ts(ev.payload[k]) except ValueError: seen = None break for raw in ev.payload.get("attendees") or []: parsed = _parse_attendee(raw) if parsed: record(parsed[0], parsed[1], "calendar", ev.evidence_id, seen) for ev in ctx.evidence("photo.meta"): try: seen = parse_ts(ev.payload.get("taken_at") or ev.observed_at) except ValueError: seen = None for raw in ev.payload.get("people") or []: name = str(raw).strip() if name: record(normalize_text(name), name, "photos", ev.evidence_id, seen) for ev in ctx.evidence("note"): try: seen = parse_ts(ev.observed_at) except ValueError: seen = None for name in _MENTION.findall(_note_text(ev.payload)): record(normalize_text(name), name, "notes", ev.evidence_id, seen) out: List[CandidateClaim] = [] for key in sorted(people): stats = people[key] total = sum(stats.channels.values()) if total < MIN_INTERACTIONS: continue channel_scores = { ch: support_curve( count, midpoint=CHANNEL_SUPPORT_MIDPOINT, ceiling=CHANNEL_SUPPORT_CEILING, ) for ch, count in sorted(stats.channels.items()) } base = combine_independent(channel_scores.values()) age = 0.0 if stats.last_seen is not None: age = max(0.0, ctx.age_days(iso(stats.last_seen)) - GRACE_DAYS) conf = clamp(temporal_decay(base, age, HALF_LIFE_DAYS)) channels = dict(sorted(stats.channels.items())) out.append( CandidateClaim( subject="user", predicate="relationship.frequent_contact", identity={"person": key}, value={ "person": key, "name": stats.display, "channels": channels, "interactions": total, "last_seen": iso(stats.last_seen) if stats.last_seen else None, }, confidence=conf, inputs=sorted(stats.evidence_ids), summary=( f"{stats.display} appears in {total} observations " f"across {len(channels)} channel(s): " + ", ".join(f"{ch}={n}" for ch, n in channels.items()) + "." ), reasoning=[ f"Co-occurrences with the user: " + ", ".join(f"{n}x via {ch}" for ch, n in channels.items()) + f" (total {total}, threshold {MIN_INTERACTIONS}).", "Per-channel support scores combined with noisy-OR, " "so multi-channel presence earns extra confidence.", ], confidence_account={ "method": "noisy_or(per-channel support_curve) * temporal_decay", "channel_scores": { ch: round(s, 6) for ch, s in channel_scores.items() }, "support_midpoint": CHANNEL_SUPPORT_MIDPOINT, "support_ceiling": CHANNEL_SUPPORT_CEILING, "base": round(base, 6), "age_days_past_grace": round(age, 3), "half_life_days": HALF_LIFE_DAYS, "result": round(conf, 6), }, ) ) return out # ------------------------------------------------------------------ # def _close_ties(self, ctx: DerivationContext) -> List[CandidateClaim]: out: List[CandidateClaim] = [] for claim in ctx.claims( "relationship.frequent_contact", min_confidence=CLOSE_MIN_CONFIDENCE ): channels = claim.value.get("channels") or {} active_channels = [ch for ch, n in channels.items() if n > 0] if len(active_channels) < CLOSE_MIN_CHANNELS: continue conf = clamp(discount(claim.confidence, CLOSE_DISCOUNT)) person = claim.value.get("person", claim.identity.get("person", "")) name = claim.value.get("name", person) out.append( CandidateClaim( subject="user", predicate="relationship.close", identity={"person": person}, value={ "person": person, "name": name, "channels": sorted(active_channels), "based_on": claim.claim_id, }, confidence=conf, inputs=[claim.claim_id], summary=( f"{name} is likely a close tie: frequent contact " f"(confidence {claim.confidence:.2f}) across " f"{len(active_channels)} channels." ), reasoning=[ f"Derived from claim {claim.claim_id} " f"(relationship.frequent_contact, " f"confidence {claim.confidence:.4f}).", f"Presence in >= {CLOSE_MIN_CHANNELS} channels " f"({', '.join(sorted(active_channels))}) suggests the " "relationship is personal, not incidental.", "If the frequent-contact claim is refuted, this claim " "is mechanically invalidated.", ], confidence_account={ "method": "input_confidence * close_discount", "input_claim": claim.claim_id, "input_confidence": round(claim.confidence, 6), "close_discount": CLOSE_DISCOUNT, "result": round(conf, 6), }, ) ) out.sort(key=lambda c: c.identity["person"]) return out