"""Demo dataset for the FablePool inspection CLI. Seeds a node with a small but realistic personal memory graph for the fictional user "Avery": calendar events, notes, and mock photo metadata as raw evidence; a three-level derivation graph of claims on top of it. The graph is intentionally shaped so the canonical demo flow works: * refuting ``yoga_habit`` cascades through ``exercise_pattern`` into ``wellness_profile`` and ``packing_suggestion`` (three levels deep); * correcting ``training_for`` cascades through ``trip_purpose`` into ``packing_suggestion``. All timestamps are fixed so output is stable for tests and documentation. """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Dict from fablepool.errors import FablePoolError @dataclass class SeedResult: """Identifiers of everything the seed created, keyed by logical name.""" evidence: Dict[str, str] = field(default_factory=dict) claims: Dict[str, str] = field(default_factory=dict) @property def evidence_count(self) -> int: return len(self.evidence) @property def claim_count(self) -> int: return len(self.claims) #: Derivation depth of each seeded claim, used by docs and the seed command. CLAIM_LEVELS: Dict[str, int] = { "yoga_habit": 1, "runs_mornings": 1, "dentist": 1, "home_city": 1, "work_schedule": 1, "training_for": 1, "buys_oat_milk": 1, "peanut_caution": 1, "upcoming_trip": 1, "exercise_pattern": 2, "diet_leaning": 2, "trip_purpose": 2, "wellness_profile": 3, "packing_suggestion": 3, } def seed_demo(node: Any) -> SeedResult: """Populate ``node`` with the Avery demo dataset. Refuses to double-seed.""" for op in node.operations(): if getattr(op, "kind", None) == "claim": raise FablePoolError( "this node already contains claims; seed onto a fresh node " "(fablepool init && fablepool --node seed)" ) result = SeedResult() ev = result.evidence cl = result.claims def _id(obj: Any) -> str: for attr in ("id", "claim_id", "evidence_id", "op_id"): value = getattr(obj, attr, None) if value: return str(value) raise FablePoolError(f"seeded object {obj!r} has no identifier") # ------------------------------------------------------------------ # Raw evidence: calendar # ------------------------------------------------------------------ for name, day in (("yoga1", "02"), ("yoga2", "09"), ("yoga3", "16")): ev[name] = _id( node.record_evidence( source="calendar", media_type="calendar/event", content={ "summary": "Yoga class", "location": "Harbor Yoga, Alameda", "start": f"2025-09-{day}T18:00:00Z", "end": f"2025-09-{day}T19:00:00Z", "recurrence_hint": "weekly Tuesday", }, observed_at=f"2025-09-{day}T18:00:00Z", ) ) for name, day in (("run1", "04"), ("run2", "11")): ev[name] = _id( node.record_evidence( source="calendar", media_type="calendar/event", content={ "summary": "Morning run — half marathon plan wk 6", "start": f"2025-09-{day}T06:45:00Z", "end": f"2025-09-{day}T07:30:00Z", }, observed_at=f"2025-09-{day}T06:45:00Z", ) ) for name, date in (("dentist1", "2025-03-14"), ("dentist2", "2025-09-12")): ev[name] = _id( node.record_evidence( source="calendar", media_type="calendar/event", content={ "summary": "Dentist — Dr. Park", "location": "Alameda Dental, 1200 Park St", "start": f"{date}T10:00:00Z", "end": f"{date}T10:45:00Z", }, observed_at=f"{date}T10:00:00Z", ) ) for name, day in (("standup1", "08"), ("standup2", "15")): ev[name] = _id( node.record_evidence( source="calendar", media_type="calendar/event", content={ "summary": "Team standup", "start": f"2025-09-{day}T09:30:00Z", "end": f"2025-09-{day}T09:45:00Z", "recurrence_hint": "weekdays", }, observed_at=f"2025-09-{day}T09:30:00Z", ) ) ev["flight"] = _id( node.record_evidence( source="calendar", media_type="calendar/event", content={ "summary": "Flight UA512 SFO → PDX", "start": "2025-10-10T16:05:00Z", "end": "2025-10-10T17:55:00Z", "location": "San Francisco International Airport", "notes": "return UA517 Oct 13", }, observed_at="2025-09-20T12:00:00Z", ) ) # ------------------------------------------------------------------ # Raw evidence: notes # ------------------------------------------------------------------ ev["note_training"] = _id( node.record_evidence( source="notes", media_type="text/markdown", content={ "title": "Portland Half Marathon — training plan", "text": ( "Race day Oct 12. Week 6 of 10. Long run Sundays, tempo " "Thursdays. Goal: under 1:55. Booked hotel near Waterfront Park." ), }, observed_at="2025-09-07T20:15:00Z", ) ) ev["note_grocery"] = _id( node.record_evidence( source="notes", media_type="text/markdown", content={ "title": "Groceries", "text": "oat milk x2, bananas, lentils, sourdough, peanut-free granola, tofu", }, observed_at="2025-09-13T09:10:00Z", ) ) ev["note_allergy"] = _id( node.record_evidence( source="notes", media_type="text/markdown", content={ "title": "After dinner", "text": "Mild itchiness after pad thai again — peanuts? Watch this, maybe ask Dr. Park for a referral.", }, observed_at="2025-08-29T21:40:00Z", ) ) # ------------------------------------------------------------------ # Raw evidence: mock photo metadata # ------------------------------------------------------------------ for name, (day, hour) in ( ("photo_home1", ("05", "08")), ("photo_home2", ("12", "19")), ("photo_home3", ("19", "07")), ): ev[name] = _id( node.record_evidence( source="photos", media_type="image/metadata", content={ "filename": f"IMG_2025-09-{day}_{hour}00.jpg", "taken_at": f"2025-09-{day}T{hour}:00:00Z", "gps": {"lat": 37.7652, "lon": -122.2416}, "camera": "Pixel 8", }, observed_at=f"2025-09-{day}T{hour}:05:00Z", ) ) ev["photo_trail"] = _id( node.record_evidence( source="photos", media_type="image/metadata", content={ "filename": "IMG_2025-09-14_0712.jpg", "taken_at": "2025-09-14T07:12:00Z", "gps": {"lat": 37.8324, "lon": -122.1986}, "camera": "Pixel 8", "scene_hint": "outdoor trail", }, observed_at="2025-09-14T07:20:00Z", ) ) # ------------------------------------------------------------------ # Level 1 claims (derived directly from evidence) # ------------------------------------------------------------------ cl["yoga_habit"] = _id( node.record_claim( predicate="attends_weekly_yoga", value="Tuesdays 18:00 at Harbor Yoga", topic="fitness", confidence=0.86, sources=[ev["yoga1"], ev["yoga2"], ev["yoga3"]], rule="recurring_event_pattern", ) ) cl["runs_mornings"] = _id( node.record_claim( predicate="runs_in_the_morning", value="roughly twice a week before 08:00", topic="fitness", confidence=0.74, sources=[ev["run1"], ev["run2"], ev["photo_trail"]], rule="recurring_event_pattern", ) ) cl["dentist"] = _id( node.record_claim( predicate="dentist_is", value="Dr. Park (Alameda Dental)", topic="health", confidence=0.90, sources=[ev["dentist1"], ev["dentist2"]], rule="repeated_provider_appointment", ) ) cl["home_city"] = _id( node.record_claim( predicate="home_city", value="Alameda, CA", topic="location", confidence=0.78, sources=[ev["photo_home1"], ev["photo_home2"], ev["photo_home3"]], rule="photo_gps_cluster", ) ) cl["work_schedule"] = _id( node.record_claim( predicate="works_weekday_mornings", value="daily standup at 09:30, Monday–Friday", topic="work", confidence=0.81, sources=[ev["standup1"], ev["standup2"]], rule="recurring_event_pattern", ) ) cl["training_for"] = _id( node.record_claim( predicate="training_for_race", value="Portland Half Marathon (Oct 12)", topic="fitness", confidence=0.83, sources=[ev["note_training"], ev["run1"], ev["run2"]], rule="note_goal_extraction", ) ) cl["buys_oat_milk"] = _id( node.record_claim( predicate="buys_oat_milk", value="recurring grocery item", topic="preferences", confidence=0.66, sources=[ev["note_grocery"]], rule="note_item_frequency", ) ) cl["peanut_caution"] = _id( node.record_claim( predicate="possible_peanut_sensitivity", value="suspected, unconfirmed", topic="health", confidence=0.52, sources=[ev["note_allergy"], ev["note_grocery"]], rule="note_health_mention", ) ) cl["upcoming_trip"] = _id( node.record_claim( predicate="upcoming_trip", value="Portland, OR — Oct 10 to Oct 13", topic="travel", confidence=0.88, sources=[ev["flight"]], rule="travel_event_detection", ) ) # ------------------------------------------------------------------ # Level 2 claims (derived from level-1 claims) # ------------------------------------------------------------------ cl["exercise_pattern"] = _id( node.record_claim( predicate="exercise_pattern", value="regularly active: weekly yoga plus morning runs", topic="fitness", confidence=0.80, sources=[cl["yoga_habit"], cl["runs_mornings"]], rule="habit_aggregation", ) ) cl["diet_leaning"] = _id( node.record_claim( predicate="diet_leaning", value="plant-leaning (non-dairy milk, tofu, lentils)", topic="preferences", confidence=0.58, sources=[cl["buys_oat_milk"]], rule="preference_inference", ) ) cl["trip_purpose"] = _id( node.record_claim( predicate="trip_purpose", value="race weekend — Portland Half Marathon", topic="travel", confidence=0.76, sources=[cl["upcoming_trip"], cl["training_for"]], rule="goal_event_correlation", ) ) # ------------------------------------------------------------------ # Level 3 claims (derived from level-2 claims) # ------------------------------------------------------------------ cl["wellness_profile"] = _id( node.record_claim( predicate="wellness_profile", value="active lifestyle with a plant-leaning diet", topic="health", confidence=0.62, sources=[cl["exercise_pattern"], cl["diet_leaning"]], rule="profile_synthesis", ) ) cl["packing_suggestion"] = _id( node.record_claim( predicate="suggests_packing", value="running gear and race kit for the Portland trip", topic="travel", confidence=0.70, sources=[cl["trip_purpose"], cl["exercise_pattern"]], rule="trip_preparation", ) ) return result