"""Photo-metadata import adapter: JSON sidecar dumps to evidence records. The project's threat model keeps raw media off the protocol: a photo's *pixels* never enter the log, only its metadata (the things a personal AI would actually reason over — when, where, with what device, tagged how). The expected input is a JSON file shaped like:: {"photos": [ {"id": "IMG_0001", "filename": "IMG_0001.jpg", "taken_at": "2024-03-02T09:14:00", "gps": {"lat": 47.3769, "lon": 8.5417}, "camera": {"make": "Apple", "model": "iPhone 13"}, "tags": ["trail", "running"], "width": 4032, "height": 3024} ]} A bare JSON array of the same objects is also accepted. EXIF-style timestamps (``2024:03:02 09:14:00``) are normalised to ISO 8601, and GPS coordinates outside valid ranges are dropped rather than propagated. """ from __future__ import annotations import json import re from pathlib import Path from typing import Any, Dict, List, Optional from . import EvidenceRecord __all__ = ["load", "normalize_photo"] _EXIF_TS_RE = re.compile(r"^(\d{4}):(\d{2}):(\d{2})[ T](\d{2}:\d{2}:\d{2})$") def _normalize_timestamp(value: Any) -> Optional[str]: if not isinstance(value, str): return None value = value.strip() match = _EXIF_TS_RE.match(value) if match: year, month, day, time = match.groups() return f"{year}-{month}-{day}T{time}" if re.match(r"^\d{4}-\d{2}-\d{2}([T ]\d{2}:\d{2}(:\d{2})?)?", value): return value.replace(" ", "T", 1) return None def _normalize_gps(value: Any) -> Optional[Dict[str, float]]: if not isinstance(value, dict): return None try: lat = float(value.get("lat")) lon = float(value.get("lon")) except (TypeError, ValueError): return None if not (-90.0 <= lat <= 90.0 and -180.0 <= lon <= 180.0): return None gps: Dict[str, float] = {"lat": round(lat, 6), "lon": round(lon, 6)} alt = value.get("alt") if isinstance(alt, (int, float)): gps["alt"] = round(float(alt), 1) return gps def normalize_photo(item: Dict[str, Any]) -> Dict[str, Any]: """Normalise one raw photo-metadata object into a JSON-safe payload.""" payload: Dict[str, Any] = {} filename = item.get("filename") or item.get("file") if filename: payload["filename"] = str(filename) taken_at = _normalize_timestamp(item.get("taken_at") or item.get("datetime")) if taken_at: payload["taken_at"] = taken_at gps = _normalize_gps(item.get("gps")) if gps: payload["gps"] = gps camera = item.get("camera") if isinstance(camera, dict): cam = { key: str(camera[key]).strip() for key in ("make", "model") if isinstance(camera.get(key), str) and camera[key].strip() } if cam: payload["camera"] = cam tags = item.get("tags") if isinstance(tags, list): clean = sorted({str(tag).strip().lower() for tag in tags if str(tag).strip()}) if clean: payload["tags"] = clean for dim in ("width", "height"): value = item.get(dim) if isinstance(value, int) and value > 0: payload[dim] = value return payload def load(path: str) -> List[EvidenceRecord]: """Load a photo-metadata JSON file into evidence records.""" file = Path(path) if file.is_dir(): candidates = sorted(file.glob("*.json")) if not candidates: raise FileNotFoundError(f"no .json photo metadata found under: {path}") files = candidates elif file.is_file(): files = [file] else: raise FileNotFoundError(f"photo metadata source not found: {path}") records: List[EvidenceRecord] = [] for source_file in files: data = json.loads(source_file.read_text(encoding="utf-8")) if isinstance(data, dict): items = data.get("photos", []) elif isinstance(data, list): items = data else: raise ValueError(f"unsupported photo metadata shape in {source_file}") for index, item in enumerate(items): if not isinstance(item, dict): continue payload = normalize_photo(item) external_id = str( item.get("id") or item.get("filename") or f"photo-{index:05d}" ) records.append( EvidenceRecord( source=f"photos:{source_file.name}", kind="photo.metadata", external_id=external_id, payload=payload, observed_at=payload.get("taken_at"), ) ) return records