"""Photo metadata adapter tests against the synthetic EXIF-style dataset.""" from __future__ import annotations import json import shutil from pathlib import Path from helpers import blob from pmp.adapters.photos import PhotosAdapter def _record_count(data) -> int | None: """Number of photo records in the sample JSON, tolerant of either a top-level list or a single wrapping object containing a list.""" if isinstance(data, list): return len(data) if isinstance(data, dict): for value in data.values(): if isinstance(value, list): return len(value) return None def test_sample_photos_ingest(samples_dir: Path): sample = samples_dir / "photos" / "avery-photos.json" items = list(PhotosAdapter().ingest(sample)) assert len(items) >= 1 expected = _record_count(json.loads(sample.read_text(encoding="utf-8"))) if expected is not None: assert len(items) == expected, "one evidence item per photo record" rendered = [blob(item.content) for item in items] assert all(rendered) assert len(set(rendered)) == len(rendered), "photo evidence items must be distinct" def test_items_carry_provenance_fields(samples_dir: Path): sample = samples_dir / "photos" / "avery-photos.json" for item in PhotosAdapter().ingest(sample): assert item.content assert item.source_ref assert "photo" in str(item.source_type).lower() def test_ingest_is_deterministic(samples_dir: Path): sample = samples_dir / "photos" / "avery-photos.json" first = [blob(item.content) for item in PhotosAdapter().ingest(sample)] second = [blob(item.content) for item in PhotosAdapter().ingest(sample)] assert first == second def test_identical_copy_yields_identical_content(samples_dir: Path, tmp_path: Path): """Content extracted from a byte-identical copy must match, which is the property node-level deduplication relies on.""" sample = samples_dir / "photos" / "avery-photos.json" copy = tmp_path / "copy.json" shutil.copyfile(sample, copy) original = [blob(item.content) for item in PhotosAdapter().ingest(sample)] copied = [blob(item.content) for item in PhotosAdapter().ingest(copy)] assert original == copied