"""Notes adapter tests: markdown and plain-text parsing, sample ingestion.""" from __future__ import annotations from pathlib import Path from helpers import blob from pmp.adapters.notes import NotesAdapter def _make_notes_dir(tmp_path: Path) -> Path: notes = tmp_path / "notes" notes.mkdir() (notes / "knee.md").write_text( "# Knee check-in\n\nKnee pain came back after the Sunday long run. " "Physio suggested reducing mileage for two weeks.\n", encoding="utf-8", ) (notes / "shopping.txt").write_text( "rye flour\nolive oil\nespresso beans\n", encoding="utf-8", ) return notes def test_ingest_single_markdown_file(tmp_path: Path): notes = _make_notes_dir(tmp_path) items = list(NotesAdapter().ingest(notes / "knee.md")) assert len(items) == 1 rendered = blob(items[0].content) assert "Knee" in rendered assert "Physio" in rendered def test_ingest_directory_picks_up_md_and_txt(tmp_path: Path): notes = _make_notes_dir(tmp_path) items = list(NotesAdapter().ingest(notes)) assert len(items) == 2 combined = blob([item.content for item in items]) assert "Knee pain" in combined assert "espresso beans" in combined def test_non_text_files_are_skipped(tmp_path: Path): notes = _make_notes_dir(tmp_path) # A binary blob that is not valid UTF-8 and is not a note format. (notes / "IMG_0001.jpg").write_bytes(b"\xff\xd8\xff\xe0" + bytes(range(256))) items = list(NotesAdapter().ingest(notes)) assert len(items) == 2, "binary non-note files must not become note evidence" def test_items_carry_provenance_fields(tmp_path: Path): notes = _make_notes_dir(tmp_path) for item in NotesAdapter().ingest(notes): assert item.content assert item.source_ref assert "note" in str(item.source_type).lower() def test_ingest_is_deterministic(tmp_path: Path): notes = _make_notes_dir(tmp_path) first = sorted(blob(item.content) for item in NotesAdapter().ingest(notes)) second = sorted(blob(item.content) for item in NotesAdapter().ingest(notes)) assert first == second def test_sample_notes_ingest(samples_dir: Path): items = list(NotesAdapter().ingest(samples_dir / "notes")) # Five sample note files ship with the repo (four markdown, one txt). assert len(items) >= 5 combined = blob([item.content for item in items]).lower() assert "sourdough" in combined assert "berlin" in combined # Each evidence item is distinct. rendered = [blob(item.content) for item in items] assert len(set(rendered)) == len(rendered)