"""Notes import adapter: Markdown / plain-text note files to evidence records.

Understands the conventions most note apps export to:

* optional ``---`` front matter with simple ``key: value`` pairs
  (``title``, ``date``/``created``, ``tags: [a, b]`` or comma lists),
* a first-level ``# Heading`` as the title fallback,
* inline ``#hashtags`` in the body.

No YAML dependency: front matter is parsed with a deliberately small
``key: value`` reader, and anything it cannot read is left in the body text.
External ids are the file's path relative to the ingest root, so re-exports
of the same notebook map onto the same evidence.
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from . import EvidenceRecord

__all__ = ["load", "parse_note"]

_NOTE_SUFFIXES = (".md", ".markdown", ".txt")
_TAG_RE = re.compile(r"(?:^|[\s(\[])#([A-Za-z][A-Za-z0-9_-]{1,40})\b")
_HEADING_RE = re.compile(r"^#\s+(.+?)\s*$", re.MULTILINE)
_DATE_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})(?:[T ](\d{2}:\d{2}(?::\d{2})?))?")
_WORD_RE = re.compile(r"\w+", re.UNICODE)


def _parse_front_matter(text: str) -> Tuple[Dict[str, Any], str]:
    """Split optional ``---`` front matter from the note body."""
    lines = text.split("\n")
    if not lines or lines[0].strip() != "---":
        return {}, text
    meta: Dict[str, Any] = {}
    for index in range(1, len(lines)):
        stripped = lines[index].strip()
        if stripped in ("---", "..."):
            return meta, "\n".join(lines[index + 1 :])
        if ":" not in stripped or stripped.startswith("#"):
            continue
        key, value = stripped.split(":", 1)
        key = key.strip().lower()
        value = value.strip()
        if key == "tags":
            tags = [
                t.strip().lstrip("#").lower()
                for t in re.split(r"[,\s]+", value.strip("[]"))
                if t.strip().lstrip("#")
            ]
            meta[key] = tags
        elif value:
            meta[key] = value.strip("\"'")
    # No closing fence: treat the whole file as body.
    return {}, text


def _normalize_date(value: Optional[str]) -> Optional[str]:
    if not isinstance(value, str):
        return None
    match = _DATE_RE.match(value.strip())
    if not match:
        return None
    date, time = match.groups()
    if time:
        if len(time) == 5:
            time += ":00"
        return f"{date}T{time}"
    return date


def parse_note(text: str, fallback_title: str) -> Dict[str, Any]:
    """Parse one note file's text into a JSON-safe payload."""
    meta, body = _parse_front_matter(text)
    body = body.strip("\n")

    title = meta.get("title")
    if not title:
        heading = _HEADING_RE.search(body)
        title = heading.group(1) if heading else fallback_title

    tags = set(meta.get("tags") or [])
    tags.update(tag.lower() for tag in _TAG_RE.findall(body))

    created = _normalize_date(
        meta.get("date") or meta.get("created") or meta.get("created_at")
    )

    payload: Dict[str, Any] = {
        "title": str(title),
        "text": body,
        "tags": sorted(tags),
        "word_count": len(_WORD_RE.findall(body)),
    }
    if created:
        payload["created"] = created
    extra_meta = {
        key: value
        for key, value in meta.items()
        if key not in ("title", "tags", "date", "created", "created_at")
    }
    if extra_meta:
        payload["meta"] = extra_meta
    return payload


def load(path: str) -> List[EvidenceRecord]:
    """Load one note file, or every note file under a directory tree."""
    root = Path(path)
    if root.is_dir():
        files = sorted(
            p for p in root.rglob("*") if p.is_file() and p.suffix.lower() in _NOTE_SUFFIXES
        )
        base = root
    elif root.is_file():
        files = [root]
        base = root.parent
    else:
        raise FileNotFoundError(f"notes source not found: {path}")

    records: List[EvidenceRecord] = []
    for file in files:
        text = file.read_text(encoding="utf-8", errors="replace")
        payload = parse_note(text, fallback_title=file.stem.replace("-", " ").replace("_", " "))
        relative = file.relative_to(base).as_posix()
        records.append(
            EvidenceRecord(
                source=f"notes:{base.name}",
                kind="note.document",
                external_id=relative,
                payload=payload,
                observed_at=payload.get("created"),
            )
        )
    return records