"""Notes adapter: Markdown / plain-text files -> ``note.document`` evidence.

Handles ``.md``, ``.markdown``, and ``.txt`` files.  For Markdown it
extracts a small amount of useful structure without pulling in a parser
dependency:

* YAML-style front matter between leading ``---`` fences -- a restricted
  dialect: ``key: value`` scalars, inline ``[a, b]`` lists, and indented
  ``- item`` block lists.  Values are kept as strings (no type coercion;
  interpretation is derivation's job).
* the title: front matter ``title``, else the first ``# `` heading, else
  the filename stem;
* headings, ``#hashtags``, ``[[wikilinks]]``, and ``[text](url)`` links;
* the full body text (this is the user's own local data; evidence is
  supposed to be faithful) plus a word count.

``external_id`` is the path relative to the import root, so editing a
note produces a superseding evidence op rather than a duplicate.
``observed_at`` prefers front matter ``updated``/``date``, falling back
to the file's mtime in UTC.
"""
from __future__ import annotations

import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Iterator, Optional

from pmp.adapters.base import Adapter, EvidenceItem, prune, register
from pmp.errors import AdapterError

_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE)
_HASHTAG_RE = re.compile(r"(?<![\w&#])#([A-Za-z][\w\-/]*)")
_WIKILINK_RE = re.compile(r"\[\[([^\[\]|]+)(?:\|[^\[\]]*)?\]\]")
_MDLINK_RE = re.compile(r"\[([^\]]+)\]\(([^)\s]+)\)")
_WORD_RE = re.compile(r"\w+", re.UNICODE)
_LIST_ITEM_RE = re.compile(r"^\s*-\s+(.*)$")


def _strip_quotes(value: str) -> str:
    value = value.strip()
    if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
        return value[1:-1]
    return value


def parse_front_matter(text: str) -> tuple[dict, str]:
    """Split off and parse front matter; returns (metadata, body).

    Only a restricted YAML dialect is supported (see module docstring).
    Lines that don't fit the dialect are skipped rather than failing the
    whole import -- front matter is auxiliary metadata, not the payload.
    """
    lines = text.split("\n")
    if not lines or lines[0].strip() != "---":
        return {}, text
    end = None
    for i in range(1, len(lines)):
        if lines[i].strip() in ("---", "..."):
            end = i
            break
    if end is None:
        return {}, text

    meta: dict[str, object] = {}
    current_list_key: Optional[str] = None
    for raw in lines[1:end]:
        if not raw.strip() or raw.lstrip().startswith("#"):
            continue
        item = _LIST_ITEM_RE.match(raw)
        if item and current_list_key is not None:
            meta.setdefault(current_list_key, [])
            target = meta[current_list_key]
            if isinstance(target, list):
                target.append(_strip_quotes(item.group(1)))
            continue
        if ":" in raw:
            key, _, value = raw.partition(":")
            key = key.strip()
            value = value.strip()
            if not key:
                continue
            if value == "":
                meta[key] = []
                current_list_key = key
            elif value.startswith("[") and value.endswith("]"):
                inner = value[1:-1].strip()
                meta[key] = (
                    [_strip_quotes(v) for v in inner.split(",") if v.strip()]
                    if inner
                    else []
                )
                current_list_key = None
            else:
                meta[key] = _strip_quotes(value)
                current_list_key = None
    body = "\n".join(lines[end + 1:])
    return meta, body


def _mtime_iso(path: Path) -> str:
    ts = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
    return ts.isoformat(timespec="seconds").replace("+00:00", "Z")


@register
class NotesAdapter(Adapter):
    """Imports markdown and plain-text notes as ``note.document`` evidence."""

    name = "notes"
    version = "1.0.0"
    content_types = ("note.document",)

    SUFFIXES = (".md", ".markdown", ".txt")

    def parse(self, source: Path) -> Iterator[EvidenceItem]:
        source = Path(source)
        root = source if source.is_dir() else source.parent
        for path in self.iter_files(source, self.SUFFIXES):
            try:
                text = path.read_text(encoding="utf-8")
            except (OSError, UnicodeDecodeError) as exc:
                raise AdapterError(f"cannot read {path}: {exc}") from exc

            is_markdown = path.suffix.lower() in (".md", ".markdown")
            rel_path = path.relative_to(root).as_posix()

            if is_markdown:
                meta, body = parse_front_matter(text)
            else:
                meta, body = {}, text

            headings = [
                {"level": len(h), "text": t}
                for h, t in _HEADING_RE.findall(body)
            ] if is_markdown else []

            title = None
            if isinstance(meta.get("title"), str):
                title = meta["title"]
            elif headings and headings[0]["level"] == 1:
                title = headings[0]["text"]
            else:
                first_line = next((ln.strip() for ln in body.splitlines() if ln.strip()), "")
                title = (
                    first_line[:120]
                    if not is_markdown and first_line
                    else path.stem
                )

            tags: list[str] = []
            fm_tags = meta.get("tags")
            if isinstance(fm_tags, list):
                tags.extend(str(t).lstrip("#") for t in fm_tags)
            elif isinstance(fm_tags, str):
                tags.extend(
                    t.strip().lstrip("#") for t in fm_tags.split(",") if t.strip()
                )
            tags.extend(_HASHTAG_RE.findall(body))
            seen: set[str] = set()
            tags = [t for t in tags if t and not (t in seen or seen.add(t))]

            wikilinks = _WIKILINK_RE.findall(body) if is_markdown else []
            links = (
                [{"text": t, "url": u} for t, u in _MDLINK_RE.findall(body)]
                if is_markdown
                else []
            )

            # Front matter values must be canonical-safe: coerce to str/list[str].
            front_matter = {
                k: ([str(x) for x in v] if isinstance(v, list) else str(v))
                for k, v in meta.items()
            }

            mtime = _mtime_iso(path)
            observed = None
            for key in ("updated", "modified", "date", "created"):
                if isinstance(meta.get(key), str) and meta[key]:
                    observed = str(meta[key])
                    break
            observed = observed or mtime

            content = prune(
                {
                    "path": rel_path,
                    "title": title,
                    "format": "markdown" if is_markdown else "text",
                    "text": body,
                    "front_matter": front_matter,
                    "tags": tags,
                    "headings": headings,
                    "wikilinks": wikilinks,
                    "links": links,
                    "word_count": len(_WORD_RE.findall(body)),
                    "modified_at": mtime,
                }
            )

            yield EvidenceItem(
                content_type="note.document",
                content=content,
                external_id=rel_path,
                observed_at=observed,
                origin=path.resolve().as_uri(),
            )