"""Integration tests for ``fablepool export``, ``fablepool audit`` and
``fablepool log``.

Unlike the in-process tests in ``test_cli_browse.py`` / ``test_cli_mutations.py``,
these tests deliberately drive the CLI through a real subprocess
(``python -m fablepool``) against freshly seeded node directories.  That means
they validate exactly the surface a user — or a second, independent
implementation — would see:

* process exit codes,
* the documented ``--json`` output contract,
* the wire-format JSONL emitted by ``export`` (one signed operation
  envelope per line),
* the relationship between the full export, topic-subset exports, and
  the operation log reported by ``audit``.

The JSON contracts asserted here are the ones documented in
``docs/cli-reference.md``; if either changes, both must change together.
"""

from __future__ import annotations

import json
import os
import subprocess
import sys
from pathlib import Path

import pytest

REPO_ROOT = Path(__file__).resolve().parents[1]

#: Every line of a wire-format export must be a JSON object carrying at
#: least these envelope fields (see docs/cli-reference.md, "Wire-format
#: export contract").  Additional fields are allowed.
REQUIRED_ENVELOPE_KEYS = {"op_id", "kind", "author", "sig"}

SUBPROCESS_TIMEOUT = 180  # generous: seeding runs the full derivation engine


# ---------------------------------------------------------------------------
# Subprocess plumbing
# ---------------------------------------------------------------------------


def run_cli(node_dir: Path, *args: str) -> subprocess.CompletedProcess:
    """Run ``python -m fablepool --node-dir <node_dir> <args...>``.

    The environment is scrubbed of ``FABLEPOOL_NODE_DIR`` so the explicit
    ``--node-dir`` flag is the only thing selecting the node, which is also
    what we want to test (flag takes precedence over environment).
    """
    env = dict(os.environ)
    env.pop("FABLEPOOL_NODE_DIR", None)
    return subprocess.run(
        [sys.executable, "-m", "fablepool", "--node-dir", str(node_dir), *args],
        capture_output=True,
        text=True,
        cwd=str(REPO_ROOT),
        env=env,
        timeout=SUBPROCESS_TIMEOUT,
    )


def run_cli_ok(node_dir: Path, *args: str) -> subprocess.CompletedProcess:
    proc = run_cli(node_dir, *args)
    assert proc.returncode == 0, (
        f"`fablepool {' '.join(args)}` failed with code {proc.returncode}\n"
        f"--- stdout ---\n{proc.stdout}\n--- stderr ---\n{proc.stderr}"
    )
    return proc


def json_out(proc: subprocess.CompletedProcess) -> object:
    """Parse a ``--json`` command's stdout as a single JSON document."""
    try:
        return json.loads(proc.stdout)
    except json.JSONDecodeError as exc:  # pragma: no cover - failure path
        raise AssertionError(
            f"command did not emit valid JSON on stdout: {exc}\n"
            f"--- stdout ---\n{proc.stdout}"
        ) from None


def parse_export_lines(text: str) -> list[dict]:
    """Parse wire-format JSONL export output into a list of envelopes."""
    lines = [ln for ln in text.splitlines() if ln.strip()]
    envelopes = []
    for i, line in enumerate(lines):
        try:
            obj = json.loads(line)
        except json.JSONDecodeError as exc:  # pragma: no cover - failure path
            raise AssertionError(f"export line {i} is not valid JSON: {exc}\n{line!r}") from None
        assert isinstance(obj, dict), f"export line {i} is not a JSON object: {line!r}"
        envelopes.append(obj)
    return envelopes


def topic_name(entry: object) -> str:
    """Extract a topic name from a `topics --json` entry."""
    if isinstance(entry, str):
        return entry
    assert isinstance(entry, dict), f"unexpected topic entry: {entry!r}"
    for key in ("topic", "name"):
        if key in entry:
            return str(entry[key])
    raise AssertionError(f"topic entry has no recognizable name field: {entry!r}")


def claim_id(entry: dict) -> str:
    """Extract a claim id from a `claims --json` entry."""
    for key in ("claim_id", "id"):
        if key in entry:
            return str(entry[key])
    raise AssertionError(f"claim entry has no recognizable id field: {entry!r}")


# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(scope="module")
def seeded_node_dir(tmp_path_factory: pytest.TempPathFactory) -> Path:
    """A node directory seeded with the demo dataset.

    Module-scoped and used only by *read-only* tests in this file; any test
    that mutates the node must use the function-scoped ``fresh_node_dir``.
    """
    node_dir = tmp_path_factory.mktemp("export-audit-node")
    run_cli_ok(node_dir, "seed-demo")
    return node_dir


@pytest.fixture()
def fresh_node_dir(tmp_path: Path) -> Path:
    """A private seeded node directory for tests that mutate state."""
    run_cli_ok(tmp_path, "seed-demo")
    return tmp_path


# ---------------------------------------------------------------------------
# Full export
# ---------------------------------------------------------------------------


def test_full_export_is_valid_wire_jsonl(seeded_node_dir: Path) -> None:
    proc = run_cli_ok(seeded_node_dir, "export")
    envelopes = parse_export_lines(proc.stdout)

    assert envelopes, "full export of a seeded node must not be empty"

    for i, env in enumerate(envelopes):
        missing = REQUIRED_ENVELOPE_KEYS - env.keys()
        assert not missing, f"export line {i} missing envelope fields {sorted(missing)}: {env!r}"
        assert isinstance(env["op_id"], str) and env["op_id"], f"line {i}: empty op_id"
        assert isinstance(env["kind"], str) and env["kind"], f"line {i}: empty kind"
        assert isinstance(env["author"], str) and env["author"], f"line {i}: empty author"
        assert isinstance(env["sig"], str) and env["sig"], f"line {i}: empty signature"


def test_full_export_op_ids_are_unique(seeded_node_dir: Path) -> None:
    envelopes = parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout)
    op_ids = [env["op_id"] for env in envelopes]
    assert len(op_ids) == len(set(op_ids)), "duplicate op_id in full export"


def test_full_export_is_deterministic(seeded_node_dir: Path) -> None:
    """Two exports of an unchanged node must be byte-identical.

    The export is the append-only operation log serialized in log order;
    nothing about it should be timestamp-of-export or randomly ordered.
    """
    first = run_cli_ok(seeded_node_dir, "export").stdout
    second = run_cli_ok(seeded_node_dir, "export").stdout
    assert first == second


def test_export_to_file_matches_stdout_export(seeded_node_dir: Path, tmp_path: Path) -> None:
    out_file = tmp_path / "graph.jsonl"
    run_cli_ok(seeded_node_dir, "export", "--out", str(out_file))
    assert out_file.exists(), "--out did not create the export file"

    file_envelopes = parse_export_lines(out_file.read_text(encoding="utf-8"))
    stdout_envelopes = parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout)

    assert [e["op_id"] for e in file_envelopes] == [e["op_id"] for e in stdout_envelopes]


# ---------------------------------------------------------------------------
# Topic-subset export
# ---------------------------------------------------------------------------


def test_topic_subset_export_is_a_strict_subset(seeded_node_dir: Path) -> None:
    full_ids = {e["op_id"] for e in parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout)}

    topics_doc = json_out(run_cli_ok(seeded_node_dir, "topics", "--json"))
    assert isinstance(topics_doc, dict) and "topics" in topics_doc, (
        f"`topics --json` must emit an object with a 'topics' key, got: {topics_doc!r}"
    )
    topics = [topic_name(t) for t in topics_doc["topics"]]
    assert len(topics) >= 2, "seed dataset must contain at least two topics for subset testing"

    subset_proc = run_cli_ok(seeded_node_dir, "export", "--topic", topics[0])
    subset_envelopes = parse_export_lines(subset_proc.stdout)
    subset_ids = {e["op_id"] for e in subset_envelopes}

    assert subset_ids, f"topic export for {topics[0]!r} is empty"
    assert subset_ids <= full_ids, "topic export contains operations absent from the full export"
    assert subset_ids < full_ids, (
        "topic export should be strictly smaller than the full export when other topics exist"
    )


def test_topic_subset_export_covers_all_claims_of_the_topic(seeded_node_dir: Path) -> None:
    topics_doc = json_out(run_cli_ok(seeded_node_dir, "topics", "--json"))
    topic = topic_name(topics_doc["topics"][0])

    claims_doc = json_out(run_cli_ok(seeded_node_dir, "claims", "--topic", topic, "--json"))
    assert isinstance(claims_doc, dict) and "claims" in claims_doc, (
        f"`claims --json` must emit an object with a 'claims' key, got: {claims_doc!r}"
    )
    ids = [claim_id(c) for c in claims_doc["claims"]]
    assert ids, f"seed topic {topic!r} has no claims"

    export_text = run_cli_ok(seeded_node_dir, "export", "--topic", topic).stdout
    for cid in ids:
        assert cid in export_text, (
            f"claim {cid} of topic {topic!r} not present in the topic-subset export"
        )


# ---------------------------------------------------------------------------
# Audit and log
# ---------------------------------------------------------------------------


def test_audit_json_reports_fully_verified_log(seeded_node_dir: Path) -> None:
    proc = run_cli_ok(seeded_node_dir, "audit", "--json")
    doc = json_out(proc)

    assert isinstance(doc, dict)
    assert doc.get("ok") is True, f"audit of a freshly seeded node must pass: {doc!r}"
    assert isinstance(doc.get("ops"), int) and doc["ops"] > 0
    assert isinstance(doc.get("verified"), int)
    assert doc["verified"] == doc["ops"], "every operation in the log must verify"
    assert doc.get("failures") in ([], None) or doc["failures"] == [], (
        f"audit reported failures on a clean node: {doc!r}"
    )

    # The full export is the whole log: counts must agree.
    n_export = len(parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout))
    assert doc["ops"] == n_export, (
        f"audit saw {doc['ops']} ops but full export emitted {n_export} lines"
    )


def test_audit_human_output_mentions_success(seeded_node_dir: Path) -> None:
    proc = run_cli_ok(seeded_node_dir, "audit")
    assert "ok" in proc.stdout.lower(), (
        f"human-readable audit output should state the result, got:\n{proc.stdout}"
    )


def test_log_json_lists_operations(seeded_node_dir: Path) -> None:
    doc = json_out(run_cli_ok(seeded_node_dir, "log", "--json"))
    assert isinstance(doc, dict) and "ops" in doc, (
        f"`log --json` must emit an object with an 'ops' key, got: {doc!r}"
    )
    ops = doc["ops"]
    assert isinstance(ops, list) and ops
    for entry in ops:
        assert isinstance(entry, dict)
        assert entry.get("op_id"), f"log entry missing op_id: {entry!r}"
        assert entry.get("kind"), f"log entry missing kind: {entry!r}"


def test_log_limit_caps_output(seeded_node_dir: Path) -> None:
    doc = json_out(run_cli_ok(seeded_node_dir, "log", "--json", "--limit", "3"))
    assert len(doc["ops"]) <= 3


# ---------------------------------------------------------------------------
# Export and audit across a mutation (refutation)
# ---------------------------------------------------------------------------


def test_refutation_appends_operations_and_log_stays_auditable(fresh_node_dir: Path) -> None:
    before = parse_export_lines(run_cli_ok(fresh_node_dir, "export").stdout)
    before_ids = {e["op_id"] for e in before}

    # Pick any active claim from the first topic and refute it.
    topics_doc = json_out(run_cli_ok(fresh_node_dir, "topics", "--json"))
    topic = topic_name(topics_doc["topics"][0])
    claims_doc = json_out(run_cli_ok(fresh_node_dir, "claims", "--topic", topic, "--json"))
    target = claim_id(claims_doc["claims"][0])

    refute_doc = json_out(
        run_cli_ok(fresh_node_dir, "refute", target, "--reason", "integration test", "--json")
    )
    assert isinstance(refute_doc, dict)
    assert refute_doc.get("op_id"), f"refute must report the new operation id: {refute_doc!r}"
    assert "cascade" in refute_doc and isinstance(refute_doc["cascade"], list), (
        f"refute must report cascaded claims (possibly empty): {refute_doc!r}"
    )

    after = parse_export_lines(run_cli_ok(fresh_node_dir, "export").stdout)
    after_ids = {e["op_id"] for e in after}

    assert before_ids < after_ids, "refutation must append new operations to the log"
    assert refute_doc["op_id"] in after_ids, "the refutation operation must appear in the export"
    # Append-only: nothing that existed before may disappear.
    assert before_ids <= after_ids

    # The mutated log must still fully verify.
    audit_doc = json_out(run_cli_ok(fresh_node_dir, "audit", "--json"))
    assert audit_doc.get("ok") is True
    assert audit_doc["verified"] == audit_doc["ops"] == len(after)