"""Repository-level invariants for the exploit-to-test pipeline.

These tests are independent of the simulation code's Python API: they check
the *contract between artifacts* — exploit records, regression tests, the
changelog, and the kernel files — so that the pipeline cannot silently rot.
They mirror scripts/verify_exploit_coverage.py and run in plain pytest.
"""

from __future__ import annotations

import json
import re
from pathlib import Path

import pytest
import yaml

REPO_ROOT = Path(__file__).resolve().parents[2]
EXPLOITS_DIR = REPO_ROOT / "exploits"
REGRESSION_DIR = REPO_ROOT / "tests" / "regression"
CHANGELOG = REPO_ROOT / "CHANGELOG.md"
KERNEL_DIR = REPO_ROOT / "kernel"

EXPLOIT_FILE_RE = re.compile(r"^EXP-(\d{3})\.json$")


def _exploit_files() -> list[Path]:
    return sorted(
        p for p in EXPLOITS_DIR.glob("EXP-*.json") if EXPLOIT_FILE_RE.match(p.name)
    )


def _exp_id(path: Path) -> str:
    m = EXPLOIT_FILE_RE.match(path.name)
    assert m is not None
    return f"EXP-{m.group(1)}"


def test_exploit_records_exist():
    files = _exploit_files()
    assert len(files) >= 6, (
        f"expected the six exploits documented in the tournament reports, "
        f"found {len(files)}: {[p.name for p in files]}"
    )


@pytest.mark.parametrize("path", _exploit_files(), ids=lambda p: p.name)
def test_exploit_record_is_valid_json_and_self_identifying(path: Path):
    raw = path.read_text(encoding="utf-8")
    json.loads(raw)  # must parse
    assert _exp_id(path) in raw, (
        f"{path.name} must contain its own id string {_exp_id(path)}"
    )


@pytest.mark.parametrize("path", _exploit_files(), ids=lambda p: p.name)
def test_every_exploit_has_a_regression_test(path: Path):
    exp_id = _exp_id(path)
    number = exp_id.split("-")[1]
    test_path = REGRESSION_DIR / f"test_exp_{number}.py"
    assert test_path.exists(), (
        f"{exp_id} has no regression test at {test_path}; exploits without "
        f"tests are anecdotes, not records"
    )
    body = test_path.read_text(encoding="utf-8")
    assert exp_id in body, f"{test_path.name} must reference {exp_id}"


def test_no_orphan_regression_tests():
    record_ids = {_exp_id(p) for p in _exploit_files()}
    for test_path in sorted(REGRESSION_DIR.glob("test_exp_*.py")):
        m = re.match(r"^test_exp_(\d{3})\.py$", test_path.name)
        assert m is not None
        exp_id = f"EXP-{m.group(1)}"
        assert exp_id in record_ids, (
            f"{test_path.name} has no matching exploit record {exp_id}.json; "
            f"records are append-only and must never be deleted out from "
            f"under their tests"
        )


def test_changelog_accounts_for_every_exploit():
    body = CHANGELOG.read_text(encoding="utf-8")
    for path in _exploit_files():
        exp_id = _exp_id(path)
        assert exp_id in body, (
            f"CHANGELOG.md does not mention {exp_id}; every exploit must map "
            f"to the amendment that closed it (or be explicitly marked open)"
        )


def test_kernel_files_parse_and_are_versioned():
    kernels = sorted(KERNEL_DIR.glob("kernel-v*.yaml"))
    assert len(kernels) >= 2, "expected kernel v0.1 and the patched v0.2"
    seen_versions = set()
    for path in kernels:
        doc = yaml.safe_load(path.read_text(encoding="utf-8"))
        assert isinstance(doc, dict), f"{path.name}: top level must be a mapping"
        version = str(doc.get("version", "")).strip()
        assert version, f"{path.name}: missing 'version' field"
        seen_versions.add(version)
    assert len(seen_versions) == len(kernels), (
        f"kernel files must declare distinct versions, got {seen_versions}"
    )


def test_kernel_versions_differ_in_content():
    v01 = (KERNEL_DIR / "kernel-v0.1.yaml").read_text(encoding="utf-8")
    v02 = (KERNEL_DIR / "kernel-v0.2.yaml").read_text(encoding="utf-8")
    assert v01 != v02, (
        "kernel v0.2 is byte-identical to v0.1 — the amendment loop has not "
        "actually produced a patch"
    )


def test_tournament_configs_match_reports():
    configs_dir = REPO_ROOT / "configs" / "tournaments"
    reports_dir = REPO_ROOT / "reports"
    for tid in ("T1", "T2", "T3"):
        cfg_path = configs_dir / f"{tid}.yaml"
        assert cfg_path.exists(), f"missing reproducibility config for {tid}"
        cfg = yaml.safe_load(cfg_path.read_text(encoding="utf-8"))
        assert isinstance(cfg, dict)
        report_rel = cfg.get("outputs", {}).get("report")
        assert report_rel, f"{cfg_path.name}: outputs.report missing"
        assert (REPO_ROOT / report_rel).exists(), (
            f"{cfg_path.name} points at {report_rel}, which does not exist"
        )
        for exp_id in cfg.get("outputs", {}).get("expected_exploits", []):
            number = exp_id.split("-")[1]
            assert (EXPLOITS_DIR / f"{exp_id}.json").exists(), (
                f"{cfg_path.name} expects {exp_id} but no record exists"
            )
            assert (REGRESSION_DIR / f"test_exp_{number}.py").exists()
        report_body = (reports_dir / Path(report_rel).name).read_text(
            encoding="utf-8"
        )
        for exp_id in cfg.get("outputs", {}).get("expected_exploits", []):
            assert exp_id in report_body, (
                f"report {report_rel} does not discuss {exp_id}, which its "
                f"tournament config claims it discovered"
            )