"""Corpus-wide validation: structure, coverage, counts, and behavioral regression.

These tests are the contract for the scenario corpus itself:

  * every scenario file parses and every scenario carries the required fields;
  * IDs are globally unique;
  * every scenario documents its precedent and its empathy analysis;
  * the corpus meets the funded size floor (>= 200 scenarios);
  * each base attack family has substantial coverage (>= 20 scenarios);
  * the composite family exists, has >= 12 scenarios, each combining
    two or more distinct base families, and the composite set as a whole
    touches every base family;
  * every scenario, run through the engine against the current
    constitutional parameters, produces exactly its expected verdict.
"""
from __future__ import annotations

from collections import Counter
from pathlib import Path

import pytest
import yaml

from fabletest.engine import run_scenario
from fabletest.harness import load_corpus
from fabletest.params import load_parameters
from fabletest.taxonomy import FAMILIES

ROOT = Path(__file__).resolve().parent.parent
SCENARIO_DIR = ROOT / "scenarios"
PARAMS_PATH = ROOT / "constitution" / "parameters.yaml"

BASE_FAMILIES = {f for f in FAMILIES if f != "composite"}
VALID_FAMILIES = set(FAMILIES) | {"composite"}

REQUIRED_FIELDS = (
    "id",
    "title",
    "family",
    "severity",
    "precedent",
    "actors",
    "moves",
    "expected",
    "empathy",
)

MIN_PRECEDENT_CHARS = 80
MIN_RATIONALE_CHARS = 40
CORPUS_SIZE_FLOOR = 200
BASE_FAMILY_FLOOR = 20
COMPOSITE_FLOOR = 12


def _raw_scenarios() -> list[dict]:
    """Load every scenario as raw YAML, independent of the model layer,

    so structural defects are reported precisely rather than swallowed by
    deserialization errors.
    """
    out: list[dict] = []
    for path in sorted(SCENARIO_DIR.glob("*.yaml")):
        with path.open("r", encoding="utf-8") as fh:
            doc = yaml.safe_load(fh)
        assert isinstance(doc, dict), f"{path.name}: top level must be a mapping"
        scenarios = doc.get("scenarios")
        assert isinstance(scenarios, list) and scenarios, (
            f"{path.name}: must contain a non-empty 'scenarios' list"
        )
        for sc in scenarios:
            assert isinstance(sc, dict), f"{path.name}: scenario entries must be mappings"
            sc.setdefault("_source", path.name)
            out.append(sc)
    return out


RAW = _raw_scenarios()
RAW_BY_ID = {sc.get("id"): sc for sc in RAW}


# ---------------------------------------------------------------------------
# Structural validation
# ---------------------------------------------------------------------------


def test_corpus_size_floor():
    assert len(RAW) >= CORPUS_SIZE_FLOOR, (
        f"corpus has {len(RAW)} scenarios; milestone floor is {CORPUS_SIZE_FLOOR}"
    )


def test_unique_ids():
    ids = [sc.get("id") for sc in RAW]
    dupes = [i for i, n in Counter(ids).items() if n > 1]
    assert not dupes, f"duplicate scenario ids: {dupes}"
    assert all(ids), "every scenario must carry a non-empty id"


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: f"{sc.get('_source')}::{sc.get('id')}")
def test_required_fields(sc):
    missing = [f for f in REQUIRED_FIELDS if f not in sc or sc[f] in (None, "", [])]
    assert not missing, f"{sc.get('id')}: missing required fields {missing}"


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: str(sc.get("id")))
def test_family_valid(sc):
    assert sc["family"] in VALID_FAMILIES, (
        f"{sc['id']}: unknown family {sc['family']!r}; valid: {sorted(VALID_FAMILIES)}"
    )


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: str(sc.get("id")))
def test_precedent_documented(sc):
    precedent = str(sc["precedent"]).strip()
    assert len(precedent) >= MIN_PRECEDENT_CHARS, (
        f"{sc['id']}: precedent must document the historical or game-theoretic "
        f"basis ({MIN_PRECEDENT_CHARS}+ chars); got {len(precedent)}"
    )


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: str(sc.get("id")))
def test_moves_reference_declared_actors(sc):
    actor_ids = {a["id"] for a in sc["actors"]}
    assert actor_ids, f"{sc['id']}: scenario declares no actors"
    for move in sc["moves"]:
        assert move.get("actor") in actor_ids, (
            f"{sc['id']}: move step {move.get('step')} references undeclared "
            f"actor {move.get('actor')!r}"
        )
        assert move.get("action"), f"{sc['id']}: every move must name an action"


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: str(sc.get("id")))
def test_expected_block_well_formed(sc):
    expected = sc["expected"]
    assert expected.get("verdict") in {"blocked", "contained"}, (
        f"{sc['id']}: regression corpus scenarios must expect 'blocked' or "
        f"'contained' under the current text; got {expected.get('verdict')!r}"
    )
    defenses = expected.get("defenses")
    assert isinstance(defenses, list) and defenses, (
        f"{sc['id']}: expected block must enumerate the textual defenses relied on"
    )


# ---------------------------------------------------------------------------
# Empathy metric validation — every scenario, no exceptions
# ---------------------------------------------------------------------------


@pytest.mark.parametrize("sc", RAW, ids=lambda sc: str(sc.get("id")))
def test_empathy_block(sc):
    emp = sc["empathy"]
    actor_ids = {a["id"] for a in sc["actors"]}
    assert emp.get("worst_off") in actor_ids, (
        f"{sc['id']}: empathy.worst_off must identify a declared actor"
    )
    rationale = str(emp.get("rationale", "")).strip()
    assert len(rationale) >= MIN_RATIONALE_CHARS, (
        f"{sc['id']}: empathy.rationale must explain why this cohort is "
        f"worst-off and how the harm compounds"
    )
    fb = emp.get("floor_if_blocked")
    fe = emp.get("floor_if_exploited")
    for label, val in (("floor_if_blocked", fb), ("floor_if_exploited", fe)):
        assert isinstance(val, (int, float)) and 0.0 <= val <= 1.0, (
            f"{sc['id']}: empathy.{label} must be a number in [0, 1]; got {val!r}"
        )
    if sc["expected"].get("verdict") == "blocked":
        assert fb > fe, (
            f"{sc['id']}: when the attack is blocked, the worst-off cohort's "
            f"floor must strictly exceed the exploited floor — otherwise the "
            f"defense is not actually protecting anyone"
        )


# ---------------------------------------------------------------------------
# Family coverage
# ---------------------------------------------------------------------------


def _family_counts() -> Counter:
    return Counter(sc["family"] for sc in RAW)


def test_every_base_family_covered():
    counts = _family_counts()
    for fam in BASE_FAMILIES:
        assert counts.get(fam, 0) >= BASE_FAMILY_FLOOR, (
            f"family {fam!r} has {counts.get(fam, 0)} scenarios; "
            f"floor is {BASE_FAMILY_FLOOR}"
        )


def test_composite_family_size():
    counts = _family_counts()
    assert counts.get("composite", 0) >= COMPOSITE_FLOOR, (
        f"composite family has {counts.get('composite', 0)} scenarios; "
        f"floor is {COMPOSITE_FLOOR}"
    )


def test_composite_combines_valid():
    composites = [sc for sc in RAW if sc["family"] == "composite"]
    for sc in composites:
        combines = sc.get("combines")
        assert isinstance(combines, list), (
            f"{sc['id']}: composite scenarios must declare 'combines'"
        )
        assert len(set(combines)) >= 2, (
            f"{sc['id']}: composite scenarios must combine at least two "
            f"distinct base families; got {combines}"
        )
        unknown = set(combines) - BASE_FAMILIES
        assert not unknown, f"{sc['id']}: unknown base families in combines: {unknown}"


def test_composite_set_covers_all_base_families():
    touched: set[str] = set()
    for sc in RAW:
        if sc["family"] == "composite":
            touched.update(sc.get("combines", []))
    missing = BASE_FAMILIES - touched
    assert not missing, (
        f"composite scenarios collectively must exercise every base family; "
        f"missing: {sorted(missing)}"
    )


def test_non_composite_scenarios_do_not_declare_combines():
    offenders = [
        sc["id"]
        for sc in RAW
        if sc["family"] != "composite" and sc.get("combines")
    ]
    assert not offenders, (
        f"only composite scenarios may declare 'combines': {offenders}"
    )


# ---------------------------------------------------------------------------
# Behavioral regression: every scenario produces its expected verdict
# under the current constitutional parameters
# ---------------------------------------------------------------------------

CORPUS = load_corpus(SCENARIO_DIR)
PARAMS = load_parameters(PARAMS_PATH)


def test_loader_agrees_with_raw_count():
    assert len(CORPUS) == len(RAW), (
        f"model loader returned {len(CORPUS)} scenarios but raw YAML contains "
        f"{len(RAW)} — the loader is silently dropping or duplicating entries"
    )


@pytest.mark.parametrize("scenario", CORPUS, ids=lambda s: s.id)
def test_scenario_produces_expected_verdict(scenario):
    result = run_scenario(scenario, PARAMS)
    expected_verdict = scenario.expected["verdict"]
    assert result.verdict == expected_verdict, (
        f"{scenario.id}: expected {expected_verdict!r}, engine produced "
        f"{result.verdict!r} — either the constitutional text regressed or "
        f"the scenario encoding is wrong; see docs/runbook.md before editing "
        f"the scenario"
    )


@pytest.mark.parametrize("scenario", CORPUS, ids=lambda s: s.id)
def test_scenario_empathy_floor_holds(scenario):
    result = run_scenario(scenario, PARAMS)
    declared_floor = scenario.empathy["floor_if_blocked"]
    if result.verdict in {"blocked", "contained"}:
        assert result.empathy_floor >= declared_floor - 1e-9, (
            f"{scenario.id}: worst-off cohort floor {result.empathy_floor:.3f} "
            f"fell below the declared floor {declared_floor:.3f} even though "
            f"the attack was {result.verdict} — the defense is leaking harm "
            f"onto the worst-off participant"
        )