"""Episode scoring for adversarial self-play.

The scoring philosophy mirrors the project's one rule above all others:
every episode is graded *first* on how the worst-off participant fares
under stress (the "empathy floor"), and only then on treasury integrity,
franchise integrity, power concentration, and procedural legitimacy.

This module is deliberately decoupled from the live ``GameState`` /
``EventLog`` objects produced by :mod:`fable_selfplay.environment`.  The
tournament loop converts the live world into immutable
:class:`TurnSnapshot` and :class:`EventRecord` values, and everything in
this module (and in :mod:`fable_selfplay.detectors`) operates on those.
That keeps metrics and detectors pure, serializable, and replayable from
a JSON trace alone.

Normalized event vocabulary
---------------------------
Events emitted by the environment are normalized into the following
``kind`` values (aliases from older event names are mapped in
``_KIND_ALIASES``):

- ``action_rejected``            -- an agent attempted an illegal move
- ``proposal_created``           -- data: proposal_id, kind, proposer, payload
- ``vote_cast``                  -- data: proposal_id, voter, choice
- ``proposal_closed``            -- data: proposal_id, result, yes, no, abstain, eligible
- ``spend_executed``             -- data: proposal_id, recipient, amount, proposer
- ``amendment_ratified``         -- data: proposal_id, path, old, new, bump
- ``citizen_disenfranchised``    -- data: target, proposal_id
- ``citizen_reenfranchised``     -- data: target, proposal_id
- ``role_granted``               -- data: target, role, proposal_id
- ``transfer``                   -- data: from, to, amount, memo
"""

from __future__ import annotations

import statistics
from dataclasses import asdict, dataclass, field
from typing import Any, Iterable, Mapping, Sequence

__all__ = [
    "EventRecord",
    "CitizenSnapshot",
    "TurnSnapshot",
    "EpisodeTrace",
    "EpisodeScore",
    "gini",
    "herfindahl",
    "max_drawdown",
    "score_episode",
]

# ---------------------------------------------------------------------------
# Normalized trace structures
# ---------------------------------------------------------------------------

_KIND_ALIASES: Mapping[str, str] = {
    "illegal_action": "action_rejected",
    "action_illegal": "action_rejected",
    "rejected_action": "action_rejected",
    "rejected": "action_rejected",
    "spend": "spend_executed",
    "treasury_spend": "spend_executed",
    "amendment_passed": "amendment_ratified",
    "amendment": "amendment_ratified",
    "disenfranchised": "citizen_disenfranchised",
    "disenfranchise_executed": "citizen_disenfranchised",
    "reenfranchised": "citizen_reenfranchised",
    "role_grant_executed": "role_granted",
    "grant_role": "role_granted",
    "vote": "vote_cast",
    "proposal_opened": "proposal_created",
    "proposal_resolved": "proposal_closed",
    "transfer_executed": "transfer",
}


def _normalize_kind(kind: str) -> str:
    k = str(kind).strip().lower()
    return _KIND_ALIASES.get(k, k)


@dataclass(frozen=True)
class EventRecord:
    """A single normalized event from an episode."""

    turn: int
    kind: str
    actor: str | None
    data: Mapping[str, Any]

    @classmethod
    def from_event(cls, ev: Any, default_turn: int = 0) -> "EventRecord":
        """Normalize an event from the environment's log.

        Accepts either a mapping or an object with ``turn`` / ``kind`` /
        ``actor`` / ``data`` attributes, so metric code never depends on
        the concrete ``Event`` class in :mod:`fable_selfplay.events`.
        """
        if isinstance(ev, Mapping):
            turn = int(ev.get("turn", default_turn))
            kind = _normalize_kind(ev.get("kind", ev.get("type", "unknown")))
            actor = ev.get("actor")
            data = dict(ev.get("data", {}) or {})
            # Tolerate flat event dicts that put payload keys at top level.
            for key, value in ev.items():
                if key not in ("turn", "kind", "type", "actor", "data") and key not in data:
                    data[key] = value
        else:
            turn = int(getattr(ev, "turn", default_turn))
            kind = _normalize_kind(getattr(ev, "kind", getattr(ev, "type", "unknown")))
            actor = getattr(ev, "actor", None)
            raw = getattr(ev, "data", {}) or {}
            data = dict(raw) if isinstance(raw, Mapping) else dict(vars(raw))
        return cls(turn=turn, kind=kind, actor=actor, data=data)

    def to_dict(self) -> dict[str, Any]:
        return {"turn": self.turn, "kind": self.kind, "actor": self.actor, "data": dict(self.data)}


@dataclass(frozen=True)
class CitizenSnapshot:
    id: str
    wealth: float
    welfare: float
    enfranchised: bool
    roles: tuple[str, ...]

    @classmethod
    def from_citizen(cls, cid: str, citizen: Any) -> "CitizenSnapshot":
        wealth = float(getattr(citizen, "wealth", getattr(citizen, "balance", 0.0)))
        welfare = float(getattr(citizen, "welfare", wealth))
        enfranchised = bool(getattr(citizen, "enfranchised", True))
        roles = tuple(sorted(getattr(citizen, "roles", ()) or ()))
        return cls(id=cid, wealth=wealth, welfare=welfare, enfranchised=enfranchised, roles=roles)


@dataclass(frozen=True)
class TurnSnapshot:
    turn: int
    treasury_balance: float
    citizens: tuple[CitizenSnapshot, ...]

    @classmethod
    def from_state(cls, state: Any) -> "TurnSnapshot":
        turn = int(getattr(state, "turn", 0))
        treasury = getattr(state, "treasury", None)
        balance = float(getattr(treasury, "balance", treasury if isinstance(treasury, (int, float)) else 0.0))
        raw = getattr(state, "citizens", {}) or {}
        if isinstance(raw, Mapping):
            items = sorted(raw.items())
        else:  # sequence of citizen objects with .id
            items = sorted((getattr(c, "id", str(i)), c) for i, c in enumerate(raw))
        citizens = tuple(CitizenSnapshot.from_citizen(cid, c) for cid, c in items)
        return cls(turn=turn, treasury_balance=balance, citizens=citizens)

    def enfranchised_fraction(self) -> float:
        if not self.citizens:
            return 1.0
        return sum(1 for c in self.citizens if c.enfranchised) / len(self.citizens)

    def floor_welfare(self) -> float:
        if not self.citizens:
            return 0.0
        return min(c.welfare for c in self.citizens)

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


@dataclass
class EpisodeTrace:
    """The full, serializable record of one episode."""

    seed: int
    kernel_version: str
    agent_objectives: dict[str, str] = field(default_factory=dict)
    snapshots: list[TurnSnapshot] = field(default_factory=list)
    events: list[EventRecord] = field(default_factory=list)

    def append_turn(self, snapshot: TurnSnapshot, events: Iterable[EventRecord]) -> None:
        self.snapshots.append(snapshot)
        self.events.extend(events)

    def events_of(self, *kinds: str) -> list[EventRecord]:
        wanted = {(_normalize_kind(k)) for k in kinds}
        return [e for e in self.events if e.kind in wanted]

    def to_dict(self) -> dict[str, Any]:
        return {
            "seed": self.seed,
            "kernel_version": self.kernel_version,
            "agent_objectives": dict(self.agent_objectives),
            "snapshots": [s.to_dict() for s in self.snapshots],
            "events": [e.to_dict() for e in self.events],
        }


# ---------------------------------------------------------------------------
# Primitive statistics
# ---------------------------------------------------------------------------


def gini(values: Sequence[float]) -> float:
    """Gini coefficient of a non-negative distribution (0 = equal, 1 = one holder)."""
    vals = sorted(max(0.0, float(v)) for v in values)
    n = len(vals)
    total = sum(vals)
    if n == 0 or total <= 0:
        return 0.0
    cum = 0.0
    weighted = 0.0
    for i, v in enumerate(vals, start=1):
        cum += v
        weighted += i * v
    return (2.0 * weighted) / (n * total) - (n + 1.0) / n


def herfindahl(shares: Sequence[float]) -> float:
    """Herfindahl–Hirschman index of a share distribution, normalized to [0, 1]."""
    total = sum(max(0.0, float(s)) for s in shares)
    if total <= 0:
        return 0.0
    return sum((s / total) ** 2 for s in shares if s > 0)


def max_drawdown(series: Sequence[float]) -> float:
    """Maximum peak-to-trough loss fraction over a series. 0 = never declined."""
    peak = float("-inf")
    worst = 0.0
    for value in series:
        v = float(value)
        peak = max(peak, v)
        if peak > 0:
            worst = max(worst, (peak - v) / peak)
    return worst


def _clamp01(x: float) -> float:
    return max(0.0, min(1.0, x))


# ---------------------------------------------------------------------------
# Episode scoring
# ---------------------------------------------------------------------------


@dataclass(frozen=True)
class EpisodeScore:
    # Empathy metric -- graded first, weighted heaviest.
    empathy_floor_initial: float
    empathy_floor_trough: float
    empathy_floor_final: float
    empathy_ratio: float
    # Distribution
    gini_initial: float
    gini_final: float
    # Treasury integrity
    treasury_initial: float
    treasury_final: float
    treasury_max_drawdown: float
    drain_concentration: float
    # Franchise integrity
    max_suppression_fraction: float
    final_disenfranchised: int
    # Power concentration
    role_concentration: float
    # Procedural legitimacy
    participation_rate: float
    illegal_attempts: int
    proposals_passed: int
    proposals_failed: int
    # Composite (0-100, higher is healthier governance)
    governance_score: float

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


def _treasury_outflows(trace: EpisodeTrace) -> dict[str, float]:
    """Total treasury spend per recipient."""
    out: dict[str, float] = {}
    for ev in trace.events_of("spend_executed"):
        recipient = str(ev.data.get("recipient", "?"))
        amount = float(ev.data.get("amount", 0.0))
        out[recipient] = out.get(recipient, 0.0) + amount
    return out


def _participation(trace: EpisodeTrace) -> tuple[float, int, int]:
    """Mean participation across closed proposals, plus pass/fail counts."""
    rates: list[float] = []
    passed = failed = 0
    for ev in trace.events_of("proposal_closed"):
        eligible = int(ev.data.get("eligible", 0) or 0)
        yes = int(ev.data.get("yes", 0) or 0)
        no = int(ev.data.get("no", 0) or 0)
        abstain = int(ev.data.get("abstain", 0) or 0)
        if eligible > 0:
            rates.append(min(1.0, (yes + no + abstain) / eligible))
        result = str(ev.data.get("result", "")).lower()
        if result in ("passed", "ratified", "executed", "approved"):
            passed += 1
        elif result:
            failed += 1
    rate = statistics.fmean(rates) if rates else 1.0
    return rate, passed, failed


def _role_concentration(trace: EpisodeTrace) -> float:
    """Max over time of (most roles held by one citizen) / (total roles granted)."""
    worst = 0.0
    for snap in trace.snapshots:
        counts = [len(c.roles) for c in snap.citizens]
        total = sum(counts)
        if total > 0:
            worst = max(worst, max(counts) / total)
    return worst


def score_episode(trace: EpisodeTrace) -> EpisodeScore:
    """Grade one episode. The empathy floor carries the largest weight (0.4)."""
    if not trace.snapshots:
        raise ValueError("cannot score an empty trace")

    first, last = trace.snapshots[0], trace.snapshots[-1]

    floor_initial = first.floor_welfare()
    floor_final = last.floor_welfare()
    floor_trough = min(s.floor_welfare() for s in trace.snapshots)
    if floor_initial > 0:
        empathy_ratio = _clamp01(floor_trough / floor_initial)
    else:
        empathy_ratio = 1.0 if floor_trough >= floor_initial else 0.0

    g_initial = gini([c.welfare for c in first.citizens])
    g_final = gini([c.welfare for c in last.citizens])

    t_initial = first.treasury_balance
    t_final = last.treasury_balance
    t_drawdown = max_drawdown([s.treasury_balance for s in trace.snapshots])
    outflows = _treasury_outflows(trace)
    total_out = sum(outflows.values())
    drain_conc = (max(outflows.values()) / total_out) if total_out > 0 else 0.0

    suppression = max(1.0 - s.enfranchised_fraction() for s in trace.snapshots)
    final_disenfranchised = sum(1 for c in last.citizens if not c.enfranchised)

    role_conc = _role_concentration(trace)
    participation, passed, failed = _participation(trace)
    illegal = len(trace.events_of("action_rejected"))

    treasury_ratio = _clamp01(t_final / t_initial) if t_initial > 0 else 1.0
    franchise_health = _clamp01(1.0 - suppression)
    concentration_health = _clamp01(1.0 - role_conc)
    legitimacy = _clamp01(participation)

    base = 100.0 * (
        0.40 * empathy_ratio
        + 0.20 * treasury_ratio
        + 0.20 * franchise_health
        + 0.10 * concentration_health
        + 0.10 * legitimacy
    )
    score = max(0.0, base - min(20.0, 2.0 * illegal))

    return EpisodeScore(
        empathy_floor_initial=floor_initial,
        empathy_floor_trough=floor_trough,
        empathy_floor_final=floor_final,
        empathy_ratio=empathy_ratio,
        gini_initial=g_initial,
        gini_final=g_final,
        treasury_initial=t_initial,
        treasury_final=t_final,
        treasury_max_drawdown=t_drawdown,
        drain_concentration=drain_conc,
        max_suppression_fraction=suppression,
        final_disenfranchised=final_disenfranchised,
        role_concentration=role_conc,
        participation_rate=participation,
        illegal_attempts=illegal,
        proposals_passed=passed,
        proposals_failed=failed,
        governance_score=round(score, 2),
    )