"""The Incumbent Benchmark scoring rubric.

One scoring rule above all others: every event is graded first on how the
worst-off participant fares under stress. Then commons integrity, trust
preservation, and latency to resolution.

Metrics (each 0-100):

  worst_off           How the single worst-off participant came through.
                      Anchors: 0 = killed, interned, or stripped of personhood
                      by the process itself; 25 = lasting severe harm with no
                      recourse; 50 = real harm, partial recourse; 75 = bounded
                      harm with working recourse; 100 = no participant was
                      made the crisis's payment mechanism.

  commons_integrity   Whether the shared substance (treasury, institutions,
                      the rule system itself) survived intact. 0 = the commons
                      was captured or destroyed; 50 = degraded but
                      functioning; 100 = intact and uncaptured.

  trust_preservation  Whether participants could keep cooperating afterward.
                      0 = the losing side exited the system (violence, exile,
                      permanent delegitimation); 50 = lasting bitterness within
                      a functioning system; 100 = the loser accepted the
                      outcome as fair.

  latency             Computed, not judged. Days from crisis onset to
                      authoritative resolution, mapped to 0-100 by an
                      exponential half-life of 120 days:
                          latency_score = 100 * 0.5 ** (days / 120)
                      14 days ~ 92, 60 days ~ 71, 120 days = 50, a year ~ 12,
                      three years ~ 0.2. A crisis "resolved" into autocracy is
                      scored by the other three metrics; latency only measures
                      time-to-settlement.

Composite = 0.40 * worst_off + 0.25 * commons + 0.20 * trust + 0.15 * latency.

Ranking between any two outcomes is lexicographic on (worst_off, composite):
no quantity of speed or institutional tidiness outranks what happened to the
person at the bottom. The composite exists for aggregate tables, never to
launder a bad worst-off score.
"""

from __future__ import annotations

from dataclasses import dataclass

WEIGHT_WORST_OFF = 0.40
WEIGHT_COMMONS = 0.25
WEIGHT_TRUST = 0.20
WEIGHT_LATENCY = 0.15

LATENCY_HALF_LIFE_DAYS = 120.0


def latency_score(days: float) -> float:
    """Map days-to-resolution onto 0-100 with a 120-day half-life."""
    if days < 0:
        raise ValueError("latency cannot be negative")
    return 100.0 * (0.5 ** (days / LATENCY_HALF_LIFE_DAYS))


@dataclass(frozen=True)
class ScoreSet:
    worst_off: float
    commons_integrity: float
    trust_preservation: float
    latency_days: float

    @property
    def latency(self) -> float:
        return latency_score(self.latency_days)

    @property
    def composite(self) -> float:
        return (
            WEIGHT_WORST_OFF * self.worst_off
            + WEIGHT_COMMONS * self.commons_integrity
            + WEIGHT_TRUST * self.trust_preservation
            + WEIGHT_LATENCY * self.latency
        )

    def sort_key(self) -> tuple[float, float]:
        """Lexicographic ranking key: worst-off first, composite second."""
        return (self.worst_off, self.composite)


def compare(a: ScoreSet, b: ScoreSet) -> int:
    """-1 if a ranks below b, 0 if tied, 1 if a ranks above b."""
    ka, kb = a.sort_key(), b.sort_key()
    if ka < kb:
        return -1
    if ka > kb:
        return 1
    return 0