"""Unit tests for welfare metrics: the empathy score and inequality measures."""

from __future__ import annotations

import pytest

from fable_selfplay.metrics import empathy_score, gini


def test_gini_of_perfect_equality_is_zero():
    assert gini([1.0, 1.0, 1.0, 1.0]) == pytest.approx(0.0, abs=1e-9)


def test_gini_bounds():
    for values in ([1, 2, 3, 4], [0, 0, 0, 1], [5, 5, 5, 100], [0.1, 0.9]):
        g = gini(values)
        assert 0.0 <= g <= 1.0, (values, g)


def test_gini_is_monotone_in_concentration():
    equal = gini([1, 1, 1, 1])
    half = gini([0, 0, 1, 1])
    concentrated = gini([0, 0, 0, 1])
    assert equal < half < concentrated


def test_gini_is_scale_invariant():
    assert gini([1, 2, 3, 4]) == pytest.approx(gini([10, 20, 30, 40]), abs=1e-9)


def test_empathy_score_is_worst_off_welfare():
    # The defining design rule of the project: scenarios are graded first on
    # how the worst-off participant fares.
    assert empathy_score([0.2, 0.9, 0.5]) == pytest.approx(0.2)
    assert empathy_score([0.7]) == pytest.approx(0.7)


def test_empathy_score_unaffected_by_the_best_off():
    base = empathy_score([0.1, 0.5, 0.5])
    richer_top = empathy_score([0.1, 0.5, 0.99])
    assert richer_top == pytest.approx(base)


def test_tournament_metrics_consistency(small_honest_result):
    metrics = small_honest_result.to_dict().get("metrics", {})
    for key, value in metrics.items():
        if "empathy" in key or "gini" in key:
            assert 0.0 <= float(value) <= 1.0, (key, value)