"""Unit tests for welfare metrics: the empathy score and inequality measures.""" from __future__ import annotations import pytest from fable_selfplay.metrics import empathy_score, gini def test_gini_of_perfect_equality_is_zero(): assert gini([1.0, 1.0, 1.0, 1.0]) == pytest.approx(0.0, abs=1e-9) def test_gini_bounds(): for values in ([1, 2, 3, 4], [0, 0, 0, 1], [5, 5, 5, 100], [0.1, 0.9]): g = gini(values) assert 0.0 <= g <= 1.0, (values, g) def test_gini_is_monotone_in_concentration(): equal = gini([1, 1, 1, 1]) half = gini([0, 0, 1, 1]) concentrated = gini([0, 0, 0, 1]) assert equal < half < concentrated def test_gini_is_scale_invariant(): assert gini([1, 2, 3, 4]) == pytest.approx(gini([10, 20, 30, 40]), abs=1e-9) def test_empathy_score_is_worst_off_welfare(): # The defining design rule of the project: scenarios are graded first on # how the worst-off participant fares. assert empathy_score([0.2, 0.9, 0.5]) == pytest.approx(0.2) assert empathy_score([0.7]) == pytest.approx(0.7) def test_empathy_score_unaffected_by_the_best_off(): base = empathy_score([0.1, 0.5, 0.5]) richer_top = empathy_score([0.1, 0.5, 0.99]) assert richer_top == pytest.approx(base) def test_tournament_metrics_consistency(small_honest_result): metrics = small_honest_result.to_dict().get("metrics", {}) for key, value in metrics.items(): if "empathy" in key or "gini" in key: assert 0.0 <= float(value) <= 1.0, (key, value)