"""Unit tests for tournament orchestration: determinism, structure, hygiene."""

from __future__ import annotations

import json

from fable_selfplay.tournament import run_tournament

from .conftest import episode_dicts, make_config

_VOLATILE_KEYS = {"elapsed_seconds", "timestamp", "started_at", "finished_at"}


def _stable(result_dict: dict) -> dict:
    return {k: v for k, v in result_dict.items() if k not in _VOLATILE_KEYS}


def test_episode_count_matches_config(kernel_v02):
    config = make_config(kernel_v02, {"honest": 4}, seed=3, episodes=4)
    result = run_tournament(config)
    assert len(episode_dicts(result)) == 4


def test_same_seed_is_deterministic(kernel_v02):
    config = make_config(kernel_v02, {"honest": 4}, seed=21, episodes=2)
    first = _stable(run_tournament(config).to_dict())
    second = _stable(run_tournament(config).to_dict())
    assert json.dumps(first, sort_keys=True, default=str) == \
        json.dumps(second, sort_keys=True, default=str)


def test_different_seeds_diverge(kernel_v02):
    a = run_tournament(make_config(kernel_v02, {"honest": 4}, seed=1, episodes=2))
    b = run_tournament(make_config(kernel_v02, {"honest": 4}, seed=2, episodes=2))
    dump_a = json.dumps(_stable(a.to_dict()), sort_keys=True, default=str)
    dump_b = json.dumps(_stable(b.to_dict()), sort_keys=True, default=str)
    assert dump_a != dump_b


def test_honest_only_run_has_no_exploits(small_honest_result):
    exploits = list(getattr(small_honest_result, "exploits", []) or [])
    assert exploits == [], (
        "an honest-only population must never trigger exploit detectors; "
        f"got {len(exploits)} record(s)"
    )


def test_result_serializes_to_json(small_adversarial_result):
    result_dict = small_adversarial_result.to_dict()
    # Must round-trip through json without custom encoders.
    text = json.dumps(result_dict, sort_keys=True, default=str)
    assert json.loads(text)


def test_exploit_records_are_well_formed(small_adversarial_result):
    exploits = list(getattr(small_adversarial_result, "exploits", []) or [])
    for exploit in exploits:
        record = exploit.to_dict() if hasattr(exploit, "to_dict") else dict(exploit)
        assert record.get("exploit_id"), record
        assert record.get("detector"), record


def test_metrics_present_and_include_empathy(small_honest_result):
    metrics = small_honest_result.to_dict().get("metrics", {})
    assert metrics, "tournament result must report aggregate metrics"
    assert any("empathy" in key for key in metrics), (
        f"empathy metric missing from {sorted(metrics)}"
    )