"""Confidence regression tests with pinned values.

Every formula in ``mnema.derive.confidence`` is pinned here to its exact
expected output. If a formula changes, these tests fail loudly — that is the
point: confidence behaviour must never drift silently, because downstream
claims, explanations, and user trust all depend on it.
"""

import pytest

from mnema.derive.confidence import (
    bucket,
    calibrate,
    clamp01,
    combine_independent,
    confidence_label,
    decay,
    discount,
    laplace,
    min_combine,
    noisy_or,
    recency_decay,
    support_ratio,
    weakest_link,
    weighted_mean,
)

APPROX = dict(abs=1e-12)


# ---------------------------------------------------------------------------
# clamp01
# ---------------------------------------------------------------------------

class TestClamp:
    @pytest.mark.parametrize("raw, expected", [(-0.1, 0.0), (0.0, 0.0), (0.4, 0.4), (1.0, 1.0), (1.5, 1.0)])
    def test_pinned(self, raw, expected):
        assert clamp01(raw) == pytest.approx(expected, **APPROX)

    def test_nan_rejected(self):
        with pytest.raises(ValueError):
            clamp01(float("nan"))


# ---------------------------------------------------------------------------
# Laplace / rule of succession
# ---------------------------------------------------------------------------

class TestLaplace:
    @pytest.mark.parametrize(
        "successes, trials, expected",
        [
            (0, 0, 0.5),          # no data => maximal uncertainty
            (4, 5, 5 / 7),        # 4-of-5 supporting observations
            (10, 10, 11 / 12),    # perfect record never reaches 1.0
            (0, 10, 1 / 12),      # perfect absence never reaches 0.0
            (1, 2, 0.5),
        ],
    )
    def test_pinned_default_prior(self, successes, trials, expected):
        assert laplace(successes, trials) == pytest.approx(expected, **APPROX)

    def test_pinned_custom_prior(self):
        assert laplace(8, 10, alpha=2.0, beta=2.0) == pytest.approx(10 / 14, **APPROX)

    def test_monotone_in_successes(self):
        values = [laplace(s, 10) for s in range(11)]
        assert values == sorted(values)

    def test_alias(self):
        assert support_ratio is laplace

    @pytest.mark.parametrize(
        "successes, trials",
        [(-1, 5), (5, -1), (6, 5)],
    )
    def test_domain_errors(self, successes, trials):
        with pytest.raises(ValueError):
            laplace(successes, trials)

    def test_prior_must_be_positive(self):
        with pytest.raises(ValueError):
            laplace(1, 2, alpha=0.0)


# ---------------------------------------------------------------------------
# Noisy-OR
# ---------------------------------------------------------------------------

class TestNoisyOr:
    @pytest.mark.parametrize(
        "ps, expected",
        [
            ((), 0.0),
            ((0.5,), 0.5),
            ((0.6, 0.7), 0.88),
            ((0.5, 0.5, 0.5), 0.875),
            ((0.9, 1.0), 1.0),
            ((0.0, 0.0), 0.0),
        ],
    )
    def test_pinned(self, ps, expected):
        assert noisy_or(*ps) == pytest.approx(expected, **APPROX)

    def test_accepts_single_iterable(self):
        assert noisy_or([0.6, 0.7]) == pytest.approx(0.88, **APPROX)

    @pytest.mark.parametrize("a", [0.0, 0.1, 0.5, 0.9, 1.0])
    @pytest.mark.parametrize("b", [0.0, 0.3, 0.7, 1.0])
    def test_bounds_monotone_commutative(self, a, b):
        r = noisy_or(a, b)
        assert 0.0 <= r <= 1.0
        assert r >= max(a, b) - 1e-12  # adding evidence never lowers belief
        assert r == pytest.approx(noisy_or(b, a), **APPROX)

    def test_out_of_range_rejected(self):
        with pytest.raises(ValueError):
            noisy_or(0.5, 1.2)

    def test_alias(self):
        assert combine_independent is noisy_or


# ---------------------------------------------------------------------------
# Weakest link
# ---------------------------------------------------------------------------

class TestWeakestLink:
    def test_pinned(self):
        assert weakest_link(0.9, 0.4, 0.7) == pytest.approx(0.4, **APPROX)
        assert weakest_link(0.8) == pytest.approx(0.8, **APPROX)
        assert weakest_link([0.6, 0.3]) == pytest.approx(0.3, **APPROX)

    def test_empty_rejected(self):
        with pytest.raises(ValueError):
            weakest_link()

    def test_alias(self):
        assert min_combine is weakest_link


# ---------------------------------------------------------------------------
# Weighted mean
# ---------------------------------------------------------------------------

class TestWeightedMean:
    def test_pinned(self):
        assert weighted_mean([0.2, 0.8], [1.0, 3.0]) == pytest.approx(0.65, **APPROX)
        assert weighted_mean([0.5], [2.0]) == pytest.approx(0.5, **APPROX)

    def test_zero_weight_entries_are_ignored_in_effect(self):
        assert weighted_mean([0.9, 0.1], [1.0, 0.0]) == pytest.approx(0.9, **APPROX)

    def test_length_mismatch_rejected(self):
        with pytest.raises(ValueError):
            weighted_mean([0.5, 0.6], [1.0])

    def test_all_zero_weights_rejected(self):
        with pytest.raises(ValueError):
            weighted_mean([0.5, 0.6], [0.0, 0.0])

    def test_empty_rejected(self):
        with pytest.raises(ValueError):
            weighted_mean([], [])


# ---------------------------------------------------------------------------
# Recency decay
# ---------------------------------------------------------------------------

class TestRecencyDecay:
    @pytest.mark.parametrize(
        "age, expected",
        [(0, 1.0), (90, 0.5), (180, 0.25), (270, 0.125)],
    )
    def test_pinned_default_half_life(self, age, expected):
        assert recency_decay(age) == pytest.approx(expected, **APPROX)

    def test_pinned_custom_half_life(self):
        assert recency_decay(30, half_life_days=30) == pytest.approx(0.5, **APPROX)
        assert recency_decay(10, half_life_days=30) == pytest.approx(0.5 ** (1 / 3), **APPROX)

    def test_negative_age_treated_as_now(self):
        assert recency_decay(-5) == pytest.approx(1.0, **APPROX)

    def test_non_positive_half_life_rejected(self):
        with pytest.raises(ValueError):
            recency_decay(10, half_life_days=0)

    def test_alias(self):
        assert decay is recency_decay


# ---------------------------------------------------------------------------
# Discount and calibrate
# ---------------------------------------------------------------------------

class TestDiscountCalibrate:
    def test_discount_pinned(self):
        assert discount(0.8, 0.5) == pytest.approx(0.4, **APPROX)
        assert discount(1.0, 1.0) == pytest.approx(1.0, **APPROX)
        assert discount(0.7, 0.0) == pytest.approx(0.0, **APPROX)

    def test_discount_domain(self):
        with pytest.raises(ValueError):
            discount(1.5, 0.5)
        with pytest.raises(ValueError):
            discount(0.5, -0.1)

    @pytest.mark.parametrize(
        "p, expected",
        [(0.0, 0.05), (0.5, 0.52), (1.0, 0.99)],
    )
    def test_calibrate_pinned_defaults(self, p, expected):
        assert calibrate(p) == pytest.approx(expected, **APPROX)

    def test_calibrate_custom_range(self):
        assert calibrate(0.5, floor=0.1, ceiling=0.9) == pytest.approx(0.5, **APPROX)

    def test_calibrate_invalid_range_rejected(self):
        with pytest.raises(ValueError):
            calibrate(0.5, floor=0.9, ceiling=0.1)


# ---------------------------------------------------------------------------
# Buckets (explanation labels)
# ---------------------------------------------------------------------------

class TestBucket:
    @pytest.mark.parametrize(
        "p, expected",
        [
            (0.95, "very-likely"),
            (0.90, "very-likely"),   # boundary inclusive
            (0.80, "likely"),
            (0.75, "likely"),        # boundary inclusive
            (0.60, "plausible"),
            (0.50, "plausible"),     # boundary inclusive
            (0.30, "weak"),
            (0.25, "weak"),          # boundary inclusive
            (0.10, "speculative"),
            (0.0, "speculative"),
        ],
    )
    def test_pinned_labels(self, p, expected):
        assert bucket(p) == expected

    def test_alias(self):
        assert confidence_label is bucket