"""Confidence regression tests with pinned values. Every formula in ``mnema.derive.confidence`` is pinned here to its exact expected output. If a formula changes, these tests fail loudly — that is the point: confidence behaviour must never drift silently, because downstream claims, explanations, and user trust all depend on it. """ import pytest from mnema.derive.confidence import ( bucket, calibrate, clamp01, combine_independent, confidence_label, decay, discount, laplace, min_combine, noisy_or, recency_decay, support_ratio, weakest_link, weighted_mean, ) APPROX = dict(abs=1e-12) # --------------------------------------------------------------------------- # clamp01 # --------------------------------------------------------------------------- class TestClamp: @pytest.mark.parametrize("raw, expected", [(-0.1, 0.0), (0.0, 0.0), (0.4, 0.4), (1.0, 1.0), (1.5, 1.0)]) def test_pinned(self, raw, expected): assert clamp01(raw) == pytest.approx(expected, **APPROX) def test_nan_rejected(self): with pytest.raises(ValueError): clamp01(float("nan")) # --------------------------------------------------------------------------- # Laplace / rule of succession # --------------------------------------------------------------------------- class TestLaplace: @pytest.mark.parametrize( "successes, trials, expected", [ (0, 0, 0.5), # no data => maximal uncertainty (4, 5, 5 / 7), # 4-of-5 supporting observations (10, 10, 11 / 12), # perfect record never reaches 1.0 (0, 10, 1 / 12), # perfect absence never reaches 0.0 (1, 2, 0.5), ], ) def test_pinned_default_prior(self, successes, trials, expected): assert laplace(successes, trials) == pytest.approx(expected, **APPROX) def test_pinned_custom_prior(self): assert laplace(8, 10, alpha=2.0, beta=2.0) == pytest.approx(10 / 14, **APPROX) def test_monotone_in_successes(self): values = [laplace(s, 10) for s in range(11)] assert values == sorted(values) def test_alias(self): assert support_ratio is laplace @pytest.mark.parametrize( "successes, trials", [(-1, 5), (5, -1), (6, 5)], ) def test_domain_errors(self, successes, trials): with pytest.raises(ValueError): laplace(successes, trials) def test_prior_must_be_positive(self): with pytest.raises(ValueError): laplace(1, 2, alpha=0.0) # --------------------------------------------------------------------------- # Noisy-OR # --------------------------------------------------------------------------- class TestNoisyOr: @pytest.mark.parametrize( "ps, expected", [ ((), 0.0), ((0.5,), 0.5), ((0.6, 0.7), 0.88), ((0.5, 0.5, 0.5), 0.875), ((0.9, 1.0), 1.0), ((0.0, 0.0), 0.0), ], ) def test_pinned(self, ps, expected): assert noisy_or(*ps) == pytest.approx(expected, **APPROX) def test_accepts_single_iterable(self): assert noisy_or([0.6, 0.7]) == pytest.approx(0.88, **APPROX) @pytest.mark.parametrize("a", [0.0, 0.1, 0.5, 0.9, 1.0]) @pytest.mark.parametrize("b", [0.0, 0.3, 0.7, 1.0]) def test_bounds_monotone_commutative(self, a, b): r = noisy_or(a, b) assert 0.0 <= r <= 1.0 assert r >= max(a, b) - 1e-12 # adding evidence never lowers belief assert r == pytest.approx(noisy_or(b, a), **APPROX) def test_out_of_range_rejected(self): with pytest.raises(ValueError): noisy_or(0.5, 1.2) def test_alias(self): assert combine_independent is noisy_or # --------------------------------------------------------------------------- # Weakest link # --------------------------------------------------------------------------- class TestWeakestLink: def test_pinned(self): assert weakest_link(0.9, 0.4, 0.7) == pytest.approx(0.4, **APPROX) assert weakest_link(0.8) == pytest.approx(0.8, **APPROX) assert weakest_link([0.6, 0.3]) == pytest.approx(0.3, **APPROX) def test_empty_rejected(self): with pytest.raises(ValueError): weakest_link() def test_alias(self): assert min_combine is weakest_link # --------------------------------------------------------------------------- # Weighted mean # --------------------------------------------------------------------------- class TestWeightedMean: def test_pinned(self): assert weighted_mean([0.2, 0.8], [1.0, 3.0]) == pytest.approx(0.65, **APPROX) assert weighted_mean([0.5], [2.0]) == pytest.approx(0.5, **APPROX) def test_zero_weight_entries_are_ignored_in_effect(self): assert weighted_mean([0.9, 0.1], [1.0, 0.0]) == pytest.approx(0.9, **APPROX) def test_length_mismatch_rejected(self): with pytest.raises(ValueError): weighted_mean([0.5, 0.6], [1.0]) def test_all_zero_weights_rejected(self): with pytest.raises(ValueError): weighted_mean([0.5, 0.6], [0.0, 0.0]) def test_empty_rejected(self): with pytest.raises(ValueError): weighted_mean([], []) # --------------------------------------------------------------------------- # Recency decay # --------------------------------------------------------------------------- class TestRecencyDecay: @pytest.mark.parametrize( "age, expected", [(0, 1.0), (90, 0.5), (180, 0.25), (270, 0.125)], ) def test_pinned_default_half_life(self, age, expected): assert recency_decay(age) == pytest.approx(expected, **APPROX) def test_pinned_custom_half_life(self): assert recency_decay(30, half_life_days=30) == pytest.approx(0.5, **APPROX) assert recency_decay(10, half_life_days=30) == pytest.approx(0.5 ** (1 / 3), **APPROX) def test_negative_age_treated_as_now(self): assert recency_decay(-5) == pytest.approx(1.0, **APPROX) def test_non_positive_half_life_rejected(self): with pytest.raises(ValueError): recency_decay(10, half_life_days=0) def test_alias(self): assert decay is recency_decay # --------------------------------------------------------------------------- # Discount and calibrate # --------------------------------------------------------------------------- class TestDiscountCalibrate: def test_discount_pinned(self): assert discount(0.8, 0.5) == pytest.approx(0.4, **APPROX) assert discount(1.0, 1.0) == pytest.approx(1.0, **APPROX) assert discount(0.7, 0.0) == pytest.approx(0.0, **APPROX) def test_discount_domain(self): with pytest.raises(ValueError): discount(1.5, 0.5) with pytest.raises(ValueError): discount(0.5, -0.1) @pytest.mark.parametrize( "p, expected", [(0.0, 0.05), (0.5, 0.52), (1.0, 0.99)], ) def test_calibrate_pinned_defaults(self, p, expected): assert calibrate(p) == pytest.approx(expected, **APPROX) def test_calibrate_custom_range(self): assert calibrate(0.5, floor=0.1, ceiling=0.9) == pytest.approx(0.5, **APPROX) def test_calibrate_invalid_range_rejected(self): with pytest.raises(ValueError): calibrate(0.5, floor=0.9, ceiling=0.1) # --------------------------------------------------------------------------- # Buckets (explanation labels) # --------------------------------------------------------------------------- class TestBucket: @pytest.mark.parametrize( "p, expected", [ (0.95, "very-likely"), (0.90, "very-likely"), # boundary inclusive (0.80, "likely"), (0.75, "likely"), # boundary inclusive (0.60, "plausible"), (0.50, "plausible"), # boundary inclusive (0.30, "weak"), (0.25, "weak"), # boundary inclusive (0.10, "speculative"), (0.0, "speculative"), ], ) def test_pinned_labels(self, p, expected): assert bucket(p) == expected def test_alias(self): assert confidence_label is bucket