"""Confidence arithmetic for the mnema derivation engine. Every deriver expresses belief as a probability-like score in ``[0.0, 1.0]``. This module is the *single* place where those scores are produced and combined, so that confidence behaviour is auditable in one spot and regression-tested with pinned values (see ``tests/test_confidence.py``). If a formula here changes, the pinned tests must be changed with it — deliberately, and visibly in review. Formula summary --------------- ======================== ===================================================== ``laplace(s, t, a, b)`` ``(s + a) / (t + a + b)`` — rule of succession. Avoids confidence 0.0/1.0 from tiny samples; with defaults ``a = b = 1`` an empty sample yields 0.5. ``noisy_or(p1..pn)`` ``1 - prod(1 - pi)`` — combine *independent supporting* signals (more agreeing evidence can only raise belief, never above 1.0). ``weakest_link(p1..pn)`` ``min(pi)`` — combine *conjunctive* dependencies (a derivation chain is no stronger than its weakest input). ``weighted_mean(v, w)`` ``sum(vi*wi) / sum(wi)`` — blend comparable signals of unequal quality. ``recency_decay(d, hl)`` ``0.5 ** (max(d, 0) / hl)`` — half-life decay for stale evidence (default half-life 90 days). ``discount(p, f)`` ``p * f`` — apply a reliability factor to a source. ``calibrate(p, lo, hi)`` ``lo + p * (hi - lo)`` — keep reported confidence away from absolute certainty at either end. ``bucket(p)`` Human-readable label for explanation records. ======================== ===================================================== All functions are pure, validate their inputs, and raise ``ValueError`` / ``TypeError`` on out-of-domain arguments rather than silently producing garbage probabilities. Backwards-compatible aliases (``support_ratio``, ``combine_independent``, ``min_combine``, ``decay``, ``confidence_label``) are kept so derivers may refer to either name. """ from __future__ import annotations import math from typing import Iterable, List, Sequence, Tuple, Union Number = Union[int, float] __all__ = [ "clamp01", "laplace", "support_ratio", "noisy_or", "combine_independent", "weakest_link", "min_combine", "weighted_mean", "recency_decay", "decay", "discount", "calibrate", "bucket", "confidence_label", ] # --------------------------------------------------------------------------- # Validation helpers # --------------------------------------------------------------------------- def _check_finite(value: Number, name: str) -> float: """Reject non-numbers, booleans, NaN, and infinities.""" if isinstance(value, bool) or not isinstance(value, (int, float)): raise TypeError(f"{name} must be a number, got {type(value).__name__}") fvalue = float(value) if math.isnan(fvalue) or math.isinf(fvalue): raise ValueError(f"{name} must be finite, got {value!r}") return fvalue def _check_probability(value: Number, name: str = "probability") -> float: fvalue = _check_finite(value, name) if not 0.0 <= fvalue <= 1.0: raise ValueError(f"{name} must be within [0, 1], got {value!r}") return fvalue def _expand(args: Tuple) -> List[float]: """Allow ``f(0.1, 0.2)`` and ``f([0.1, 0.2])`` interchangeably.""" if len(args) == 1 and not isinstance(args[0], (int, float)): return list(args[0]) return list(args) # --------------------------------------------------------------------------- # Primitives # --------------------------------------------------------------------------- def clamp01(p: Number) -> float: """Clamp a finite number into ``[0.0, 1.0]``. Raises on NaN / infinity instead of guessing — a non-finite confidence is always a bug upstream. """ fp = _check_finite(p, "p") return min(max(fp, 0.0), 1.0) def laplace(successes: Number, trials: Number, alpha: Number = 1.0, beta: Number = 1.0) -> float: """Laplace-smoothed support ratio: ``(s + alpha) / (t + alpha + beta)``. Used wherever a deriver counts "k supporting observations out of n opportunities" (e.g. attended the gym on 7 of 8 scheduled Tuesdays). With the default uniform prior (``alpha = beta = 1``): * no data at all yields 0.5 (maximal uncertainty), and * a perfect 10-for-10 record yields 11/12, never 1.0. """ s = _check_finite(successes, "successes") t = _check_finite(trials, "trials") a = _check_finite(alpha, "alpha") b = _check_finite(beta, "beta") if s < 0 or t < 0: raise ValueError("successes and trials must be non-negative") if s > t: raise ValueError(f"successes ({s}) cannot exceed trials ({t})") if a <= 0 or b <= 0: raise ValueError("alpha and beta must be positive") return (s + a) / (t + a + b) def noisy_or(*probabilities) -> float: """Noisy-OR combination of independent supporting signals. ``noisy_or(p1, ..., pn) = 1 - (1-p1) * ... * (1-pn)``. Monotone non-decreasing in every argument; an empty combination is 0.0 (no evidence, no belief); any certain input makes the result certain. Accepts either varargs or a single iterable. """ ps = [_check_probability(p, "probability") for p in _expand(probabilities)] if not ps: return 0.0 remaining_doubt = 1.0 for p in ps: remaining_doubt *= (1.0 - p) return 1.0 - remaining_doubt def weakest_link(*probabilities) -> float: """Conjunctive combination: a chain is only as strong as its weakest link. Used when a claim *requires* all of its inputs (e.g. a derived claim's confidence may never exceed the confidence of the claims it was derived from). Accepts either varargs or a single iterable; raises on an empty combination because "the minimum of nothing" has no safe answer. """ ps = [_check_probability(p, "probability") for p in _expand(probabilities)] if not ps: raise ValueError("weakest_link requires at least one probability") return min(ps) def weighted_mean(values: Sequence[Number], weights: Sequence[Number]) -> float: """Weighted mean of comparable confidence signals. ``values`` must each be in [0, 1]; ``weights`` must be non-negative with a positive sum; the two sequences must have equal, non-zero length. """ if len(values) != len(weights): raise ValueError( f"values ({len(values)}) and weights ({len(weights)}) must have equal length" ) if not values: raise ValueError("weighted_mean requires at least one value") total = 0.0 weight_sum = 0.0 for i, (v, w) in enumerate(zip(values, weights)): fv = _check_probability(v, f"values[{i}]") fw = _check_finite(w, f"weights[{i}]") if fw < 0: raise ValueError(f"weights[{i}] must be non-negative, got {w!r}") total += fv * fw weight_sum += fw if weight_sum <= 0: raise ValueError("weights must sum to a positive number") return clamp01(total / weight_sum) def recency_decay(age_days: Number, half_life_days: Number = 90.0) -> float: """Half-life decay weight for stale evidence. ``0.5 ** (age / half_life)`` — evidence observed *now* weighs 1.0, evidence one half-life old weighs 0.5, two half-lives old weighs 0.25, and so on. Negative ages (clock skew) are treated as zero rather than amplifying. """ age = _check_finite(age_days, "age_days") half_life = _check_finite(half_life_days, "half_life_days") if half_life <= 0: raise ValueError(f"half_life_days must be positive, got {half_life_days!r}") return 0.5 ** (max(age, 0.0) / half_life) def discount(p: Number, factor: Number) -> float: """Apply a source-reliability factor in [0, 1] to a confidence in [0, 1].""" fp = _check_probability(p, "p") ff = _check_probability(factor, "factor") return fp * ff def calibrate(p: Number, floor: Number = 0.05, ceiling: Number = 0.99) -> float: """Linearly map a raw score into ``[floor, ceiling]``. Keeps reported confidence away from 0.0 and 1.0: the system never claims absolute certainty about a derived fact, and never reports literally zero belief in something it bothered to derive. """ fp = _check_probability(p, "p") lo = _check_finite(floor, "floor") hi = _check_finite(ceiling, "ceiling") if not (0.0 <= lo < hi <= 1.0): raise ValueError(f"floor/ceiling must satisfy 0 <= floor < ceiling <= 1, got {floor!r}/{ceiling!r}") return lo + fp * (hi - lo) # --------------------------------------------------------------------------- # Human-readable labels (used by explanation records) # --------------------------------------------------------------------------- _BUCKETS: Tuple[Tuple[float, str], ...] = ( (0.90, "very-likely"), (0.75, "likely"), (0.50, "plausible"), (0.25, "weak"), ) def bucket(p: Number) -> str: """Map a confidence in [0, 1] to a stable, human-readable label. >= 0.90 very-likely >= 0.75 likely >= 0.50 plausible >= 0.25 weak < 0.25 speculative """ fp = _check_probability(p, "p") for threshold, label in _BUCKETS: if fp >= threshold: return label return "speculative" # --------------------------------------------------------------------------- # Aliases kept for compatibility with deriver call sites # --------------------------------------------------------------------------- support_ratio = laplace combine_independent = noisy_or min_combine = weakest_link decay = recency_decay confidence_label = bucket