"""End-to-end pipeline tests: full default-deriver runs over the sample corpus, checking claim production, provenance chains, calibrated confidence, explanations, idempotency, and determinism.""" from __future__ import annotations import json import math from mnema.core.log import OperationLog from mnema.derive.engine import DerivationEngine # Substrings expected (case-insensitively) in the deriver names of the four # default deriver families. Matching on substrings keeps the test stable # against exact-naming choices ("routines" vs "RoutineDeriver"). FAMILY_MARKERS = ("routine", "place", "relation", "preference") def _value_key(value) -> str: return json.dumps(value, sort_keys=True, default=str) def test_run_produces_claims_from_every_default_deriver_family(ran): engine, result, _evidence = ran assert result.new_claims, "full sample corpus must yield claims" deriver_names = {c.deriver.lower() for c in result.new_claims} for marker in FAMILY_MARKERS: assert any(marker in name for name in deriver_names), ( f"no claims produced by a deriver matching {marker!r}; " f"derivers seen: {sorted(deriver_names)}" ) def test_every_claim_has_nonempty_provenance(ran): engine, result, _evidence = ran for claim in result.new_claims: inputs = list(engine.graph.inputs(claim.claim_id)) assert inputs, f"claim {claim.claim_id} ({claim.predicate}) has no recorded inputs" def test_provenance_chains_terminate_in_ingested_evidence(ran, provenance_roots): engine, result, evidence = ran evidence_ids = {e.evidence_id for e in evidence} for claim in result.new_claims: roots = provenance_roots(engine, claim.claim_id) assert roots, f"claim {claim.claim_id} has an empty provenance chain" stray = roots - evidence_ids assert not stray, ( f"claim {claim.claim_id} has provenance roots that are not " f"ingested evidence: {sorted(stray)}" ) def test_confidences_are_calibrated_probabilities(ran): _engine, result, _evidence = ran for claim in result.new_claims: c = float(claim.confidence) assert math.isfinite(c), f"claim {claim.claim_id} has non-finite confidence" assert 0.0 < c <= 1.0, ( f"claim {claim.claim_id} confidence {c} outside (0, 1]" ) def test_explanation_exists_for_every_new_claim(ran): engine, result, _evidence = ran for claim in result.new_claims: assert claim.claim_id in result.explanations, ( f"run() returned no explanation for claim {claim.claim_id}" ) expl = result.explanations[claim.claim_id] assert expl.claim_id == claim.claim_id assert isinstance(expl.summary, str) and expl.summary.strip() # engine.explain must agree with the record handed back by run() again = engine.explain(claim.claim_id) assert again.claim_id == claim.claim_id def test_rerun_is_idempotent(ran): engine, _first, _evidence = ran before = {c.claim_id for c in engine.claims()} second = engine.run() assert not second.new_claims, ( "re-running the engine on unchanged evidence must not mint new claims" ) after = {c.claim_id for c in engine.claims()} assert before == after, "active claim set drifted across an idempotent re-run" def test_claims_are_recorded_in_operation_log(ran, op_log): _engine, result, _evidence = ran claim_ops = [op for op in op_log if op.kind == "claim"] assert len(claim_ops) >= len(result.new_claims), ( "every derived claim must be persisted as a signed claim operation" ) def test_evidence_is_recorded_in_operation_log(ran, op_log): _engine, _result, evidence = ran evidence_ops = [op for op in op_log if op.kind == "evidence"] assert len(evidence_ops) >= len(evidence) def test_determinism_across_independent_engines(tmp_path, keypair, sample_evidence): """Two fresh engines over the same corpus must agree on the full (subject, predicate, value) -> confidence map.""" def snapshot(name: str): log = OperationLog(tmp_path / name) eng = DerivationEngine(log=log, keypair=keypair) eng.ingest(sample_evidence) eng.run() return { (c.subject, c.predicate, _value_key(c.value)): round(float(c.confidence), 9) for c in eng.claims() } assert snapshot("a.jsonl") == snapshot("b.jsonl")