"""End-to-end pipeline tests: full default-deriver runs over the sample
corpus, checking claim production, provenance chains, calibrated
confidence, explanations, idempotency, and determinism."""

from __future__ import annotations

import json
import math

from mnema.core.log import OperationLog
from mnema.derive.engine import DerivationEngine

# Substrings expected (case-insensitively) in the deriver names of the four
# default deriver families. Matching on substrings keeps the test stable
# against exact-naming choices ("routines" vs "RoutineDeriver").
FAMILY_MARKERS = ("routine", "place", "relation", "preference")


def _value_key(value) -> str:
    return json.dumps(value, sort_keys=True, default=str)


def test_run_produces_claims_from_every_default_deriver_family(ran):
    engine, result, _evidence = ran
    assert result.new_claims, "full sample corpus must yield claims"
    deriver_names = {c.deriver.lower() for c in result.new_claims}
    for marker in FAMILY_MARKERS:
        assert any(marker in name for name in deriver_names), (
            f"no claims produced by a deriver matching {marker!r}; "
            f"derivers seen: {sorted(deriver_names)}"
        )


def test_every_claim_has_nonempty_provenance(ran):
    engine, result, _evidence = ran
    for claim in result.new_claims:
        inputs = list(engine.graph.inputs(claim.claim_id))
        assert inputs, f"claim {claim.claim_id} ({claim.predicate}) has no recorded inputs"


def test_provenance_chains_terminate_in_ingested_evidence(ran, provenance_roots):
    engine, result, evidence = ran
    evidence_ids = {e.evidence_id for e in evidence}
    for claim in result.new_claims:
        roots = provenance_roots(engine, claim.claim_id)
        assert roots, f"claim {claim.claim_id} has an empty provenance chain"
        stray = roots - evidence_ids
        assert not stray, (
            f"claim {claim.claim_id} has provenance roots that are not "
            f"ingested evidence: {sorted(stray)}"
        )


def test_confidences_are_calibrated_probabilities(ran):
    _engine, result, _evidence = ran
    for claim in result.new_claims:
        c = float(claim.confidence)
        assert math.isfinite(c), f"claim {claim.claim_id} has non-finite confidence"
        assert 0.0 < c <= 1.0, (
            f"claim {claim.claim_id} confidence {c} outside (0, 1]"
        )


def test_explanation_exists_for_every_new_claim(ran):
    engine, result, _evidence = ran
    for claim in result.new_claims:
        assert claim.claim_id in result.explanations, (
            f"run() returned no explanation for claim {claim.claim_id}"
        )
        expl = result.explanations[claim.claim_id]
        assert expl.claim_id == claim.claim_id
        assert isinstance(expl.summary, str) and expl.summary.strip()
        # engine.explain must agree with the record handed back by run()
        again = engine.explain(claim.claim_id)
        assert again.claim_id == claim.claim_id


def test_rerun_is_idempotent(ran):
    engine, _first, _evidence = ran
    before = {c.claim_id for c in engine.claims()}
    second = engine.run()
    assert not second.new_claims, (
        "re-running the engine on unchanged evidence must not mint new claims"
    )
    after = {c.claim_id for c in engine.claims()}
    assert before == after, "active claim set drifted across an idempotent re-run"


def test_claims_are_recorded_in_operation_log(ran, op_log):
    _engine, result, _evidence = ran
    claim_ops = [op for op in op_log if op.kind == "claim"]
    assert len(claim_ops) >= len(result.new_claims), (
        "every derived claim must be persisted as a signed claim operation"
    )


def test_evidence_is_recorded_in_operation_log(ran, op_log):
    _engine, _result, evidence = ran
    evidence_ops = [op for op in op_log if op.kind == "evidence"]
    assert len(evidence_ops) >= len(evidence)


def test_determinism_across_independent_engines(tmp_path, keypair, sample_evidence):
    """Two fresh engines over the same corpus must agree on the full
    (subject, predicate, value) -> confidence map."""

    def snapshot(name: str):
        log = OperationLog(tmp_path / name)
        eng = DerivationEngine(log=log, keypair=keypair)
        eng.ingest(sample_evidence)
        eng.run()
        return {
            (c.subject, c.predicate, _value_key(c.value)): round(float(c.confidence), 9)
            for c in eng.claims()
        }

    assert snapshot("a.jsonl") == snapshot("b.jsonl")