"""Integration tests for ``fablepool export``, ``fablepool audit`` and ``fablepool log``. Unlike the in-process tests in ``test_cli_browse.py`` / ``test_cli_mutations.py``, these tests deliberately drive the CLI through a real subprocess (``python -m fablepool``) against freshly seeded node directories. That means they validate exactly the surface a user — or a second, independent implementation — would see: * process exit codes, * the documented ``--json`` output contract, * the wire-format JSONL emitted by ``export`` (one signed operation envelope per line), * the relationship between the full export, topic-subset exports, and the operation log reported by ``audit``. The JSON contracts asserted here are the ones documented in ``docs/cli-reference.md``; if either changes, both must change together. """ from __future__ import annotations import json import os import subprocess import sys from pathlib import Path import pytest REPO_ROOT = Path(__file__).resolve().parents[1] #: Every line of a wire-format export must be a JSON object carrying at #: least these envelope fields (see docs/cli-reference.md, "Wire-format #: export contract"). Additional fields are allowed. REQUIRED_ENVELOPE_KEYS = {"op_id", "kind", "author", "sig"} SUBPROCESS_TIMEOUT = 180 # generous: seeding runs the full derivation engine # --------------------------------------------------------------------------- # Subprocess plumbing # --------------------------------------------------------------------------- def run_cli(node_dir: Path, *args: str) -> subprocess.CompletedProcess: """Run ``python -m fablepool --node-dir ``. The environment is scrubbed of ``FABLEPOOL_NODE_DIR`` so the explicit ``--node-dir`` flag is the only thing selecting the node, which is also what we want to test (flag takes precedence over environment). """ env = dict(os.environ) env.pop("FABLEPOOL_NODE_DIR", None) return subprocess.run( [sys.executable, "-m", "fablepool", "--node-dir", str(node_dir), *args], capture_output=True, text=True, cwd=str(REPO_ROOT), env=env, timeout=SUBPROCESS_TIMEOUT, ) def run_cli_ok(node_dir: Path, *args: str) -> subprocess.CompletedProcess: proc = run_cli(node_dir, *args) assert proc.returncode == 0, ( f"`fablepool {' '.join(args)}` failed with code {proc.returncode}\n" f"--- stdout ---\n{proc.stdout}\n--- stderr ---\n{proc.stderr}" ) return proc def json_out(proc: subprocess.CompletedProcess) -> object: """Parse a ``--json`` command's stdout as a single JSON document.""" try: return json.loads(proc.stdout) except json.JSONDecodeError as exc: # pragma: no cover - failure path raise AssertionError( f"command did not emit valid JSON on stdout: {exc}\n" f"--- stdout ---\n{proc.stdout}" ) from None def parse_export_lines(text: str) -> list[dict]: """Parse wire-format JSONL export output into a list of envelopes.""" lines = [ln for ln in text.splitlines() if ln.strip()] envelopes = [] for i, line in enumerate(lines): try: obj = json.loads(line) except json.JSONDecodeError as exc: # pragma: no cover - failure path raise AssertionError(f"export line {i} is not valid JSON: {exc}\n{line!r}") from None assert isinstance(obj, dict), f"export line {i} is not a JSON object: {line!r}" envelopes.append(obj) return envelopes def topic_name(entry: object) -> str: """Extract a topic name from a `topics --json` entry.""" if isinstance(entry, str): return entry assert isinstance(entry, dict), f"unexpected topic entry: {entry!r}" for key in ("topic", "name"): if key in entry: return str(entry[key]) raise AssertionError(f"topic entry has no recognizable name field: {entry!r}") def claim_id(entry: dict) -> str: """Extract a claim id from a `claims --json` entry.""" for key in ("claim_id", "id"): if key in entry: return str(entry[key]) raise AssertionError(f"claim entry has no recognizable id field: {entry!r}") # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture(scope="module") def seeded_node_dir(tmp_path_factory: pytest.TempPathFactory) -> Path: """A node directory seeded with the demo dataset. Module-scoped and used only by *read-only* tests in this file; any test that mutates the node must use the function-scoped ``fresh_node_dir``. """ node_dir = tmp_path_factory.mktemp("export-audit-node") run_cli_ok(node_dir, "seed-demo") return node_dir @pytest.fixture() def fresh_node_dir(tmp_path: Path) -> Path: """A private seeded node directory for tests that mutate state.""" run_cli_ok(tmp_path, "seed-demo") return tmp_path # --------------------------------------------------------------------------- # Full export # --------------------------------------------------------------------------- def test_full_export_is_valid_wire_jsonl(seeded_node_dir: Path) -> None: proc = run_cli_ok(seeded_node_dir, "export") envelopes = parse_export_lines(proc.stdout) assert envelopes, "full export of a seeded node must not be empty" for i, env in enumerate(envelopes): missing = REQUIRED_ENVELOPE_KEYS - env.keys() assert not missing, f"export line {i} missing envelope fields {sorted(missing)}: {env!r}" assert isinstance(env["op_id"], str) and env["op_id"], f"line {i}: empty op_id" assert isinstance(env["kind"], str) and env["kind"], f"line {i}: empty kind" assert isinstance(env["author"], str) and env["author"], f"line {i}: empty author" assert isinstance(env["sig"], str) and env["sig"], f"line {i}: empty signature" def test_full_export_op_ids_are_unique(seeded_node_dir: Path) -> None: envelopes = parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout) op_ids = [env["op_id"] for env in envelopes] assert len(op_ids) == len(set(op_ids)), "duplicate op_id in full export" def test_full_export_is_deterministic(seeded_node_dir: Path) -> None: """Two exports of an unchanged node must be byte-identical. The export is the append-only operation log serialized in log order; nothing about it should be timestamp-of-export or randomly ordered. """ first = run_cli_ok(seeded_node_dir, "export").stdout second = run_cli_ok(seeded_node_dir, "export").stdout assert first == second def test_export_to_file_matches_stdout_export(seeded_node_dir: Path, tmp_path: Path) -> None: out_file = tmp_path / "graph.jsonl" run_cli_ok(seeded_node_dir, "export", "--out", str(out_file)) assert out_file.exists(), "--out did not create the export file" file_envelopes = parse_export_lines(out_file.read_text(encoding="utf-8")) stdout_envelopes = parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout) assert [e["op_id"] for e in file_envelopes] == [e["op_id"] for e in stdout_envelopes] # --------------------------------------------------------------------------- # Topic-subset export # --------------------------------------------------------------------------- def test_topic_subset_export_is_a_strict_subset(seeded_node_dir: Path) -> None: full_ids = {e["op_id"] for e in parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout)} topics_doc = json_out(run_cli_ok(seeded_node_dir, "topics", "--json")) assert isinstance(topics_doc, dict) and "topics" in topics_doc, ( f"`topics --json` must emit an object with a 'topics' key, got: {topics_doc!r}" ) topics = [topic_name(t) for t in topics_doc["topics"]] assert len(topics) >= 2, "seed dataset must contain at least two topics for subset testing" subset_proc = run_cli_ok(seeded_node_dir, "export", "--topic", topics[0]) subset_envelopes = parse_export_lines(subset_proc.stdout) subset_ids = {e["op_id"] for e in subset_envelopes} assert subset_ids, f"topic export for {topics[0]!r} is empty" assert subset_ids <= full_ids, "topic export contains operations absent from the full export" assert subset_ids < full_ids, ( "topic export should be strictly smaller than the full export when other topics exist" ) def test_topic_subset_export_covers_all_claims_of_the_topic(seeded_node_dir: Path) -> None: topics_doc = json_out(run_cli_ok(seeded_node_dir, "topics", "--json")) topic = topic_name(topics_doc["topics"][0]) claims_doc = json_out(run_cli_ok(seeded_node_dir, "claims", "--topic", topic, "--json")) assert isinstance(claims_doc, dict) and "claims" in claims_doc, ( f"`claims --json` must emit an object with a 'claims' key, got: {claims_doc!r}" ) ids = [claim_id(c) for c in claims_doc["claims"]] assert ids, f"seed topic {topic!r} has no claims" export_text = run_cli_ok(seeded_node_dir, "export", "--topic", topic).stdout for cid in ids: assert cid in export_text, ( f"claim {cid} of topic {topic!r} not present in the topic-subset export" ) # --------------------------------------------------------------------------- # Audit and log # --------------------------------------------------------------------------- def test_audit_json_reports_fully_verified_log(seeded_node_dir: Path) -> None: proc = run_cli_ok(seeded_node_dir, "audit", "--json") doc = json_out(proc) assert isinstance(doc, dict) assert doc.get("ok") is True, f"audit of a freshly seeded node must pass: {doc!r}" assert isinstance(doc.get("ops"), int) and doc["ops"] > 0 assert isinstance(doc.get("verified"), int) assert doc["verified"] == doc["ops"], "every operation in the log must verify" assert doc.get("failures") in ([], None) or doc["failures"] == [], ( f"audit reported failures on a clean node: {doc!r}" ) # The full export is the whole log: counts must agree. n_export = len(parse_export_lines(run_cli_ok(seeded_node_dir, "export").stdout)) assert doc["ops"] == n_export, ( f"audit saw {doc['ops']} ops but full export emitted {n_export} lines" ) def test_audit_human_output_mentions_success(seeded_node_dir: Path) -> None: proc = run_cli_ok(seeded_node_dir, "audit") assert "ok" in proc.stdout.lower(), ( f"human-readable audit output should state the result, got:\n{proc.stdout}" ) def test_log_json_lists_operations(seeded_node_dir: Path) -> None: doc = json_out(run_cli_ok(seeded_node_dir, "log", "--json")) assert isinstance(doc, dict) and "ops" in doc, ( f"`log --json` must emit an object with an 'ops' key, got: {doc!r}" ) ops = doc["ops"] assert isinstance(ops, list) and ops for entry in ops: assert isinstance(entry, dict) assert entry.get("op_id"), f"log entry missing op_id: {entry!r}" assert entry.get("kind"), f"log entry missing kind: {entry!r}" def test_log_limit_caps_output(seeded_node_dir: Path) -> None: doc = json_out(run_cli_ok(seeded_node_dir, "log", "--json", "--limit", "3")) assert len(doc["ops"]) <= 3 # --------------------------------------------------------------------------- # Export and audit across a mutation (refutation) # --------------------------------------------------------------------------- def test_refutation_appends_operations_and_log_stays_auditable(fresh_node_dir: Path) -> None: before = parse_export_lines(run_cli_ok(fresh_node_dir, "export").stdout) before_ids = {e["op_id"] for e in before} # Pick any active claim from the first topic and refute it. topics_doc = json_out(run_cli_ok(fresh_node_dir, "topics", "--json")) topic = topic_name(topics_doc["topics"][0]) claims_doc = json_out(run_cli_ok(fresh_node_dir, "claims", "--topic", topic, "--json")) target = claim_id(claims_doc["claims"][0]) refute_doc = json_out( run_cli_ok(fresh_node_dir, "refute", target, "--reason", "integration test", "--json") ) assert isinstance(refute_doc, dict) assert refute_doc.get("op_id"), f"refute must report the new operation id: {refute_doc!r}" assert "cascade" in refute_doc and isinstance(refute_doc["cascade"], list), ( f"refute must report cascaded claims (possibly empty): {refute_doc!r}" ) after = parse_export_lines(run_cli_ok(fresh_node_dir, "export").stdout) after_ids = {e["op_id"] for e in after} assert before_ids < after_ids, "refutation must append new operations to the log" assert refute_doc["op_id"] in after_ids, "the refutation operation must appear in the export" # Append-only: nothing that existed before may disappear. assert before_ids <= after_ids # The mutated log must still fully verify. audit_doc = json_out(run_cli_ok(fresh_node_dir, "audit", "--json")) assert audit_doc.get("ok") is True assert audit_doc["verified"] == audit_doc["ops"] == len(after)