"""Tests for the amendment evaluation harness.""" from __future__ import annotations import copy import json from pathlib import Path import pytest import yaml from fabletest import harness from fabletest.model import load_corpus from fabletest.params import load_parameters from tests.conftest import STRICT_PARAMS, write_params DRAIN_SCENARIO = { "id": "harness-drain-001", "title": "Majority drains the treasury in one omnibus spend", "family": "treasury-drain", "description": ( "A 2/3 majority faction passes a single spend of 60% of the " "treasury to one of its own members. The single-spend cap must " "refuse execution regardless of the vote outcome." ), "precedent": "Synthetic; reduced form of the classic 51% treasury raid.", "tags": ["synthetic"], "actors": [ {"id": "m1", "faction": "majority"}, {"id": "m2", "faction": "majority"}, {"id": "m3", "faction": "minority"}, ], "resources": {"treasury": 100.0}, "objective": {"type": "treasury_below", "threshold": 50.0}, "moves": [ {"actor": "m1", "action": "propose", "args": {"kind": "spend", "amount": 60.0, "recipient": "m1"}}, {"actor": "m1", "action": "vote", "args": {"proposal": "p1", "choice": "yes"}}, {"actor": "m2", "action": "vote", "args": {"proposal": "p1", "choice": "yes"}}, {"actor": "m3", "action": "vote", "args": {"proposal": "p1", "choice": "no"}}, {"actor": "m1", "action": "tally", "args": {"proposal": "p1"}}, {"actor": "m1", "action": "execute", "args": {"proposal": "p1"}}, ], "expected": {"attack_succeeds": False, "blocked_by": []}, "empathy": { "worst_off": "m3", "floor": 0.0, "rationale": "Floor disabled; this scenario isolates the spend cap rule.", }, } QUORUM_SCENARIO = { "id": "harness-quorum-001", "title": "Two members tally a vote nobody else saw", "family": "quorum-manipulation", "description": ( "Two of five members propose, vote, and tally within an hour, " "before anyone else participates. Quorum must keep the tally from " "binding the group." ), "precedent": "Synthetic; reduced form of the snap-meeting quorum ambush.", "tags": ["synthetic"], "actors": [ {"id": "m1", "faction": "majority"}, {"id": "m2", "faction": "majority"}, {"id": "m3", "faction": "minority"}, {"id": "m4", "faction": "minority"}, {"id": "m5", "faction": "minority"}, ], "resources": {"treasury": 100.0}, "objective": {"type": "proposal_executed", "proposal": "p1"}, "moves": [ {"actor": "m1", "action": "propose", "args": {"kind": "spend", "amount": 5.0, "recipient": "m1"}}, {"actor": "m1", "action": "vote", "args": {"proposal": "p1", "choice": "yes"}}, {"actor": "m2", "action": "vote", "args": {"proposal": "p1", "choice": "yes"}}, {"actor": "m1", "action": "tally", "args": {"proposal": "p1"}}, {"actor": "m1", "action": "execute", "args": {"proposal": "p1"}}, ], "expected": {"attack_succeeds": False, "blocked_by": []}, "empathy": { "worst_off": "m3", "floor": 0.0, "rationale": "Floor disabled; this scenario isolates the quorum rule.", }, } @pytest.fixture() def mini_corpus(tmp_path: Path) -> Path: corpus = tmp_path / "scenarios" corpus.mkdir() (corpus / "mini.yaml").write_text( yaml.safe_dump({"scenarios": [DRAIN_SCENARIO, QUORUM_SCENARIO]}, sort_keys=False), encoding="utf-8", ) return corpus @pytest.fixture() def strict_params_path(tmp_path: Path) -> Path: return write_params(tmp_path, STRICT_PARAMS, "baseline.yaml") @pytest.fixture() def weakened_params_path(tmp_path: Path) -> Path: weakened = copy.deepcopy(STRICT_PARAMS) # The "amendment" under test: remove the single-spend cap entirely. weakened["treasury"]["single_spend_cap_fraction"] = 1.0 return write_params(tmp_path, weakened, "proposed.yaml") class TestRunSuite: def test_strict_params_defend_mini_corpus(self, mini_corpus, strict_params_path): scenarios = load_corpus(mini_corpus) params = load_parameters(strict_params_path) report = harness.run_suite(scenarios, params) assert report.total == 2 assert report.ok, report.failed_ids() families = report.by_family() assert families["treasury-drain"] == (1, 1) assert families["quorum-manipulation"] == (1, 1) def test_family_filter(self, mini_corpus, strict_params_path): scenarios = load_corpus(mini_corpus) params = load_parameters(strict_params_path) report = harness.run_suite(scenarios, params, families=["treasury-drain"]) assert report.total == 1 assert report.results[0].scenario.id == "harness-drain-001" def test_report_serializes_to_json(self, mini_corpus, strict_params_path): scenarios = load_corpus(mini_corpus) params = load_parameters(strict_params_path) report = harness.run_suite(scenarios, params) payload = json.dumps(report.to_dict()) decoded = json.loads(payload) assert decoded["total"] == 2 assert {r["id"] for r in decoded["results"]} == { "harness-drain-001", "harness-quorum-001", } class TestEvaluateAmendment: def test_weakening_amendment_is_blocked( self, mini_corpus, strict_params_path, weakened_params_path ): verdict = harness.evaluate_amendment_paths( mini_corpus, strict_params_path, weakened_params_path ) assert verdict.blocked is True assert "harness-drain-001" in verdict.regressions # The quorum scenario is untouched by the spend-cap change. assert "harness-quorum-001" not in verdict.regressions assert any("regression" in r for r in verdict.reasons) def test_identical_parameters_pass(self, mini_corpus, strict_params_path): verdict = harness.evaluate_amendment_paths( mini_corpus, strict_params_path, strict_params_path ) assert verdict.blocked is False assert verdict.parameter_diff == {} assert verdict.semver_impact == "patch" def test_verdict_serializes(self, mini_corpus, strict_params_path, weakened_params_path): verdict = harness.evaluate_amendment_paths( mini_corpus, strict_params_path, weakened_params_path ) decoded = json.loads(json.dumps(verdict.to_dict())) assert decoded["blocked"] is True assert "treasury.single_spend_cap_fraction" in decoded["parameter_diff"] def test_non_strict_mode_only_blocks_regressions( self, mini_corpus, strict_params_path, weakened_params_path ): # Even in lenient mode, a regression still blocks. verdict = harness.evaluate_amendment_paths( mini_corpus, strict_params_path, weakened_params_path, strict=False ) assert verdict.blocked is True # But a no-op amendment with pre-existing failures would not; with # the weakened params as BOTH baseline and proposed there are no # regressions, so lenient mode lets it pass while strict refuses. lenient = harness.evaluate_amendment_paths( mini_corpus, weakened_params_path, weakened_params_path, strict=False ) assert lenient.blocked is False strict = harness.evaluate_amendment_paths( mini_corpus, weakened_params_path, weakened_params_path, strict=True ) assert strict.blocked is True assert "harness-drain-001" in strict.persistent_failures class TestParameterDiff: def test_diff_and_semver_classification(self): baseline = load_parameters.__self__ if False else None # noqa: F841 from fabletest.params import Parameters # local import to keep deps obvious a = Parameters({"vote": {"quorum": 0.5}, "treasury": {"single_spend_cap_fraction": 0.2}}) b = Parameters({"vote": {"quorum": 0.5}, "treasury": {"single_spend_cap_fraction": 0.3}}) diff = harness.diff_parameters(a, b) assert diff == {"treasury.single_spend_cap_fraction": (0.2, 0.3)} assert harness.classify_semver(diff) == "minor" c = Parameters({"vote": {"quorum": 0.4}, "treasury": {"single_spend_cap_fraction": 0.2}}) kernel_diff = harness.diff_parameters(a, c) assert harness.classify_semver(kernel_diff) == "major" assert harness.classify_semver({}) == "patch" class TestRendering: def test_text_rendering_mentions_verdict( self, mini_corpus, strict_params_path, weakened_params_path ): verdict = harness.evaluate_amendment_paths( mini_corpus, strict_params_path, weakened_params_path ) text = harness.render_amendment_text(verdict) assert "BLOCKED" in text assert "harness-drain-001" in text markdown = harness.render_amendment_markdown(verdict) assert "BLOCKED" in markdown assert "treasury.single_spend_cap_fraction" in markdown