"""Scorecard rendering: per-event Markdown, aggregate Markdown, and JSON.""" from __future__ import annotations import json from typing import Any from . import kernel from .harness import EventResult from .rubric import ( WEIGHT_COMMONS, WEIGHT_LATENCY, WEIGHT_TRUST, WEIGHT_WORST_OFF, ) def _f(x: float) -> str: return f"{x:.1f}" def _delta(kernel_v: float, incumbent_v: float) -> str: d = kernel_v - incumbent_v sign = "+" if d >= 0 else "" return f"{sign}{d:.1f}" def render_event_markdown(r: EventResult) -> str: d = r.dossier inc = r.incumbent_scores ker = r.kernel_scores lines: list[str] = [] a = lines.append a(f"# Scorecard: {d.title}") a("") a(f"- **Event ID:** `{d.id}`") a(f"- **Category:** {d.category.value}") a(f"- **Polity / era:** {d.polity}, {d.dates.start} – {d.dates.end}") a(f"- **Incumbent constitution:** {d.incumbent_constitution}") a(f"- **Replayed under:** FablePool kernel v{kernel.KERNEL_VERSION}") a(f"- **Consistency check:** {'PASS' if r.ok else 'FAIL'}") a("") a("## Summary") a("") a(d.summary.strip()) a("") a("## Side-by-side") a("") a("| Metric | Incumbent | Kernel v0.1 | Δ |") a("|---|---:|---:|---:|") a( f"| **Worst-off participant** (weight {WEIGHT_WORST_OFF}) " f"| {_f(inc.worst_off)} | {_f(ker.worst_off)} " f"| {_delta(ker.worst_off, inc.worst_off)} |" ) a( f"| Commons integrity (weight {WEIGHT_COMMONS}) " f"| {_f(inc.commons_integrity)} | {_f(ker.commons_integrity)} " f"| {_delta(ker.commons_integrity, inc.commons_integrity)} |" ) a( f"| Trust preservation (weight {WEIGHT_TRUST}) " f"| {_f(inc.trust_preservation)} | {_f(ker.trust_preservation)} " f"| {_delta(ker.trust_preservation, inc.trust_preservation)} |" ) a( f"| Latency (days → score, weight {WEIGHT_LATENCY}) " f"| {inc.latency_days:.0f}d → {_f(inc.latency)} " f"| {ker.latency_days:.0f}d → {_f(ker.latency)} " f"| {_delta(ker.latency, inc.latency)} |" ) a( f"| **Composite** | **{_f(inc.composite)}** | **{_f(ker.composite)}** " f"| **{_delta(ker.composite, inc.composite)}** |" ) a("") m = d.incumbent_outcome.metrics cm = d.counterfactual.metrics a("### Worst-off participant") a("") a(f"- **Incumbent:** {m.worst_off.participant} — {m.worst_off.rationale}") a(f"- **Kernel:** {cm.worst_off.participant} — {cm.worst_off.rationale}") a("") a("## Move-by-move kernel adjudication") a("") a("| Decision point | Move | Actor | Incumbent ruling | Kernel verdict | Articles |") a("|---|---|---|---|---|---|") for ev in r.evaluations: taken = "✓" if ev.taken_historically else "✗" a( f"| `{ev.decision_point}` | `{ev.move}` (taken: {taken}) " f"| {ev.actor} | {ev.incumbent_ruling} " f"| **{ev.verdict.kind.value}** | {ev.verdict.cite()} |" ) a("") a("### Verdict detail") a("") for ev in r.evaluations: a(f"- **`{ev.decision_point}/{ev.move}`** — {ev.description}") for reason in ev.verdict.reasons: a(f" - {reason}") for c in ev.verdict.constraints: a(f" - *Constraint:* {c}") for pid in ev.verdict.procedures: p = kernel.PROCEDURES[pid] a(f" - *Procedure:* {p.name} ({p.days}d, Art. {p.article})") a("") a("## Kernel resolution path") a("") total = 0 for pid in r.dossier.counterfactual.resolution_path: p = kernel.PROCEDURES.get(pid) if p is None: a(f"- `{pid}` — UNKNOWN PROCEDURE") continue total += p.days a(f"- {p.name} — {p.days} days (Art. {p.article})") a("") a(f"**Mechanical kernel latency: {r.kernel_latency_days} days** " f"(sum of distinct procedure clocks on the critical path).") a("") a("## Counterfactual narrative") a("") a(d.counterfactual.narrative.strip()) a("") a("### Counterfactual assumptions (read before citing this scorecard)") a("") a(d.counterfactual.assumptions.strip()) a("") if r.issues: a("## Harness issues") a("") for i in r.issues: a(f"- **{i.severity.upper()}**: {i.message}") a("") a("## Incumbent outcome (historical record)") a("") a(d.incumbent_outcome.description.strip()) a("") a("## Sources") a("") for s in d.sources: if s.url: a(f"- {s.citation} — <{s.url}>") else: a(f"- {s.citation}") a("") return "\n".join(lines) def render_summary_markdown(results: list[EventResult]) -> str: lines: list[str] = [] a = lines.append a("# The Incumbent Benchmark — Aggregate Scorecard") a("") a(f"Events replayed under FablePool kernel v{kernel.KERNEL_VERSION}. " "Ranking is lexicographic: worst-off participant first, composite second. " "Kernel counterfactual scores are structured expert judgments whose legal " "analysis is machine-checked against the kernel rule engine; read each " "event's assumptions before citing. Latency under the kernel is computed " "mechanically from procedure clocks.") a("") a("| Event | Category | Worst-off (Inc → Ker) | Composite (Inc → Ker) | Latency days (Inc → Ker) | Check |") a("|---|---|---|---|---|---|") ordered = sorted(results, key=lambda r: r.dossier.id) for r in ordered: inc, ker = r.incumbent_scores, r.kernel_scores a( f"| `{r.dossier.id}` | {r.dossier.category.value} " f"| {_f(inc.worst_off)} → {_f(ker.worst_off)} " f"| {_f(inc.composite)} → {_f(ker.composite)} " f"| {inc.latency_days:.0f} → {ker.latency_days:.0f} " f"| {'PASS' if r.ok else 'FAIL'} |" ) a("") # Category aggregates cats: dict[str, list[EventResult]] = {} for r in ordered: cats.setdefault(r.dossier.category.value, []).append(r) a("## By category (mean scores)") a("") a("| Category | n | Worst-off (Inc → Ker) | Composite (Inc → Ker) |") a("|---|---:|---|---|") for cat, rs in sorted(cats.items()): n = len(rs) iw = sum(x.incumbent_scores.worst_off for x in rs) / n kw = sum(x.kernel_scores.worst_off for x in rs) / n ic = sum(x.incumbent_scores.composite for x in rs) / n kc = sum(x.kernel_scores.composite for x in rs) / n a(f"| {cat} | {n} | {_f(iw)} → {_f(kw)} | {_f(ic)} → {_f(kc)} |") a("") n = len(ordered) if n: iw = sum(x.incumbent_scores.worst_off for x in ordered) / n kw = sum(x.kernel_scores.worst_off for x in ordered) / n ic = sum(x.incumbent_scores.composite for x in ordered) / n kc = sum(x.kernel_scores.composite for x in ordered) / n a(f"**Overall ({n} events): worst-off {_f(iw)} → {_f(kw)}; " f"composite {_f(ic)} → {_f(kc)}.**") a("") return "\n".join(lines) def results_to_json(results: list[EventResult]) -> str: payload: list[dict[str, Any]] = [] for r in sorted(results, key=lambda x: x.dossier.id): inc, ker = r.incumbent_scores, r.kernel_scores payload.append( { "id": r.dossier.id, "title": r.dossier.title, "category": r.dossier.category.value, "polity": r.dossier.polity, "consistency_check": "pass" if r.ok else "fail", "incumbent": { "worst_off": inc.worst_off, "commons_integrity": inc.commons_integrity, "trust_preservation": inc.trust_preservation, "latency_days": inc.latency_days, "latency_score": round(inc.latency, 2), "composite": round(inc.composite, 2), }, "kernel": { "worst_off": ker.worst_off, "commons_integrity": ker.commons_integrity, "trust_preservation": ker.trust_preservation, "latency_days": ker.latency_days, "latency_score": round(ker.latency, 2), "composite": round(ker.composite, 2), }, "moves": [ { "decision_point": ev.decision_point, "move": ev.move, "actor": ev.actor, "taken_historically": ev.taken_historically, "incumbent_ruling": ev.incumbent_ruling, "kernel_verdict": ev.verdict.kind.value, "articles": ev.verdict.articles, "procedures": ev.verdict.procedures, } for ev in r.evaluations ], "issues": [ {"severity": i.severity, "message": i.message} for i in r.issues ], } ) return json.dumps( {"kernel_version": kernel.KERNEL_VERSION, "events": payload}, indent=2 )