"""Governance semantic versioning classifier. This module formalises the FablePool governance semver contract and implements the classifier that labels every amendment pull request. The contract ============ * **MAJOR** — any change to the *normative* content of the constitutional kernel (``constitution/kernel/*.yaml``) or of the invariants file (``constitution/invariants.yaml``). Adding, removing, or rewording a kernel clause is breaking by definition: every downstream fork relies on kernel semantics, and the kernel is the part of the document that defines how the document itself changes. * **MINOR** — adding or removing a userland module, or changing the normative content of a userland module beyond a pure parameter-value change (new clauses, changed bounds, restructured rules). * **PATCH** — changing only userland parameter *values*, each within its declared bounds/allowed set, or touching non-normative prose (commentary, rationale, notes, examples) anywhere in the constitution. * **NONE** — changes that do not touch the constitution at all (tooling, citizen registry, ledger, proposals, ballots). These never bump the constitutional version and never require a constitutional vote (they may still be governed by other processes, e.g. citizen admission). "Normative" is defined structurally: every key in a constitution YAML file is normative except the keys in :data:`NON_NORMATIVE_KEYS`. Two versions of a file are normatively identical iff the canonical hashes of their normative views match. This makes the major/minor/patch boundary *machine-checkable*, which is the whole point: a human cannot quietly relabel a kernel rewrite as a typo fix, because CI recomputes the class from the diff. The overall bump of a change set is the maximum bump over its files (``major > minor > patch > none``). """ from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path from typing import Any from govtool.canonical import content_hash from govtool.errors import GovtoolError from govtool.yamlio import load_yaml, to_jsonable class ClassificationError(GovtoolError): """Raised when a change set cannot be classified.""" # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- BUMPS = ("none", "patch", "minor", "major") BUMP_ORDER = {name: index for index, name in enumerate(BUMPS)} #: PR labels emitted by CI for each change class. LABELS = { "major": "governance/major", "minor": "governance/minor", "patch": "governance/patch", "none": "governance/none", } #: Keys whose content is explanatory prose, not binding rules. NON_NORMATIVE_KEYS = frozenset( { "commentary", "rationale", "notes", "references", "examples", "summary", "description", } ) KERNEL_PREFIX = "constitution/kernel/" USERLAND_PREFIX = "constitution/userland/" INVARIANTS_PATH = "constitution/invariants.yaml" VERSION_PATH = "constitution/version.yaml" # --------------------------------------------------------------------------- # Normative view & hashing # --------------------------------------------------------------------------- def normative_view(node: Any) -> Any: """Strip non-normative keys recursively, returning the binding core.""" if isinstance(node, dict): return { key: normative_view(value) for key, value in node.items() if key not in NON_NORMATIVE_KEYS } if isinstance(node, list): return [normative_view(item) for item in node] return node def normative_hash(data: Any) -> str: """Canonical content hash of the normative view of a document.""" return content_hash(to_jsonable(normative_view(data))) def full_hash(data: Any) -> str: """Canonical content hash of the entire document (prose included).""" return content_hash(to_jsonable(data)) # --------------------------------------------------------------------------- # Parameter helpers (shared with fork tooling) # --------------------------------------------------------------------------- def param_value(entry: Any) -> Any: """Extract the effective value of a userland parameter entry. Parameters may be declared either as a plain scalar:: quorum: 0.5 or as a structured entry with metadata:: quorum: value: 0.5 bounds: {min: 0.1, max: 0.9} """ if isinstance(entry, dict) and "value" in entry: return entry["value"] return entry def param_meta(entry: Any) -> dict: """Return the metadata (everything except ``value``) of a parameter.""" if isinstance(entry, dict) and "value" in entry: return {key: value for key, value in entry.items() if key != "value"} return {} def check_bounds(value: Any, meta: dict) -> tuple[bool, str]: """Check a candidate value against declared ``bounds`` / ``allowed``. Returns ``(ok, reason)`` where ``reason`` explains a failure. A parameter with no declared constraints accepts any value (the change is still a PATCH only because the *shape* of the entry is unchanged). """ allowed = meta.get("allowed") if allowed is not None: if not isinstance(allowed, list): return False, f"malformed 'allowed' constraint: {allowed!r}" if value not in allowed: return False, f"value {value!r} not in allowed set {allowed!r}" bounds = meta.get("bounds") if bounds is not None: if not isinstance(bounds, dict): return False, f"malformed 'bounds' constraint: {bounds!r}" minimum = bounds.get("min") maximum = bounds.get("max") if minimum is not None or maximum is not None: if not isinstance(value, (int, float)) or isinstance(value, bool): return False, f"value {value!r} is not numeric but bounds are declared" if minimum is not None and value < minimum: return False, f"value {value!r} below declared minimum {minimum!r}" if maximum is not None and value > maximum: return False, f"value {value!r} above declared maximum {maximum!r}" return True, "" @dataclass class ParameterDiff: """Result of comparing the ``parameters`` blocks of two module versions.""" values_only: bool within_bounds: bool changed: list[str] = field(default_factory=list) reason: str = "" def compare_parameters(old_module: Any, new_module: Any) -> ParameterDiff: """Decide whether a userland module change is a pure parameter patch. The change is *values-only* iff the modules are normatively identical once the ``parameters`` key is removed, the parameter name sets are identical, and for each changed parameter the metadata (bounds, allowed set, type annotations) is unchanged — only the value moved. """ if not isinstance(old_module, dict) or not isinstance(new_module, dict): return ParameterDiff(False, False, reason="module is not a mapping") old_rest = {k: v for k, v in old_module.items() if k != "parameters"} new_rest = {k: v for k, v in new_module.items() if k != "parameters"} if normative_hash(old_rest) != normative_hash(new_rest): return ParameterDiff(False, False, reason="normative change outside 'parameters'") old_params = old_module.get("parameters") or {} new_params = new_module.get("parameters") or {} if not isinstance(old_params, dict) or not isinstance(new_params, dict): return ParameterDiff(False, False, reason="'parameters' is not a mapping") if set(old_params) != set(new_params): added = sorted(set(new_params) - set(old_params)) removed = sorted(set(old_params) - set(new_params)) return ParameterDiff( False, False, changed=added + removed, reason=f"parameter set changed (added={added}, removed={removed})", ) changed: list[str] = [] for name in sorted(old_params): old_entry, new_entry = old_params[name], new_params[name] if normative_hash(old_entry) == normative_hash(new_entry): continue if normative_hash(param_meta(old_entry)) != normative_hash(param_meta(new_entry)): return ParameterDiff( False, False, changed=[name], reason=f"parameter '{name}' changed its constraints, not just its value", ) changed.append(name) ok, why = check_bounds(param_value(new_entry), param_meta(new_entry)) if not ok: return ParameterDiff( True, False, changed=changed, reason=f"parameter '{name}': {why}", ) return ParameterDiff(True, True, changed=changed) # --------------------------------------------------------------------------- # Classification # --------------------------------------------------------------------------- def zone_of(relpath: str) -> str: """Map a repo-relative path to its governance zone.""" relpath = relpath.replace("\\", "/") if relpath == VERSION_PATH: return "version" if relpath == INVARIANTS_PATH: return "invariants" if relpath.startswith(KERNEL_PREFIX): return "kernel" if relpath.startswith(USERLAND_PREFIX): return "userland" if relpath.startswith("constitution/"): return "constitution-other" return "non-constitutional" @dataclass class FileClassification: """Classification of a single changed file.""" path: str zone: str change: str # added | removed | modified-normative | modified-prose | unchanged bump: str reason: str def to_dict(self) -> dict: return { "path": self.path, "zone": self.zone, "change": self.change, "bump": self.bump, "reason": self.reason, } @dataclass class Classification: """Classification of an entire change set (one PR).""" bump: str files: list[FileClassification] = field(default_factory=list) @property def label(self) -> str: return LABELS[self.bump] def to_dict(self) -> dict: return { "bump": self.bump, "label": self.label, "files": [f.to_dict() for f in self.files], } def summary_markdown(self) -> str: lines = [ f"**Change class: `{self.bump.upper()}`** (label `{self.label}`)", "", "| File | Zone | Change | Bump | Reason |", "|---|---|---|---|---|", ] relevant = [f for f in self.files if f.change != "unchanged"] if not relevant: lines.append("| _no constitutional files changed_ | — | — | none | — |") for f in relevant: lines.append( f"| `{f.path}` | {f.zone} | {f.change} | {f.bump} | {f.reason} |" ) return "\n".join(lines) def _classify_file(relpath: str, old_data: Any, new_data: Any) -> FileClassification: zone = zone_of(relpath) if zone in ("version", "non-constitutional"): change = "modified-normative" if old_data is not None and new_data is not None else ( "added" if old_data is None else "removed" ) reason = ( "version file is managed by the ratification tooling" if zone == "version" else "outside the constitution; no version bump" ) return FileClassification(relpath, zone, change, "none", reason) if old_data is None and new_data is None: # pragma: no cover - defensive return FileClassification(relpath, zone, "unchanged", "none", "file absent in both trees") if old_data is None: if zone in ("kernel", "invariants"): return FileClassification( relpath, zone, "added", "major", "new kernel/invariant content is a breaking change" ) if zone == "userland": return FileClassification(relpath, zone, "added", "minor", "new userland module") return FileClassification(relpath, zone, "added", "minor", "new constitutional document") if new_data is None: if zone in ("kernel", "invariants"): return FileClassification( relpath, zone, "removed", "major", "removing kernel/invariant content is a breaking change" ) if zone == "userland": return FileClassification(relpath, zone, "removed", "minor", "userland module removed") return FileClassification(relpath, zone, "removed", "minor", "constitutional document removed") old_norm, new_norm = normative_hash(old_data), normative_hash(new_data) if old_norm == new_norm: if full_hash(old_data) == full_hash(new_data): return FileClassification(relpath, zone, "unchanged", "none", "identical content") return FileClassification( relpath, zone, "modified-prose", "patch", "only non-normative prose changed" ) if zone in ("kernel", "invariants"): return FileClassification( relpath, zone, "modified-normative", "major", "normative kernel/invariant content changed", ) if zone == "userland": pdiff = compare_parameters(old_data, new_data) if pdiff.values_only and pdiff.within_bounds: return FileClassification( relpath, zone, "modified-normative", "patch", f"parameter values changed within declared bounds: {', '.join(pdiff.changed) or 'n/a'}", ) if pdiff.values_only and not pdiff.within_bounds: return FileClassification( relpath, zone, "modified-normative", "minor", f"parameter change exceeds declared bounds: {pdiff.reason}", ) return FileClassification( relpath, zone, "modified-normative", "minor", pdiff.reason or "normative module content changed", ) return FileClassification( relpath, zone, "modified-normative", "minor", "constitutional document changed" ) def _constitution_files(root: Path) -> dict[str, Path]: """Map repo-relative path -> absolute path for every constitution YAML.""" base = Path(root) / "constitution" found: dict[str, Path] = {} if base.is_dir(): for path in sorted(base.rglob("*.yaml")): found[path.relative_to(root).as_posix()] = path for path in sorted(base.rglob("*.yml")): found[path.relative_to(root).as_posix()] = path return found def classify_trees(base_root: Path | str, head_root: Path | str) -> Classification: """Classify the constitutional difference between two checked-out trees. ``base_root`` is the merge base (current law); ``head_root`` is the proposed tree (the PR head). Files outside ``constitution/`` are ignored — they classify as ``none`` by definition. """ base_root, head_root = Path(base_root), Path(head_root) base_files = _constitution_files(base_root) head_files = _constitution_files(head_root) files: list[FileClassification] = [] for relpath in sorted(set(base_files) | set(head_files)): old_data = load_yaml(base_files[relpath]) if relpath in base_files else None new_data = load_yaml(head_files[relpath]) if relpath in head_files else None files.append(_classify_file(relpath, old_data, new_data)) bump = "none" for f in files: if BUMP_ORDER[f.bump] > BUMP_ORDER[bump]: bump = f.bump return Classification(bump=bump, files=files) # --------------------------------------------------------------------------- # Version arithmetic # --------------------------------------------------------------------------- def parse_version(version: str) -> tuple[int, int, int]: """Parse ``[v]MAJOR.MINOR.PATCH`` into a tuple of ints.""" text = str(version).strip() if text.startswith(("v", "V")): text = text[1:] parts = text.split(".") if len(parts) != 3: raise ClassificationError(f"not a semver version: {version!r}") try: major, minor, patch = (int(p) for p in parts) except ValueError as exc: raise ClassificationError(f"not a semver version: {version!r}") from exc return major, minor, patch def next_version(current: str, bump: str) -> str: """Compute the next constitutional version for a given bump class.""" if bump not in BUMP_ORDER: raise ClassificationError(f"unknown bump class: {bump!r}") major, minor, patch = parse_version(current) if bump == "major": return f"{major + 1}.0.0" if bump == "minor": return f"{major}.{minor + 1}.0" if bump == "patch": return f"{major}.{minor}.{patch + 1}" return f"{major}.{minor}.{patch}"