"""JSON Schemas for every machine-parseable artifact in the repository. The constitution is source code, so it gets type checking. Every YAML artifact — kernel articles, userland modules, the invariants file, the version file, proposals, ballots, the citizen registry, ledger entries, and fork manifests — has a JSON Schema (draft 2020-12) here, and :func:`validate_repo` walks a checkout and validates everything it recognises. The vote gate runs this as its first stage: a malformed constitution cannot reach a vote. Schemas are deliberately tolerant of *extra* keys (``additionalProperties`` defaults to true): articles carry commentary, modules carry prose, and we do not want schema churn every time someone adds explanatory text. The schemas pin down the keys the tooling actually depends on. """ from __future__ import annotations from dataclasses import dataclass, field from pathlib import Path from typing import Any from jsonschema import Draft202012Validator from govtool.errors import GovtoolError from govtool.yamlio import load_yaml, to_jsonable class SchemaError(GovtoolError): """Raised on misuse of the validation API (not on invalid documents).""" SEMVER_PATTERN = r"^v?\d+\.\d+\.\d+$" ID_PATTERN = r"^[A-Za-z0-9][A-Za-z0-9._-]*$" SCHEMAS: dict[str, dict] = { "version": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Constitution version file", "type": "object", "required": ["version"], "properties": { "version": {"type": "string", "pattern": SEMVER_PATTERN}, "history": { "type": "array", "items": { "type": "object", "required": ["version"], "properties": { "version": {"type": "string", "pattern": SEMVER_PATTERN}, "proposal": {"type": "string"}, "ratified_at": {"type": "string"}, "change_class": { "type": "string", "enum": ["major", "minor", "patch", "none"], }, "gate_report": {"type": "string"}, }, }, }, }, }, "kernel_article": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Kernel article", "type": "object", "required": ["title"], "anyOf": [{"required": ["id"]}, {"required": ["article"]}], "properties": { "id": {"type": "string", "pattern": ID_PATTERN}, "article": {"type": ["integer", "string"]}, "title": {"type": "string", "minLength": 1}, "clauses": { "type": "array", "items": { "type": "object", "required": ["id"], "properties": { "id": {"type": "string"}, "text": {"type": "string"}, }, }, }, "text": {"type": "string"}, "commentary": {"type": "string"}, "rationale": {"type": "string"}, }, }, "userland_module": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Userland module", "type": "object", "required": ["title"], "anyOf": [{"required": ["id"]}, {"required": ["module"]}], "properties": { "id": {"type": "string", "pattern": ID_PATTERN}, "module": {"type": "string"}, "title": {"type": "string", "minLength": 1}, "parameters": { "type": "object", "additionalProperties": { "anyOf": [ {"type": ["string", "number", "boolean", "null", "array"]}, { "type": "object", "required": ["value"], "properties": { "value": {}, "bounds": { "type": "object", "properties": { "min": {"type": "number"}, "max": {"type": "number"}, }, }, "allowed": {"type": "array"}, }, }, ] }, }, }, }, "invariants": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Constitutional invariants", "type": "object", "required": ["invariants"], "properties": { "invariants": { "type": "array", "minItems": 1, "items": { "type": "object", "required": ["id"], "properties": { "id": {"type": "string", "pattern": ID_PATTERN}, "statement": {"type": "string"}, "text": {"type": "string"}, }, }, } }, }, "proposal": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Amendment proposal", "type": "object", "required": ["id", "title"], "properties": { "id": {"type": "string", "pattern": ID_PATTERN}, "title": {"type": "string", "minLength": 1}, "author": {"type": "string"}, "status": { "type": "string", "enum": [ "draft", "open", "voting", "ratified", "rejected", "withdrawn", "closed", ], }, "change_class": { "type": "string", "enum": ["major", "minor", "patch", "none"], }, "created_at": {"type": ["string", "object"]}, "opened_at": {"type": ["string", "object"]}, "summary": {"type": "string"}, "rationale": {"type": "string"}, "target_files": {"type": "array", "items": {"type": "string"}}, }, }, "ballot": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Signed ballot", "type": "object", "required": ["proposal_id", "voter_id", "choice"], "properties": { "proposal_id": {"type": "string", "pattern": ID_PATTERN}, "voter_id": {"type": "string", "pattern": ID_PATTERN}, "choice": {"type": "string", "enum": ["yes", "no", "abstain"]}, "proposal_hash": {"type": "string"}, "cast_at": {"type": ["string", "object"]}, "public_key": {"type": "string"}, "signature": {"type": "string"}, }, }, "registry": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Citizen registry", "type": "object", "required": ["citizens"], "properties": { "citizens": { "type": "array", "items": { "type": "object", "required": ["id", "public_key"], "properties": { "id": {"type": "string", "pattern": ID_PATTERN}, "public_key": {"type": "string", "minLength": 16}, "status": {"type": "string"}, "joined_at": {"type": ["string", "object"]}, }, }, } }, }, "ledger_entry": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Audit ledger entry", "type": "object", "required": ["type", "payload"], "properties": { "index": {"type": "integer", "minimum": 0}, "type": {"type": "string", "minLength": 1}, "timestamp": {"type": ["string", "object"]}, "payload": {"type": "object"}, "prev_hash": {"type": ["string", "null"]}, "hash": {"type": "string"}, "actor": {"type": ["string", "null"]}, "signature": {"type": ["string", "null"]}, }, }, "fork_manifest": { "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Fork manifest", "type": "object", "required": ["name", "upstream", "kernel_hashes"], "properties": { "name": {"type": "string", "minLength": 1}, "forked_at": {"type": ["string", "object"]}, "upstream": { "type": "object", "properties": { "url": {"type": "string"}, "version": {"type": "string"}, }, }, "kernel_hashes": { "type": "object", "additionalProperties": {"type": "string"}, }, "overrides": {"type": "array"}, "lineage": {"type": "array"}, }, }, } _VALIDATORS: dict[str, Draft202012Validator] = {} def _validator(kind: str) -> Draft202012Validator: if kind not in SCHEMAS: raise SchemaError(f"unknown schema kind: {kind!r} (known: {sorted(SCHEMAS)})") if kind not in _VALIDATORS: Draft202012Validator.check_schema(SCHEMAS[kind]) _VALIDATORS[kind] = Draft202012Validator(SCHEMAS[kind]) return _VALIDATORS[kind] # --------------------------------------------------------------------------- # Reports # --------------------------------------------------------------------------- @dataclass class ValidationIssue: path: str schema: str location: str message: str def to_dict(self) -> dict: return { "path": self.path, "schema": self.schema, "location": self.location, "message": self.message, } def __str__(self) -> str: return f"{self.path} [{self.schema}] at {self.location or '$'}: {self.message}" @dataclass class ValidationReport: checked: list[str] = field(default_factory=list) errors: list[ValidationIssue] = field(default_factory=list) @property def ok(self) -> bool: return not self.errors def to_dict(self) -> dict: return { "ok": self.ok, "checked": self.checked, "errors": [issue.to_dict() for issue in self.errors], } def summary(self) -> str: if self.ok: return f"OK: {len(self.checked)} file(s) validated, 0 errors" lines = [f"FAILED: {len(self.errors)} error(s) across {len(self.checked)} file(s)"] lines.extend(f" - {issue}" for issue in self.errors) return "\n".join(lines) # --------------------------------------------------------------------------- # Validation API # --------------------------------------------------------------------------- def validate_data(data: Any, kind: str, source: str = "") -> list[ValidationIssue]: """Validate an in-memory structure against a named schema.""" issues: list[ValidationIssue] = [] jsonable = to_jsonable(data) for error in sorted(_validator(kind).iter_errors(jsonable), key=str): location = "/".join(str(part) for part in error.absolute_path) issues.append(ValidationIssue(source, kind, location, error.message)) return issues def kind_for_path(root: Path, path: Path) -> str | None: """Infer the schema kind for a file by its location in the repo.""" rel = path.relative_to(root).as_posix() if rel == "constitution/version.yaml": return "version" if rel == "constitution/invariants.yaml": return "invariants" if rel.startswith("constitution/kernel/"): return "kernel_article" if rel.startswith("constitution/userland/"): return "userland_module" if rel == "citizens/registry.yaml": return "registry" if rel.startswith("proposals/") and rel.endswith((".yaml", ".yml")): return "proposal" if rel.startswith("ballots/") and rel.endswith((".yaml", ".yml")): return "ballot" if rel == "fork.yaml": return "fork_manifest" return None def validate_file(root: Path | str, path: Path | str, kind: str | None = None) -> list[ValidationIssue]: """Validate one file; infers the schema from the path unless given.""" root, path = Path(root), Path(path) rel = path.relative_to(root).as_posix() resolved_kind = kind or kind_for_path(root, path) if resolved_kind is None: return [] try: data = load_yaml(path) except GovtoolError as exc: return [ValidationIssue(rel, resolved_kind, "", str(exc))] return [ ValidationIssue(rel, issue.schema, issue.location, issue.message) for issue in validate_data(data, resolved_kind, source=rel) ] def validate_repo(root: Path | str) -> ValidationReport: """Walk a checkout and validate every recognised governance artifact.""" root = Path(root) report = ValidationReport() candidates: list[Path] = [] for directory in ("constitution", "proposals", "ballots", "citizens"): base = root / directory if base.is_dir(): candidates.extend(sorted(base.rglob("*.yaml"))) candidates.extend(sorted(base.rglob("*.yml"))) fork_manifest = root / "fork.yaml" if fork_manifest.is_file(): candidates.append(fork_manifest) for path in candidates: if kind_for_path(root, path) is None: continue rel = path.relative_to(root).as_posix() report.checked.append(rel) report.errors.extend(validate_file(root, path)) return report