#!/usr/bin/env python3 """Validate FablePool structured content documents against the JSON Schema suite. Usage: python scripts/validate_content.py path/to/doc.json [more.json ...] python scripts/validate_content.py --schema problem-content path/to/doc.json python scripts/validate_content.py --self-check Without --schema, the schema is auto-detected from the document's "schema" field ("fablepool.problem/1" etc.); plain documents with {"version": 1, "blocks": [...]} are validated as content documents. Exit codes: 0 = all valid, 1 = validation errors, 2 = usage/IO error. Requires: jsonschema>=4.18 (registry support) and its companion package `referencing`. """ from __future__ import annotations import argparse import json import sys from pathlib import Path from jsonschema import Draft202012Validator from referencing import Registry, Resource SCHEMA_DIR = Path(__file__).resolve().parent.parent / "schemas" / "v1" # Maps a document's "schema" discriminator to a schema file in schemas/v1/. DISCRIMINATOR_MAP = { "fablepool.problem/1": "problem-content.json", "fablepool.lesson/1": "lesson-content.json", "fablepool.widget/1": "widget-manifest.json", "fablepool.oer/1": "oer-package.json", } # Short names accepted by --schema. SLUG_MAP = { "content-document": "content-document.json", "problem-content": "problem-content.json", "lesson-content": "lesson-content.json", "widget-manifest": "widget-manifest.json", "oer-package": "oer-package.json", } def load_schemas() -> tuple[dict[str, dict], Registry]: """Load every schema in schemas/v1 and build a $id-keyed registry.""" schemas: dict[str, dict] = {} registry = Registry() paths = sorted(SCHEMA_DIR.glob("*.json")) if not paths: raise FileNotFoundError(f"No schemas found in {SCHEMA_DIR}") for path in paths: with path.open(encoding="utf-8") as fh: schema = json.load(fh) schema_id = schema.get("$id") if not schema_id: raise ValueError(f"Schema {path} is missing $id") schemas[path.name] = schema registry = registry.with_resource( uri=schema_id, resource=Resource.from_contents(schema) ) return schemas, registry def detect_schema_file(document: dict) -> str | None: """Pick the right schema for a document, or None if undetectable.""" discriminator = document.get("schema") if isinstance(discriminator, str) and discriminator in DISCRIMINATOR_MAP: return DISCRIMINATOR_MAP[discriminator] if document.get("version") == 1 and isinstance(document.get("blocks"), list): return "content-document.json" return None def format_error(error) -> str: pointer = "/" + "/".join(str(part) for part in error.absolute_path) return f" at {pointer or '/'}: {error.message}" def validate_file( path: Path, schemas: dict[str, dict], registry: Registry, forced_schema: str | None, ) -> bool: try: with path.open(encoding="utf-8") as fh: document = json.load(fh) except (OSError, json.JSONDecodeError) as exc: print(f"FAIL {path}: cannot read/parse JSON: {exc}", file=sys.stderr) return False if forced_schema is not None: schema_file = SLUG_MAP[forced_schema] else: if not isinstance(document, dict): print(f"FAIL {path}: top-level JSON value must be an object", file=sys.stderr) return False schema_file = detect_schema_file(document) if schema_file is None: print( f"FAIL {path}: could not auto-detect schema " f"(no recognised 'schema' field); use --schema", file=sys.stderr, ) return False validator = Draft202012Validator(schemas[schema_file], registry=registry) errors = sorted(validator.iter_errors(document), key=lambda e: list(e.absolute_path)) if errors: print(f"FAIL {path} (against {schema_file}):", file=sys.stderr) for error in errors: print(format_error(error), file=sys.stderr) return False print(f"ok {path} (against {schema_file})") return True def self_check(schemas: dict[str, dict]) -> bool: """Verify every schema in the suite is itself a valid 2020-12 schema.""" ok = True for name, schema in schemas.items(): try: Draft202012Validator.check_schema(schema) print(f"ok schema {name}") except Exception as exc: # jsonschema.SchemaError print(f"FAIL schema {name}: {exc}", file=sys.stderr) ok = False return ok def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__.splitlines()[0]) parser.add_argument("files", nargs="*", type=Path, help="JSON documents to validate") parser.add_argument( "--schema", choices=sorted(SLUG_MAP), default=None, help="Force a specific schema instead of auto-detecting.", ) parser.add_argument( "--self-check", action="store_true", help="Validate the schema suite itself against the 2020-12 meta-schema.", ) args = parser.parse_args(argv) try: schemas, registry = load_schemas() except (FileNotFoundError, ValueError, json.JSONDecodeError) as exc: print(f"error: {exc}", file=sys.stderr) return 2 if args.self_check: return 0 if self_check(schemas) else 1 if not args.files: parser.print_usage(sys.stderr) print("error: no input files (or use --self-check)", file=sys.stderr) return 2 all_ok = True for path in args.files: if not validate_file(path, schemas, registry, args.schema): all_ok = False return 0 if all_ok else 1 if __name__ == "__main__": raise SystemExit(main())