"""Structural validators for FablePool's structured content format ("fable-doc") and for the per-problem-type answer specifications. These validators are deliberately dependency-free so that the model layer can enforce a first line of defence without importing a JSON Schema engine. The authoritative, machine-readable contract lives under ``schemas/`` (JSON Schema 2020-12) and is enforced end-to-end by ``scripts/validate_content.py`` and by the API serializers. The two MUST be kept in sync; the schema suite contains conformance fixtures that exercise both implementations. Document shape (version 1):: { "type": "doc", "version": 1, "blocks": [ {"type": "paragraph", "content": [...]}, {"type": "math", "tex": "\\int_0^1 x^2\\,dx", "display": true}, ... ] } Content is stored as structured JSON, never as raw HTML, so that it can be rendered safely on web/mobile/low-bandwidth clients and exported as OER. Interactive widgets are referenced by identifier only ("widget" blocks); the widget runtime is sandboxed and lives outside trusted core content. """ from __future__ import annotations from django.core.exceptions import ValidationError DOCUMENT_TYPE = "doc" SUPPORTED_DOC_VERSIONS = frozenset({1}) BLOCK_TYPES = frozenset( { "paragraph", "heading", "math", "code", "image", "figure", "list", "table", "callout", "quote", "divider", "widget", } ) CALLOUT_KINDS = frozenset({"note", "tip", "warning", "definition", "theorem", "example"}) LIST_STYLES = frozenset({"bullet", "ordered"}) EQUIVALENCE_MODES = frozenset({"symbolic", "numeric_sampling"}) WIDGET_GRADERS = frozenset({"widget", "manual"}) # --------------------------------------------------------------------------- # Generic helpers # --------------------------------------------------------------------------- def _err(path: str, message: str) -> ValidationError: return ValidationError(f"{path}: {message}", code="invalid_content") def _require(container: dict, key: str, types, path: str): """Return ``container[key]`` if present and of the expected type(s).""" if key not in container: raise _err(path, f"missing required key '{key}'") value = container[key] if not isinstance(value, types): expected = ( "/".join(t.__name__ for t in types) if isinstance(types, tuple) else types.__name__ ) raise _err(f"{path}.{key}", f"expected {expected}, got {type(value).__name__}") return value def _require_nonempty_str(container: dict, key: str, path: str) -> str: value = _require(container, key, str, path) if not value.strip(): raise _err(f"{path}.{key}", "must be a non-empty string") return value def _optional(container: dict, key: str, types, path: str): if key not in container or container[key] is None: return None value = container[key] if not isinstance(value, types): expected = ( "/".join(t.__name__ for t in types) if isinstance(types, tuple) else types.__name__ ) raise _err(f"{path}.{key}", f"expected {expected}, got {type(value).__name__}") return value def _is_number(value) -> bool: return isinstance(value, (int, float)) and not isinstance(value, bool) # --------------------------------------------------------------------------- # Document validation # --------------------------------------------------------------------------- def validate_document(value) -> None: """Validate a fable-doc structured document. Raises :class:`django.core.exceptions.ValidationError` on the first structural problem found, with a JSON-path-like location in the message. """ if not isinstance(value, dict): raise _err("$", "document must be a JSON object") if value.get("type") != DOCUMENT_TYPE: raise _err("$.type", f"must be '{DOCUMENT_TYPE}'") version = value.get("version") if not isinstance(version, int) or version not in SUPPORTED_DOC_VERSIONS: raise _err( "$.version", f"must be one of {sorted(SUPPORTED_DOC_VERSIONS)} (got {version!r})", ) blocks = _require(value, "blocks", list, "$") if not blocks: raise _err("$.blocks", "document must contain at least one block") for index, block in enumerate(blocks): _validate_block(block, f"$.blocks[{index}]") def _validate_block(block, path: str) -> None: if not isinstance(block, dict): raise _err(path, "block must be a JSON object") block_type = block.get("type") if block_type not in BLOCK_TYPES: raise _err(f"{path}.type", f"unknown block type {block_type!r}") if block_type == "paragraph": if "content" not in block: raise _err(path, "paragraph block requires 'content'") if not isinstance(block["content"], (str, list)): raise _err(f"{path}.content", "must be a string or list of inline nodes") elif block_type == "heading": level = _require(block, "level", int, path) if not 1 <= level <= 4: raise _err(f"{path}.level", "must be between 1 and 4") if "content" not in block: raise _err(path, "heading block requires 'content'") elif block_type == "math": _require_nonempty_str(block, "tex", path) _optional(block, "display", bool, path) elif block_type == "code": _require(block, "code", str, path) _optional(block, "language", str, path) elif block_type in {"image", "figure"}: if not (isinstance(block.get("asset"), str) or isinstance(block.get("src"), str)): raise _err(path, f"{block_type} block requires 'asset' (media asset id) or 'src'") # Mandatory alt text keeps published content accessible. if not isinstance(block.get("alt"), str): raise _err(f"{path}.alt", "alt text is required and must be a string") _optional(block, "caption", (str, list), path) elif block_type == "list": style = _require(block, "style", str, path) if style not in LIST_STYLES: raise _err(f"{path}.style", f"must be one of {sorted(LIST_STYLES)}") items = _require(block, "items", list, path) if not items: raise _err(f"{path}.items", "list must contain at least one item") elif block_type == "table": rows = _require(block, "rows", list, path) if not rows: raise _err(f"{path}.rows", "table must contain at least one row") for r_index, row in enumerate(rows): if not isinstance(row, list): raise _err(f"{path}.rows[{r_index}]", "row must be a list of cells") elif block_type == "callout": kind = _require(block, "kind", str, path) if kind not in CALLOUT_KINDS: raise _err(f"{path}.kind", f"must be one of {sorted(CALLOUT_KINDS)}") if "content" not in block: raise _err(path, "callout block requires 'content'") elif block_type == "quote": if "content" not in block: raise _err(path, "quote block requires 'content'") elif block_type == "widget": # Widgets are untrusted: only an identifier plus declarative params are # ever stored in core content. Rendering happens in a sandboxed frame. _require_nonempty_str(block, "widget", path) _optional(block, "params", dict, path) if not isinstance(block.get("fallback"), str): raise _err( f"{path}.fallback", "widget blocks require a textual 'fallback' for accessibility " "and low-bandwidth clients", ) # "divider" carries no payload. # --------------------------------------------------------------------------- # Answer-spec validation (per problem type) # --------------------------------------------------------------------------- def _validate_choice_items(spec: dict, key: str, path: str) -> set: items = _require(spec, key, list, path) if len(items) < 2: raise _err(f"{path}.{key}", "requires at least two entries") seen: set = set() for index, item in enumerate(items): item_path = f"{path}.{key}[{index}]" if not isinstance(item, dict): raise _err(item_path, "must be a JSON object") item_id = item.get("id") if not isinstance(item_id, str) or not item_id: raise _err(f"{item_path}.id", "must be a non-empty string") if item_id in seen: raise _err(f"{item_path}.id", f"duplicate id {item_id!r}") seen.add(item_id) if "content" not in item: raise _err(item_path, "missing required key 'content'") return seen def _validate_multiple_choice(spec: dict, path: str) -> None: ids = _validate_choice_items(spec, "choices", path) correct = _require(spec, "correct", list, path) if not correct: raise _err(f"{path}.correct", "must list at least one correct choice id") unknown = [c for c in correct if c not in ids] if unknown: raise _err(f"{path}.correct", f"unknown choice ids: {unknown}") if len(set(correct)) != len(correct): raise _err(f"{path}.correct", "contains duplicate ids") multiple = _optional(spec, "multiple", bool, path) if not multiple and len(correct) != 1: raise _err( f"{path}.correct", "single-answer questions must have exactly one correct id " "(set 'multiple': true for multi-select)", ) _optional(spec, "shuffle", bool, path) def _validate_numeric(spec: dict, path: str) -> None: has_value = "value" in spec and spec["value"] is not None has_range = "min" in spec or "max" in spec if not has_value and not has_range: raise _err(path, "numeric answers require 'value' or a 'min'/'max' range") if has_value and not _is_number(spec["value"]): raise _err(f"{path}.value", "must be a number") for bound in ("min", "max"): if bound in spec and spec[bound] is not None and not _is_number(spec[bound]): raise _err(f"{path}.{bound}", "must be a number") if ( _is_number(spec.get("min")) and _is_number(spec.get("max")) and spec["min"] > spec["max"] ): raise _err(path, "'min' must not exceed 'max'") tolerance = spec.get("tolerance") if tolerance is not None: if not _is_number(tolerance) or tolerance < 0: raise _err(f"{path}.tolerance", "must be a non-negative number") _optional(spec, "unit", str, path) def _validate_symbolic(spec: dict, path: str) -> None: _require_nonempty_str(spec, "expression", path) variables = _optional(spec, "variables", list, path) if variables is not None: for index, name in enumerate(variables): if not isinstance(name, str) or not name: raise _err(f"{path}.variables[{index}]", "must be a non-empty string") equivalence = _optional(spec, "equivalence", str, path) if equivalence is not None and equivalence not in EQUIVALENCE_MODES: raise _err(f"{path}.equivalence", f"must be one of {sorted(EQUIVALENCE_MODES)}") def _validate_proof(spec: dict, path: str) -> None: rubric = _optional(spec, "rubric", list, path) if rubric is not None: if not rubric: raise _err(f"{path}.rubric", "must not be empty when provided") for index, criterion in enumerate(rubric): c_path = f"{path}.rubric[{index}]" if not isinstance(criterion, dict): raise _err(c_path, "must be a JSON object") _require_nonempty_str(criterion, "criterion", c_path) points = criterion.get("points") if not _is_number(points) or points <= 0: raise _err(f"{c_path}.points", "must be a positive number") min_words = _optional(spec, "min_words", int, path) if min_words is not None and min_words < 0: raise _err(f"{path}.min_words", "must be non-negative") def _validate_code(spec: dict, path: str) -> None: _require_nonempty_str(spec, "language", path) tests = _require(spec, "tests", list, path) if not tests: raise _err(f"{path}.tests", "must contain at least one test case") for index, test in enumerate(tests): t_path = f"{path}.tests[{index}]" if not isinstance(test, dict): raise _err(t_path, "must be a JSON object") _require_nonempty_str(test, "name", t_path) if "expected" not in test and "assert" not in test: raise _err(t_path, "must define 'expected' (with optional 'input') or 'assert'") _optional(test, "input", str, t_path) _optional(test, "hidden", bool, t_path) for limit_key in ("time_limit_ms", "memory_limit_mb"): limit = _optional(spec, limit_key, int, path) if limit is not None and limit <= 0: raise _err(f"{path}.{limit_key}", "must be a positive integer") _optional(spec, "starter_code", str, path) def _validate_ordering(spec: dict, path: str) -> None: ids = _validate_choice_items(spec, "items", path) order = _require(spec, "correct_order", list, path) if sorted(order) != sorted(ids): raise _err( f"{path}.correct_order", "must be a permutation of every item id (no omissions or duplicates)", ) def _validate_matching(spec: dict, path: str) -> None: left_ids = _validate_choice_items(spec, "left", path) right_ids = _validate_choice_items(spec, "right", path) pairs = _require(spec, "pairs", list, path) if not pairs: raise _err(f"{path}.pairs", "must contain at least one pair") used_left: set = set() for index, pair in enumerate(pairs): p_path = f"{path}.pairs[{index}]" if not isinstance(pair, list) or len(pair) != 2: raise _err(p_path, "must be a two-element list [left_id, right_id]") left, right = pair if left not in left_ids: raise _err(p_path, f"unknown left id {left!r}") if right not in right_ids: raise _err(p_path, f"unknown right id {right!r}") if left in used_left: raise _err(p_path, f"left id {left!r} is matched more than once") used_left.add(left) def _validate_widget(spec: dict, path: str) -> None: _require_nonempty_str(spec, "widget", path) _optional(spec, "params", dict, path) grader = _optional(spec, "grader", str, path) if grader is not None and grader not in WIDGET_GRADERS: raise _err(f"{path}.grader", f"must be one of {sorted(WIDGET_GRADERS)}") _ANSWER_SPEC_VALIDATORS = { "multiple_choice": _validate_multiple_choice, "numeric": _validate_numeric, "symbolic": _validate_symbolic, "proof": _validate_proof, "code": _validate_code, "ordering": _validate_ordering, "matching": _validate_matching, "widget": _validate_widget, } def validate_answer_spec(problem_type: str, spec) -> None: """Validate an answer specification for the given problem type.""" if not isinstance(spec, dict): raise _err("answer_spec", "must be a JSON object") validator = _ANSWER_SPEC_VALIDATORS.get(problem_type) if validator is None: raise _err("answer_spec", f"unsupported problem type {problem_type!r}") validator(spec, "answer_spec")