"""Structural validators for FablePool's structured content format ("fable-doc")
and for the per-problem-type answer specifications.

These validators are deliberately dependency-free so that the model layer can
enforce a first line of defence without importing a JSON Schema engine.  The
authoritative, machine-readable contract lives under ``schemas/`` (JSON Schema
2020-12) and is enforced end-to-end by ``scripts/validate_content.py`` and by
the API serializers.  The two MUST be kept in sync; the schema suite contains
conformance fixtures that exercise both implementations.

Document shape (version 1)::

    {
      "type": "doc",
      "version": 1,
      "blocks": [
        {"type": "paragraph", "content": [...]},
        {"type": "math", "tex": "\\int_0^1 x^2\\,dx", "display": true},
        ...
      ]
    }

Content is stored as structured JSON, never as raw HTML, so that it can be
rendered safely on web/mobile/low-bandwidth clients and exported as OER.
Interactive widgets are referenced by identifier only ("widget" blocks); the
widget runtime is sandboxed and lives outside trusted core content.
"""
from __future__ import annotations

from django.core.exceptions import ValidationError

DOCUMENT_TYPE = "doc"
SUPPORTED_DOC_VERSIONS = frozenset({1})

BLOCK_TYPES = frozenset(
    {
        "paragraph",
        "heading",
        "math",
        "code",
        "image",
        "figure",
        "list",
        "table",
        "callout",
        "quote",
        "divider",
        "widget",
    }
)

CALLOUT_KINDS = frozenset({"note", "tip", "warning", "definition", "theorem", "example"})
LIST_STYLES = frozenset({"bullet", "ordered"})
EQUIVALENCE_MODES = frozenset({"symbolic", "numeric_sampling"})
WIDGET_GRADERS = frozenset({"widget", "manual"})


# ---------------------------------------------------------------------------
# Generic helpers
# ---------------------------------------------------------------------------

def _err(path: str, message: str) -> ValidationError:
    return ValidationError(f"{path}: {message}", code="invalid_content")


def _require(container: dict, key: str, types, path: str):
    """Return ``container[key]`` if present and of the expected type(s)."""
    if key not in container:
        raise _err(path, f"missing required key '{key}'")
    value = container[key]
    if not isinstance(value, types):
        expected = (
            "/".join(t.__name__ for t in types)
            if isinstance(types, tuple)
            else types.__name__
        )
        raise _err(f"{path}.{key}", f"expected {expected}, got {type(value).__name__}")
    return value


def _require_nonempty_str(container: dict, key: str, path: str) -> str:
    value = _require(container, key, str, path)
    if not value.strip():
        raise _err(f"{path}.{key}", "must be a non-empty string")
    return value


def _optional(container: dict, key: str, types, path: str):
    if key not in container or container[key] is None:
        return None
    value = container[key]
    if not isinstance(value, types):
        expected = (
            "/".join(t.__name__ for t in types)
            if isinstance(types, tuple)
            else types.__name__
        )
        raise _err(f"{path}.{key}", f"expected {expected}, got {type(value).__name__}")
    return value


def _is_number(value) -> bool:
    return isinstance(value, (int, float)) and not isinstance(value, bool)


# ---------------------------------------------------------------------------
# Document validation
# ---------------------------------------------------------------------------

def validate_document(value) -> None:
    """Validate a fable-doc structured document.

    Raises :class:`django.core.exceptions.ValidationError` on the first
    structural problem found, with a JSON-path-like location in the message.
    """
    if not isinstance(value, dict):
        raise _err("$", "document must be a JSON object")
    if value.get("type") != DOCUMENT_TYPE:
        raise _err("$.type", f"must be '{DOCUMENT_TYPE}'")
    version = value.get("version")
    if not isinstance(version, int) or version not in SUPPORTED_DOC_VERSIONS:
        raise _err(
            "$.version",
            f"must be one of {sorted(SUPPORTED_DOC_VERSIONS)} (got {version!r})",
        )
    blocks = _require(value, "blocks", list, "$")
    if not blocks:
        raise _err("$.blocks", "document must contain at least one block")
    for index, block in enumerate(blocks):
        _validate_block(block, f"$.blocks[{index}]")


def _validate_block(block, path: str) -> None:
    if not isinstance(block, dict):
        raise _err(path, "block must be a JSON object")
    block_type = block.get("type")
    if block_type not in BLOCK_TYPES:
        raise _err(f"{path}.type", f"unknown block type {block_type!r}")

    if block_type == "paragraph":
        if "content" not in block:
            raise _err(path, "paragraph block requires 'content'")
        if not isinstance(block["content"], (str, list)):
            raise _err(f"{path}.content", "must be a string or list of inline nodes")

    elif block_type == "heading":
        level = _require(block, "level", int, path)
        if not 1 <= level <= 4:
            raise _err(f"{path}.level", "must be between 1 and 4")
        if "content" not in block:
            raise _err(path, "heading block requires 'content'")

    elif block_type == "math":
        _require_nonempty_str(block, "tex", path)
        _optional(block, "display", bool, path)

    elif block_type == "code":
        _require(block, "code", str, path)
        _optional(block, "language", str, path)

    elif block_type in {"image", "figure"}:
        if not (isinstance(block.get("asset"), str) or isinstance(block.get("src"), str)):
            raise _err(path, f"{block_type} block requires 'asset' (media asset id) or 'src'")
        # Mandatory alt text keeps published content accessible.
        if not isinstance(block.get("alt"), str):
            raise _err(f"{path}.alt", "alt text is required and must be a string")
        _optional(block, "caption", (str, list), path)

    elif block_type == "list":
        style = _require(block, "style", str, path)
        if style not in LIST_STYLES:
            raise _err(f"{path}.style", f"must be one of {sorted(LIST_STYLES)}")
        items = _require(block, "items", list, path)
        if not items:
            raise _err(f"{path}.items", "list must contain at least one item")

    elif block_type == "table":
        rows = _require(block, "rows", list, path)
        if not rows:
            raise _err(f"{path}.rows", "table must contain at least one row")
        for r_index, row in enumerate(rows):
            if not isinstance(row, list):
                raise _err(f"{path}.rows[{r_index}]", "row must be a list of cells")

    elif block_type == "callout":
        kind = _require(block, "kind", str, path)
        if kind not in CALLOUT_KINDS:
            raise _err(f"{path}.kind", f"must be one of {sorted(CALLOUT_KINDS)}")
        if "content" not in block:
            raise _err(path, "callout block requires 'content'")

    elif block_type == "quote":
        if "content" not in block:
            raise _err(path, "quote block requires 'content'")

    elif block_type == "widget":
        # Widgets are untrusted: only an identifier plus declarative params are
        # ever stored in core content.  Rendering happens in a sandboxed frame.
        _require_nonempty_str(block, "widget", path)
        _optional(block, "params", dict, path)
        if not isinstance(block.get("fallback"), str):
            raise _err(
                f"{path}.fallback",
                "widget blocks require a textual 'fallback' for accessibility "
                "and low-bandwidth clients",
            )

    # "divider" carries no payload.


# ---------------------------------------------------------------------------
# Answer-spec validation (per problem type)
# ---------------------------------------------------------------------------

def _validate_choice_items(spec: dict, key: str, path: str) -> set:
    items = _require(spec, key, list, path)
    if len(items) < 2:
        raise _err(f"{path}.{key}", "requires at least two entries")
    seen: set = set()
    for index, item in enumerate(items):
        item_path = f"{path}.{key}[{index}]"
        if not isinstance(item, dict):
            raise _err(item_path, "must be a JSON object")
        item_id = item.get("id")
        if not isinstance(item_id, str) or not item_id:
            raise _err(f"{item_path}.id", "must be a non-empty string")
        if item_id in seen:
            raise _err(f"{item_path}.id", f"duplicate id {item_id!r}")
        seen.add(item_id)
        if "content" not in item:
            raise _err(item_path, "missing required key 'content'")
    return seen


def _validate_multiple_choice(spec: dict, path: str) -> None:
    ids = _validate_choice_items(spec, "choices", path)
    correct = _require(spec, "correct", list, path)
    if not correct:
        raise _err(f"{path}.correct", "must list at least one correct choice id")
    unknown = [c for c in correct if c not in ids]
    if unknown:
        raise _err(f"{path}.correct", f"unknown choice ids: {unknown}")
    if len(set(correct)) != len(correct):
        raise _err(f"{path}.correct", "contains duplicate ids")
    multiple = _optional(spec, "multiple", bool, path)
    if not multiple and len(correct) != 1:
        raise _err(
            f"{path}.correct",
            "single-answer questions must have exactly one correct id "
            "(set 'multiple': true for multi-select)",
        )
    _optional(spec, "shuffle", bool, path)


def _validate_numeric(spec: dict, path: str) -> None:
    has_value = "value" in spec and spec["value"] is not None
    has_range = "min" in spec or "max" in spec
    if not has_value and not has_range:
        raise _err(path, "numeric answers require 'value' or a 'min'/'max' range")
    if has_value and not _is_number(spec["value"]):
        raise _err(f"{path}.value", "must be a number")
    for bound in ("min", "max"):
        if bound in spec and spec[bound] is not None and not _is_number(spec[bound]):
            raise _err(f"{path}.{bound}", "must be a number")
    if (
        _is_number(spec.get("min"))
        and _is_number(spec.get("max"))
        and spec["min"] > spec["max"]
    ):
        raise _err(path, "'min' must not exceed 'max'")
    tolerance = spec.get("tolerance")
    if tolerance is not None:
        if not _is_number(tolerance) or tolerance < 0:
            raise _err(f"{path}.tolerance", "must be a non-negative number")
    _optional(spec, "unit", str, path)


def _validate_symbolic(spec: dict, path: str) -> None:
    _require_nonempty_str(spec, "expression", path)
    variables = _optional(spec, "variables", list, path)
    if variables is not None:
        for index, name in enumerate(variables):
            if not isinstance(name, str) or not name:
                raise _err(f"{path}.variables[{index}]", "must be a non-empty string")
    equivalence = _optional(spec, "equivalence", str, path)
    if equivalence is not None and equivalence not in EQUIVALENCE_MODES:
        raise _err(f"{path}.equivalence", f"must be one of {sorted(EQUIVALENCE_MODES)}")


def _validate_proof(spec: dict, path: str) -> None:
    rubric = _optional(spec, "rubric", list, path)
    if rubric is not None:
        if not rubric:
            raise _err(f"{path}.rubric", "must not be empty when provided")
        for index, criterion in enumerate(rubric):
            c_path = f"{path}.rubric[{index}]"
            if not isinstance(criterion, dict):
                raise _err(c_path, "must be a JSON object")
            _require_nonempty_str(criterion, "criterion", c_path)
            points = criterion.get("points")
            if not _is_number(points) or points <= 0:
                raise _err(f"{c_path}.points", "must be a positive number")
    min_words = _optional(spec, "min_words", int, path)
    if min_words is not None and min_words < 0:
        raise _err(f"{path}.min_words", "must be non-negative")


def _validate_code(spec: dict, path: str) -> None:
    _require_nonempty_str(spec, "language", path)
    tests = _require(spec, "tests", list, path)
    if not tests:
        raise _err(f"{path}.tests", "must contain at least one test case")
    for index, test in enumerate(tests):
        t_path = f"{path}.tests[{index}]"
        if not isinstance(test, dict):
            raise _err(t_path, "must be a JSON object")
        _require_nonempty_str(test, "name", t_path)
        if "expected" not in test and "assert" not in test:
            raise _err(t_path, "must define 'expected' (with optional 'input') or 'assert'")
        _optional(test, "input", str, t_path)
        _optional(test, "hidden", bool, t_path)
    for limit_key in ("time_limit_ms", "memory_limit_mb"):
        limit = _optional(spec, limit_key, int, path)
        if limit is not None and limit <= 0:
            raise _err(f"{path}.{limit_key}", "must be a positive integer")
    _optional(spec, "starter_code", str, path)


def _validate_ordering(spec: dict, path: str) -> None:
    ids = _validate_choice_items(spec, "items", path)
    order = _require(spec, "correct_order", list, path)
    if sorted(order) != sorted(ids):
        raise _err(
            f"{path}.correct_order",
            "must be a permutation of every item id (no omissions or duplicates)",
        )


def _validate_matching(spec: dict, path: str) -> None:
    left_ids = _validate_choice_items(spec, "left", path)
    right_ids = _validate_choice_items(spec, "right", path)
    pairs = _require(spec, "pairs", list, path)
    if not pairs:
        raise _err(f"{path}.pairs", "must contain at least one pair")
    used_left: set = set()
    for index, pair in enumerate(pairs):
        p_path = f"{path}.pairs[{index}]"
        if not isinstance(pair, list) or len(pair) != 2:
            raise _err(p_path, "must be a two-element list [left_id, right_id]")
        left, right = pair
        if left not in left_ids:
            raise _err(p_path, f"unknown left id {left!r}")
        if right not in right_ids:
            raise _err(p_path, f"unknown right id {right!r}")
        if left in used_left:
            raise _err(p_path, f"left id {left!r} is matched more than once")
        used_left.add(left)


def _validate_widget(spec: dict, path: str) -> None:
    _require_nonempty_str(spec, "widget", path)
    _optional(spec, "params", dict, path)
    grader = _optional(spec, "grader", str, path)
    if grader is not None and grader not in WIDGET_GRADERS:
        raise _err(f"{path}.grader", f"must be one of {sorted(WIDGET_GRADERS)}")


_ANSWER_SPEC_VALIDATORS = {
    "multiple_choice": _validate_multiple_choice,
    "numeric": _validate_numeric,
    "symbolic": _validate_symbolic,
    "proof": _validate_proof,
    "code": _validate_code,
    "ordering": _validate_ordering,
    "matching": _validate_matching,
    "widget": _validate_widget,
}


def validate_answer_spec(problem_type: str, spec) -> None:
    """Validate an answer specification for the given problem type."""
    if not isinstance(spec, dict):
        raise _err("answer_spec", "must be a JSON object")
    validator = _ANSWER_SPEC_VALIDATORS.get(problem_type)
    if validator is None:
        raise _err("answer_spec", f"unsupported problem type {problem_type!r}")
    validator(spec, "answer_spec")