"""Orchestrate the full Lagoon benchmark suite and produce a Markdown summary.

Runs, in order:
  1. bench_ingest.py     (keeps the namespace for later phases)
  2. bench_recall.py     (ANN vs exact recall@k)
  3. bench_latency.py    (warm phase)
  4. bench_cache.py      (hit rates with a skewed hot set)
  5. Renders results/SUMMARY.md from the per-benchmark JSON files.

The cold-latency phase cannot be fully automated from inside this process
because a *true* cold measurement requires restarting the API server (or
clearing its local cache directory) between samples.  This script prints
exact instructions for collecting cold samples and merges any
results/latency_cold.json it finds into the summary.

Usage:
    python run_all.py --docs 50000 --dim 256
"""

from __future__ import annotations

import argparse
import json
import os
import subprocess
import sys
from typing import Optional

HERE = os.path.dirname(os.path.abspath(__file__))


def run(script: str, *args: str) -> None:
    cmd = [sys.executable, os.path.join(HERE, script), *args]
    print(f"\n$ {' '.join(cmd)}")
    subprocess.run(cmd, check=True)


def load(out_dir: str, name: str) -> Optional[dict]:
    path = os.path.join(out_dir, f"{name}.json")
    if not os.path.exists(path):
        return None
    with open(path, encoding="utf-8") as f:
        return json.load(f)


def fmt_latency(block: Optional[dict]) -> str:
    if not block:
        return "—"
    s = block.get("client_latency", block)
    if not s or s.get("count", 0) == 0:
        return "—"
    return (f"p50 {s['p50_ms']} ms · p90 {s['p90_ms']} ms · "
            f"p99 {s['p99_ms']} ms (n={s['count']})")


def render_summary(out_dir: str) -> str:
    ingest = load(out_dir, "ingest")
    recall = load(out_dir, "recall")
    warm = load(out_dir, "latency_warm")
    cold = load(out_dir, "latency_cold")
    cache = load(out_dir, "cache")

    lines = [
        "# Lagoon benchmark results",
        "",
        "> Generated by `benchmarks/run_all.py`. Read",
        "> `docs/benchmark-guide.md` for methodology and reporting policy",
        "> before quoting any number from this file.",
        "",
    ]

    if ingest:
        env = ingest.get("environment", {})
        lines += [
            "## Environment",
            "",
            f"- Timestamp (UTC): {env.get('timestamp_utc', '?')}",
            f"- Host: {env.get('hostname', '?')} — {env.get('platform', '?')}",
            f"- CPUs: {env.get('cpu_count', '?')}",
            f"- Server URL: {env.get('lagoon_url', '?')}",
            "- Storage backend: **FILL IN** (filesystem / MinIO / S3 + region)",
            "- Server build: **FILL IN** (git SHA, release/debug)",
            "",
        ]

    if ingest:
        r, p = ingest["results"], ingest["params"]
        lines += [
            "## Ingest throughput",
            "",
            f"- Dataset: {p['docs']:,} docs, {p['dim']}-d vectors, "
            f"batch size {p['batch_size']}, concurrency {p['concurrency']}",
            f"- Throughput: **{r['docs_per_second']:,} docs/s** "
            f"({r['payload_mb_per_second']} MB/s JSON payload)",
            f"- Per-batch latency: {fmt_latency({'client_latency': r['batch_latency']})}",
            f"- Index catch-up after last write: {r['index_catchup_seconds']} s "
            f"(caught up: {r['index_caught_up']})",
            "",
        ]

    if recall:
        r, p = recall["results"], recall["params"]
        rec_items = [f"{k} = **{v}**" for k, v in r.items() if k.startswith("recall@")]
        lines += [
            "## ANN recall vs exact kNN",
            "",
            f"- {p['queries']} queries, top_k={p['top_k']}, metric={p['metric']}",
            "- " + " · ".join(rec_items),
            f"- Exact latency: {fmt_latency({'client_latency': r['exact_latency']})}",
            f"- ANN latency: {fmt_latency({'client_latency': r['ann_latency']})}",
            f"- Mean speedup (exact/ANN): {r.get('speedup_mean', '—')}×",
            "",
        ]

    lines += ["## Query latency", ""]
    if cold:
        r = cold["results"]
        lines += [
            f"- **Cold** (first query after restart/cache clear): "
            f"{r.get('first_query_cold_ms', '—')} ms "
            f"(single sample — collect more by re-running the cold phase)",
        ]
    else:
        lines += [
            "- **Cold**: not collected. Restart the server (or clear its cache "
            "dir), then run `python bench_latency.py --phase cold "
            "--namespace bench-ingest --dim <dim>` and regenerate this summary.",
        ]
    if warm:
        r = warm["results"]
        for wl in ("vector_ann", "vector_exact", "bm25", "hybrid_rrf",
                   "filtered_vector"):
            if wl in r:
                lines.append(f"- **Warm {wl}**: {fmt_latency(r[wl])}")
    lines.append("")

    if cache:
        r = cache["results"]
        lines += [
            "## Cache hit rates",
            "",
            f"- Workload: {r['queries_issued']} queries, "
            f"{r['distinct_queries']} distinct",
            f"- Overall hit rate: **{r.get('overall_hit_rate', '—')}**",
            f"- Hits by tier: `{json.dumps(r.get('cache_hits_by_tier', {}))}`",
            f"- Misses by tier: `{json.dumps(r.get('cache_misses_by_tier', {}))}`",
            f"- Object-store GETs during workload: {r.get('object_store_gets', '—')}; "
            f"PUTs: {r.get('object_store_puts', '—')}",
            "",
        ]
        if "warning" in r:
            lines += [f"> ⚠️ {r['warning']}", ""]

    lines += [
        "## Reporting policy",
        "",
        "These numbers describe **this build on this machine with this "
        "dataset** only. Do not extrapolate to other hardware, datasets, or "
        "products, and do not present them as comparisons with proprietary "
        "systems that were not measured under identical conditions. See "
        "`docs/benchmark-guide.md` § Honest reporting.",
        "",
    ]
    return "\n".join(lines)


def main() -> None:
    ap = argparse.ArgumentParser(description="Run the full Lagoon benchmark suite")
    ap.add_argument("--docs", type=int, default=50_000)
    ap.add_argument("--dim", type=int, default=256)
    ap.add_argument("--batch-size", type=int, default=500)
    ap.add_argument("--queries", type=int, default=200)
    ap.add_argument("--recall-queries", type=int, default=100)
    ap.add_argument("--seed", type=int, default=42)
    ap.add_argument("--namespace", default="bench-ingest")
    ap.add_argument("--out-dir", default="results")
    ap.add_argument("--summary-only", action="store_true",
                    help="Skip running benchmarks; just re-render SUMMARY.md "
                         "from existing JSON results.")
    args = ap.parse_args()

    if not args.summary_only:
        run("bench_ingest.py",
            "--docs", str(args.docs), "--dim", str(args.dim),
            "--batch-size", str(args.batch_size), "--seed", str(args.seed),
            "--namespace", args.namespace, "--keep-namespace",
            "--out-dir", args.out_dir)
        run("bench_recall.py",
            "--namespace", args.namespace, "--dim", str(args.dim),
            "--queries", str(args.recall_queries), "--top-k", "100",
            "--seed", str(args.seed), "--out-dir", args.out_dir)
        run("bench_latency.py",
            "--namespace", args.namespace, "--dim", str(args.dim),
            "--queries", str(args.queries), "--phase", "warm",
            "--seed", str(args.seed), "--out-dir", args.out_dir)
        run("bench_cache.py",
            "--namespace", args.namespace, "--dim", str(args.dim),
            "--queries", str(args.queries), "--hot-set", "20",
            "--seed", str(args.seed), "--out-dir", args.out_dir)

    summary = render_summary(args.out_dir)
    os.makedirs(args.out_dir, exist_ok=True)
    out_path = os.path.join(args.out_dir, "SUMMARY.md")
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(summary)
    print(f"\nWrote {out_path}")

    print(
        "\nTo add COLD latency samples:\n"
        "  1. Restart the Lagoon server (or delete its local cache directory).\n"
        f"  2. python bench_latency.py --phase cold --namespace {args.namespace} "
        f"--dim {args.dim} --out-dir {args.out_dir}\n"
        "  3. python run_all.py --summary-only\n"
        f"\nWhen finished, free storage with:  "
        f"lagoon namespace delete {args.namespace}  (or via the API)."
    )


if __name__ == "__main__":
    main()