#!/usr/bin/env python3
"""
roadmap-build.py — Build state.json from .meshkore/roadmap/ + docs/.

Parses YAML frontmatter from every .md under tasks/, log/ and docs/ and
aggregates into a single state.json that the portal consumes.

Usage:
    python3 roadmap-build.py [--meshkore PATH] [--out PATH] [--validate]
                             [--docs] [--quiet]

Defaults:
    --meshkore   .meshkore/
    --out        .meshkore/roadmap/state.json (or --portal-out for portal)
    --docs       Include docs/ tree in output (default: yes)
    --validate   Only validate frontmatter, do not write
    --quiet      No stdout
"""
from __future__ import annotations

import argparse
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path

# ─── Frontmatter parser (yaml without dep) ──────────────────────────────

_FM_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)


def parse_frontmatter(text: str) -> dict | None:
    """Parse YAML frontmatter. Returns dict or None if no frontmatter."""
    m = _FM_RE.match(text)
    if not m:
        return None
    raw = m.group(1)
    # Minimal YAML subset — supports key: value, key: [a, b], lists, quotes
    out: dict = {}
    current_key = None
    list_items: list[str] = []
    for line in raw.splitlines():
        if not line.strip() or line.strip().startswith("#"):
            continue
        if line.startswith("  - ") or line.startswith("- "):
            # list item
            val = line.lstrip(" -").strip()
            list_items.append(_strip_quotes(val))
            continue
        if list_items and current_key:
            out[current_key] = list_items
            list_items = []
            current_key = None
        if ":" in line:
            key, _, value = line.partition(":")
            key = key.strip()
            value = value.strip()
            if not value:
                # could be a list-of-items follow-on
                current_key = key
                list_items = []
                out[key] = []
            elif value.startswith("[") and value.endswith("]"):
                inner = value[1:-1].strip()
                items = [_strip_quotes(x.strip()) for x in inner.split(",") if x.strip()]
                out[key] = items
            else:
                out[key] = _coerce(_strip_quotes(value))
    if list_items and current_key:
        out[current_key] = list_items
    return out


def _strip_quotes(s: str) -> str:
    if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
        return s[1:-1]
    return s


def _coerce(v: str):
    if v in ("true", "True"):
        return True
    if v in ("false", "False"):
        return False
    if re.fullmatch(r"-?\d+", v):
        return int(v)
    if re.fullmatch(r"-?\d+\.\d+", v):
        return float(v)
    return v


def strip_frontmatter(text: str) -> str:
    return _FM_RE.sub("", text, count=1)


# ─── Walkers ─────────────────────────────────────────────────────────────

VALID_TASK_STATUSES = {"backlog", "next", "in_progress", "done", "blocked", "cancelled"}
VALID_DOC_STATUSES = {"draft", "stable", "deprecated"}


def collect_tasks(meshkore_dir: Path, log: callable, declared_module_ids: set[str] | None = None) -> list[dict]:
    """Walk both layouts:
      - new: .meshkore/modules/<id>/tasks/*.md and .meshkore/modules/<id>/log/<YYYY-MM>/*.md
      - legacy: .meshkore/roadmap/tasks/<cat>/*.md and .meshkore/roadmap/log/<YYYY-MM>/*.md

    For files under the new layout, the task's `category:` is derived from
    the parent module folder name when missing.
    """
    tasks: list[dict] = []
    seen: set[Path] = set()

    # New layout: per-module
    modules_root = meshkore_dir / "modules"
    if modules_root.exists():
        for mod_dir in sorted(modules_root.iterdir()):
            if not mod_dir.is_dir():
                continue
            mid = mod_dir.name
            for sub in ("tasks", "log"):
                base = mod_dir / sub
                if not base.exists():
                    continue
                for md in sorted(base.rglob("*.md")):
                    if md.name in ("INDEX.md", "README.md", "overview.md"):
                        continue
                    seen.add(md)
                    _ingest_task(md, meshkore_dir, log, tasks, default_cat=mid,
                                 declared_module_ids=declared_module_ids)

    # Legacy layout — keep walking for transitional safety
    legacy_roadmap = meshkore_dir / "roadmap"
    if legacy_roadmap.exists():
        for sub in ("tasks", "log"):
            base = legacy_roadmap / sub
            if not base.exists():
                continue
            for md in sorted(base.rglob("*.md")):
                if md in seen:
                    continue
                if md.name in ("INDEX.md", "README.md", "overview.md", "manual-tasks.md", "_legacy-INDEX.md"):
                    continue
                _ingest_task(md, meshkore_dir, log, tasks, default_cat=None,
                             declared_module_ids=declared_module_ids)

    return tasks


def _ingest_task(md: Path, meshkore_dir: Path, log: callable, tasks: list[dict],
                 default_cat: str | None, declared_module_ids: set[str] | None):
    text = md.read_text(encoding="utf-8")
    fm = parse_frontmatter(text)
    if not fm:
        if not re.fullmatch(r"\d{4}-\d{2}-\d{2}.*", md.stem):
            log(f"WARN no frontmatter: {md.relative_to(meshkore_dir.parent)}")
        return
    if "id" not in fm:
        log(f"WARN missing id: {md.relative_to(meshkore_dir.parent)}")
        return
    status = fm.get("status", "backlog")
    if status not in VALID_TASK_STATUSES:
        log(f"WARN invalid task status '{status}' in {md.name}")
    cat = fm.get("category")
    rel_path = md.relative_to(meshkore_dir.parent)
    if not cat:
        if default_cat:
            fm["category"] = default_cat
            cat = default_cat
        else:
            log(f"WARN missing category: {rel_path} — defaulting to parent folder '{md.parent.name}'")
            fm["category"] = md.parent.name
            cat = fm["category"]
    if declared_module_ids is not None and cat not in declared_module_ids:
        log(f"WARN category '{cat}' is not a declared module ({rel_path}).")
    fm["path"] = str(md.relative_to(meshkore_dir))
    tasks.append(fm)


def collect_docs(docs_dir: Path, log: callable) -> dict:
    """Walk docs/ and return a tree grouped by top-level category.

    A doc is either:
      - <category>/<slug>.md            (flat — no diagrams attached)
      - <category>/<slug>/README.md     (folder — may have sibling diagrams/)
      - <category>/<sub>/<slug>.md      (deeper categorisation, no diagrams)
      - <category>/<slug>/diagrams/*.mmd  (attached to the README in same folder)

    Each doc emitted carries `diagrams: [...]` (possibly empty). Diagram
    entries: {slug, title, kind, path, description}.
    """
    if not docs_dir.exists():
        return {"tree": []}

    tree: dict[str, list[dict]] = {}
    seen_paths: set[Path] = set()

    # Pass 1: README.md inside a folder = "rich doc" with optional diagrams.
    for readme in sorted(docs_dir.rglob("README.md")):
        if readme == docs_dir / "README.md":
            continue
        folder = readme.parent
        rel = readme.relative_to(docs_dir)
        parts = rel.parts                       # e.g. ("architecture","cluster-layout","README.md")
        if len(parts) < 2:
            continue
        category = parts[0]
        slug = folder.name
        subpath = "/".join(parts[1:-2]) if len(parts) > 3 else ""
        item = _doc_item(readme, rel, category, slug, subpath, log)
        # Attach diagrams from sibling diagrams/ folder (if present)
        diag_dir = folder / "diagrams"
        if diag_dir.exists():
            item["diagrams"] = sorted(
                (_read_diagram(p, docs_dir.parent) for p in diag_dir.glob("*.mmd")),
                key=lambda d: (DIAGRAM_KIND_ORDER.get(d["kind"], 99), d["slug"]),
            )
        else:
            item["diagrams"] = []
        tree.setdefault(category, []).append(item)
        seen_paths.add(readme)
        # Mark every .mmd inside as attached so it's not picked up as orphan
        if diag_dir.exists():
            for p in diag_dir.glob("*.mmd"):
                seen_paths.add(p)

    # Pass 2: flat .md files (legacy / simple docs without diagrams).
    for md in sorted(docs_dir.rglob("*.md")):
        if md in seen_paths:
            continue
        if md.name in ("INDEX.md", "README.md", "governance.md", "_legacy-INDEX.md"):
            continue
        rel = md.relative_to(docs_dir)
        parts = rel.parts
        if len(parts) < 2:
            continue
        category = parts[0]
        slug = md.stem
        subpath = "/".join(parts[1:-1]) if len(parts) > 2 else ""
        item = _doc_item(md, rel, category, slug, subpath, log)
        item["diagrams"] = []
        tree.setdefault(category, []).append(item)

    return {
        "tree": [
            {"category": cat, "items": sorted(items, key=lambda i: (i.get("subpath") or "", i.get("slug","")))}
            for cat, items in sorted(tree.items())
        ]
    }


def _doc_item(md: Path, rel: Path, category: str, slug: str, subpath: str, log: callable) -> dict:
    text = md.read_text(encoding="utf-8")
    fm = parse_frontmatter(text) or {}
    title = fm.get("title") or _extract_h1(text) or slug
    status = fm.get("status", "stable")
    if status not in VALID_DOC_STATUSES:
        log(f"WARN invalid doc status '{status}' in {rel}")
    return {
        "slug": slug,
        "title": title,
        "category": category,
        "subpath": subpath,
        "status": status,
        "updated": fm.get("updated", ""),
        "owner": fm.get("owner", ""),
        "tags": fm.get("tags", []),
        "related": fm.get("related", []),
        "path": str(rel),
    }


# Order diagram kinds in the tabs viewer: architecture first, then schema,
# sequence, flow, then anything else. Keeps the "skeleton" view first.
DIAGRAM_KIND_ORDER = {"architecture": 0, "schema": 1, "sequence": 2, "flow": 3, "other": 4}


def collect_docs_with_modules(meshkore_dir: Path, log: callable) -> dict:
    """Cross-cutting docs (architecture, conventions, deploy, …) come from
    .meshkore/docs/. Per-module docs come from .meshkore/modules/<id>/
    where each module folder contributes its README + sibling diagrams/.

    We emit a single `tree` keyed by category; module docs are emitted
    under a synthetic category 'modules' for the portal nav, but the
    portal is expected to drive its tree from `state.modules` and use
    the doc lookup by id.
    """
    base = collect_docs(meshkore_dir / "docs", log)

    modules_root = meshkore_dir / "modules"
    if not modules_root.exists():
        return base

    items = []
    for mod_dir in sorted(modules_root.iterdir()):
        if not mod_dir.is_dir():
            continue
        readme = mod_dir / "README.md"
        if not readme.exists():
            continue
        rel = readme.relative_to(meshkore_dir)
        item = _doc_item(readme, rel, "modules", mod_dir.name, "", log)
        # Path field is relative to .meshkore (not docs/) for module docs
        item["path"] = str(rel)
        diag_dir = mod_dir / "diagrams"
        if diag_dir.exists():
            item["diagrams"] = sorted(
                (_read_diagram(p, meshkore_dir.parent) for p in diag_dir.glob("*.mmd")),
                key=lambda d: (DIAGRAM_KIND_ORDER.get(d["kind"], 99), d["slug"]),
            )
        else:
            item["diagrams"] = []
        items.append(item)

    if items:
        base["tree"].append({"category": "modules", "items": sorted(items, key=lambda i: i["slug"])})
        # Re-sort the categories list alphabetically for stable output
        base["tree"].sort(key=lambda c: c["category"])

    return base


def _extract_h1(text: str) -> str | None:
    body = strip_frontmatter(text)
    for line in body.splitlines():
        line = line.strip()
        if line.startswith("# "):
            return line[2:].strip()
    return None


# ─── Cluster info ────────────────────────────────────────────────────────

def load_cluster(meshkore_dir: Path) -> dict:
    # cluster.yaml lives in .meshkore/public/ (only committed file)
    cluster_yaml = meshkore_dir / "public" / "cluster.yaml"
    if not cluster_yaml.exists():
        cluster_yaml = meshkore_dir / "cluster.yaml"  # legacy fallback
    if not cluster_yaml.exists():
        return {}
    text = cluster_yaml.read_text(encoding="utf-8")
    # We only need a few top-level fields. Parse them with simple regex.
    info: dict = {}
    for key in ("version", "id", "type", "name", "description"):
        m = re.search(rf"^{key}:\s*(.+)$", text, re.MULTILINE)
        if m:
            raw = m.group(1)
            # Strip inline YAML comment
            raw = re.sub(r"\s+#.*$", "", raw)
            info[key] = _strip_quotes(raw.strip())
    # transport endpoint
    m = re.search(r"^transport:\s*\n((?:\s+.+\n?)+)", text, re.MULTILINE)
    if m:
        block = m.group(1)
        ep = re.search(r"endpoint:\s*(.+)", block)
        if ep:
            info.setdefault("transport", {})["endpoint"] = _strip_quotes(ep.group(1).strip())
    return info


def load_members(meshkore_dir: Path) -> list[dict]:
    """Best-effort parse of members from cluster.yaml. Falls back to agents/*.yaml."""
    members: list[dict] = []
    # Members can be declared either in public/cluster.yaml (rare — usually only
    # owner there) or in agents/*.yaml (one file per member, gitignored).
    cluster_yaml = meshkore_dir / "public" / "cluster.yaml"
    if not cluster_yaml.exists():
        cluster_yaml = meshkore_dir / "cluster.yaml"
    if cluster_yaml.exists():
        text = cluster_yaml.read_text(encoding="utf-8")
        m = re.search(r"^members:\s*\n((?:\s+.+\n?)+?)(?=^\S|\Z)", text, re.MULTILINE)
        if m:
            block = m.group(1)
            entries = re.findall(r"-\s+role:\s*(\S+)\s*\n((?:\s{4,}.+\n?)*)", block)
            for role, body in entries:
                entry: dict = {"role": _strip_quotes(role.strip())}
                for k in ("identity", "client", "agent_role", "credentials"):
                    mm = re.search(rf"{k}:\s*(.+)", body)
                    if mm:
                        entry[k] = _strip_quotes(mm.group(1).strip())
                members.append(entry)
    return members


# ─── Timeline ────────────────────────────────────────────────────────────

def collect_timeline(meshkore_dir: Path, log: callable, recent_limit: int = 500) -> dict:
    """Load .meshkore/timeline/*.jsonl files and produce an indexed view.

    Returns:
        {
          "days": [{"date","event_count","conversations"}],
          "recent_events": [...up to recent_limit events, newest last...],
          "conversations": {conv_id: {"first_ts","last_ts","event_count","author"}},
          "modules_with_activity": [...]
        }
    """
    timeline_dir = meshkore_dir / "timeline"
    if not timeline_dir.exists():
        return {"days": [], "recent_events": [], "conversations": {}, "modules_with_activity": []}

    days: list[dict] = []
    recent_events: list[dict] = []
    conversations: dict = {}
    modules: dict[str, int] = {}

    files = sorted(timeline_dir.glob("*.jsonl"))
    for f in files:
        date_str = f.stem  # YYYY-MM-DD
        events_today: list[dict] = []
        convs_today: set[str] = set()
        try:
            for line in f.read_text(encoding="utf-8").splitlines():
                line = line.strip()
                if not line or line.startswith("#"):
                    continue
                try:
                    ev = json.loads(line)
                except Exception as e:
                    log(f"WARN bad jsonl line in {f.name}: {e}")
                    continue
                events_today.append(ev)
                if ev.get("conv"):
                    convs_today.add(ev["conv"])
                    c = conversations.setdefault(ev["conv"], {
                        "first_ts": ev.get("ts", ""),
                        "last_ts": ev.get("ts", ""),
                        "event_count": 0,
                        "authors": set(),
                    })
                    c["event_count"] += 1
                    c["last_ts"] = max(c["last_ts"], ev.get("ts", ""))
                    if ev.get("author"):
                        c["authors"].add(ev["author"])
                if ev.get("module"):
                    modules[ev["module"]] = modules.get(ev["module"], 0) + 1
        except Exception as e:
            log(f"WARN failed reading {f}: {e}")
            continue
        days.append({
            "date": date_str,
            "event_count": len(events_today),
            "conversations": len(convs_today),
        })
        recent_events.extend(events_today)

    # Trim to recent_limit, keep newest
    recent_events = recent_events[-recent_limit:]

    # Convert conversation author sets to lists for JSON
    conv_out = {}
    for cid, c in conversations.items():
        conv_out[cid] = {
            "first_ts": c["first_ts"],
            "last_ts": c["last_ts"],
            "event_count": c["event_count"],
            "authors": sorted(list(c["authors"])),
        }

    days.sort(key=lambda d: d["date"], reverse=True)

    return {
        "days": days,
        "recent_events": recent_events,
        "conversations": conv_out,
        "modules_with_activity": [{"id": m, "event_count": n} for m, n in sorted(modules.items(), key=lambda x: -x[1])],
    }


# ─── Modules ─────────────────────────────────────────────────────────────

VALID_MODULE_KINDS = {"code", "spec", "docs", "area"}


def parse_modules_from_cluster_yaml(meshkore_dir: Path) -> list[dict]:
    """Parse the modules: block from .meshkore/public/cluster.yaml.

    Returns [] if no modules: block is declared. Each entry: {id, name?, kind?,
    path?, description?}. id is required.
    """
    cluster_yaml = meshkore_dir / "public" / "cluster.yaml"
    if not cluster_yaml.exists():
        return []
    text = cluster_yaml.read_text(encoding="utf-8")
    # Capture the modules: block — everything indented under it until the next
    # top-level key or end of file.
    m = re.search(r"^modules:\s*\n((?:[ \t]+.*\n?|\n)+?)(?=^\S|\Z)", text, re.MULTILINE)
    if not m:
        return []
    block = m.group(1)
    # Each entry starts with "  - id: <something>" then 4+ space-indented lines
    entries = re.findall(
        r"^\s{2}-\s+id:\s*(\S+)\s*\n((?:^\s{4,}.+\n?)*)",
        block,
        re.MULTILINE,
    )
    out: list[dict] = []
    for mid, body in entries:
        e: dict = {"id": _strip_quotes(mid.strip())}
        for k in ("name", "kind", "path", "description", "parent"):
            mm = re.search(rf"^\s+{k}:\s*(.+)$", body, re.MULTILINE)
            if mm:
                e[k] = _strip_quotes(mm.group(1).strip())
        out.append(e)
    return out


def collect_modules(meshkore_dir: Path, repo_root: Path, tasks: list[dict], log: callable) -> list[dict]:
    """Build the modules list. Declared modules in cluster.yaml win.

    Falls back to auto-discovery of top-level repo folders only if no
    modules: block is declared (with a loud WARN — that mode is meant for
    bootstrapping, not steady state).

    Each module: {id, name, kind, path?, description?, task_counts, task_total,
    active_count}.
    """
    declared = parse_modules_from_cluster_yaml(meshkore_dir)

    if not declared:
        log("WARN no `modules:` block in cluster.yaml — falling back to "
            "auto-discovery of top-level repo folders. Declare modules "
            "explicitly to silence this warning and clean up the portal.")
        IGNORE = {".git", ".github", ".claude", ".meshkore", ".reference", "node_modules",
                  "target", "dist", ".venv", "venv", "__pycache__", ".vscode", ".idea",
                  ".fly", "_rjj"}
        for entry in sorted(repo_root.iterdir()):
            if entry.is_dir() and not entry.name.startswith(".") and entry.name not in IGNORE:
                declared.append({"id": entry.name, "name": entry.name, "kind": "code", "path": entry.name})

    # Validate kinds
    for m in declared:
        k = m.get("kind", "area")
        if k not in VALID_MODULE_KINDS:
            log(f"WARN module '{m['id']}' has invalid kind '{k}' — coercing to 'area'")
            k = "area"
        m["kind"] = k
        m.setdefault("name", m["id"])

    # Aggregate task counts per module (by category)
    counts_by_id: dict[str, dict[str, int]] = {}
    for t in tasks:
        cat = t.get("category") or "uncategorized"
        d = counts_by_id.setdefault(cat, {s: 0 for s in VALID_TASK_STATUSES})
        s = t.get("status", "backlog")
        if s in d:
            d[s] += 1

    # Surface unknown categories as a synthetic "_unknown" module so tasks
    # don't disappear, but the user can see what needs fixing.
    declared_ids = {m["id"] for m in declared}
    unknown_cats = [c for c in counts_by_id if c not in declared_ids]
    if unknown_cats:
        log(f"WARN {len(unknown_cats)} task category(ies) not declared in modules: "
            f"{', '.join(sorted(unknown_cats))}. Add them to cluster.yaml or fix the tasks.")
        # Bucket them all under a single _unknown module
        merged = {s: 0 for s in VALID_TASK_STATUSES}
        for c in unknown_cats:
            for s, n in counts_by_id[c].items():
                merged[s] = merged.get(s, 0) + n
        declared.append({"id": "_unknown", "name": "(unknown)", "kind": "area",
                         "description": f"Tasks with category not in modules: {', '.join(sorted(unknown_cats))}",
                         "task_counts": merged})

    # Attach counts
    for m in declared:
        c = m.get("task_counts") or counts_by_id.get(m["id"], {s: 0 for s in VALID_TASK_STATUSES})
        m["task_counts"] = c
        m["task_total"] = sum(c.values())
        m["active_count"] = c.get("next", 0) + c.get("in_progress", 0) + c.get("blocked", 0)

    return declared


# ─── Diagrams ────────────────────────────────────────────────────────────
#
# Convention (see docs/conventions/diagrams.md):
#
#   .meshkore/docs/architecture/diagrams/*.mmd        ← project-wide
#   .meshkore/docs/modules/<id>/diagrams/*.mmd        ← per-module
#   .meshkore/docs/modules/<id>/submodules/<sub>/...  ← optional submodules
#
# Each .mmd may carry a YAML frontmatter block (title, description,
# updated, kind). When `kind` is missing, infer from filename prefix:
#   architecture* → architecture
#   schema* / db* → schema
#   sequence* / seq* → sequence
#   flow*         → flow
#   anything else → other

DIAGRAM_KINDS = {"architecture", "schema", "sequence", "flow", "other"}


def _infer_kind(slug: str) -> str:
    s = slug.lower()
    if s.startswith(("architecture", "arch")):
        return "architecture"
    if s.startswith(("schema", "db")):
        return "schema"
    if s.startswith(("sequence", "seq")):
        return "sequence"
    if s.startswith("flow"):
        return "flow"
    return "other"


def _read_diagram(mmd: Path, repo_relative_root: Path) -> dict:
    text = mmd.read_text(encoding="utf-8")
    fm = parse_frontmatter(text) or {}
    slug = mmd.stem
    kind = fm.get("kind") or _infer_kind(slug)
    if kind not in DIAGRAM_KINDS:
        kind = "other"
    return {
        "slug": slug,
        "title": fm.get("title") or slug.replace("-", " "),
        "description": fm.get("description", ""),
        "kind": kind,
        "updated": fm.get("updated", ""),
        "path": str(mmd.relative_to(repo_relative_root)),
    }


# ─── Stats ───────────────────────────────────────────────────────────────

def compute_stats(tasks: list[dict]) -> dict:
    counts = {s: 0 for s in VALID_TASK_STATUSES}
    for t in tasks:
        s = t.get("status", "backlog")
        if s in counts:
            counts[s] += 1
    counts["total"] = len(tasks)
    return counts


# ─── Main ────────────────────────────────────────────────────────────────

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--meshkore", default=".meshkore", type=Path)
    ap.add_argument("--out", default=None, type=Path)
    ap.add_argument("--validate", action="store_true")
    ap.add_argument("--no-docs", action="store_true")
    ap.add_argument("--quiet", action="store_true")
    args = ap.parse_args()

    meshkore = args.meshkore.resolve()
    if not meshkore.exists():
        print(f"ERROR: {meshkore} does not exist", file=sys.stderr)
        sys.exit(1)

    out = args.out or (meshkore / "roadmap" / "state.json")

    def log(msg):
        if not args.quiet:
            print(msg, file=sys.stderr)

    log(f"Reading {meshkore}")

    cluster_info = load_cluster(meshkore)
    members = load_members(meshkore)
    declared = parse_modules_from_cluster_yaml(meshkore)
    declared_ids = {m["id"] for m in declared} if declared else None
    tasks = collect_tasks(meshkore, log, declared_ids)
    docs = {"tree": []} if args.no_docs else collect_docs_with_modules(meshkore, log)
    timeline = collect_timeline(meshkore, log)
    modules = collect_modules(meshkore, meshkore.parent, tasks, log)

    state = {
        "generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
        "cluster": cluster_info,
        "members": members,
        "modules": modules,
        "timeline": timeline,
        "roadmap": {
            "tasks": tasks,
            "stats": compute_stats(tasks),
        },
        "docs": docs,
    }

    if args.validate:
        log(f"Validation OK · tasks={len(tasks)} · docs={sum(len(c['items']) for c in docs['tree'])}")
        return

    out.parent.mkdir(parents=True, exist_ok=True)
    json_text = json.dumps(state, indent=2, ensure_ascii=False)
    out.write_text(json_text, encoding="utf-8")
    state_js = out.with_suffix(".js")
    state_js.write_text(
        "// Generated by roadmap-build.py — do not edit\n"
        f"window.__MESHKORE_STATE__ = {json_text};\n",
        encoding="utf-8",
    )
    log(f"Wrote {out} + {state_js.name}")
    log(f"  tasks={len(tasks)} ({state['roadmap']['stats']})")
    log(f"  docs={sum(len(c['items']) for c in docs['tree'])} across {len(docs['tree'])} categories")


if __name__ == "__main__":
    main()
