#!/usr/bin/env python3
"""
migrate-tasklist.py — One-shot: convert _rjj/tasklist/INDEX.md table rows to
individual .md files with YAML frontmatter under
.meshkore/modules/<module>/{tasks,log}/ (standard §2 canonical layout).

Each row in INDEX.md becomes a single .md file:
  - DONE rows  → .meshkore/modules/<module>/log/{YYYY-MM}/<id>-<slug>.md  (status: done)
  - NEXT       → .meshkore/modules/<module>/tasks/<id>-<slug>.md          (status: next)
  - UPCOMING   → .meshkore/modules/<module>/tasks/<id>-<slug>.md          (status: next)
  - PLANNED    → .meshkore/modules/<module>/tasks/<id>-<slug>.md          (status: backlog)
  - FUTURE     → .meshkore/modules/<module>/tasks/<id>-<slug>.md          (status: backlog)
  - ECOSYSTEM  → .meshkore/modules/ecosystem/tasks/<id>-<slug>.md         (status: backlog)

Continuation rows (`| | text | | | |`) are appended as the body of the task.

Usage:
    python3 migrate-tasklist.py <INDEX.md> <out-meshkore-dir> [--dry-run]

The <out-meshkore-dir> argument is the `.meshkore/` directory of the
target repo. Module folders are created under `<out>/modules/<id>/`.
"""
from __future__ import annotations

import argparse
import re
import sys
from datetime import date
from pathlib import Path

ID_RE = re.compile(r"^\|\s*(?P<id>D\d+|N\d+|U\d+|P\d+|F\d+|E\d+)\s*\|", re.IGNORECASE)
SECTION_RE = re.compile(r"^##\s+(?P<name>DONE|NEXT|UPCOMING|PLANNED|FUTURE|ECOSYSTEM)", re.IGNORECASE)

STATUS_BY_SECTION = {
    "done": "done",
    "next": "next",
    "upcoming": "next",
    "planned": "backlog",
    "future": "backlog",
    "ecosystem": "backlog",
}


def slugify(text: str) -> str:
    t = text.lower()
    t = re.sub(r"[^a-z0-9\s-]", "", t)
    t = re.sub(r"\s+", "-", t)
    t = re.sub(r"-+", "-", t)
    return t.strip("-")[:60] or "task"


def parse_row(line: str) -> list[str] | None:
    """Parse a markdown table row into cells. Returns None if not a row."""
    line = line.rstrip("\n")
    if not line.startswith("|"):
        return None
    if "---" in line:
        return None
    # Split on | but keep cells
    parts = line.split("|")
    # First and last are empty
    cells = [c.strip() for c in parts[1:-1]] if len(parts) >= 3 else []
    return cells


def is_continuation(cells: list[str]) -> bool:
    return cells and cells[0] == ""


def section_to_folder(section: str, module: str, when: str | None = None) -> tuple[str, str]:
    """Returns (subdir-path relative to .meshkore/, status). Output follows
    the standard §2 canonical layout: modules/<id>/{tasks,log}/, NOT the
    pre-v3 roadmap/{tasks,log}/ layout."""
    section = section.lower()
    status = STATUS_BY_SECTION[section]
    if section == "ecosystem":
        return "modules/ecosystem/tasks", status
    mod = (module or "general").strip().lower()
    mod = re.sub(r"[^a-z0-9_-]", "-", mod) or "general"
    if section == "done":
        # modules/<id>/log/<YYYY-MM>/
        ym = "2026-04"  # default heuristic
        if when:
            w = when.lower()
            if "mar" in w:
                ym = "2026-03"
            elif "apr" in w or "abr" in w:
                ym = "2026-04"
            elif "may" in w:
                ym = "2026-05"
        return f"modules/{mod}/log/{ym}", status
    return f"modules/{mod}/tasks", status


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("index_md", type=Path)
    ap.add_argument("meshkore_dir", type=Path, help=".meshkore/ root of the target repo (paths are built as <meshkore_dir>/modules/<id>/...)")
    ap.add_argument("--dry-run", action="store_true")
    args = ap.parse_args()

    if not args.index_md.exists():
        print(f"ERROR: {args.index_md} not found", file=sys.stderr)
        sys.exit(1)

    text = args.index_md.read_text(encoding="utf-8")
    lines = text.splitlines()

    section: str | None = None
    rows: list[dict] = []  # accumulated rows
    current: dict | None = None

    for line in lines:
        sm = SECTION_RE.match(line.strip())
        if sm:
            if current:
                rows.append(current)
                current = None
            section = sm.group("name").lower()
            continue

        if not section:
            continue

        cells = parse_row(line)
        if cells is None:
            # Not a table row — flush current
            if current:
                rows.append(current)
                current = None
            continue

        if is_continuation(cells):
            if current is None:
                continue
            # Append non-empty cells as body
            body_cells = [c for c in cells[1:] if c]
            if body_cells:
                current.setdefault("body", []).append(" ".join(body_cells))
            continue

        # New row — flush previous
        if current:
            rows.append(current)
            current = None

        idm = ID_RE.match(line)
        if not idm:
            continue
        task_id = idm.group("id").upper()
        # Parse cells based on section
        # Common: cells[0]=id, cells[1]=what
        what = cells[1] if len(cells) >= 2 else ""
        if section == "done":
            # | # | What | When | Details |
            when = cells[2] if len(cells) >= 3 else ""
            details = cells[3] if len(cells) >= 4 else ""
            module = ""
            current = {
                "id": task_id,
                "section": section,
                "what": what,
                "when": when,
                "module": module,
                "body": [details] if details else [],
            }
        elif section == "ecosystem":
            # | # | What | Who builds | Depends on | Module |
            module = cells[4] if len(cells) >= 5 else "ecosystem"
            current = {
                "id": task_id,
                "section": section,
                "what": what,
                "module": module,
                "depends_on": cells[3] if len(cells) >= 4 else "",
                "body": [],
            }
        else:
            # | # | What | Depends on | Effort | Module |
            depends = cells[2] if len(cells) >= 3 else ""
            effort = cells[3] if len(cells) >= 4 else ""
            module = cells[4] if len(cells) >= 5 else "uncategorized"
            current = {
                "id": task_id,
                "section": section,
                "what": what,
                "module": module,
                "depends_on": depends,
                "effort": effort,
                "body": [],
            }

    if current:
        rows.append(current)

    print(f"Parsed {len(rows)} rows from {args.index_md.name}", file=sys.stderr)

    args.meshkore_dir.mkdir(parents=True, exist_ok=True)

    today = date.today().isoformat()

    for r in rows:
        section = r["section"]
        what = r.get("what", "")
        if not what or what.startswith("~~"):
            # Skip strikethroughs (cancelled / superseded entries)
            continue
        slug = slugify(what)
        subpath, status = section_to_folder(section, r.get("module", ""), r.get("when"))
        out_dir = args.meshkore_dir / subpath
        out_path = out_dir / f"{r['id']}-{slug}.md"

        # Build frontmatter
        title = what.replace('"', "'")
        # Strip markdown formatting from title for frontmatter cleanliness
        title_clean = re.sub(r"\*+", "", title).strip()
        tags = [r.get("module", "")] if r.get("module") else []
        tags = [t for t in tags if t and t not in ("uncategorized", "—")]

        fm = [
            "---",
            f"id: {r['id']}",
            f'title: "{title_clean[:120]}"',
            f"status: {status}",
            f"category: {r.get('module', 'uncategorized') or 'uncategorized'}",
            f"updated: {today}",
            "owner: rjj",
            f"tags: {tags}",
        ]
        if r.get("depends_on") and r["depends_on"] not in ("—", "-", ""):
            fm.append(f'depends_on: "{r["depends_on"]}"')
        if r.get("effort"):
            fm.append(f'effort: "{r["effort"]}"')
        if r.get("when"):
            fm.append(f'when: "{r["when"]}"')
        fm.append("---")
        fm.append("")

        body = "\n\n".join(r.get("body", [])).strip()
        if not body:
            body = f"Migrated from `_rjj/tasklist/INDEX.md` ({r['id']})."

        content = "\n".join(fm) + "\n# " + title_clean + "\n\n" + body + "\n"

        if args.dry_run:
            print(f"[DRY] {out_path.relative_to(args.roadmap_dir.parent.parent)}")
            continue

        out_dir.mkdir(parents=True, exist_ok=True)
        out_path.write_text(content, encoding="utf-8")

    if not args.dry_run:
        print(f"Wrote {len(rows)} task files under {args.roadmap_dir}")


if __name__ == "__main__":
    main()