#!/usr/bin/env python3
"""Build market intelligence report PDFs from data/content/reports/*.md.

Reads YAML frontmatter + Markdown body from each .md file, renders the
HTML template (web/src/padelnomics/templates/reports/report.html) with
the report content, and generates a PDF via WeasyPrint.

Output: data/content/reports/_build/<slug>-<lang>.pdf

Usage:
    uv run python web/scripts/build_report_pdf.py [--slug q1-2026] [--lang en]

The --slug and --lang flags filter which reports to build.  With no flags,
all .md files in data/content/reports/ are built.
"""

import argparse
import re
import sys
from pathlib import Path

import mistune
import yaml
from jinja2 import Template
from weasyprint import HTML

REPO_ROOT = Path(__file__).parent.parent.parent
REPORTS_DIR = REPO_ROOT / "data" / "content" / "reports"
BUILD_DIR = REPORTS_DIR / "_build"
TEMPLATE_DIR = REPO_ROOT / "web" / "src" / "padelnomics" / "templates" / "reports"
LOGO_PATH = REPO_ROOT / "web" / "src" / "padelnomics" / "static" / "images" / "logo.png"

FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)


def _parse_md(path: Path) -> tuple[dict, str]:
    """Return (frontmatter_dict, markdown_body) for a .md file."""
    raw = path.read_text(encoding="utf-8")
    m = FRONTMATTER_RE.match(raw)
    assert m, f"No YAML frontmatter found in {path}"
    fm = yaml.safe_load(m.group(1)) or {}
    body = raw[m.end():]
    return fm, body


def _make_toc(body_md: str) -> list[dict]:
    """Extract H2 headings from markdown for the TOC."""
    toc = []
    for line in body_md.splitlines():
        if line.startswith("## "):
            toc.append({"title": line[3:].strip(), "is_section": False})
    return toc


def _cover_stats_en() -> list[dict]:
    return [
        {"value": "77,355", "label": "Courts Worldwide"},
        {"value": "+29%",   "label": "Growth in 18 Months"},
        {"value": "80",     "label": "Countries Tracked"},
        {"value": "12,441", "label": "Venues in Pipeline"},
    ]


def _cover_stats_de() -> list[dict]:
    return [
        {"value": "77.355", "label": "Plätze weltweit"},
        {"value": "+29 %",  "label": "Wachstum in 18 Monaten"},
        {"value": "80",     "label": "Länder erfasst"},
        {"value": "12.441", "label": "Venues im Datensatz"},
    ]


def _labels_en(fm: dict) -> dict:
    return {
        "report_slug_label": "State of Padel Q1 2026",
        "edition_label":     "Q1 2026 Edition",
        "report_type_label": "Global Market Intelligence Report",
        "subtitle": (
            "77,355 courts. +29% growth. 80 countries. "
            "The most complete independent picture of the global padel market."
        ),
        "published_label":    "Published Q1 2026",
        "confidential_label": "For registered recipients",
        "toc_heading":        "Contents",
        "disclaimer": (
            "This report has been prepared by Padelnomics for informational purposes only. "
            "All data is sourced from publicly available reports (FIP, Playtomic/PwC) and "
            "Padelnomics' proprietary data pipeline. Market figures reflect the best available "
            "data at time of publication and may differ from subsequently reported figures. "
            "Nothing in this report constitutes investment advice. "
            "© 2026 Padelnomics — padelnomics.io"
        ),
    }


def _labels_de(fm: dict) -> dict:
    return {
        "report_slug_label": "State of Padel Q1 2026",
        "edition_label":     "Ausgabe Q1 2026",
        "report_type_label": "Globaler Marktintelligenz-Bericht",
        "subtitle": (
            "77.355 Plätze. +29 % Wachstum. 80 Länder. "
            "Das vollständigste unabhängige Bild des globalen Padel-Markts."
        ),
        "published_label":    "Veröffentlicht Q1 2026",
        "confidential_label": "Für registrierte Empfänger",
        "toc_heading":        "Inhalt",
        "disclaimer": (
            "Dieser Bericht wurde von Padelnomics ausschließlich zu Informationszwecken erstellt. "
            "Alle Daten stammen aus öffentlich zugänglichen Berichten (FIP, Playtomic/PwC) sowie "
            "der proprietären Datenpipeline von Padelnomics. Marktdaten spiegeln den besten "
            "verfügbaren Stand zum Zeitpunkt der Veröffentlichung wider. "
            "Der Bericht stellt keine Anlageberatung dar. "
            "© 2026 Padelnomics — padelnomics.io"
        ),
    }


def build_one(md_path: Path, output_dir: Path) -> Path:
    """Build a single PDF from a .md report file. Returns the output path."""
    fm, body_md = _parse_md(md_path)
    lang = fm.get("language", "en")
    slug = fm.get("slug", md_path.stem)
    title = fm.get("title", slug)

    body_html = mistune.html(body_md)

    labels = _labels_de(fm) if lang == "de" else _labels_en(fm)
    cover_stats = _cover_stats_de() if lang == "de" else _cover_stats_en()
    toc = _make_toc(body_md)

    template_html = (TEMPLATE_DIR / "report.html").read_text(encoding="utf-8")
    css = (TEMPLATE_DIR / "report.css").read_text(encoding="utf-8")

    # WeasyPrint resolves relative URLs from base_url; pass logo as file:// path
    logo_file_url = LOGO_PATH.as_uri()

    rendered = Template(template_html).render(
        language=lang,
        title=title,
        css=css,
        logo_path=logo_file_url,
        body_html=body_html,
        toc=toc,
        cover_stats=cover_stats,
        **labels,
    )

    output_dir.mkdir(parents=True, exist_ok=True)
    output_path = output_dir / f"{slug}-{lang}.pdf"

    HTML(string=rendered).write_pdf(str(output_path))
    print(f"  ✓ Built {output_path.relative_to(REPO_ROOT)}")
    return output_path


def main() -> None:
    parser = argparse.ArgumentParser(description="Build market intelligence report PDFs")
    parser.add_argument("--slug", help="Only build report with this slug substring")
    parser.add_argument("--lang", help="Only build this language (en or de)")
    args = parser.parse_args()

    md_files = sorted(REPORTS_DIR.glob("*.md"))
    if not md_files:
        print(f"No .md files found in {REPORTS_DIR}")
        sys.exit(0)

    built = 0
    for md_path in md_files:
        if args.slug and args.slug not in md_path.stem:
            continue
        if args.lang and not md_path.stem.endswith(f"-{args.lang}"):
            continue
        try:
            build_one(md_path, BUILD_DIR)
            built += 1
        except Exception as exc:
            print(f"  ✗ Failed {md_path.name}: {exc}", file=sys.stderr)
            raise

    print(f"\n✓ Built {built} PDF(s) → {BUILD_DIR}")


if __name__ == "__main__":
    main()