#!/usr/bin/env python3 """Build market intelligence report PDFs from data/content/reports/*.md. Reads YAML frontmatter + Markdown body from each .md file, renders the HTML template (web/src/padelnomics/templates/reports/report.html) with the report content, and generates a PDF via WeasyPrint. Output: data/content/reports/_build/-.pdf Usage: uv run python web/scripts/build_report_pdf.py [--slug q1-2026] [--lang en] The --slug and --lang flags filter which reports to build. With no flags, all .md files in data/content/reports/ are built. """ import argparse import re import sys from pathlib import Path import mistune import yaml from jinja2 import Template from weasyprint import HTML REPO_ROOT = Path(__file__).parent.parent.parent REPORTS_DIR = REPO_ROOT / "data" / "content" / "reports" BUILD_DIR = REPORTS_DIR / "_build" TEMPLATE_DIR = REPO_ROOT / "web" / "src" / "padelnomics" / "templates" / "reports" LOGO_PATH = REPO_ROOT / "web" / "src" / "padelnomics" / "static" / "images" / "logo.png" FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL) def _parse_md(path: Path) -> tuple[dict, str]: """Return (frontmatter_dict, markdown_body) for a .md file.""" raw = path.read_text(encoding="utf-8") m = FRONTMATTER_RE.match(raw) assert m, f"No YAML frontmatter found in {path}" fm = yaml.safe_load(m.group(1)) or {} body = raw[m.end():] return fm, body def _make_toc(body_md: str) -> list[dict]: """Extract H2 headings from markdown for the TOC.""" toc = [] for line in body_md.splitlines(): if line.startswith("## "): toc.append({"title": line[3:].strip(), "is_section": False}) return toc def _cover_stats_en() -> list[dict]: return [ {"value": "77,355", "label": "Courts Worldwide"}, {"value": "+29%", "label": "Growth in 18 Months"}, {"value": "80", "label": "Countries Tracked"}, {"value": "12,441", "label": "Venues in Pipeline"}, ] def _cover_stats_de() -> list[dict]: return [ {"value": "77.355", "label": "Plätze weltweit"}, {"value": "+29 %", "label": "Wachstum in 18 Monaten"}, {"value": "80", "label": "Länder erfasst"}, {"value": "12.441", "label": "Venues im Datensatz"}, ] def _labels_en(fm: dict) -> dict: return { "report_slug_label": "State of Padel Q1 2026", "edition_label": "Q1 2026 Edition", "report_type_label": "Global Market Intelligence Report", "subtitle": ( "77,355 courts. +29% growth. 80 countries. " "The most complete independent picture of the global padel market." ), "published_label": "Published Q1 2026", "confidential_label": "For registered recipients", "toc_heading": "Contents", "disclaimer": ( "This report has been prepared by Padelnomics for informational purposes only. " "All data is sourced from publicly available reports (FIP, Playtomic/PwC) and " "Padelnomics' proprietary data pipeline. Market figures reflect the best available " "data at time of publication and may differ from subsequently reported figures. " "Nothing in this report constitutes investment advice. " "© 2026 Padelnomics — padelnomics.io" ), } def _labels_de(fm: dict) -> dict: return { "report_slug_label": "State of Padel Q1 2026", "edition_label": "Ausgabe Q1 2026", "report_type_label": "Globaler Marktintelligenz-Bericht", "subtitle": ( "77.355 Plätze. +29 % Wachstum. 80 Länder. " "Das vollständigste unabhängige Bild des globalen Padel-Markts." ), "published_label": "Veröffentlicht Q1 2026", "confidential_label": "Für registrierte Empfänger", "toc_heading": "Inhalt", "disclaimer": ( "Dieser Bericht wurde von Padelnomics ausschließlich zu Informationszwecken erstellt. " "Alle Daten stammen aus öffentlich zugänglichen Berichten (FIP, Playtomic/PwC) sowie " "der proprietären Datenpipeline von Padelnomics. Marktdaten spiegeln den besten " "verfügbaren Stand zum Zeitpunkt der Veröffentlichung wider. " "Der Bericht stellt keine Anlageberatung dar. " "© 2026 Padelnomics — padelnomics.io" ), } def build_one(md_path: Path, output_dir: Path) -> Path: """Build a single PDF from a .md report file. Returns the output path.""" fm, body_md = _parse_md(md_path) lang = fm.get("language", "en") slug = fm.get("slug", md_path.stem) title = fm.get("title", slug) body_html = mistune.html(body_md) labels = _labels_de(fm) if lang == "de" else _labels_en(fm) cover_stats = _cover_stats_de() if lang == "de" else _cover_stats_en() toc = _make_toc(body_md) template_html = (TEMPLATE_DIR / "report.html").read_text(encoding="utf-8") css = (TEMPLATE_DIR / "report.css").read_text(encoding="utf-8") # WeasyPrint resolves relative URLs from base_url; pass logo as file:// path logo_file_url = LOGO_PATH.as_uri() rendered = Template(template_html).render( language=lang, title=title, css=css, logo_path=logo_file_url, body_html=body_html, toc=toc, cover_stats=cover_stats, **labels, ) output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / f"{slug}-{lang}.pdf" HTML(string=rendered).write_pdf(str(output_path)) print(f" ✓ Built {output_path.relative_to(REPO_ROOT)}") return output_path def main() -> None: parser = argparse.ArgumentParser(description="Build market intelligence report PDFs") parser.add_argument("--slug", help="Only build report with this slug substring") parser.add_argument("--lang", help="Only build this language (en or de)") args = parser.parse_args() md_files = sorted(REPORTS_DIR.glob("*.md")) if not md_files: print(f"No .md files found in {REPORTS_DIR}") sys.exit(0) built = 0 for md_path in md_files: if args.slug and args.slug not in md_path.stem: continue if args.lang and not md_path.stem.endswith(f"-{args.lang}"): continue try: build_one(md_path, BUILD_DIR) built += 1 except Exception as exc: print(f" ✗ Failed {md_path.name}: {exc}", file=sys.stderr) raise print(f"\n✓ Built {built} PDF(s) → {BUILD_DIR}") if __name__ == "__main__": main()