Files
padelnomics/web/scripts/build_report_pdf.py
Deeman b50ca5a8cd feat(reports): PDF build infrastructure — premium WeasyPrint template
- report.css: full-bleed navy cover, Padelnomics logo watermark at 3.5%
  opacity (position:fixed, repeats every page), gold/teal accents, Georgia
  headings, running headers via CSS named strings, metric boxes, insight-box
- report.html: Jinja2 template with cover stats, TOC, body, disclaimer
- build_report_pdf.py: builds EN+DE PDFs from data/content/reports/*.md
  (WeasyPrint, mistune, PyYAML; reads logo as file:// URI for watermark)
- Makefile: report-pdf target

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 07:49:40 +01:00

186 lines
6.5 KiB
Python

#!/usr/bin/env python3
"""Build market intelligence report PDFs from data/content/reports/*.md.
Reads YAML frontmatter + Markdown body from each .md file, renders the
HTML template (web/src/padelnomics/templates/reports/report.html) with
the report content, and generates a PDF via WeasyPrint.
Output: data/content/reports/_build/<slug>-<lang>.pdf
Usage:
uv run python web/scripts/build_report_pdf.py [--slug q1-2026] [--lang en]
The --slug and --lang flags filter which reports to build. With no flags,
all .md files in data/content/reports/ are built.
"""
import argparse
import re
import sys
from pathlib import Path
import mistune
import yaml
from jinja2 import Template
from weasyprint import HTML
REPO_ROOT = Path(__file__).parent.parent.parent
REPORTS_DIR = REPO_ROOT / "data" / "content" / "reports"
BUILD_DIR = REPORTS_DIR / "_build"
TEMPLATE_DIR = REPO_ROOT / "web" / "src" / "padelnomics" / "templates" / "reports"
LOGO_PATH = REPO_ROOT / "web" / "src" / "padelnomics" / "static" / "images" / "logo.png"
FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n", re.DOTALL)
def _parse_md(path: Path) -> tuple[dict, str]:
"""Return (frontmatter_dict, markdown_body) for a .md file."""
raw = path.read_text(encoding="utf-8")
m = FRONTMATTER_RE.match(raw)
assert m, f"No YAML frontmatter found in {path}"
fm = yaml.safe_load(m.group(1)) or {}
body = raw[m.end():]
return fm, body
def _make_toc(body_md: str) -> list[dict]:
"""Extract H2 headings from markdown for the TOC."""
toc = []
for line in body_md.splitlines():
if line.startswith("## "):
toc.append({"title": line[3:].strip(), "is_section": False})
return toc
def _cover_stats_en() -> list[dict]:
return [
{"value": "77,355", "label": "Courts Worldwide"},
{"value": "+29%", "label": "Growth in 18 Months"},
{"value": "80", "label": "Countries Tracked"},
{"value": "12,441", "label": "Venues in Pipeline"},
]
def _cover_stats_de() -> list[dict]:
return [
{"value": "77.355", "label": "Plätze weltweit"},
{"value": "+29 %", "label": "Wachstum in 18 Monaten"},
{"value": "80", "label": "Länder erfasst"},
{"value": "12.441", "label": "Venues im Datensatz"},
]
def _labels_en(fm: dict) -> dict:
return {
"report_slug_label": "State of Padel Q1 2026",
"edition_label": "Q1 2026 Edition",
"report_type_label": "Global Market Intelligence Report",
"subtitle": (
"77,355 courts. +29% growth. 80 countries. "
"The most complete independent picture of the global padel market."
),
"published_label": "Published Q1 2026",
"confidential_label": "For registered recipients",
"toc_heading": "Contents",
"disclaimer": (
"This report has been prepared by Padelnomics for informational purposes only. "
"All data is sourced from publicly available reports (FIP, Playtomic/PwC) and "
"Padelnomics' proprietary data pipeline. Market figures reflect the best available "
"data at time of publication and may differ from subsequently reported figures. "
"Nothing in this report constitutes investment advice. "
"© 2026 Padelnomics — padelnomics.io"
),
}
def _labels_de(fm: dict) -> dict:
return {
"report_slug_label": "State of Padel Q1 2026",
"edition_label": "Ausgabe Q1 2026",
"report_type_label": "Globaler Marktintelligenz-Bericht",
"subtitle": (
"77.355 Plätze. +29 % Wachstum. 80 Länder. "
"Das vollständigste unabhängige Bild des globalen Padel-Markts."
),
"published_label": "Veröffentlicht Q1 2026",
"confidential_label": "Für registrierte Empfänger",
"toc_heading": "Inhalt",
"disclaimer": (
"Dieser Bericht wurde von Padelnomics ausschließlich zu Informationszwecken erstellt. "
"Alle Daten stammen aus öffentlich zugänglichen Berichten (FIP, Playtomic/PwC) sowie "
"der proprietären Datenpipeline von Padelnomics. Marktdaten spiegeln den besten "
"verfügbaren Stand zum Zeitpunkt der Veröffentlichung wider. "
"Der Bericht stellt keine Anlageberatung dar. "
"© 2026 Padelnomics — padelnomics.io"
),
}
def build_one(md_path: Path, output_dir: Path) -> Path:
"""Build a single PDF from a .md report file. Returns the output path."""
fm, body_md = _parse_md(md_path)
lang = fm.get("language", "en")
slug = fm.get("slug", md_path.stem)
title = fm.get("title", slug)
body_html = mistune.html(body_md)
labels = _labels_de(fm) if lang == "de" else _labels_en(fm)
cover_stats = _cover_stats_de() if lang == "de" else _cover_stats_en()
toc = _make_toc(body_md)
template_html = (TEMPLATE_DIR / "report.html").read_text(encoding="utf-8")
css = (TEMPLATE_DIR / "report.css").read_text(encoding="utf-8")
# WeasyPrint resolves relative URLs from base_url; pass logo as file:// path
logo_file_url = LOGO_PATH.as_uri()
rendered = Template(template_html).render(
language=lang,
title=title,
css=css,
logo_path=logo_file_url,
body_html=body_html,
toc=toc,
cover_stats=cover_stats,
**labels,
)
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / f"{slug}-{lang}.pdf"
HTML(string=rendered).write_pdf(str(output_path))
print(f" ✓ Built {output_path.relative_to(REPO_ROOT)}")
return output_path
def main() -> None:
parser = argparse.ArgumentParser(description="Build market intelligence report PDFs")
parser.add_argument("--slug", help="Only build report with this slug substring")
parser.add_argument("--lang", help="Only build this language (en or de)")
args = parser.parse_args()
md_files = sorted(REPORTS_DIR.glob("*.md"))
if not md_files:
print(f"No .md files found in {REPORTS_DIR}")
sys.exit(0)
built = 0
for md_path in md_files:
if args.slug and args.slug not in md_path.stem:
continue
if args.lang and not md_path.stem.endswith(f"-{args.lang}"):
continue
try:
build_one(md_path, BUILD_DIR)
built += 1
except Exception as exc:
print(f" ✗ Failed {md_path.name}: {exc}", file=sys.stderr)
raise
print(f"\n✓ Built {built} PDF(s) → {BUILD_DIR}")
if __name__ == "__main__":
main()