perf(content): batch article generation in single transaction + upsert

Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in a single transaction context manager. Eliminates ~1,500 individual SQLite commits for a 500-article run (one commit per row replaced by one total). Also fix _get_article_stats() returning None for live/scheduled/draft counts when the articles table is empty: wrap SUM expressions in COALESCE(..., 0) so they always return integers regardless of row count. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 16:34:16 +01:00
parent 1e0aa6002a
commit 482b4f9fca
2 changed files with 129 additions and 139 deletions
--- a/web/src/padelnomics/admin/routes.py
+++ b/web/src/padelnomics/admin/routes.py
@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
    row = await fetch_one(
        """SELECT
           COUNT(*) AS total,
-           SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live,
-           SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled,
-           SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft
+           COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
+           COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
+           COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
           FROM articles"""
    )
    return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
--- a/web/src/padelnomics/content/init.py
+++ b/web/src/padelnomics/content/init.py
@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables.  Only articles + published_scenarios
 are stored in SQLite (routing / application state).
 """
 import json
+import logging
 import re
 from datetime import UTC, date, datetime, timedelta
 from pathlib import Path
@@ -15,7 +16,9 @@ import yaml
 from jinja2 import ChainableUndefined, Environment

 from ..analytics import fetch_analytics
-from ..core import execute, fetch_one, slugify, utcnow_iso
+from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
+
+logger = logging.getLogger(__name__)

 # ── Constants ────────────────────────────────────────────────────────────────

@@ -303,59 +306,51 @@ async def generate_articles(
    generated = 0
    now_iso = utcnow_iso()

-    for row in rows:
-        for lang in config["languages"]:
-            # Build render context: row data + language
-            ctx = {**row, "language": lang}
+    async with transaction() as db:
+        for row in rows:
+            for lang in config["languages"]:
+                # Build render context: row data + language
+                ctx = {**row, "language": lang}

-            # Render URL pattern (no lang prefix — blueprint provides /<lang>)
-            url_path = _render_pattern(config["url_pattern"], ctx)
-            if is_reserved_path(url_path):
-                continue
+                # Render URL pattern (no lang prefix — blueprint provides /<lang>)
+                url_path = _render_pattern(config["url_pattern"], ctx)
+                if is_reserved_path(url_path):
+                    continue

-            title = _render_pattern(config["title_pattern"], ctx)
-            meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
-            article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
+                title = _render_pattern(config["title_pattern"], ctx)
+                meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
+                article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])

-            # Calculator content type: create scenario
-            scenario_slug = None
-            if config["content_type"] == "calculator":
-                # DuckDB lowercases all column names; build a case-insensitive
-                # reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
-                _defaults_ci = {k.lower(): k for k in DEFAULTS}
-                calc_overrides = {
-                    _defaults_ci[k.lower()]: v
-                    for k, v in row.items()
-                    if k.lower() in _defaults_ci and v is not None
-                }
-                state = validate_state(calc_overrides)
-                d = calc(state, lang=lang)
+                # Calculator content type: create scenario
+                scenario_slug = None
+                if config["content_type"] == "calculator":
+                    # DuckDB lowercases all column names; build a case-insensitive
+                    # reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
+                    _defaults_ci = {k.lower(): k for k in DEFAULTS}
+                    calc_overrides = {
+                        _defaults_ci[k.lower()]: v
+                        for k, v in row.items()
+                        if k.lower() in _defaults_ci and v is not None
+                    }
+                    state = validate_state(calc_overrides)
+                    d = calc(state, lang=lang)

-                scenario_slug = slug + "-" + str(row[config["natural_key"]])
-                dbl = state.get("dblCourts", 0)
-                sgl = state.get("sglCourts", 0)
-                court_config = f"{dbl} double + {sgl} single"
-                city = row.get("city_name", row.get("city", ""))
-                country = row.get("country", state.get("country", ""))
+                    scenario_slug = slug + "-" + str(row[config["natural_key"]])
+                    dbl = state.get("dblCourts", 0)
+                    sgl = state.get("sglCourts", 0)
+                    court_config = f"{dbl} double + {sgl} single"
+                    city = row.get("city_name", row.get("city", ""))
+                    country = row.get("country", state.get("country", ""))

-                # Upsert published scenario
-                existing = await fetch_one(
-                    "SELECT id FROM published_scenarios WHERE slug = ?",
-                    (scenario_slug,),
-                )
-                if existing:
-                    await execute(
-                        """UPDATE published_scenarios
-                           SET state_json = ?, calc_json = ?, updated_at = ?
-                           WHERE slug = ?""",
-                        (json.dumps(state), json.dumps(d), now_iso, scenario_slug),
-                    )
-                else:
-                    await execute(
+                    await db.execute(
                        """INSERT INTO published_scenarios
-                           (slug, title, location, country, venue_type, ownership,
-                            court_config, state_json, calc_json, created_at)
-                           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                               (slug, title, location, country, venue_type, ownership,
+                                court_config, state_json, calc_json, created_at)
+                               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                           ON CONFLICT(slug) DO UPDATE SET
+                               state_json = excluded.state_json,
+                               calc_json  = excluded.calc_json,
+                               updated_at = excluded.created_at""",
                        (
                            scenario_slug, city, city, country,
                            state.get("venue", "indoor"),
@@ -365,97 +360,89 @@ async def generate_articles(
                        ),
                    )

-                ctx["scenario_slug"] = scenario_slug
+                    ctx["scenario_slug"] = scenario_slug

-            # Render body template
-            body_md = _render_pattern(config["body_template"], ctx)
-            body_html = mistune.html(body_md)
-            body_html = await bake_scenario_cards(body_html, lang=lang)
+                # Render body template
+                body_md = _render_pattern(config["body_template"], ctx)
+                body_html = mistune.html(body_md)
+                body_html = await bake_scenario_cards(body_html, lang=lang)

-            # Extract FAQ pairs for structured data
-            faq_pairs = _extract_faq_pairs(body_md)
+                # Extract FAQ pairs for structured data
+                faq_pairs = _extract_faq_pairs(body_md)

-            # Build SEO metadata (full_url includes lang prefix for canonical/OG)
-            full_url = f"{base_url}/{lang}{url_path}"
-            publish_dt = datetime(
-                publish_date.year, publish_date.month, publish_date.day,
-                8, 0, 0,
-            ).isoformat()
+                # Build SEO metadata (full_url includes lang prefix for canonical/OG)
+                full_url = f"{base_url}/{lang}{url_path}"
+                publish_dt = datetime(
+                    publish_date.year, publish_date.month, publish_date.day,
+                    8, 0, 0,
+                ).isoformat()

-            # Hreflang links
-            hreflang_links = []
-            for alt_lang in config["languages"]:
-                alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
+                # Hreflang links
+                hreflang_links = []
+                for alt_lang in config["languages"]:
+                    alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
+                    hreflang_links.append(
+                        f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
+                    )
+                # x-default points to English (or first language)
+                default_lang = "en" if "en" in config["languages"] else config["languages"][0]
+                default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
                hreflang_links.append(
-                    f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
+                    f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
                )
-            # x-default points to English (or first language)
-            default_lang = "en" if "en" in config["languages"] else config["languages"][0]
-            default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
-            hreflang_links.append(
-                f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
-            )

-            # JSON-LD
-            breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
-            jsonld_objects = build_jsonld(
-                config["schema_type"],
-                title=title,
-                description=meta_desc,
-                url=full_url,
-                published_at=publish_dt,
-                date_modified=now_iso,
-                language=lang,
-                breadcrumbs=breadcrumbs,
-                faq_pairs=faq_pairs,
-            )
-
-            # Build SEO head block
-            seo_head = "\n".join([
-                f'<link rel="canonical" href="{full_url}" />',
-                *hreflang_links,
-                f'<meta property="og:title" content="{_escape_attr(title)}" />',
-                f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
-                f'<meta property="og:url" content="{full_url}" />',
-                '<meta property="og:type" content="article" />',
-                *[
-                    f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
-                    for obj in jsonld_objects
-                ],
-            ])
-
-            # Write HTML to disk
-            build_dir = BUILD_DIR / lang
-            build_dir.mkdir(parents=True, exist_ok=True)
-            (build_dir / f"{article_slug}.html").write_text(body_html)
-
-            # Write markdown source to disk (for admin editing)
-            md_dir = BUILD_DIR / lang / "md"
-            md_dir.mkdir(parents=True, exist_ok=True)
-            (md_dir / f"{article_slug}.md").write_text(body_md)
-
-            # Upsert article in SQLite — keyed by (url_path, language) since
-            # multiple languages share the same url_path
-            existing_article = await fetch_one(
-                "SELECT id FROM articles WHERE url_path = ? AND language = ?",
-                (url_path, lang),
-            )
-            if existing_article:
-                await execute(
-                    """UPDATE articles
-                       SET title = ?, meta_description = ?, template_slug = ?,
-                           language = ?, date_modified = ?, updated_at = ?,
-                           seo_head = ?
-                       WHERE url_path = ? AND language = ?""",
-                    (title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
+                # JSON-LD
+                breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
+                jsonld_objects = build_jsonld(
+                    config["schema_type"],
+                    title=title,
+                    description=meta_desc,
+                    url=full_url,
+                    published_at=publish_dt,
+                    date_modified=now_iso,
+                    language=lang,
+                    breadcrumbs=breadcrumbs,
+                    faq_pairs=faq_pairs,
                )
-            else:
-                await execute(
+
+                # Build SEO head block
+                seo_head = "\n".join([
+                    f'<link rel="canonical" href="{full_url}" />',
+                    *hreflang_links,
+                    f'<meta property="og:title" content="{_escape_attr(title)}" />',
+                    f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
+                    f'<meta property="og:url" content="{full_url}" />',
+                    '<meta property="og:type" content="article" />',
+                    *[
+                        f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
+                        for obj in jsonld_objects
+                    ],
+                ])
+
+                # Write HTML to disk
+                build_dir = BUILD_DIR / lang
+                build_dir.mkdir(parents=True, exist_ok=True)
+                (build_dir / f"{article_slug}.html").write_text(body_html)
+
+                # Write markdown source to disk (for admin editing)
+                md_dir = BUILD_DIR / lang / "md"
+                md_dir.mkdir(parents=True, exist_ok=True)
+                (md_dir / f"{article_slug}.md").write_text(body_md)
+
+                # Upsert article in SQLite — keyed by (url_path, language)
+                await db.execute(
                    """INSERT INTO articles
-                       (url_path, slug, title, meta_description, country, region,
-                        status, published_at, template_slug, language, date_modified,
-                        seo_head, created_at)
-                       VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""",
+                           (url_path, slug, title, meta_description, country, region,
+                            status, published_at, template_slug, language, date_modified,
+                            seo_head, created_at)
+                           VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
+                       ON CONFLICT(url_path, language) DO UPDATE SET
+                           title            = excluded.title,
+                           meta_description = excluded.meta_description,
+                           template_slug    = excluded.template_slug,
+                           date_modified    = excluded.date_modified,
+                           seo_head         = excluded.seo_head,
+                           updated_at       = excluded.date_modified""",
                    (
                        url_path, article_slug, title, meta_desc,
                        row.get("country", ""), row.get("region", ""),
@@ -463,14 +450,17 @@ async def generate_articles(
                    ),
                )

-            generated += 1
+                generated += 1
+                if generated % 25 == 0:
+                    logger.info("%s: %d articles written…", slug, generated)

-            # Stagger dates
-            published_today += 1
-            if published_today >= articles_per_day:
-                published_today = 0
-                publish_date += timedelta(days=1)
+                # Stagger dates
+                published_today += 1
+                if published_today >= articles_per_day:
+                    published_today = 0
+                    publish_date += timedelta(days=1)

+    logger.info("%s: done — %d total", slug, generated)
    return generated