perf(content): batch article generation in single transaction + upsert

Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with
INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in
a single transaction context manager. Eliminates ~1,500 individual SQLite
commits for a 500-article run (one commit per row replaced by one total).

Also fix _get_article_stats() returning None for live/scheduled/draft counts
when the articles table is empty: wrap SUM expressions in COALESCE(..., 0)
so they always return integers regardless of row count.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-24 16:34:16 +01:00
parent 1e0aa6002a
commit 482b4f9fca
2 changed files with 129 additions and 139 deletions

View File

@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
row = await fetch_one( row = await fetch_one(
"""SELECT """SELECT
COUNT(*) AS total, COUNT(*) AS total,
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live, COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled, COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
FROM articles""" FROM articles"""
) )
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0} return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}

View File

@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
are stored in SQLite (routing / application state). are stored in SQLite (routing / application state).
""" """
import json import json
import logging
import re import re
from datetime import UTC, date, datetime, timedelta from datetime import UTC, date, datetime, timedelta
from pathlib import Path from pathlib import Path
@@ -15,7 +16,9 @@ import yaml
from jinja2 import ChainableUndefined, Environment from jinja2 import ChainableUndefined, Environment
from ..analytics import fetch_analytics from ..analytics import fetch_analytics
from ..core import execute, fetch_one, slugify, utcnow_iso from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
logger = logging.getLogger(__name__)
# ── Constants ──────────────────────────────────────────────────────────────── # ── Constants ────────────────────────────────────────────────────────────────
@@ -303,59 +306,51 @@ async def generate_articles(
generated = 0 generated = 0
now_iso = utcnow_iso() now_iso = utcnow_iso()
for row in rows: async with transaction() as db:
for lang in config["languages"]: for row in rows:
# Build render context: row data + language for lang in config["languages"]:
ctx = {**row, "language": lang} # Build render context: row data + language
ctx = {**row, "language": lang}
# Render URL pattern (no lang prefix — blueprint provides /<lang>) # Render URL pattern (no lang prefix — blueprint provides /<lang>)
url_path = _render_pattern(config["url_pattern"], ctx) url_path = _render_pattern(config["url_pattern"], ctx)
if is_reserved_path(url_path): if is_reserved_path(url_path):
continue continue
title = _render_pattern(config["title_pattern"], ctx) title = _render_pattern(config["title_pattern"], ctx)
meta_desc = _render_pattern(config["meta_description_pattern"], ctx) meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]]) article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
# Calculator content type: create scenario # Calculator content type: create scenario
scenario_slug = None scenario_slug = None
if config["content_type"] == "calculator": if config["content_type"] == "calculator":
# DuckDB lowercases all column names; build a case-insensitive # DuckDB lowercases all column names; build a case-insensitive
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS). # reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
_defaults_ci = {k.lower(): k for k in DEFAULTS} _defaults_ci = {k.lower(): k for k in DEFAULTS}
calc_overrides = { calc_overrides = {
_defaults_ci[k.lower()]: v _defaults_ci[k.lower()]: v
for k, v in row.items() for k, v in row.items()
if k.lower() in _defaults_ci and v is not None if k.lower() in _defaults_ci and v is not None
} }
state = validate_state(calc_overrides) state = validate_state(calc_overrides)
d = calc(state, lang=lang) d = calc(state, lang=lang)
scenario_slug = slug + "-" + str(row[config["natural_key"]]) scenario_slug = slug + "-" + str(row[config["natural_key"]])
dbl = state.get("dblCourts", 0) dbl = state.get("dblCourts", 0)
sgl = state.get("sglCourts", 0) sgl = state.get("sglCourts", 0)
court_config = f"{dbl} double + {sgl} single" court_config = f"{dbl} double + {sgl} single"
city = row.get("city_name", row.get("city", "")) city = row.get("city_name", row.get("city", ""))
country = row.get("country", state.get("country", "")) country = row.get("country", state.get("country", ""))
# Upsert published scenario await db.execute(
existing = await fetch_one(
"SELECT id FROM published_scenarios WHERE slug = ?",
(scenario_slug,),
)
if existing:
await execute(
"""UPDATE published_scenarios
SET state_json = ?, calc_json = ?, updated_at = ?
WHERE slug = ?""",
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
)
else:
await execute(
"""INSERT INTO published_scenarios """INSERT INTO published_scenarios
(slug, title, location, country, venue_type, ownership, (slug, title, location, country, venue_type, ownership,
court_config, state_json, calc_json, created_at) court_config, state_json, calc_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(slug) DO UPDATE SET
state_json = excluded.state_json,
calc_json = excluded.calc_json,
updated_at = excluded.created_at""",
( (
scenario_slug, city, city, country, scenario_slug, city, city, country,
state.get("venue", "indoor"), state.get("venue", "indoor"),
@@ -365,97 +360,89 @@ async def generate_articles(
), ),
) )
ctx["scenario_slug"] = scenario_slug ctx["scenario_slug"] = scenario_slug
# Render body template # Render body template
body_md = _render_pattern(config["body_template"], ctx) body_md = _render_pattern(config["body_template"], ctx)
body_html = mistune.html(body_md) body_html = mistune.html(body_md)
body_html = await bake_scenario_cards(body_html, lang=lang) body_html = await bake_scenario_cards(body_html, lang=lang)
# Extract FAQ pairs for structured data # Extract FAQ pairs for structured data
faq_pairs = _extract_faq_pairs(body_md) faq_pairs = _extract_faq_pairs(body_md)
# Build SEO metadata (full_url includes lang prefix for canonical/OG) # Build SEO metadata (full_url includes lang prefix for canonical/OG)
full_url = f"{base_url}/{lang}{url_path}" full_url = f"{base_url}/{lang}{url_path}"
publish_dt = datetime( publish_dt = datetime(
publish_date.year, publish_date.month, publish_date.day, publish_date.year, publish_date.month, publish_date.day,
8, 0, 0, 8, 0, 0,
).isoformat() ).isoformat()
# Hreflang links # Hreflang links
hreflang_links = [] hreflang_links = []
for alt_lang in config["languages"]: for alt_lang in config["languages"]:
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang}) alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
hreflang_links.append(
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
)
# x-default points to English (or first language)
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
hreflang_links.append( hreflang_links.append(
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />' f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
) )
# x-default points to English (or first language)
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
hreflang_links.append(
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
)
# JSON-LD # JSON-LD
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url) breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
jsonld_objects = build_jsonld( jsonld_objects = build_jsonld(
config["schema_type"], config["schema_type"],
title=title, title=title,
description=meta_desc, description=meta_desc,
url=full_url, url=full_url,
published_at=publish_dt, published_at=publish_dt,
date_modified=now_iso, date_modified=now_iso,
language=lang, language=lang,
breadcrumbs=breadcrumbs, breadcrumbs=breadcrumbs,
faq_pairs=faq_pairs, faq_pairs=faq_pairs,
)
# Build SEO head block
seo_head = "\n".join([
f'<link rel="canonical" href="{full_url}" />',
*hreflang_links,
f'<meta property="og:title" content="{_escape_attr(title)}" />',
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
f'<meta property="og:url" content="{full_url}" />',
'<meta property="og:type" content="article" />',
*[
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
for obj in jsonld_objects
],
])
# Write HTML to disk
build_dir = BUILD_DIR / lang
build_dir.mkdir(parents=True, exist_ok=True)
(build_dir / f"{article_slug}.html").write_text(body_html)
# Write markdown source to disk (for admin editing)
md_dir = BUILD_DIR / lang / "md"
md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md)
# Upsert article in SQLite — keyed by (url_path, language) since
# multiple languages share the same url_path
existing_article = await fetch_one(
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
(url_path, lang),
)
if existing_article:
await execute(
"""UPDATE articles
SET title = ?, meta_description = ?, template_slug = ?,
language = ?, date_modified = ?, updated_at = ?,
seo_head = ?
WHERE url_path = ? AND language = ?""",
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
) )
else:
await execute( # Build SEO head block
seo_head = "\n".join([
f'<link rel="canonical" href="{full_url}" />',
*hreflang_links,
f'<meta property="og:title" content="{_escape_attr(title)}" />',
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
f'<meta property="og:url" content="{full_url}" />',
'<meta property="og:type" content="article" />',
*[
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
for obj in jsonld_objects
],
])
# Write HTML to disk
build_dir = BUILD_DIR / lang
build_dir.mkdir(parents=True, exist_ok=True)
(build_dir / f"{article_slug}.html").write_text(body_html)
# Write markdown source to disk (for admin editing)
md_dir = BUILD_DIR / lang / "md"
md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md)
# Upsert article in SQLite — keyed by (url_path, language)
await db.execute(
"""INSERT INTO articles """INSERT INTO articles
(url_path, slug, title, meta_description, country, region, (url_path, slug, title, meta_description, country, region,
status, published_at, template_slug, language, date_modified, status, published_at, template_slug, language, date_modified,
seo_head, created_at) seo_head, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
ON CONFLICT(url_path, language) DO UPDATE SET
title = excluded.title,
meta_description = excluded.meta_description,
template_slug = excluded.template_slug,
date_modified = excluded.date_modified,
seo_head = excluded.seo_head,
updated_at = excluded.date_modified""",
( (
url_path, article_slug, title, meta_desc, url_path, article_slug, title, meta_desc,
row.get("country", ""), row.get("region", ""), row.get("country", ""), row.get("region", ""),
@@ -463,14 +450,17 @@ async def generate_articles(
), ),
) )
generated += 1 generated += 1
if generated % 25 == 0:
logger.info("%s: %d articles written…", slug, generated)
# Stagger dates # Stagger dates
published_today += 1 published_today += 1
if published_today >= articles_per_day: if published_today >= articles_per_day:
published_today = 0 published_today = 0
publish_date += timedelta(days=1) publish_date += timedelta(days=1)
logger.info("%s: done — %d total", slug, generated)
return generated return generated