perf(content): batch article generation in single transaction + upsert

Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with
INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in
a single transaction context manager. Eliminates ~1,500 individual SQLite
commits for a 500-article run (one commit per row replaced by one total).

Also fix _get_article_stats() returning None for live/scheduled/draft counts
when the articles table is empty: wrap SUM expressions in COALESCE(..., 0)
so they always return integers regardless of row count.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-24 16:34:16 +01:00
parent 1e0aa6002a
commit 482b4f9fca
2 changed files with 129 additions and 139 deletions

View File

@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
row = await fetch_one( row = await fetch_one(
"""SELECT """SELECT
COUNT(*) AS total, COUNT(*) AS total,
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live, COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled, COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
FROM articles""" FROM articles"""
) )
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0} return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}

View File

@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
are stored in SQLite (routing / application state). are stored in SQLite (routing / application state).
""" """
import json import json
import logging
import re import re
from datetime import UTC, date, datetime, timedelta from datetime import UTC, date, datetime, timedelta
from pathlib import Path from pathlib import Path
@@ -15,7 +16,9 @@ import yaml
from jinja2 import ChainableUndefined, Environment from jinja2 import ChainableUndefined, Environment
from ..analytics import fetch_analytics from ..analytics import fetch_analytics
from ..core import execute, fetch_one, slugify, utcnow_iso from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
logger = logging.getLogger(__name__)
# ── Constants ──────────────────────────────────────────────────────────────── # ── Constants ────────────────────────────────────────────────────────────────
@@ -303,6 +306,7 @@ async def generate_articles(
generated = 0 generated = 0
now_iso = utcnow_iso() now_iso = utcnow_iso()
async with transaction() as db:
for row in rows: for row in rows:
for lang in config["languages"]: for lang in config["languages"]:
# Build render context: row data + language # Build render context: row data + language
@@ -338,24 +342,15 @@ async def generate_articles(
city = row.get("city_name", row.get("city", "")) city = row.get("city_name", row.get("city", ""))
country = row.get("country", state.get("country", "")) country = row.get("country", state.get("country", ""))
# Upsert published scenario await db.execute(
existing = await fetch_one(
"SELECT id FROM published_scenarios WHERE slug = ?",
(scenario_slug,),
)
if existing:
await execute(
"""UPDATE published_scenarios
SET state_json = ?, calc_json = ?, updated_at = ?
WHERE slug = ?""",
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
)
else:
await execute(
"""INSERT INTO published_scenarios """INSERT INTO published_scenarios
(slug, title, location, country, venue_type, ownership, (slug, title, location, country, venue_type, ownership,
court_config, state_json, calc_json, created_at) court_config, state_json, calc_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(slug) DO UPDATE SET
state_json = excluded.state_json,
calc_json = excluded.calc_json,
updated_at = excluded.created_at""",
( (
scenario_slug, city, city, country, scenario_slug, city, city, country,
state.get("venue", "indoor"), state.get("venue", "indoor"),
@@ -434,28 +429,20 @@ async def generate_articles(
md_dir.mkdir(parents=True, exist_ok=True) md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md) (md_dir / f"{article_slug}.md").write_text(body_md)
# Upsert article in SQLite — keyed by (url_path, language) since # Upsert article in SQLite — keyed by (url_path, language)
# multiple languages share the same url_path await db.execute(
existing_article = await fetch_one(
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
(url_path, lang),
)
if existing_article:
await execute(
"""UPDATE articles
SET title = ?, meta_description = ?, template_slug = ?,
language = ?, date_modified = ?, updated_at = ?,
seo_head = ?
WHERE url_path = ? AND language = ?""",
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
)
else:
await execute(
"""INSERT INTO articles """INSERT INTO articles
(url_path, slug, title, meta_description, country, region, (url_path, slug, title, meta_description, country, region,
status, published_at, template_slug, language, date_modified, status, published_at, template_slug, language, date_modified,
seo_head, created_at) seo_head, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""", VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
ON CONFLICT(url_path, language) DO UPDATE SET
title = excluded.title,
meta_description = excluded.meta_description,
template_slug = excluded.template_slug,
date_modified = excluded.date_modified,
seo_head = excluded.seo_head,
updated_at = excluded.date_modified""",
( (
url_path, article_slug, title, meta_desc, url_path, article_slug, title, meta_desc,
row.get("country", ""), row.get("region", ""), row.get("country", ""), row.get("region", ""),
@@ -464,6 +451,8 @@ async def generate_articles(
) )
generated += 1 generated += 1
if generated % 25 == 0:
logger.info("%s: %d articles written…", slug, generated)
# Stagger dates # Stagger dates
published_today += 1 published_today += 1
@@ -471,6 +460,7 @@ async def generate_articles(
published_today = 0 published_today = 0
publish_date += timedelta(days=1) publish_date += timedelta(days=1)
logger.info("%s: done — %d total", slug, generated)
return generated return generated