perf(content): batch article generation in single transaction + upsert

Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with
INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in
a single transaction context manager. Eliminates ~1,500 individual SQLite
commits for a 500-article run (one commit per row replaced by one total).

Also fix _get_article_stats() returning None for live/scheduled/draft counts
when the articles table is empty: wrap SUM expressions in COALESCE(..., 0)
so they always return integers regardless of row count.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-24 16:34:16 +01:00
parent 1e0aa6002a
commit 482b4f9fca
2 changed files with 129 additions and 139 deletions

View File

@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
row = await fetch_one(
"""SELECT
COUNT(*) AS total,
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live,
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled,
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft
COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
FROM articles"""
)
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}

View File

@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
are stored in SQLite (routing / application state).
"""
import json
import logging
import re
from datetime import UTC, date, datetime, timedelta
from pathlib import Path
@@ -15,7 +16,9 @@ import yaml
from jinja2 import ChainableUndefined, Environment
from ..analytics import fetch_analytics
from ..core import execute, fetch_one, slugify, utcnow_iso
from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
logger = logging.getLogger(__name__)
# ── Constants ────────────────────────────────────────────────────────────────
@@ -303,59 +306,51 @@ async def generate_articles(
generated = 0
now_iso = utcnow_iso()
for row in rows:
for lang in config["languages"]:
# Build render context: row data + language
ctx = {**row, "language": lang}
async with transaction() as db:
for row in rows:
for lang in config["languages"]:
# Build render context: row data + language
ctx = {**row, "language": lang}
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
url_path = _render_pattern(config["url_pattern"], ctx)
if is_reserved_path(url_path):
continue
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
url_path = _render_pattern(config["url_pattern"], ctx)
if is_reserved_path(url_path):
continue
title = _render_pattern(config["title_pattern"], ctx)
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
title = _render_pattern(config["title_pattern"], ctx)
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
# Calculator content type: create scenario
scenario_slug = None
if config["content_type"] == "calculator":
# DuckDB lowercases all column names; build a case-insensitive
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
_defaults_ci = {k.lower(): k for k in DEFAULTS}
calc_overrides = {
_defaults_ci[k.lower()]: v
for k, v in row.items()
if k.lower() in _defaults_ci and v is not None
}
state = validate_state(calc_overrides)
d = calc(state, lang=lang)
# Calculator content type: create scenario
scenario_slug = None
if config["content_type"] == "calculator":
# DuckDB lowercases all column names; build a case-insensitive
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
_defaults_ci = {k.lower(): k for k in DEFAULTS}
calc_overrides = {
_defaults_ci[k.lower()]: v
for k, v in row.items()
if k.lower() in _defaults_ci and v is not None
}
state = validate_state(calc_overrides)
d = calc(state, lang=lang)
scenario_slug = slug + "-" + str(row[config["natural_key"]])
dbl = state.get("dblCourts", 0)
sgl = state.get("sglCourts", 0)
court_config = f"{dbl} double + {sgl} single"
city = row.get("city_name", row.get("city", ""))
country = row.get("country", state.get("country", ""))
scenario_slug = slug + "-" + str(row[config["natural_key"]])
dbl = state.get("dblCourts", 0)
sgl = state.get("sglCourts", 0)
court_config = f"{dbl} double + {sgl} single"
city = row.get("city_name", row.get("city", ""))
country = row.get("country", state.get("country", ""))
# Upsert published scenario
existing = await fetch_one(
"SELECT id FROM published_scenarios WHERE slug = ?",
(scenario_slug,),
)
if existing:
await execute(
"""UPDATE published_scenarios
SET state_json = ?, calc_json = ?, updated_at = ?
WHERE slug = ?""",
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
)
else:
await execute(
await db.execute(
"""INSERT INTO published_scenarios
(slug, title, location, country, venue_type, ownership,
court_config, state_json, calc_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
(slug, title, location, country, venue_type, ownership,
court_config, state_json, calc_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(slug) DO UPDATE SET
state_json = excluded.state_json,
calc_json = excluded.calc_json,
updated_at = excluded.created_at""",
(
scenario_slug, city, city, country,
state.get("venue", "indoor"),
@@ -365,97 +360,89 @@ async def generate_articles(
),
)
ctx["scenario_slug"] = scenario_slug
ctx["scenario_slug"] = scenario_slug
# Render body template
body_md = _render_pattern(config["body_template"], ctx)
body_html = mistune.html(body_md)
body_html = await bake_scenario_cards(body_html, lang=lang)
# Render body template
body_md = _render_pattern(config["body_template"], ctx)
body_html = mistune.html(body_md)
body_html = await bake_scenario_cards(body_html, lang=lang)
# Extract FAQ pairs for structured data
faq_pairs = _extract_faq_pairs(body_md)
# Extract FAQ pairs for structured data
faq_pairs = _extract_faq_pairs(body_md)
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
full_url = f"{base_url}/{lang}{url_path}"
publish_dt = datetime(
publish_date.year, publish_date.month, publish_date.day,
8, 0, 0,
).isoformat()
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
full_url = f"{base_url}/{lang}{url_path}"
publish_dt = datetime(
publish_date.year, publish_date.month, publish_date.day,
8, 0, 0,
).isoformat()
# Hreflang links
hreflang_links = []
for alt_lang in config["languages"]:
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
# Hreflang links
hreflang_links = []
for alt_lang in config["languages"]:
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
hreflang_links.append(
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
)
# x-default points to English (or first language)
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
hreflang_links.append(
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
)
# x-default points to English (or first language)
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
hreflang_links.append(
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
)
# JSON-LD
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
jsonld_objects = build_jsonld(
config["schema_type"],
title=title,
description=meta_desc,
url=full_url,
published_at=publish_dt,
date_modified=now_iso,
language=lang,
breadcrumbs=breadcrumbs,
faq_pairs=faq_pairs,
)
# Build SEO head block
seo_head = "\n".join([
f'<link rel="canonical" href="{full_url}" />',
*hreflang_links,
f'<meta property="og:title" content="{_escape_attr(title)}" />',
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
f'<meta property="og:url" content="{full_url}" />',
'<meta property="og:type" content="article" />',
*[
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
for obj in jsonld_objects
],
])
# Write HTML to disk
build_dir = BUILD_DIR / lang
build_dir.mkdir(parents=True, exist_ok=True)
(build_dir / f"{article_slug}.html").write_text(body_html)
# Write markdown source to disk (for admin editing)
md_dir = BUILD_DIR / lang / "md"
md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md)
# Upsert article in SQLite — keyed by (url_path, language) since
# multiple languages share the same url_path
existing_article = await fetch_one(
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
(url_path, lang),
)
if existing_article:
await execute(
"""UPDATE articles
SET title = ?, meta_description = ?, template_slug = ?,
language = ?, date_modified = ?, updated_at = ?,
seo_head = ?
WHERE url_path = ? AND language = ?""",
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
# JSON-LD
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
jsonld_objects = build_jsonld(
config["schema_type"],
title=title,
description=meta_desc,
url=full_url,
published_at=publish_dt,
date_modified=now_iso,
language=lang,
breadcrumbs=breadcrumbs,
faq_pairs=faq_pairs,
)
else:
await execute(
# Build SEO head block
seo_head = "\n".join([
f'<link rel="canonical" href="{full_url}" />',
*hreflang_links,
f'<meta property="og:title" content="{_escape_attr(title)}" />',
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
f'<meta property="og:url" content="{full_url}" />',
'<meta property="og:type" content="article" />',
*[
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
for obj in jsonld_objects
],
])
# Write HTML to disk
build_dir = BUILD_DIR / lang
build_dir.mkdir(parents=True, exist_ok=True)
(build_dir / f"{article_slug}.html").write_text(body_html)
# Write markdown source to disk (for admin editing)
md_dir = BUILD_DIR / lang / "md"
md_dir.mkdir(parents=True, exist_ok=True)
(md_dir / f"{article_slug}.md").write_text(body_md)
# Upsert article in SQLite — keyed by (url_path, language)
await db.execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, country, region,
status, published_at, template_slug, language, date_modified,
seo_head, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""",
(url_path, slug, title, meta_description, country, region,
status, published_at, template_slug, language, date_modified,
seo_head, created_at)
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
ON CONFLICT(url_path, language) DO UPDATE SET
title = excluded.title,
meta_description = excluded.meta_description,
template_slug = excluded.template_slug,
date_modified = excluded.date_modified,
seo_head = excluded.seo_head,
updated_at = excluded.date_modified""",
(
url_path, article_slug, title, meta_desc,
row.get("country", ""), row.get("region", ""),
@@ -463,14 +450,17 @@ async def generate_articles(
),
)
generated += 1
generated += 1
if generated % 25 == 0:
logger.info("%s: %d articles written…", slug, generated)
# Stagger dates
published_today += 1
if published_today >= articles_per_day:
published_today = 0
publish_date += timedelta(days=1)
# Stagger dates
published_today += 1
if published_today >= articles_per_day:
published_today = 0
publish_date += timedelta(days=1)
logger.info("%s: done — %d total", slug, generated)
return generated