perf(content): batch article generation in single transaction + upsert
Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in a single transaction context manager. Eliminates ~1,500 individual SQLite commits for a 500-article run (one commit per row replaced by one total). Also fix _get_article_stats() returning None for live/scheduled/draft counts when the articles table is empty: wrap SUM expressions in COALESCE(..., 0) so they always return integers regardless of row count. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
|
|||||||
row = await fetch_one(
|
row = await fetch_one(
|
||||||
"""SELECT
|
"""SELECT
|
||||||
COUNT(*) AS total,
|
COUNT(*) AS total,
|
||||||
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live,
|
COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
|
||||||
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled,
|
COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
|
||||||
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft
|
COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
|
||||||
FROM articles"""
|
FROM articles"""
|
||||||
)
|
)
|
||||||
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
|
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
|
|||||||
are stored in SQLite (routing / application state).
|
are stored in SQLite (routing / application state).
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from datetime import UTC, date, datetime, timedelta
|
from datetime import UTC, date, datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -15,7 +16,9 @@ import yaml
|
|||||||
from jinja2 import ChainableUndefined, Environment
|
from jinja2 import ChainableUndefined, Environment
|
||||||
|
|
||||||
from ..analytics import fetch_analytics
|
from ..analytics import fetch_analytics
|
||||||
from ..core import execute, fetch_one, slugify, utcnow_iso
|
from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Constants ────────────────────────────────────────────────────────────────
|
# ── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -303,6 +306,7 @@ async def generate_articles(
|
|||||||
generated = 0
|
generated = 0
|
||||||
now_iso = utcnow_iso()
|
now_iso = utcnow_iso()
|
||||||
|
|
||||||
|
async with transaction() as db:
|
||||||
for row in rows:
|
for row in rows:
|
||||||
for lang in config["languages"]:
|
for lang in config["languages"]:
|
||||||
# Build render context: row data + language
|
# Build render context: row data + language
|
||||||
@@ -338,24 +342,15 @@ async def generate_articles(
|
|||||||
city = row.get("city_name", row.get("city", ""))
|
city = row.get("city_name", row.get("city", ""))
|
||||||
country = row.get("country", state.get("country", ""))
|
country = row.get("country", state.get("country", ""))
|
||||||
|
|
||||||
# Upsert published scenario
|
await db.execute(
|
||||||
existing = await fetch_one(
|
|
||||||
"SELECT id FROM published_scenarios WHERE slug = ?",
|
|
||||||
(scenario_slug,),
|
|
||||||
)
|
|
||||||
if existing:
|
|
||||||
await execute(
|
|
||||||
"""UPDATE published_scenarios
|
|
||||||
SET state_json = ?, calc_json = ?, updated_at = ?
|
|
||||||
WHERE slug = ?""",
|
|
||||||
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await execute(
|
|
||||||
"""INSERT INTO published_scenarios
|
"""INSERT INTO published_scenarios
|
||||||
(slug, title, location, country, venue_type, ownership,
|
(slug, title, location, country, venue_type, ownership,
|
||||||
court_config, state_json, calc_json, created_at)
|
court_config, state_json, calc_json, created_at)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(slug) DO UPDATE SET
|
||||||
|
state_json = excluded.state_json,
|
||||||
|
calc_json = excluded.calc_json,
|
||||||
|
updated_at = excluded.created_at""",
|
||||||
(
|
(
|
||||||
scenario_slug, city, city, country,
|
scenario_slug, city, city, country,
|
||||||
state.get("venue", "indoor"),
|
state.get("venue", "indoor"),
|
||||||
@@ -434,28 +429,20 @@ async def generate_articles(
|
|||||||
md_dir.mkdir(parents=True, exist_ok=True)
|
md_dir.mkdir(parents=True, exist_ok=True)
|
||||||
(md_dir / f"{article_slug}.md").write_text(body_md)
|
(md_dir / f"{article_slug}.md").write_text(body_md)
|
||||||
|
|
||||||
# Upsert article in SQLite — keyed by (url_path, language) since
|
# Upsert article in SQLite — keyed by (url_path, language)
|
||||||
# multiple languages share the same url_path
|
await db.execute(
|
||||||
existing_article = await fetch_one(
|
|
||||||
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
|
|
||||||
(url_path, lang),
|
|
||||||
)
|
|
||||||
if existing_article:
|
|
||||||
await execute(
|
|
||||||
"""UPDATE articles
|
|
||||||
SET title = ?, meta_description = ?, template_slug = ?,
|
|
||||||
language = ?, date_modified = ?, updated_at = ?,
|
|
||||||
seo_head = ?
|
|
||||||
WHERE url_path = ? AND language = ?""",
|
|
||||||
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await execute(
|
|
||||||
"""INSERT INTO articles
|
"""INSERT INTO articles
|
||||||
(url_path, slug, title, meta_description, country, region,
|
(url_path, slug, title, meta_description, country, region,
|
||||||
status, published_at, template_slug, language, date_modified,
|
status, published_at, template_slug, language, date_modified,
|
||||||
seo_head, created_at)
|
seo_head, created_at)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(url_path, language) DO UPDATE SET
|
||||||
|
title = excluded.title,
|
||||||
|
meta_description = excluded.meta_description,
|
||||||
|
template_slug = excluded.template_slug,
|
||||||
|
date_modified = excluded.date_modified,
|
||||||
|
seo_head = excluded.seo_head,
|
||||||
|
updated_at = excluded.date_modified""",
|
||||||
(
|
(
|
||||||
url_path, article_slug, title, meta_desc,
|
url_path, article_slug, title, meta_desc,
|
||||||
row.get("country", ""), row.get("region", ""),
|
row.get("country", ""), row.get("region", ""),
|
||||||
@@ -464,6 +451,8 @@ async def generate_articles(
|
|||||||
)
|
)
|
||||||
|
|
||||||
generated += 1
|
generated += 1
|
||||||
|
if generated % 25 == 0:
|
||||||
|
logger.info("%s: %d articles written…", slug, generated)
|
||||||
|
|
||||||
# Stagger dates
|
# Stagger dates
|
||||||
published_today += 1
|
published_today += 1
|
||||||
@@ -471,6 +460,7 @@ async def generate_articles(
|
|||||||
published_today = 0
|
published_today = 0
|
||||||
publish_date += timedelta(days=1)
|
publish_date += timedelta(days=1)
|
||||||
|
|
||||||
|
logger.info("%s: done — %d total", slug, generated)
|
||||||
return generated
|
return generated
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user