perf(content): batch article generation in single transaction + upsert
Replace SELECT-then-INSERT/UPDATE pairs in generate_articles() with INSERT ... ON CONFLICT DO UPDATE statements, and wrap the entire loop in a single transaction context manager. Eliminates ~1,500 individual SQLite commits for a 500-article run (one commit per row replaced by one total). Also fix _get_article_stats() returning None for live/scheduled/draft counts when the articles table is empty: wrap SUM expressions in COALESCE(..., 0) so they always return integers regardless of row count. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
|
||||
row = await fetch_one(
|
||||
"""SELECT
|
||||
COUNT(*) AS total,
|
||||
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live,
|
||||
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled,
|
||||
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft
|
||||
COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
|
||||
COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
|
||||
COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
|
||||
FROM articles"""
|
||||
)
|
||||
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
|
||||
|
||||
@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
|
||||
are stored in SQLite (routing / application state).
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
@@ -15,7 +16,9 @@ import yaml
|
||||
from jinja2 import ChainableUndefined, Environment
|
||||
|
||||
from ..analytics import fetch_analytics
|
||||
from ..core import execute, fetch_one, slugify, utcnow_iso
|
||||
from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ── Constants ────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -303,59 +306,51 @@ async def generate_articles(
|
||||
generated = 0
|
||||
now_iso = utcnow_iso()
|
||||
|
||||
for row in rows:
|
||||
for lang in config["languages"]:
|
||||
# Build render context: row data + language
|
||||
ctx = {**row, "language": lang}
|
||||
async with transaction() as db:
|
||||
for row in rows:
|
||||
for lang in config["languages"]:
|
||||
# Build render context: row data + language
|
||||
ctx = {**row, "language": lang}
|
||||
|
||||
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
|
||||
url_path = _render_pattern(config["url_pattern"], ctx)
|
||||
if is_reserved_path(url_path):
|
||||
continue
|
||||
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
|
||||
url_path = _render_pattern(config["url_pattern"], ctx)
|
||||
if is_reserved_path(url_path):
|
||||
continue
|
||||
|
||||
title = _render_pattern(config["title_pattern"], ctx)
|
||||
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
|
||||
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
|
||||
title = _render_pattern(config["title_pattern"], ctx)
|
||||
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
|
||||
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
|
||||
|
||||
# Calculator content type: create scenario
|
||||
scenario_slug = None
|
||||
if config["content_type"] == "calculator":
|
||||
# DuckDB lowercases all column names; build a case-insensitive
|
||||
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
|
||||
_defaults_ci = {k.lower(): k for k in DEFAULTS}
|
||||
calc_overrides = {
|
||||
_defaults_ci[k.lower()]: v
|
||||
for k, v in row.items()
|
||||
if k.lower() in _defaults_ci and v is not None
|
||||
}
|
||||
state = validate_state(calc_overrides)
|
||||
d = calc(state, lang=lang)
|
||||
# Calculator content type: create scenario
|
||||
scenario_slug = None
|
||||
if config["content_type"] == "calculator":
|
||||
# DuckDB lowercases all column names; build a case-insensitive
|
||||
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
|
||||
_defaults_ci = {k.lower(): k for k in DEFAULTS}
|
||||
calc_overrides = {
|
||||
_defaults_ci[k.lower()]: v
|
||||
for k, v in row.items()
|
||||
if k.lower() in _defaults_ci and v is not None
|
||||
}
|
||||
state = validate_state(calc_overrides)
|
||||
d = calc(state, lang=lang)
|
||||
|
||||
scenario_slug = slug + "-" + str(row[config["natural_key"]])
|
||||
dbl = state.get("dblCourts", 0)
|
||||
sgl = state.get("sglCourts", 0)
|
||||
court_config = f"{dbl} double + {sgl} single"
|
||||
city = row.get("city_name", row.get("city", ""))
|
||||
country = row.get("country", state.get("country", ""))
|
||||
scenario_slug = slug + "-" + str(row[config["natural_key"]])
|
||||
dbl = state.get("dblCourts", 0)
|
||||
sgl = state.get("sglCourts", 0)
|
||||
court_config = f"{dbl} double + {sgl} single"
|
||||
city = row.get("city_name", row.get("city", ""))
|
||||
country = row.get("country", state.get("country", ""))
|
||||
|
||||
# Upsert published scenario
|
||||
existing = await fetch_one(
|
||||
"SELECT id FROM published_scenarios WHERE slug = ?",
|
||||
(scenario_slug,),
|
||||
)
|
||||
if existing:
|
||||
await execute(
|
||||
"""UPDATE published_scenarios
|
||||
SET state_json = ?, calc_json = ?, updated_at = ?
|
||||
WHERE slug = ?""",
|
||||
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
|
||||
)
|
||||
else:
|
||||
await execute(
|
||||
await db.execute(
|
||||
"""INSERT INTO published_scenarios
|
||||
(slug, title, location, country, venue_type, ownership,
|
||||
court_config, state_json, calc_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||
(slug, title, location, country, venue_type, ownership,
|
||||
court_config, state_json, calc_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(slug) DO UPDATE SET
|
||||
state_json = excluded.state_json,
|
||||
calc_json = excluded.calc_json,
|
||||
updated_at = excluded.created_at""",
|
||||
(
|
||||
scenario_slug, city, city, country,
|
||||
state.get("venue", "indoor"),
|
||||
@@ -365,97 +360,89 @@ async def generate_articles(
|
||||
),
|
||||
)
|
||||
|
||||
ctx["scenario_slug"] = scenario_slug
|
||||
ctx["scenario_slug"] = scenario_slug
|
||||
|
||||
# Render body template
|
||||
body_md = _render_pattern(config["body_template"], ctx)
|
||||
body_html = mistune.html(body_md)
|
||||
body_html = await bake_scenario_cards(body_html, lang=lang)
|
||||
# Render body template
|
||||
body_md = _render_pattern(config["body_template"], ctx)
|
||||
body_html = mistune.html(body_md)
|
||||
body_html = await bake_scenario_cards(body_html, lang=lang)
|
||||
|
||||
# Extract FAQ pairs for structured data
|
||||
faq_pairs = _extract_faq_pairs(body_md)
|
||||
# Extract FAQ pairs for structured data
|
||||
faq_pairs = _extract_faq_pairs(body_md)
|
||||
|
||||
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
|
||||
full_url = f"{base_url}/{lang}{url_path}"
|
||||
publish_dt = datetime(
|
||||
publish_date.year, publish_date.month, publish_date.day,
|
||||
8, 0, 0,
|
||||
).isoformat()
|
||||
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
|
||||
full_url = f"{base_url}/{lang}{url_path}"
|
||||
publish_dt = datetime(
|
||||
publish_date.year, publish_date.month, publish_date.day,
|
||||
8, 0, 0,
|
||||
).isoformat()
|
||||
|
||||
# Hreflang links
|
||||
hreflang_links = []
|
||||
for alt_lang in config["languages"]:
|
||||
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
|
||||
# Hreflang links
|
||||
hreflang_links = []
|
||||
for alt_lang in config["languages"]:
|
||||
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
|
||||
hreflang_links.append(
|
||||
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
|
||||
)
|
||||
# x-default points to English (or first language)
|
||||
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
|
||||
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
|
||||
hreflang_links.append(
|
||||
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
|
||||
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
|
||||
)
|
||||
# x-default points to English (or first language)
|
||||
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
|
||||
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
|
||||
hreflang_links.append(
|
||||
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
|
||||
)
|
||||
|
||||
# JSON-LD
|
||||
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
|
||||
jsonld_objects = build_jsonld(
|
||||
config["schema_type"],
|
||||
title=title,
|
||||
description=meta_desc,
|
||||
url=full_url,
|
||||
published_at=publish_dt,
|
||||
date_modified=now_iso,
|
||||
language=lang,
|
||||
breadcrumbs=breadcrumbs,
|
||||
faq_pairs=faq_pairs,
|
||||
)
|
||||
|
||||
# Build SEO head block
|
||||
seo_head = "\n".join([
|
||||
f'<link rel="canonical" href="{full_url}" />',
|
||||
*hreflang_links,
|
||||
f'<meta property="og:title" content="{_escape_attr(title)}" />',
|
||||
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
|
||||
f'<meta property="og:url" content="{full_url}" />',
|
||||
'<meta property="og:type" content="article" />',
|
||||
*[
|
||||
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
|
||||
for obj in jsonld_objects
|
||||
],
|
||||
])
|
||||
|
||||
# Write HTML to disk
|
||||
build_dir = BUILD_DIR / lang
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
(build_dir / f"{article_slug}.html").write_text(body_html)
|
||||
|
||||
# Write markdown source to disk (for admin editing)
|
||||
md_dir = BUILD_DIR / lang / "md"
|
||||
md_dir.mkdir(parents=True, exist_ok=True)
|
||||
(md_dir / f"{article_slug}.md").write_text(body_md)
|
||||
|
||||
# Upsert article in SQLite — keyed by (url_path, language) since
|
||||
# multiple languages share the same url_path
|
||||
existing_article = await fetch_one(
|
||||
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
|
||||
(url_path, lang),
|
||||
)
|
||||
if existing_article:
|
||||
await execute(
|
||||
"""UPDATE articles
|
||||
SET title = ?, meta_description = ?, template_slug = ?,
|
||||
language = ?, date_modified = ?, updated_at = ?,
|
||||
seo_head = ?
|
||||
WHERE url_path = ? AND language = ?""",
|
||||
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
|
||||
# JSON-LD
|
||||
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
|
||||
jsonld_objects = build_jsonld(
|
||||
config["schema_type"],
|
||||
title=title,
|
||||
description=meta_desc,
|
||||
url=full_url,
|
||||
published_at=publish_dt,
|
||||
date_modified=now_iso,
|
||||
language=lang,
|
||||
breadcrumbs=breadcrumbs,
|
||||
faq_pairs=faq_pairs,
|
||||
)
|
||||
else:
|
||||
await execute(
|
||||
|
||||
# Build SEO head block
|
||||
seo_head = "\n".join([
|
||||
f'<link rel="canonical" href="{full_url}" />',
|
||||
*hreflang_links,
|
||||
f'<meta property="og:title" content="{_escape_attr(title)}" />',
|
||||
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
|
||||
f'<meta property="og:url" content="{full_url}" />',
|
||||
'<meta property="og:type" content="article" />',
|
||||
*[
|
||||
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
|
||||
for obj in jsonld_objects
|
||||
],
|
||||
])
|
||||
|
||||
# Write HTML to disk
|
||||
build_dir = BUILD_DIR / lang
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
(build_dir / f"{article_slug}.html").write_text(body_html)
|
||||
|
||||
# Write markdown source to disk (for admin editing)
|
||||
md_dir = BUILD_DIR / lang / "md"
|
||||
md_dir.mkdir(parents=True, exist_ok=True)
|
||||
(md_dir / f"{article_slug}.md").write_text(body_md)
|
||||
|
||||
# Upsert article in SQLite — keyed by (url_path, language)
|
||||
await db.execute(
|
||||
"""INSERT INTO articles
|
||||
(url_path, slug, title, meta_description, country, region,
|
||||
status, published_at, template_slug, language, date_modified,
|
||||
seo_head, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""",
|
||||
(url_path, slug, title, meta_description, country, region,
|
||||
status, published_at, template_slug, language, date_modified,
|
||||
seo_head, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(url_path, language) DO UPDATE SET
|
||||
title = excluded.title,
|
||||
meta_description = excluded.meta_description,
|
||||
template_slug = excluded.template_slug,
|
||||
date_modified = excluded.date_modified,
|
||||
seo_head = excluded.seo_head,
|
||||
updated_at = excluded.date_modified""",
|
||||
(
|
||||
url_path, article_slug, title, meta_desc,
|
||||
row.get("country", ""), row.get("region", ""),
|
||||
@@ -463,14 +450,17 @@ async def generate_articles(
|
||||
),
|
||||
)
|
||||
|
||||
generated += 1
|
||||
generated += 1
|
||||
if generated % 25 == 0:
|
||||
logger.info("%s: %d articles written…", slug, generated)
|
||||
|
||||
# Stagger dates
|
||||
published_today += 1
|
||||
if published_today >= articles_per_day:
|
||||
published_today = 0
|
||||
publish_date += timedelta(days=1)
|
||||
# Stagger dates
|
||||
published_today += 1
|
||||
if published_today >= articles_per_day:
|
||||
published_today = 0
|
||||
publish_date += timedelta(days=1)
|
||||
|
||||
logger.info("%s: done — %d total", slug, generated)
|
||||
return generated
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user