merge: content improvement tasks (FAQ, Market Score, DE translations, country names, DB perf)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1713,9 +1713,9 @@ async def _get_article_stats() -> dict:
|
|||||||
row = await fetch_one(
|
row = await fetch_one(
|
||||||
"""SELECT
|
"""SELECT
|
||||||
COUNT(*) AS total,
|
COUNT(*) AS total,
|
||||||
SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END) AS live,
|
COALESCE(SUM(CASE WHEN status='published' AND published_at <= datetime('now') THEN 1 ELSE 0 END), 0) AS live,
|
||||||
SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END) AS scheduled,
|
COALESCE(SUM(CASE WHEN status='published' AND published_at > datetime('now') THEN 1 ELSE 0 END), 0) AS scheduled,
|
||||||
SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END) AS draft
|
COALESCE(SUM(CASE WHEN status='draft' THEN 1 ELSE 0 END), 0) AS draft
|
||||||
FROM articles"""
|
FROM articles"""
|
||||||
)
|
)
|
||||||
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
|
return dict(row) if row else {"total": 0, "live": 0, "scheduled": 0, "draft": 0}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ Data comes from DuckDB serving tables. Only articles + published_scenarios
|
|||||||
are stored in SQLite (routing / application state).
|
are stored in SQLite (routing / application state).
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from datetime import UTC, date, datetime, timedelta
|
from datetime import UTC, date, datetime, timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -15,7 +16,9 @@ import yaml
|
|||||||
from jinja2 import ChainableUndefined, Environment
|
from jinja2 import ChainableUndefined, Environment
|
||||||
|
|
||||||
from ..analytics import fetch_analytics
|
from ..analytics import fetch_analytics
|
||||||
from ..core import execute, fetch_one, slugify, utcnow_iso
|
from ..core import execute, fetch_one, slugify, transaction, utcnow_iso
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# ── Constants ────────────────────────────────────────────────────────────────
|
# ── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -303,59 +306,51 @@ async def generate_articles(
|
|||||||
generated = 0
|
generated = 0
|
||||||
now_iso = utcnow_iso()
|
now_iso = utcnow_iso()
|
||||||
|
|
||||||
for row in rows:
|
async with transaction() as db:
|
||||||
for lang in config["languages"]:
|
for row in rows:
|
||||||
# Build render context: row data + language
|
for lang in config["languages"]:
|
||||||
ctx = {**row, "language": lang}
|
# Build render context: row data + language
|
||||||
|
ctx = {**row, "language": lang}
|
||||||
|
|
||||||
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
|
# Render URL pattern (no lang prefix — blueprint provides /<lang>)
|
||||||
url_path = _render_pattern(config["url_pattern"], ctx)
|
url_path = _render_pattern(config["url_pattern"], ctx)
|
||||||
if is_reserved_path(url_path):
|
if is_reserved_path(url_path):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
title = _render_pattern(config["title_pattern"], ctx)
|
title = _render_pattern(config["title_pattern"], ctx)
|
||||||
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
|
meta_desc = _render_pattern(config["meta_description_pattern"], ctx)
|
||||||
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
|
article_slug = slug + "-" + lang + "-" + str(row[config["natural_key"]])
|
||||||
|
|
||||||
# Calculator content type: create scenario
|
# Calculator content type: create scenario
|
||||||
scenario_slug = None
|
scenario_slug = None
|
||||||
if config["content_type"] == "calculator":
|
if config["content_type"] == "calculator":
|
||||||
# DuckDB lowercases all column names; build a case-insensitive
|
# DuckDB lowercases all column names; build a case-insensitive
|
||||||
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
|
# reverse map so "ratepeak" (stored) matches "ratePeak" (DEFAULTS).
|
||||||
_defaults_ci = {k.lower(): k for k in DEFAULTS}
|
_defaults_ci = {k.lower(): k for k in DEFAULTS}
|
||||||
calc_overrides = {
|
calc_overrides = {
|
||||||
_defaults_ci[k.lower()]: v
|
_defaults_ci[k.lower()]: v
|
||||||
for k, v in row.items()
|
for k, v in row.items()
|
||||||
if k.lower() in _defaults_ci and v is not None
|
if k.lower() in _defaults_ci and v is not None
|
||||||
}
|
}
|
||||||
state = validate_state(calc_overrides)
|
state = validate_state(calc_overrides)
|
||||||
d = calc(state, lang=lang)
|
d = calc(state, lang=lang)
|
||||||
|
|
||||||
scenario_slug = slug + "-" + str(row[config["natural_key"]])
|
scenario_slug = slug + "-" + str(row[config["natural_key"]])
|
||||||
dbl = state.get("dblCourts", 0)
|
dbl = state.get("dblCourts", 0)
|
||||||
sgl = state.get("sglCourts", 0)
|
sgl = state.get("sglCourts", 0)
|
||||||
court_config = f"{dbl} double + {sgl} single"
|
court_config = f"{dbl} double + {sgl} single"
|
||||||
city = row.get("city_name", row.get("city", ""))
|
city = row.get("city_name", row.get("city", ""))
|
||||||
country = row.get("country", state.get("country", ""))
|
country = row.get("country", state.get("country", ""))
|
||||||
|
|
||||||
# Upsert published scenario
|
await db.execute(
|
||||||
existing = await fetch_one(
|
|
||||||
"SELECT id FROM published_scenarios WHERE slug = ?",
|
|
||||||
(scenario_slug,),
|
|
||||||
)
|
|
||||||
if existing:
|
|
||||||
await execute(
|
|
||||||
"""UPDATE published_scenarios
|
|
||||||
SET state_json = ?, calc_json = ?, updated_at = ?
|
|
||||||
WHERE slug = ?""",
|
|
||||||
(json.dumps(state), json.dumps(d), now_iso, scenario_slug),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await execute(
|
|
||||||
"""INSERT INTO published_scenarios
|
"""INSERT INTO published_scenarios
|
||||||
(slug, title, location, country, venue_type, ownership,
|
(slug, title, location, country, venue_type, ownership,
|
||||||
court_config, state_json, calc_json, created_at)
|
court_config, state_json, calc_json, created_at)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(slug) DO UPDATE SET
|
||||||
|
state_json = excluded.state_json,
|
||||||
|
calc_json = excluded.calc_json,
|
||||||
|
updated_at = excluded.created_at""",
|
||||||
(
|
(
|
||||||
scenario_slug, city, city, country,
|
scenario_slug, city, city, country,
|
||||||
state.get("venue", "indoor"),
|
state.get("venue", "indoor"),
|
||||||
@@ -365,97 +360,89 @@ async def generate_articles(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
ctx["scenario_slug"] = scenario_slug
|
ctx["scenario_slug"] = scenario_slug
|
||||||
|
|
||||||
# Render body template
|
# Render body template
|
||||||
body_md = _render_pattern(config["body_template"], ctx)
|
body_md = _render_pattern(config["body_template"], ctx)
|
||||||
body_html = mistune.html(body_md)
|
body_html = mistune.html(body_md)
|
||||||
body_html = await bake_scenario_cards(body_html, lang=lang)
|
body_html = await bake_scenario_cards(body_html, lang=lang)
|
||||||
|
|
||||||
# Extract FAQ pairs for structured data
|
# Extract FAQ pairs for structured data
|
||||||
faq_pairs = _extract_faq_pairs(body_md)
|
faq_pairs = _extract_faq_pairs(body_md)
|
||||||
|
|
||||||
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
|
# Build SEO metadata (full_url includes lang prefix for canonical/OG)
|
||||||
full_url = f"{base_url}/{lang}{url_path}"
|
full_url = f"{base_url}/{lang}{url_path}"
|
||||||
publish_dt = datetime(
|
publish_dt = datetime(
|
||||||
publish_date.year, publish_date.month, publish_date.day,
|
publish_date.year, publish_date.month, publish_date.day,
|
||||||
8, 0, 0,
|
8, 0, 0,
|
||||||
).isoformat()
|
).isoformat()
|
||||||
|
|
||||||
# Hreflang links
|
# Hreflang links
|
||||||
hreflang_links = []
|
hreflang_links = []
|
||||||
for alt_lang in config["languages"]:
|
for alt_lang in config["languages"]:
|
||||||
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
|
alt_url = f"/{alt_lang}" + _render_pattern(config["url_pattern"], {**row, "language": alt_lang})
|
||||||
|
hreflang_links.append(
|
||||||
|
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
|
||||||
|
)
|
||||||
|
# x-default points to English (or first language)
|
||||||
|
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
|
||||||
|
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
|
||||||
hreflang_links.append(
|
hreflang_links.append(
|
||||||
f'<link rel="alternate" hreflang="{alt_lang}" href="{base_url}{alt_url}" />'
|
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
|
||||||
)
|
)
|
||||||
# x-default points to English (or first language)
|
|
||||||
default_lang = "en" if "en" in config["languages"] else config["languages"][0]
|
|
||||||
default_url = f"/{default_lang}" + _render_pattern(config["url_pattern"], {**row, "language": default_lang})
|
|
||||||
hreflang_links.append(
|
|
||||||
f'<link rel="alternate" hreflang="x-default" href="{base_url}{default_url}" />'
|
|
||||||
)
|
|
||||||
|
|
||||||
# JSON-LD
|
# JSON-LD
|
||||||
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
|
breadcrumbs = _build_breadcrumbs(f"/{lang}{url_path}", base_url)
|
||||||
jsonld_objects = build_jsonld(
|
jsonld_objects = build_jsonld(
|
||||||
config["schema_type"],
|
config["schema_type"],
|
||||||
title=title,
|
title=title,
|
||||||
description=meta_desc,
|
description=meta_desc,
|
||||||
url=full_url,
|
url=full_url,
|
||||||
published_at=publish_dt,
|
published_at=publish_dt,
|
||||||
date_modified=now_iso,
|
date_modified=now_iso,
|
||||||
language=lang,
|
language=lang,
|
||||||
breadcrumbs=breadcrumbs,
|
breadcrumbs=breadcrumbs,
|
||||||
faq_pairs=faq_pairs,
|
faq_pairs=faq_pairs,
|
||||||
)
|
|
||||||
|
|
||||||
# Build SEO head block
|
|
||||||
seo_head = "\n".join([
|
|
||||||
f'<link rel="canonical" href="{full_url}" />',
|
|
||||||
*hreflang_links,
|
|
||||||
f'<meta property="og:title" content="{_escape_attr(title)}" />',
|
|
||||||
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
|
|
||||||
f'<meta property="og:url" content="{full_url}" />',
|
|
||||||
'<meta property="og:type" content="article" />',
|
|
||||||
*[
|
|
||||||
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
|
|
||||||
for obj in jsonld_objects
|
|
||||||
],
|
|
||||||
])
|
|
||||||
|
|
||||||
# Write HTML to disk
|
|
||||||
build_dir = BUILD_DIR / lang
|
|
||||||
build_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
(build_dir / f"{article_slug}.html").write_text(body_html)
|
|
||||||
|
|
||||||
# Write markdown source to disk (for admin editing)
|
|
||||||
md_dir = BUILD_DIR / lang / "md"
|
|
||||||
md_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
(md_dir / f"{article_slug}.md").write_text(body_md)
|
|
||||||
|
|
||||||
# Upsert article in SQLite — keyed by (url_path, language) since
|
|
||||||
# multiple languages share the same url_path
|
|
||||||
existing_article = await fetch_one(
|
|
||||||
"SELECT id FROM articles WHERE url_path = ? AND language = ?",
|
|
||||||
(url_path, lang),
|
|
||||||
)
|
|
||||||
if existing_article:
|
|
||||||
await execute(
|
|
||||||
"""UPDATE articles
|
|
||||||
SET title = ?, meta_description = ?, template_slug = ?,
|
|
||||||
language = ?, date_modified = ?, updated_at = ?,
|
|
||||||
seo_head = ?
|
|
||||||
WHERE url_path = ? AND language = ?""",
|
|
||||||
(title, meta_desc, slug, lang, now_iso, now_iso, seo_head, url_path, lang),
|
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
await execute(
|
# Build SEO head block
|
||||||
|
seo_head = "\n".join([
|
||||||
|
f'<link rel="canonical" href="{full_url}" />',
|
||||||
|
*hreflang_links,
|
||||||
|
f'<meta property="og:title" content="{_escape_attr(title)}" />',
|
||||||
|
f'<meta property="og:description" content="{_escape_attr(meta_desc)}" />',
|
||||||
|
f'<meta property="og:url" content="{full_url}" />',
|
||||||
|
'<meta property="og:type" content="article" />',
|
||||||
|
*[
|
||||||
|
f'<script type="application/ld+json">{json.dumps(obj, ensure_ascii=False)}</script>'
|
||||||
|
for obj in jsonld_objects
|
||||||
|
],
|
||||||
|
])
|
||||||
|
|
||||||
|
# Write HTML to disk
|
||||||
|
build_dir = BUILD_DIR / lang
|
||||||
|
build_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(build_dir / f"{article_slug}.html").write_text(body_html)
|
||||||
|
|
||||||
|
# Write markdown source to disk (for admin editing)
|
||||||
|
md_dir = BUILD_DIR / lang / "md"
|
||||||
|
md_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
(md_dir / f"{article_slug}.md").write_text(body_md)
|
||||||
|
|
||||||
|
# Upsert article in SQLite — keyed by (url_path, language)
|
||||||
|
await db.execute(
|
||||||
"""INSERT INTO articles
|
"""INSERT INTO articles
|
||||||
(url_path, slug, title, meta_description, country, region,
|
(url_path, slug, title, meta_description, country, region,
|
||||||
status, published_at, template_slug, language, date_modified,
|
status, published_at, template_slug, language, date_modified,
|
||||||
seo_head, created_at)
|
seo_head, created_at)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)""",
|
VALUES (?, ?, ?, ?, ?, ?, 'published', ?, ?, ?, ?, ?, ?)
|
||||||
|
ON CONFLICT(url_path, language) DO UPDATE SET
|
||||||
|
title = excluded.title,
|
||||||
|
meta_description = excluded.meta_description,
|
||||||
|
template_slug = excluded.template_slug,
|
||||||
|
date_modified = excluded.date_modified,
|
||||||
|
seo_head = excluded.seo_head,
|
||||||
|
updated_at = excluded.date_modified""",
|
||||||
(
|
(
|
||||||
url_path, article_slug, title, meta_desc,
|
url_path, article_slug, title, meta_desc,
|
||||||
row.get("country", ""), row.get("region", ""),
|
row.get("country", ""), row.get("region", ""),
|
||||||
@@ -463,14 +450,17 @@ async def generate_articles(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
generated += 1
|
generated += 1
|
||||||
|
if generated % 25 == 0:
|
||||||
|
logger.info("%s: %d articles written…", slug, generated)
|
||||||
|
|
||||||
# Stagger dates
|
# Stagger dates
|
||||||
published_today += 1
|
published_today += 1
|
||||||
if published_today >= articles_per_day:
|
if published_today >= articles_per_day:
|
||||||
published_today = 0
|
published_today = 0
|
||||||
publish_date += timedelta(days=1)
|
publish_date += timedelta(days=1)
|
||||||
|
|
||||||
|
logger.info("%s: done — %d total", slug, generated)
|
||||||
return generated
|
return generated
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user