feat: SSG-inspired pSEO CMS — git templates + DuckDB direct reads

Replace the old CSV-upload-based CMS with an SSG architecture where
templates live in git as .md.jinja files with YAML frontmatter and
data comes directly from DuckDB serving tables. Only articles and
published_scenarios remain in SQLite for routing/state.

- Content module: discover, load, generate, preview functions
- Migration 0018: drop article_templates + template_data, recreate
  articles + published_scenarios without FK references, add
  template_slug/language/date_modified/seo_head columns
- Admin routes: read-only template views with generate/regenerate/preview
- SEO pipeline: canonical URLs, hreflang (EN+DE), JSON-LD (Article,
  FAQPage, BreadcrumbList), Open Graph tags baked at generation time
- Example template: city-cost-de.md.jinja for German city market data

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 12:25:44 +01:00
parent 5b6c4182f7
commit f1181342ad
15 changed files with 1153 additions and 899 deletions

View File

@@ -81,43 +81,54 @@ async def _create_article(slug="test-article", url_path="/test-article",
)
async def _create_template():
"""Insert a template + 3 data rows, return (template_id, data_row_count)."""
template_id = await execute(
"""INSERT INTO article_templates
(name, slug, content_type, input_schema, url_pattern,
title_pattern, meta_description_pattern, body_template)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
(
"City Cost Analysis", "city-cost", "calculator",
json.dumps([
{"name": "city", "label": "City", "field_type": "text", "required": True},
{"name": "city_slug", "label": "Slug", "field_type": "text", "required": True},
{"name": "country", "label": "Country", "field_type": "text", "required": True},
{"name": "region", "label": "Region", "field_type": "text", "required": False},
{"name": "electricity", "label": "Electricity", "field_type": "number", "required": False},
]),
"/padel-court-cost-{{ city_slug }}",
"Padel Center Cost in {{ city }}",
"How much does a padel center cost in {{ city }}?",
"# Padel in {{ city }}\n\n[scenario:{{ scenario_slug }}]\n\n## CAPEX\n\n[scenario:{{ scenario_slug }}:capex]",
),
)
TEST_TEMPLATE = """\
---
name: "Test City Analysis"
slug: test-city
content_type: calculator
data_table: serving.test_cities
natural_key: city_slug
languages: [en]
url_pattern: "/markets/{{ country | lower }}/{{ city_slug }}"
title_pattern: "Padel in {{ city }}"
meta_description_pattern: "Padel costs in {{ city }}"
schema_type: Article
---
# Padel in {{ city }}
cities = [
("Miami", "miami", "US", "North America", 700),
("Madrid", "madrid", "ES", "Europe", 500),
("Berlin", "berlin", "DE", "Europe", 550),
]
for city, slug, country, region, elec in cities:
await execute(
"INSERT INTO template_data (template_id, data_json) VALUES (?, ?)",
(template_id, json.dumps({
"city": city, "city_slug": slug, "country": country,
"region": region, "electricity": elec,
})),
)
return template_id, len(cities)
Welcome to {{ city }}.
[scenario:{{ scenario_slug }}:capex]
"""
TEST_ROWS = [
{"city": "Miami", "city_slug": "miami", "country": "US", "region": "North America", "electricity": 700},
{"city": "Madrid", "city_slug": "madrid", "country": "ES", "region": "Europe", "electricity": 500},
{"city": "Berlin", "city_slug": "berlin", "country": "DE", "region": "Europe", "electricity": 550},
]
@pytest.fixture
def pseo_env(tmp_path, monkeypatch):
"""Set up pSEO environment: temp template dir, build dir, mock DuckDB."""
import padelnomics.content as content_mod
tpl_dir = tmp_path / "templates"
tpl_dir.mkdir()
monkeypatch.setattr(content_mod, "TEMPLATES_DIR", tpl_dir)
build_dir = tmp_path / "build"
build_dir.mkdir()
monkeypatch.setattr(content_mod, "BUILD_DIR", build_dir)
(tpl_dir / "test-city.md.jinja").write_text(TEST_TEMPLATE)
async def mock_fetch_analytics(query, params=None):
return TEST_ROWS
monkeypatch.setattr(content_mod, "fetch_analytics", mock_fetch_analytics)
return {"tpl_dir": tpl_dir, "build_dir": build_dir}
# ════════════════════════════════════════════════════════════
@@ -401,22 +412,14 @@ class TestBakeScenarioCards:
# ════════════════════════════════════════════════════════════
class TestGenerationPipeline:
async def test_generates_correct_count(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, count = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
generated = await _generate_from_template(dict(template), date(2026, 3, 1), 10)
assert generated == count
async def test_generates_correct_count(self, db, pseo_env):
from padelnomics.content import generate_articles
generated = await generate_articles("test-city", date(2026, 3, 1), 10)
assert generated == 3 # 3 rows × 1 language
async def test_staggered_dates_two_per_day(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 2)
async def test_staggered_dates_two_per_day(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 2)
articles = await fetch_all("SELECT * FROM articles ORDER BY published_at")
assert len(articles) == 3
@@ -426,55 +429,39 @@ class TestGenerationPipeline:
assert dates[1] == "2026-03-01"
assert dates[2] == "2026-03-02"
async def test_staggered_dates_one_per_day(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 1)
async def test_staggered_dates_one_per_day(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 1)
articles = await fetch_all("SELECT * FROM articles ORDER BY published_at")
dates = sorted({a["published_at"][:10] for a in articles})
assert dates == ["2026-03-01", "2026-03-02", "2026-03-03"]
async def test_article_url_and_title(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
async def test_article_url_and_title(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
miami = await fetch_one("SELECT * FROM articles WHERE slug = 'city-cost-miami'")
miami = await fetch_one("SELECT * FROM articles WHERE slug = 'test-city-en-miami'")
assert miami is not None
assert miami["url_path"] == "/padel-court-cost-miami"
assert miami["title"] == "Padel Center Cost in Miami"
assert miami["country"] == "US"
assert miami["region"] == "North America"
assert miami["url_path"] == "/en/markets/us/miami"
assert miami["title"] == "Padel in Miami"
assert miami["template_slug"] == "test-city"
assert miami["language"] == "en"
assert miami["status"] == "published"
async def test_scenario_created_per_row(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, count = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
async def test_scenario_created_per_row(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
scenarios = await fetch_all("SELECT * FROM published_scenarios")
assert len(scenarios) == count
assert len(scenarios) == 3
async def test_scenario_has_valid_calc_json(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
async def test_scenario_has_valid_calc_json(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
scenario = await fetch_one(
"SELECT * FROM published_scenarios WHERE slug = 'city-cost-miami'"
"SELECT * FROM published_scenarios WHERE slug = 'test-city-miami'"
)
assert scenario is not None
d = json.loads(scenario["calc_json"])
@@ -483,112 +470,76 @@ class TestGenerationPipeline:
assert "irr" in d
assert d["capex"] > 0
async def test_template_data_linked(self, db):
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
rows = await fetch_all(
"SELECT * FROM template_data WHERE template_id = ?", (template_id,)
)
for row in rows:
assert row["article_id"] is not None, f"Row {row['id']} not linked to article"
assert row["scenario_id"] is not None, f"Row {row['id']} not linked to scenario"
async def test_build_files_written(self, db):
from padelnomics.admin.routes import _generate_from_template
from padelnomics.content.routes import BUILD_DIR
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
async def test_build_files_written(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
build_dir = pseo_env["build_dir"]
articles = await fetch_all("SELECT slug FROM articles")
try:
for a in articles:
build_path = BUILD_DIR / f"{a['slug']}.html"
assert build_path.exists(), f"Missing build file: {build_path}"
content = build_path.read_text()
assert len(content) > 100, f"Build file too small: {build_path}"
assert "scenario-widget" in content
finally:
# Cleanup build files
for a in articles:
p = BUILD_DIR / f"{a['slug']}.html"
if p.exists():
p.unlink()
for a in articles:
build_path = build_dir / "en" / f"{a['slug']}.html"
assert build_path.exists(), f"Missing build file: {build_path}"
content = build_path.read_text()
assert len(content) > 50
async def test_skips_already_generated(self, db):
"""Running generate twice does not duplicate articles."""
from padelnomics.admin.routes import _generate_from_template
template_id, count = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
async def test_updates_existing_on_regeneration(self, db, pseo_env):
"""Running generate twice updates articles, doesn't duplicate."""
from padelnomics.content import generate_articles
first = await _generate_from_template(dict(template), date(2026, 3, 1), 10)
assert first == count
first = await generate_articles("test-city", date(2026, 3, 1), 10)
assert first == 3
# Second run: all rows already linked → 0 generated
second = await _generate_from_template(dict(template), date(2026, 3, 10), 10)
assert second == 0
second = await generate_articles("test-city", date(2026, 3, 10), 10)
assert second == 3 # Updates existing
articles = await fetch_all("SELECT * FROM articles")
assert len(articles) == count
assert len(articles) == 3 # No duplicates
# Cleanup
from padelnomics.content.routes import BUILD_DIR
for a in articles:
p = BUILD_DIR / f"{a['slug']}.html"
if p.exists():
p.unlink()
async def test_calc_overrides_applied(self, db):
async def test_calc_overrides_applied(self, db, pseo_env):
"""Data row values that match DEFAULTS keys are used as calc overrides."""
from padelnomics.admin.routes import _generate_from_template
template_id, _ = await _create_template()
template = await fetch_one(
"SELECT * FROM article_templates WHERE id = ?", (template_id,)
)
await _generate_from_template(dict(template), date(2026, 3, 1), 10)
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
# Miami had electricity=700, default is 600
scenario = await fetch_one(
"SELECT * FROM published_scenarios WHERE slug = 'city-cost-miami'"
"SELECT * FROM published_scenarios WHERE slug = 'test-city-miami'"
)
state = json.loads(scenario["state_json"])
assert state["electricity"] == 700
# Cleanup
from padelnomics.content.routes import BUILD_DIR
for slug in ("miami", "madrid", "berlin"):
p = BUILD_DIR / f"{slug}.html"
if p.exists():
p.unlink()
async def test_seo_head_populated(self, db, pseo_env):
from padelnomics.content import generate_articles
await generate_articles("test-city", date(2026, 3, 1), 10)
article = await fetch_one("SELECT * FROM articles WHERE slug = 'test-city-en-miami'")
assert article["seo_head"] is not None
assert 'rel="canonical"' in article["seo_head"]
assert 'application/ld+json' in article["seo_head"]
# ════════════════════════════════════════════════════════════
# Jinja string rendering
# ════════════════════════════════════════════════════════════
class TestRenderJinjaString:
class TestRenderPattern:
def test_simple(self):
from padelnomics.admin.routes import _render_jinja_string
assert _render_jinja_string("Hello {{ name }}!", {"name": "World"}) == "Hello World!"
from padelnomics.content import _render_pattern
assert _render_pattern("Hello {{ name }}!", {"name": "World"}) == "Hello World!"
def test_missing_var_empty(self):
from padelnomics.admin.routes import _render_jinja_string
result = _render_jinja_string("Hello {{ missing }}!", {})
from padelnomics.content import _render_pattern
result = _render_pattern("Hello {{ missing }}!", {})
assert result == "Hello !"
def test_url_pattern(self):
from padelnomics.admin.routes import _render_jinja_string
result = _render_jinja_string("/padel-court-cost-{{ slug }}", {"slug": "miami"})
assert result == "/padel-court-cost-miami"
from padelnomics.content import _render_pattern
result = _render_pattern("/markets/{{ country | lower }}/{{ slug }}", {"country": "US", "slug": "miami"})
assert result == "/markets/us/miami"
def test_slugify_filter(self):
from padelnomics.content import _render_pattern
result = _render_pattern("{{ name | slugify }}", {"name": "Hello World"})
assert result == "hello-world"
# ════════════════════════════════════════════════════════════
@@ -772,75 +723,6 @@ class TestAdminTemplates:
resp = await admin_client.get("/admin/templates")
assert resp.status_code == 200
async def test_template_new_form(self, admin_client):
resp = await admin_client.get("/admin/templates/new")
assert resp.status_code == 200
async def test_template_create(self, admin_client, db):
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post("/admin/templates/new", form={
"csrf_token": "test",
"name": "Test Template",
"slug": "test-tmpl",
"content_type": "calculator",
"input_schema": '[{"name":"city","label":"City","field_type":"text","required":true}]',
"url_pattern": "/test-{{ city }}",
"title_pattern": "Test {{ city }}",
"meta_description_pattern": "",
"body_template": "# Hello {{ city }}",
})
assert resp.status_code == 302
row = await fetch_one("SELECT * FROM article_templates WHERE slug = 'test-tmpl'")
assert row is not None
assert row["name"] == "Test Template"
async def test_template_edit(self, admin_client, db):
template_id = await execute(
"""INSERT INTO article_templates
(name, slug, content_type, input_schema, url_pattern,
title_pattern, body_template)
VALUES ('Edit Me', 'edit-me', 'calculator', '[]',
'/edit', 'Edit', '# body')"""
)
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post(f"/admin/templates/{template_id}/edit", form={
"csrf_token": "test",
"name": "Edited",
"input_schema": "[]",
"url_pattern": "/edit",
"title_pattern": "Edited",
"body_template": "# edited",
})
assert resp.status_code == 302
row = await fetch_one("SELECT * FROM article_templates WHERE id = ?", (template_id,))
assert row["name"] == "Edited"
async def test_template_delete(self, admin_client, db):
template_id = await execute(
"""INSERT INTO article_templates
(name, slug, content_type, input_schema, url_pattern,
title_pattern, body_template)
VALUES ('Del Me', 'del-me', 'calculator', '[]',
'/del', 'Del', '# body')"""
)
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post(f"/admin/templates/{template_id}/delete", form={
"csrf_token": "test",
})
assert resp.status_code == 302
row = await fetch_one("SELECT * FROM article_templates WHERE id = ?", (template_id,))
assert row is None
class TestAdminScenarios:
@@ -1012,81 +894,6 @@ class TestAdminArticles:
assert await fetch_one("SELECT 1 FROM articles WHERE id = ?", (article_id,)) is None
class TestAdminTemplateData:
async def test_data_add(self, admin_client, db):
template_id, _ = await _create_template()
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post(f"/admin/templates/{template_id}/data/add", form={
"csrf_token": "test",
"city": "London",
"city_slug": "london",
"country": "UK",
"region": "Europe",
"electricity": "650",
})
assert resp.status_code == 302
rows = await fetch_all(
"SELECT * FROM template_data WHERE template_id = ?", (template_id,)
)
# 3 from _create_template + 1 just added
assert len(rows) == 4
async def test_data_delete(self, admin_client, db):
template_id, _ = await _create_template()
rows = await fetch_all(
"SELECT id FROM template_data WHERE template_id = ?", (template_id,)
)
data_id = rows[0]["id"]
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post(
f"/admin/templates/{template_id}/data/{data_id}/delete",
form={"csrf_token": "test"},
)
assert resp.status_code == 302
remaining = await fetch_all(
"SELECT * FROM template_data WHERE template_id = ?", (template_id,)
)
assert len(remaining) == 2
class TestAdminGenerate:
async def test_generate_form(self, admin_client, db):
template_id, _ = await _create_template()
resp = await admin_client.get(f"/admin/templates/{template_id}/generate")
assert resp.status_code == 200
html = (await resp.data).decode()
assert "3" in html # pending count
async def test_generate_creates_articles(self, admin_client, db):
from padelnomics.content.routes import BUILD_DIR
template_id, _ = await _create_template()
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
resp = await admin_client.post(f"/admin/templates/{template_id}/generate", form={
"csrf_token": "test",
"start_date": "2026-04-01",
"articles_per_day": "2",
})
assert resp.status_code == 302
articles = await fetch_all("SELECT * FROM articles")
assert len(articles) == 3
# Cleanup
for a in articles:
p = BUILD_DIR / f"{a['slug']}.html"
if p.exists():
p.unlink()
# ════════════════════════════════════════════════════════════