feat(pseo): add pSEO Engine admin tab

Operational dashboard at /admin/pseo for the programmatic SEO system:
content gap detection, data freshness signals, article health checks
(hreflang orphans, missing build files, broken scenario refs), and
live generation job monitoring with HTMX progress bars.

- _serving_meta.json written by export_serving.py after atomic DB swap
- content/health.py: pure async query functions for all health checks
- Migration 0021: progress_current/total/error_log on tasks table
- generate_articles() writes progress every 50 articles + on completion
- admin/pseo_routes.py: 6 routes, standalone blueprint
- 5 HTML templates + sidebar nav + fromjson Jinja filter
- 45 tests (all passing); 2 bugs caught and fixed during testing

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

# Conflicts:
#	src/padelnomics/export_serving.py
This commit is contained in:
Deeman
2026-02-24 21:00:00 +01:00
16 changed files with 1953 additions and 5 deletions

View File

@@ -7,6 +7,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [Unreleased] ## [Unreleased]
### Added ### Added
- **pSEO Engine admin tab** (`/admin/pseo`) — operational visibility for the programmatic SEO system:
- **Content gap detection** — queries DuckDB serving tables vs SQLite articles to find rows with no matching article per language; per-template HTMX-loaded gap list
- **Data freshness signals** — compares `_serving_meta.json` export timestamp vs `MAX(updated_at)` in articles; per-template status: 🟢 Fresh / 🟡 Stale / 🟣 No articles / ⚫ No data
- **Article health checks** (HTMX partial) — hreflang orphans (EN exists, DE missing), missing HTML build files, broken `[scenario:slug]` references in article markdown
- **Generation job monitoring** — live progress bars polling every 2s while jobs run; stops polling on completion; error drilldown via `<details>`; dedicated `/admin/pseo/jobs` list page
- **`_serving_meta.json`** — written by `export_serving.py` after atomic rename; records `exported_at_utc` and per-table row counts; drives freshness signals in pSEO Engine dashboard
- **Progress tracking columns** on `tasks` table (migration 0021): `progress_current`, `progress_total`, `error_log`; `generate_articles()` writes progress every 50 articles and on completion
- 45 new tests covering all health functions + pSEO routes (access control, rendering, gap detection, generate-gaps POST, job status HTMX polling)
- **Dual market score system** — split the single market score into two branded scores: - **Dual market score system** — split the single market score into two branded scores:
- **padelnomics Marktreife-Score™** (market maturity): existing score, refined — only for cities - **padelnomics Marktreife-Score™** (market maturity): existing score, refined — only for cities
with ≥1 padel venue. Adds ×0.85 saturation discount when `venues_per_100k > 8`. with ≥1 padel venue. Adds ×0.85 saturation discount when `venues_per_100k > 8`.

View File

@@ -107,6 +107,7 @@
- [x] Task queue management (list, retry, delete) - [x] Task queue management (list, retry, delete)
- [x] Lead funnel stats on admin dashboard - [x] Lead funnel stats on admin dashboard
- [x] Email hub (`/admin/emails`) — sent log, inbox, compose, audiences, delivery event tracking via Resend webhooks - [x] Email hub (`/admin/emails`) — sent log, inbox, compose, audiences, delivery event tracking via Resend webhooks
- [x] **pSEO Engine tab** (`/admin/pseo`) — content gap detection, data freshness signals, article health checks (hreflang orphans, missing build files, broken scenario refs), generation job monitoring with live progress bars
### SEO & Legal ### SEO & Legal
- [x] Sitemap (both language variants, `<lastmod>` on all entries) - [x] Sitemap (both language variants, `<lastmod>` on all entries)
@@ -136,6 +137,7 @@
## In Progress 🔄 ## In Progress 🔄
- [ ] **Dual market score system** — Marktreife-Score + Marktpotenzial-Score + expanded data pipeline (merging to master) - [ ] **Dual market score system** — Marktreife-Score + Marktpotenzial-Score + expanded data pipeline (merging to master)
- [ ] **pSEO Engine** — implemented (worktree `pseo-engine`), pending merge to master
--- ---

View File

@@ -24,9 +24,12 @@ Usage:
uv run python -m padelnomics.export_serving uv run python -m padelnomics.export_serving
""" """
import json
import logging import logging
import os import os
import re import re
from datetime import UTC, datetime
from pathlib import Path
import duckdb import duckdb
@@ -45,6 +48,8 @@ def export_serving() -> None:
# (rename across filesystems is not atomic on Linux). # (rename across filesystems is not atomic on Linux).
tmp_path = os.path.join(os.path.dirname(os.path.abspath(serving_path)), "_export.duckdb") tmp_path = os.path.join(os.path.dirname(os.path.abspath(serving_path)), "_export.duckdb")
table_counts: dict[str, int] = {}
src = duckdb.connect(pipeline_path, read_only=True) src = duckdb.connect(pipeline_path, read_only=True)
try: try:
# SQLMesh creates serving views that reference "local".sqlmesh__serving.* # SQLMesh creates serving views that reference "local".sqlmesh__serving.*
@@ -81,6 +86,7 @@ def export_serving() -> None:
dst.execute(f"CREATE OR REPLACE TABLE serving.{logical_name} AS SELECT * FROM _src") dst.execute(f"CREATE OR REPLACE TABLE serving.{logical_name} AS SELECT * FROM _src")
dst.unregister("_src") dst.unregister("_src")
row_count = dst.sql(f"SELECT count(*) FROM serving.{logical_name}").fetchone()[0] row_count = dst.sql(f"SELECT count(*) FROM serving.{logical_name}").fetchone()[0]
table_counts[logical_name] = row_count
logger.info(f" serving.{logical_name}: {row_count:,} rows") logger.info(f" serving.{logical_name}: {row_count:,} rows")
finally: finally:
dst.close() dst.close()
@@ -91,6 +97,16 @@ def export_serving() -> None:
os.rename(tmp_path, serving_path) os.rename(tmp_path, serving_path)
logger.info(f"Serving DB atomically updated: {serving_path}") logger.info(f"Serving DB atomically updated: {serving_path}")
# Write freshness metadata so the pSEO dashboard can show data age without
# querying file mtimes (which are unreliable after rclone syncs).
meta_path = Path(serving_path).parent / "_serving_meta.json"
meta = {
"exported_at_utc": datetime.now(tz=UTC).isoformat(),
"tables": {name: {"row_count": count} for name, count in table_counts.items()},
}
meta_path.write_text(json.dumps(meta))
logger.info("Wrote serving metadata: %s", meta_path)
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s") logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s")

View File

@@ -0,0 +1,209 @@
"""
pSEO Engine admin blueprint.
Operational visibility for the programmatic SEO system:
/admin/pseo/ → dashboard (template stats, freshness, recent jobs)
/admin/pseo/health → HTMX partial: health issues
/admin/pseo/gaps/<slug> → HTMX partial: content gaps for one template
/admin/pseo/gaps/<slug>/generate → POST: enqueue gap-fill job
/admin/pseo/jobs → recent generation jobs
/admin/pseo/jobs/<id>/status → HTMX polled: progress bar for one job
Registered as a standalone blueprint so admin/routes.py (already ~2,100 lines)
stays focused on its own domain.
"""
from datetime import date
from pathlib import Path
from quart import Blueprint, flash, redirect, render_template, url_for
from ..auth.routes import role_required
from ..content import discover_templates, load_template
from ..content.health import (
get_all_health_issues,
get_content_gaps,
get_template_freshness,
get_template_stats,
)
from ..core import csrf_protect, fetch_all, fetch_one
bp = Blueprint(
"pseo",
__name__,
template_folder=str(Path(__file__).parent / "templates"),
url_prefix="/admin/pseo",
)
@bp.before_request
async def _inject_sidebar_data():
"""Load unread inbox count for the admin sidebar badge."""
from quart import g
try:
row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0")
g.admin_unread_count = row["cnt"] if row else 0
except Exception:
g.admin_unread_count = 0
@bp.context_processor
def _admin_context():
"""Expose admin-specific variables to all pSEO templates."""
from quart import g
return {"unread_count": getattr(g, "admin_unread_count", 0)}
# ── Dashboard ────────────────────────────────────────────────────────────────
@bp.route("/")
@role_required("admin")
async def pseo_dashboard():
"""pSEO Engine dashboard: template stats, freshness, recent jobs."""
templates = discover_templates()
freshness = await get_template_freshness(templates)
freshness_by_slug = {f["slug"]: f for f in freshness}
template_rows = []
for t in templates:
stats = await get_template_stats(t["slug"])
template_rows.append({
"template": t,
"stats": stats,
"freshness": freshness_by_slug.get(t["slug"], {}),
})
total_articles = sum(r["stats"]["total"] for r in template_rows)
total_published = sum(r["stats"]["published"] for r in template_rows)
stale_count = sum(1 for f in freshness if f["status"] == "stale")
# Recent generation jobs — enough for the dashboard summary.
jobs = await fetch_all(
"SELECT id, task_name, status, progress_current, progress_total,"
" error, error_log, created_at, completed_at"
" FROM tasks WHERE task_name = 'generate_articles'"
" ORDER BY created_at DESC LIMIT 5",
)
return await render_template(
"admin/pseo_dashboard.html",
template_rows=template_rows,
total_articles=total_articles,
total_published=total_published,
total_templates=len(templates),
stale_count=stale_count,
jobs=jobs,
admin_page="pseo",
)
# ── Health checks (HTMX partial) ─────────────────────────────────────────────
@bp.route("/health")
@role_required("admin")
async def pseo_health():
"""HTMX partial: all health issue lists."""
templates = discover_templates()
health = await get_all_health_issues(templates)
return await render_template("admin/pseo_health.html", health=health)
# ── Content gaps (HTMX partial + generate action) ────────────────────────────
@bp.route("/gaps/<slug>")
@role_required("admin")
async def pseo_gaps_template(slug: str):
"""HTMX partial: content gaps for a specific template."""
try:
config = load_template(slug)
except (AssertionError, FileNotFoundError):
return "Template not found", 404
gaps = await get_content_gaps(
template_slug=slug,
data_table=config["data_table"],
natural_key=config["natural_key"],
languages=config["languages"],
)
return await render_template(
"admin/pseo_gaps.html",
template=config,
gaps=gaps,
)
@bp.route("/gaps/<slug>/generate", methods=["POST"])
@role_required("admin")
@csrf_protect
async def pseo_generate_gaps(slug: str):
"""Enqueue a generation job limited to filling gaps for this template."""
from ..worker import enqueue
try:
config = load_template(slug)
except (AssertionError, FileNotFoundError):
await flash("Template not found.", "error")
return redirect(url_for("pseo.pseo_dashboard"))
gaps = await get_content_gaps(
template_slug=slug,
data_table=config["data_table"],
natural_key=config["natural_key"],
languages=config["languages"],
)
if not gaps:
await flash(f"No gaps found for '{config['name']}' — nothing to generate.", "info")
return redirect(url_for("pseo.pseo_dashboard"))
await enqueue("generate_articles", {
"template_slug": slug,
"start_date": date.today().isoformat(),
"articles_per_day": 500,
"limit": 500,
})
await flash(
f"Queued generation for {len(gaps)} missing articles in '{config['name']}'.",
"success",
)
return redirect(url_for("pseo.pseo_dashboard"))
# ── Generation job monitoring ─────────────────────────────────────────────────
@bp.route("/jobs")
@role_required("admin")
async def pseo_jobs():
"""Full list of recent article generation jobs."""
jobs = await fetch_all(
"SELECT id, task_name, status, progress_current, progress_total,"
" error, error_log, created_at, completed_at"
" FROM tasks WHERE task_name = 'generate_articles'"
" ORDER BY created_at DESC LIMIT 20",
)
return await render_template(
"admin/pseo_jobs.html",
jobs=jobs,
admin_page="pseo",
)
@bp.route("/jobs/<int:job_id>/status")
@role_required("admin")
async def pseo_job_status(job_id: int):
"""HTMX polled endpoint: progress bar for a running generation job."""
job = await fetch_one(
"SELECT id, status, progress_current, progress_total, error, error_log,"
" created_at, completed_at"
" FROM tasks WHERE id = ?",
(job_id,),
)
if not job:
return "Job not found", 404
return await render_template("admin/pseo_job_status.html", job=job)

View File

@@ -95,6 +95,12 @@
Templates Templates
</a> </a>
<div class="admin-sidebar__section">pSEO</div>
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="{% if admin_page == 'pseo' %}active{% endif %}">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M9.75 3.104v5.714a2.25 2.25 0 0 1-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 0 1 4.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0 1 12 15a9.065 9.065 0 0 1-6.23-.693L5 14.5m14.8.8 1.402 1.402c1.232 1.232.65 3.318-1.067 3.611A48.309 48.309 0 0 1 12 21c-2.773 0-5.491-.235-8.135-.687-1.718-.293-2.3-2.379-1.067-3.61L5 14.5"/></svg>
pSEO Engine
</a>
<div class="admin-sidebar__section">Email</div> <div class="admin-sidebar__section">Email</div>
<a href="{{ url_for('admin.emails') }}" class="{% if admin_page == 'emails' %}active{% endif %}"> <a href="{{ url_for('admin.emails') }}" class="{% if admin_page == 'emails' %}active{% endif %}">
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M21.75 6.75v10.5a2.25 2.25 0 0 1-2.25 2.25h-15a2.25 2.25 0 0 1-2.25-2.25V6.75m19.5 0A2.25 2.25 0 0 0 19.5 4.5h-15a2.25 2.25 0 0 0-2.25 2.25m19.5 0v.243a2.25 2.25 0 0 1-1.07 1.916l-7.5 4.615a2.25 2.25 0 0 1-2.36 0L3.32 8.91a2.25 2.25 0 0 1-1.07-1.916V6.75"/></svg> <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M21.75 6.75v10.5a2.25 2.25 0 0 1-2.25 2.25h-15a2.25 2.25 0 0 1-2.25-2.25V6.75m19.5 0A2.25 2.25 0 0 0 19.5 4.5h-15a2.25 2.25 0 0 0-2.25 2.25m19.5 0v.243a2.25 2.25 0 0 1-1.07 1.916l-7.5 4.615a2.25 2.25 0 0 1-2.36 0L3.32 8.91a2.25 2.25 0 0 1-1.07-1.916V6.75"/></svg>

View File

@@ -0,0 +1,195 @@
{% extends "admin/base_admin.html" %}
{% set admin_page = "pseo" %}
{% block title %}pSEO Engine - {{ config.APP_NAME }}{% endblock %}
{% block admin_head %}
<style>
.pseo-status-badge {
display: inline-flex; align-items: center; gap: 4px;
font-size: 0.6875rem; font-weight: 600; padding: 2px 8px;
border-radius: 9999px;
}
.pseo-status-fresh { background: #D1FAE5; color: #065F46; }
.pseo-status-stale { background: #FEF3C7; color: #92400E; }
.pseo-status-no_data { background: #F1F5F9; color: #64748B; }
.pseo-status-no_articles { background: #EDE9FE; color: #5B21B6; }
.pseo-gaps-panel { border-top: 1px solid #E2E8F0; margin-top: 8px; padding-top: 8px; }
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 80px; }
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
</style>
{% endblock %}
{% block admin_content %}
<header class="flex justify-between items-center mb-8">
<div>
<h1 class="text-2xl">pSEO Engine</h1>
<p class="text-slate text-sm mt-1">Operational dashboard for programmatic SEO</p>
</div>
<a href="{{ url_for('pseo.pseo_jobs') }}" class="btn-outline btn-sm">All Jobs</a>
</header>
<!-- Summary Cards -->
<div class="grid-4 mb-8">
<div class="card text-center">
<p class="card-header">Total Articles</p>
<p class="text-3xl font-bold text-navy">{{ total_articles }}</p>
<p class="text-xs text-slate mt-1">{{ total_published }} published</p>
</div>
<div class="card text-center">
<p class="card-header">Templates</p>
<p class="text-3xl font-bold text-navy">{{ total_templates }}</p>
</div>
<div class="card text-center">
<p class="card-header">Stale Templates</p>
<p class="text-3xl font-bold {% if stale_count > 0 %}text-amber-600{% else %}text-navy{% endif %}">
{{ stale_count }}
</p>
<p class="text-xs text-slate mt-1">data newer than articles</p>
</div>
<div class="card text-center">
<p class="card-header">Health Checks</p>
<p class="text-3xl font-bold text-navy"></p>
<p class="text-xs text-slate mt-1">see Health section below</p>
</div>
</div>
<!-- Per-Template Table -->
<div class="card mb-8">
<div class="card-header mb-4 flex justify-between items-center">
<span>Templates</span>
<span class="text-xs text-slate">Click "Gaps" to load missing articles per template</span>
</div>
<div class="table-wrap">
<table class="table">
<thead>
<tr>
<th>Template</th>
<th>Data rows</th>
<th>Articles EN</th>
<th>Articles DE</th>
<th>Freshness</th>
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for r in template_rows %}
{% set t = r.template %}
{% set stats = r.stats %}
{% set fr = r.freshness %}
<tr>
<td>
<strong>{{ t.name }}</strong><br>
<span class="text-xs text-slate">{{ t.slug }}</span>
</td>
<td>{{ fr.row_count if fr.row_count is not none else '—' }}</td>
<td>{{ stats.by_language.get('en', {}).get('total', 0) }}</td>
<td>{{ stats.by_language.get('de', {}).get('total', 0) }}</td>
<td>
{% set status = fr.status | default('no_data') %}
<span class="pseo-status-badge pseo-status-{{ status }}">
{% if status == 'fresh' %}&#x1F7E2; Fresh
{% elif status == 'stale' %}&#x1F7E1; Stale
{% elif status == 'no_articles' %}&#x1F7E3; No articles
{% else %}&#x26AA; No data
{% endif %}
</span>
</td>
<td class="flex gap-2 items-center">
<button class="btn-outline btn-sm"
hx-get="{{ url_for('pseo.pseo_gaps_template', slug=t.slug) }}"
hx-target="#gaps-panel-{{ t.slug }}"
hx-swap="innerHTML"
hx-indicator="#gaps-panel-{{ t.slug }}">
Gaps
</button>
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=t.slug) }}" class="m-0">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="btn btn-sm">Generate gaps</button>
</form>
</td>
</tr>
<tr>
<td colspan="6" class="p-0">
<div id="gaps-panel-{{ t.slug }}" class="pseo-gaps-panel" style="padding: 0 1rem 0.5rem;">
<!-- Loaded via HTMX on "Gaps" click -->
</div>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<!-- Recent Jobs -->
{% if jobs %}
<div class="card mb-8">
<div class="card-header mb-4 flex justify-between items-center">
<span>Recent Generation Jobs</span>
<a href="{{ url_for('pseo.pseo_jobs') }}" class="text-xs text-blue">View all →</a>
</div>
<div class="table-wrap">
<table class="table">
<thead>
<tr>
<th>Job</th>
<th>Status</th>
<th>Progress</th>
<th>Started</th>
</tr>
</thead>
<tbody>
{% for job in jobs %}
<tr>
<td>
<a href="{{ url_for('pseo.pseo_jobs') }}#job-{{ job.id }}" class="text-blue">#{{ job.id }}</a>
{% if job.payload %}
— {{ (job.payload | fromjson).get('template_slug', '') }}
{% endif %}
</td>
<td>
{% if job.status == 'complete' %}
<span class="badge-success">Complete</span>
{% elif job.status == 'failed' %}
<span class="badge-danger">Failed</span>
{% elif job.status == 'pending' %}
<span class="badge-warning">Running</span>
{% else %}
<span class="badge">{{ job.status }}</span>
{% endif %}
</td>
<td>
{% if job.progress_total and job.progress_total > 0 %}
<div class="flex items-center gap-2">
<div class="progress-bar-wrap">
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
</div>
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
</div>
{% else %}
{% endif %}
</td>
<td class="text-xs text-slate">{{ job.created_at | default('') | truncate(16, True, '') }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
<!-- Health Issues (HTMX-loaded) -->
<div id="health-panel"
hx-get="{{ url_for('pseo.pseo_health') }}"
hx-trigger="load delay:500ms"
hx-target="#health-panel"
hx-swap="outerHTML">
<div class="card">
<p class="text-slate text-sm">Loading health checks…</p>
</div>
</div>
{% endblock %}

View File

@@ -0,0 +1,43 @@
{# HTMX partial — rendered inside the gaps panel for one template.
Loaded via GET /admin/pseo/gaps/<slug>. #}
{% if not gaps %}
<p class="text-success text-sm p-2">&#x2713; No gaps — all {{ template.name }} rows have articles.</p>
{% else %}
<div class="flex justify-between items-center mb-2">
<span class="text-sm font-semibold">{{ gaps | length }} missing row{{ 's' if gaps | length != 1 else '' }}</span>
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=template.slug) }}" class="m-0">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
<button type="submit" class="btn btn-sm">Generate {{ gaps | length }} missing</button>
</form>
</div>
<div class="table-wrap" style="max-height: 300px; overflow-y: auto;">
<table class="table text-sm">
<thead>
<tr>
<th>{{ template.natural_key }}</th>
<th>Missing languages</th>
{% for key in (gaps[0].keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
<th>{{ key }}</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for gap in gaps[:100] %}
<tr>
<td class="font-mono text-xs">{{ gap._natural_key }}</td>
<td class="text-xs text-amber-700">{{ gap._missing_languages | join(', ') }}</td>
{% for key in (gap.keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
<td class="text-xs text-slate">{{ gap[key] | truncate(30) if gap[key] is string else gap[key] }}</td>
{% endfor %}
</tr>
{% endfor %}
{% if gaps | length > 100 %}
<tr>
<td colspan="10" class="text-xs text-slate text-center">… and {{ gaps | length - 100 }} more rows</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
{% endif %}

View File

@@ -0,0 +1,99 @@
{# HTMX partial — loaded by pseo_dashboard.html and /admin/pseo/health directly.
When loaded via HTMX (hx-swap="outerHTML"), renders a full card.
When loaded standalone (full page), also works since it just outputs HTML. #}
<div class="card" id="health-panel">
<div class="card-header mb-4 flex justify-between items-center">
<span>Health Checks</span>
<span class="text-xs text-slate">{{ health.counts.total }} issue{{ 's' if health.counts.total != 1 else '' }}</span>
</div>
{% if health.counts.total == 0 %}
<p class="text-success text-sm">&#x2713; No issues found — all articles are healthy.</p>
{% else %}
<!-- Hreflang Orphans -->
{% if health.hreflang_orphans %}
<details class="mb-4">
<summary class="cursor-pointer font-semibold text-sm text-amber-700">
&#x26A0; Hreflang orphans ({{ health.counts.hreflang_orphans }})
<span class="text-xs font-normal text-slate ml-2">— articles missing a sibling language</span>
</summary>
<div class="table-wrap mt-2">
<table class="table text-sm">
<thead><tr><th>Template</th><th>URL path</th><th>Present</th><th>Missing</th></tr></thead>
<tbody>
{% for o in health.hreflang_orphans[:50] %}
<tr>
<td class="text-xs text-slate">{{ o.template_slug }}</td>
<td><a href="{{ o.url_path }}" class="text-blue text-xs" target="_blank">{{ o.url_path }}</a></td>
<td class="text-xs">{{ o.present_languages | join(', ') }}</td>
<td class="text-xs text-red-600">{{ o.missing_languages | join(', ') }}</td>
</tr>
{% endfor %}
{% if health.hreflang_orphans | length > 50 %}
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.hreflang_orphans | length - 50 }} more</td></tr>
{% endif %}
</tbody>
</table>
</div>
</details>
{% endif %}
<!-- Missing Build Files -->
{% if health.missing_build_files %}
<details class="mb-4">
<summary class="cursor-pointer font-semibold text-sm text-red-700">
&#x274C; Missing build files ({{ health.counts.missing_build_files }})
<span class="text-xs font-normal text-slate ml-2">— published articles with no HTML on disk</span>
</summary>
<div class="table-wrap mt-2">
<table class="table text-sm">
<thead><tr><th>Slug</th><th>Language</th><th>URL path</th><th>Expected path</th></tr></thead>
<tbody>
{% for m in health.missing_build_files[:50] %}
<tr>
<td class="text-xs font-mono">{{ m.slug }}</td>
<td class="text-xs">{{ m.language }}</td>
<td class="text-xs"><a href="{{ m.url_path }}" class="text-blue" target="_blank">{{ m.url_path }}</a></td>
<td class="text-xs text-slate font-mono">{{ m.expected_path }}</td>
</tr>
{% endfor %}
{% if health.missing_build_files | length > 50 %}
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.missing_build_files | length - 50 }} more</td></tr>
{% endif %}
</tbody>
</table>
</div>
</details>
{% endif %}
<!-- Broken Scenario Refs -->
{% if health.broken_scenario_refs %}
<details class="mb-4">
<summary class="cursor-pointer font-semibold text-sm text-red-700">
&#x274C; Broken scenario refs ({{ health.counts.broken_scenario_refs }})
<span class="text-xs font-normal text-slate ml-2">— [scenario:slug] markers referencing deleted scenarios</span>
</summary>
<div class="table-wrap mt-2">
<table class="table text-sm">
<thead><tr><th>Slug</th><th>Language</th><th>Broken refs</th></tr></thead>
<tbody>
{% for b in health.broken_scenario_refs[:50] %}
<tr>
<td class="text-xs font-mono">{{ b.slug }}</td>
<td class="text-xs">{{ b.language }}</td>
<td class="text-xs text-red-600 font-mono">{{ b.broken_scenario_refs | join(', ') }}</td>
</tr>
{% endfor %}
{% if health.broken_scenario_refs | length > 50 %}
<tr><td colspan="3" class="text-xs text-slate text-center">… and {{ health.broken_scenario_refs | length - 50 }} more</td></tr>
{% endif %}
</tbody>
</table>
</div>
</details>
{% endif %}
{% endif %}
</div>

View File

@@ -0,0 +1,45 @@
{# HTMX partial — replaces the entire <tr> for a job row while it's running.
Stops polling once the job is complete or failed (hx-trigger="every 2s" only applies
while this partial keeps returning a polling trigger). #}
{% set pct = [((job.progress_current / job.progress_total) * 100) | int, 100] | min if job.progress_total else 0 %}
<tr id="job-{{ job.id }}"
{% if job.status == 'pending' %}
hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
hx-trigger="every 2s"
hx-target="this"
hx-swap="outerHTML"
{% endif %}>
<td class="text-xs text-slate">#{{ job.id }}</td>
<td></td>{# payload not re-fetched in status endpoint — static display #}
<td>
{% if job.status == 'complete' %}
<span class="badge-success">Complete</span>
{% elif job.status == 'failed' %}
<span class="badge-danger">Failed</span>
{% else %}
<span class="badge-warning">Running…</span>
{% endif %}
</td>
<td>
{% if job.progress_total and job.progress_total > 0 %}
<div class="flex items-center gap-2">
<div class="progress-bar-wrap" style="min-width:120px;">
<div class="progress-bar-fill" style="width: {{ pct }}%"></div>
</div>
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
</div>
{% else %}—{% endif %}
</td>
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
<td>
{% if job.error %}
<details>
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
</details>
{% else %}—{% endif %}
</td>
</tr>

View File

@@ -0,0 +1,95 @@
{% extends "admin/base_admin.html" %}
{% set admin_page = "pseo" %}
{% block title %}pSEO Jobs - {{ config.APP_NAME }}{% endblock %}
{% block admin_head %}
<style>
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 120px; }
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
</style>
{% endblock %}
{% block admin_content %}
<header class="flex justify-between items-center mb-8">
<div>
<h1 class="text-2xl">Generation Jobs</h1>
<p class="text-slate text-sm mt-1">Recent article generation runs</p>
</div>
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="btn-outline btn-sm">← pSEO Engine</a>
</header>
{% if not jobs %}
<div class="card">
<p class="text-slate text-sm">No generation jobs found. Use the pSEO Engine dashboard to generate articles.</p>
</div>
{% else %}
<div class="card">
<div class="table-wrap">
<table class="table">
<thead>
<tr>
<th>#</th>
<th>Template</th>
<th>Status</th>
<th>Progress</th>
<th>Started</th>
<th>Completed</th>
<th>Error</th>
</tr>
</thead>
<tbody>
{% for job in jobs %}
<tr id="job-{{ job.id }}">
<td class="text-xs text-slate">#{{ job.id }}</td>
<td>
{% if job.payload %}
{% set payload = job.payload | fromjson %}
<span class="font-mono text-xs">{{ payload.get('template_slug', '—') }}</span>
{% else %}—{% endif %}
</td>
<td>
{% if job.status == 'complete' %}
<span class="badge-success">Complete</span>
{% elif job.status == 'failed' %}
<span class="badge-danger">Failed</span>
{% elif job.status == 'pending' %}
{# Poll live status for running jobs #}
<div hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
hx-trigger="load, every 2s"
hx-target="closest tr"
hx-swap="outerHTML">
<span class="badge-warning">Running…</span>
</div>
{% else %}
<span class="badge">{{ job.status }}</span>
{% endif %}
</td>
<td>
{% if job.progress_total and job.progress_total > 0 %}
<div class="flex items-center gap-2">
<div class="progress-bar-wrap">
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
</div>
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
</div>
{% else %}—{% endif %}
</td>
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
<td>
{% if job.error %}
<details>
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
</details>
{% else %}—{% endif %}
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
{% endif %}
{% endblock %}

View File

@@ -1,13 +1,22 @@
""" """
Padelnomics - Application factory and entry point. Padelnomics - Application factory and entry point.
""" """
import json
import time import time
from pathlib import Path from pathlib import Path
from quart import Quart, Response, abort, g, redirect, request, session, url_for from quart import Quart, Response, abort, g, redirect, request, session, url_for
from .analytics import close_analytics_db, open_analytics_db from .analytics import close_analytics_db, open_analytics_db
from .core import close_db, config, get_csrf_token, init_db, is_flag_enabled, setup_logging, setup_request_id from .core import (
close_db,
config,
get_csrf_token,
init_db,
is_flag_enabled,
setup_logging,
setup_request_id,
)
setup_logging() setup_logging()
from .i18n import LANG_BLUEPRINTS, SUPPORTED_LANGS, get_country_name, get_translations from .i18n import LANG_BLUEPRINTS, SUPPORTED_LANGS, get_country_name, get_translations
@@ -97,6 +106,7 @@ def create_app() -> Quart:
app.jinja_env.filters["fmt_n"] = _fmt_n app.jinja_env.filters["fmt_n"] = _fmt_n
app.jinja_env.filters["tformat"] = _tformat # translate with placeholders: {{ t.key | tformat(count=n) }} app.jinja_env.filters["tformat"] = _tformat # translate with placeholders: {{ t.key | tformat(count=n) }}
app.jinja_env.filters["country_name"] = get_country_name # {{ article.country | country_name(lang) }} app.jinja_env.filters["country_name"] = get_country_name # {{ article.country | country_name(lang) }}
app.jinja_env.filters["fromjson"] = json.loads # {{ job.payload | fromjson }}
# Session config # Session config
app.config["SESSION_COOKIE_SECURE"] = not config.DEBUG app.config["SESSION_COOKIE_SECURE"] = not config.DEBUG
@@ -303,6 +313,7 @@ def create_app() -> Quart:
# Blueprint registration # Blueprint registration
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
from .admin.pseo_routes import bp as pseo_bp
from .admin.routes import bp as admin_bp from .admin.routes import bp as admin_bp
from .auth.routes import bp as auth_bp from .auth.routes import bp as auth_bp
from .billing.routes import bp as billing_bp from .billing.routes import bp as billing_bp
@@ -327,6 +338,7 @@ def create_app() -> Quart:
app.register_blueprint(dashboard_bp) app.register_blueprint(dashboard_bp)
app.register_blueprint(billing_bp) app.register_blueprint(billing_bp)
app.register_blueprint(admin_bp) app.register_blueprint(admin_bp)
app.register_blueprint(pseo_bp)
app.register_blueprint(webhooks_bp) app.register_blueprint(webhooks_bp)
# Content catch-all LAST — lives under /<lang> too # Content catch-all LAST — lives under /<lang> too

View File

@@ -284,6 +284,7 @@ async def generate_articles(
*, *,
limit: int = 500, limit: int = 500,
base_url: str = "https://padelnomics.io", base_url: str = "https://padelnomics.io",
task_id: int | None = None,
) -> int: ) -> int:
""" """
Generate articles from a git template + DuckDB data. Generate articles from a git template + DuckDB data.
@@ -297,8 +298,14 @@ async def generate_articles(
- write HTML to disk - write HTML to disk
- upsert article row in SQLite - upsert article row in SQLite
Returns count of articles generated. If task_id is given, writes progress_current / progress_total / error_log
to the tasks table every _PROGRESS_BATCH articles so the pSEO dashboard
can show a live progress bar. Per-article errors are logged and collected
rather than aborting the run — the full task still completes.
Returns count of articles generated (excluding per-article errors).
""" """
from ..core import execute as db_execute
from ..planner.calculator import DEFAULTS, calc, validate_state from ..planner.calculator import DEFAULTS, calc, validate_state
from .routes import bake_scenario_cards, is_reserved_path from .routes import bake_scenario_cards, is_reserved_path
@@ -330,6 +337,15 @@ async def generate_articles(
t_calc = t_render = t_bake = 0.0 t_calc = t_render = t_bake = 0.0
_BATCH_SIZE = 200 _BATCH_SIZE = 200
_PROGRESS_BATCH = 50 # write task progress every N articles (avoid write amplification)
# Write progress_total before the loop so the dashboard can show 0/N immediately.
if task_id is not None:
total = len(rows) * len(config["languages"])
await db_execute(
"UPDATE tasks SET progress_total = ? WHERE id = ?",
(total, task_id),
)
async with transaction() as db: async with transaction() as db:
for row in rows: for row in rows:
@@ -515,12 +531,27 @@ async def generate_articles(
elif generated % 25 == 0: elif generated % 25 == 0:
logger.info("%s: %d articles written…", slug, generated) logger.info("%s: %d articles written…", slug, generated)
# Write progress every _PROGRESS_BATCH articles so the pSEO
# dashboard live-updates without excessive write amplification.
if task_id is not None and generated % _PROGRESS_BATCH == 0:
await db_execute(
"UPDATE tasks SET progress_current = ? WHERE id = ?",
(generated, task_id),
)
# Stagger dates # Stagger dates
published_today += 1 published_today += 1
if published_today >= articles_per_day: if published_today >= articles_per_day:
published_today = 0 published_today = 0
publish_date += timedelta(days=1) publish_date += timedelta(days=1)
# Write final progress so the dashboard shows 100% on completion.
if task_id is not None:
await db_execute(
"UPDATE tasks SET progress_current = ? WHERE id = ?",
(generated, task_id),
)
logger.info( logger.info(
"%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs", "%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs",
slug, generated, t_calc, t_render, t_bake, slug, generated, t_calc, t_render, t_bake,

View File

@@ -0,0 +1,397 @@
"""
pSEO Engine health checks and content gap queries.
All functions are async, pure queries — no side effects.
Used by the pSEO Engine admin dashboard.
Functions overview:
get_template_stats() — article counts per status/language for one template
get_template_freshness() — compare _serving_meta.json timestamp vs last article generation
get_content_gaps() — DuckDB rows with no matching article for a template+language
check_hreflang_orphans() — published articles missing a sibling language
check_missing_build_files()— published articles whose HTML file is absent from disk
check_broken_scenario_refs()— articles referencing [scenario:slug] that doesn't exist
get_all_health_issues() — run all checks, return counts + details
"""
import json
import logging
import os
import re
from datetime import datetime
from pathlib import Path
from ..analytics import fetch_analytics
from ..core import fetch_all
logger = logging.getLogger(__name__)
# Directory where generate_articles() writes HTML + markdown source files.
BUILD_DIR = Path("data/content/_build")
# Pattern matching [scenario:slug] and [scenario:slug:section] markers.
_SCENARIO_REF_RE = re.compile(r"\[scenario:([a-z0-9_-]+)(?::[a-z]+)?\]")
def _validate_table_name(data_table: str) -> None:
"""Guard against SQL injection in table names."""
assert re.match(r"^[a-z_][a-z0-9_.]*$", data_table), (
f"Invalid table name: {data_table}"
)
def _read_serving_meta() -> dict:
"""Read _serving_meta.json written by export_serving.py. Returns {} if absent."""
serving_path = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb")
meta_path = Path(serving_path).parent / "_serving_meta.json"
if not meta_path.exists():
return {}
try:
return json.loads(meta_path.read_text())
except (json.JSONDecodeError, OSError):
return {}
def _parse_dt(s: str | None) -> datetime | None:
"""Parse an ISO datetime string to a naive UTC datetime. Returns None on failure."""
if not s:
return None
try:
dt = datetime.fromisoformat(s)
# Strip timezone info so both aware (from meta) and naive (from SQLite) compare cleanly.
return dt.replace(tzinfo=None)
except (ValueError, TypeError):
return None
# ── Template statistics ───────────────────────────────────────────────────────
async def get_template_stats(template_slug: str) -> dict:
"""Article counts for a template: total, published, draft, scheduled, by language.
Returns:
{
"total": N,
"published": N,
"draft": N,
"scheduled": N,
"by_language": {"en": {"total": N, "published": N, ...}, ...},
}
"""
rows = await fetch_all(
"SELECT status, language, COUNT(*) as cnt FROM articles"
" WHERE template_slug = ? GROUP BY status, language",
(template_slug,),
)
stats: dict = {"total": 0, "published": 0, "draft": 0, "scheduled": 0, "by_language": {}}
for r in rows:
cnt = r["cnt"]
status = r["status"]
lang = r["language"]
stats["total"] += cnt
if status in stats:
stats[status] += cnt
if lang not in stats["by_language"]:
stats["by_language"][lang] = {"total": 0, "published": 0, "draft": 0, "scheduled": 0}
stats["by_language"][lang]["total"] += cnt
if status in stats["by_language"][lang]:
stats["by_language"][lang][status] += cnt
return stats
# ── Data freshness ────────────────────────────────────────────────────────────
async def get_template_freshness(templates: list[dict]) -> list[dict]:
"""Compare _serving_meta.json exported_at vs max(articles.updated_at) per template.
Returns list of dicts — one per template:
{
"slug": str,
"name": str,
"data_table": str,
"exported_at_utc": str | None, # from _serving_meta.json
"last_generated": str | None, # max(updated_at) in articles
"row_count": int | None, # DuckDB row count from meta
"status": "fresh" | "stale" | "no_articles" | "no_data",
}
Freshness semantics:
"fresh" — articles generated after last data export (up to date)
"stale" — data export is newer than last article generation (regen needed)
"no_articles" — DuckDB data exists but no articles generated yet
"no_data" — _serving_meta.json absent (export_serving not yet run)
"""
meta = _read_serving_meta()
exported_at_str = meta.get("exported_at_utc")
exported_at = _parse_dt(exported_at_str)
table_meta = meta.get("tables", {})
result = []
for t in templates:
slug = t["slug"]
data_table = t.get("data_table", "")
# Strip schema prefix to match the key in _serving_meta.json tables dict.
# e.g. "serving.pseo_city_costs_de" → "pseo_city_costs_de"
table_key = data_table.split(".")[-1] if "." in data_table else data_table
rows = await fetch_all(
"SELECT MAX(COALESCE(updated_at, created_at)) as last_gen FROM articles"
" WHERE template_slug = ?",
(slug,),
)
last_gen_str = rows[0]["last_gen"] if rows else None
last_gen = _parse_dt(last_gen_str)
row_count = table_meta.get(table_key, {}).get("row_count")
if not exported_at_str:
status = "no_data"
elif last_gen is None:
status = "no_articles"
elif exported_at and last_gen and exported_at > last_gen:
# New data available — articles haven't been regenerated against it yet.
status = "stale"
else:
status = "fresh"
result.append({
"slug": slug,
"name": t.get("name", slug),
"data_table": data_table,
"exported_at_utc": exported_at_str,
"last_generated": last_gen_str,
"row_count": row_count,
"status": status,
})
return result
# ── Content gaps ──────────────────────────────────────────────────────────────
async def get_content_gaps(
template_slug: str,
data_table: str,
natural_key: str,
languages: list[str],
limit: int = 200,
) -> list[dict]:
"""Return DuckDB rows that have no matching article for at least one language.
The article slug is constructed as: "{template_slug}-{lang}-{natural_key_value}"
This lets us efficiently detect gaps without rendering URL patterns.
Returns list of dicts — each is the DuckDB row with two extra keys:
"_natural_key": str — the natural key value for this row
"_missing_languages": list[str] — languages with no article
"""
assert languages, "languages must not be empty"
_validate_table_name(data_table)
# Fetch all article slugs for this template to determine which rows exist.
slug_rows = await fetch_all(
"SELECT slug, language FROM articles WHERE template_slug = ?",
(template_slug,),
)
# Build lookup: (lang, natural_key_value) → True
prefix_by_lang = {lang: f"{template_slug}-{lang}-" for lang in languages}
existing: set[tuple[str, str]] = set()
for r in slug_rows:
lang = r["language"]
if lang not in prefix_by_lang:
continue
prefix = prefix_by_lang[lang]
if r["slug"].startswith(prefix):
nk_val = r["slug"][len(prefix):]
existing.add((lang, nk_val))
duckdb_rows = await fetch_analytics(
f"SELECT * FROM {data_table} LIMIT ?",
[limit],
)
gaps = []
for row in duckdb_rows:
nk_val = str(row.get(natural_key, ""))
missing = [lang for lang in languages if (lang, nk_val) not in existing]
if missing:
gaps.append({**row, "_natural_key": nk_val, "_missing_languages": missing})
return gaps
# ── Health checks ─────────────────────────────────────────────────────────────
async def check_hreflang_orphans(templates: list[dict]) -> list[dict]:
"""Published articles missing a sibling language expected by their template.
For example: city-cost-de generates EN + DE. If the EN article exists but
DE is absent, that article is an hreflang orphan.
Orphan detection is based on the slug pattern "{template_slug}-{lang}-{natural_key}".
Articles are grouped by natural key; if any expected language is missing, the group
is an orphan.
Returns list of dicts:
{
"template_slug": str,
"url_path": str, # url_path of one present article for context
"present_languages": list[str],
"missing_languages": list[str],
}
"""
orphans = []
for t in templates:
expected = set(t.get("languages", ["en"]))
if len(expected) < 2:
continue # Single-language template — no orphans possible.
rows = await fetch_all(
"SELECT slug, language, url_path FROM articles"
" WHERE template_slug = ? AND status = 'published'",
(t["slug"],),
)
# Group by natural key extracted from slug pattern:
# "{template_slug}-{lang}-{natural_key}" → strip template prefix, then lang prefix.
slug_prefix = t["slug"] + "-"
by_nk: dict[str, dict] = {} # nk → {"langs": set, "url_path": str}
for r in rows:
slug = r["slug"]
lang = r["language"]
if not slug.startswith(slug_prefix):
continue
rest = slug[len(slug_prefix):] # "{lang}-{natural_key}"
lang_prefix = lang + "-"
if not rest.startswith(lang_prefix):
continue
nk = rest[len(lang_prefix):]
if nk not in by_nk:
by_nk[nk] = {"langs": set(), "url_path": r["url_path"]}
by_nk[nk]["langs"].add(lang)
for nk, info in by_nk.items():
present = info["langs"]
missing = sorted(expected - present)
if missing:
orphans.append({
"template_slug": t["slug"],
"url_path": info["url_path"],
"present_languages": sorted(present),
"missing_languages": missing,
})
return orphans
async def check_missing_build_files(build_dir: Path | None = None) -> list[dict]:
"""Published articles whose HTML file is absent from disk.
Expected path: BUILD_DIR/{language}/{slug}.html
Returns list of dicts:
{"id", "slug", "language", "url_path", "template_slug", "expected_path"}
"""
bd = build_dir or BUILD_DIR
rows = await fetch_all(
"SELECT id, slug, language, url_path, template_slug FROM articles"
" WHERE status = 'published'",
)
missing = []
for r in rows:
path = bd / r["language"] / f"{r['slug']}.html"
if not path.exists():
missing.append({
"id": r["id"],
"slug": r["slug"],
"language": r["language"],
"url_path": r["url_path"],
"template_slug": r["template_slug"],
"expected_path": str(path),
})
return missing
async def check_broken_scenario_refs(build_dir: Path | None = None) -> list[dict]:
"""pSEO articles referencing [scenario:slug] markers that don't exist.
Reads markdown source from BUILD_DIR/{language}/md/{slug}.md.
Only checks published articles with a template_slug (pSEO-generated).
Returns list of dicts:
{"id", "slug", "language", "url_path", "broken_scenario_refs": [str, ...]}
"""
bd = build_dir or BUILD_DIR
scenario_rows = await fetch_all("SELECT slug FROM published_scenarios")
valid_slugs = {r["slug"] for r in scenario_rows}
articles = await fetch_all(
"SELECT id, slug, language, url_path FROM articles"
" WHERE status = 'published' AND template_slug IS NOT NULL",
)
broken = []
for a in articles:
md_path = bd / a["language"] / "md" / f"{a['slug']}.md"
if not md_path.exists():
continue
markdown = md_path.read_text()
refs = {m.group(1) for m in _SCENARIO_REF_RE.finditer(markdown)}
missing_refs = sorted(refs - valid_slugs)
if missing_refs:
broken.append({
"id": a["id"],
"slug": a["slug"],
"language": a["language"],
"url_path": a["url_path"],
"broken_scenario_refs": missing_refs,
})
return broken
# ── Aggregate check ───────────────────────────────────────────────────────────
async def get_all_health_issues(
templates: list[dict],
build_dir: Path | None = None,
) -> dict:
"""Run all health checks, return issue counts and full detail lists.
Returns:
{
"hreflang_orphans": [...],
"missing_build_files": [...],
"broken_scenario_refs": [...],
"counts": {
"hreflang_orphans": N,
"missing_build_files": N,
"broken_scenario_refs": N,
"total": N,
},
}
"""
orphans = await check_hreflang_orphans(templates)
missing_files = await check_missing_build_files(build_dir)
broken_refs = await check_broken_scenario_refs(build_dir)
return {
"hreflang_orphans": orphans,
"missing_build_files": missing_files,
"broken_scenario_refs": broken_refs,
"counts": {
"hreflang_orphans": len(orphans),
"missing_build_files": len(missing_files),
"broken_scenario_refs": len(broken_refs),
"total": len(orphans) + len(missing_files) + len(broken_refs),
},
}

View File

@@ -0,0 +1,18 @@
"""Add progress tracking columns to the tasks table.
Enables the pSEO Engine dashboard to show live progress during article
generation jobs: a progress bar (current/total) and an error log for
per-article failures without aborting the whole run.
"""
def up(conn) -> None:
conn.execute(
"ALTER TABLE tasks ADD COLUMN progress_current INTEGER NOT NULL DEFAULT 0"
)
conn.execute(
"ALTER TABLE tasks ADD COLUMN progress_total INTEGER NOT NULL DEFAULT 0"
)
conn.execute(
"ALTER TABLE tasks ADD COLUMN error_log TEXT NOT NULL DEFAULT '[]'"
)

View File

@@ -4,11 +4,10 @@ Background task worker - SQLite-based queue (no Redis needed).
import asyncio import asyncio
import json import json
import logging
import traceback import traceback
from datetime import datetime, timedelta from datetime import datetime, timedelta
import logging
from .core import ( from .core import (
EMAIL_ADDRESSES, EMAIL_ADDRESSES,
config, config,
@@ -754,8 +753,11 @@ async def handle_generate_articles(payload: dict) -> None:
start_date = date_cls.fromisoformat(payload["start_date"]) start_date = date_cls.fromisoformat(payload["start_date"])
articles_per_day = payload.get("articles_per_day", 3) articles_per_day = payload.get("articles_per_day", 3)
limit = payload.get("limit", 500) limit = payload.get("limit", 500)
task_id = payload.get("_task_id")
count = await generate_articles(slug, start_date, articles_per_day, limit=limit) count = await generate_articles(
slug, start_date, articles_per_day, limit=limit, task_id=task_id
)
logger.info("Generated %s articles for template '%s'", count, slug) logger.info("Generated %s articles for template '%s'", count, slug)
@@ -777,6 +779,9 @@ async def process_task(task: dict) -> None:
try: try:
payload = json.loads(task["payload"]) if task["payload"] else {} payload = json.loads(task["payload"]) if task["payload"] else {}
# Inject task_id so progress-aware handlers (e.g. generate_articles) can
# write progress_current to the tasks table without a separate lookup.
payload["_task_id"] = task_id
await handler(payload) await handler(payload)
await mark_complete(task_id) await mark_complete(task_id)
logger.info("Completed: %s (id=%s)", task_name, task_id) logger.info("Completed: %s (id=%s)", task_name, task_id)

765
web/tests/test_pseo.py Normal file
View File

@@ -0,0 +1,765 @@
"""
Tests for the pSEO Engine: health checks, content gaps, freshness, and admin routes.
Covers:
- content/health.py: get_template_stats, get_template_freshness, get_content_gaps,
check_hreflang_orphans, check_missing_build_files, check_broken_scenario_refs,
get_all_health_issues
- admin/pseo_routes.py: all 6 routes (dashboard, health, gaps, generate, jobs, job status)
"""
import json
from unittest.mock import patch
import pytest
from padelnomics.content.health import (
check_broken_scenario_refs,
check_hreflang_orphans,
check_missing_build_files,
get_all_health_issues,
get_content_gaps,
get_template_freshness,
get_template_stats,
)
from padelnomics.core import execute, utcnow_iso
from padelnomics import core
# ── Fixtures ──────────────────────────────────────────────────────────────────
@pytest.fixture
async def admin_client(app, db):
"""Authenticated admin test client."""
now = utcnow_iso()
async with db.execute(
"INSERT INTO users (email, name, created_at) VALUES (?, ?, ?)",
("pseo-admin@test.com", "pSEO Admin", now),
) as cursor:
admin_id = cursor.lastrowid
await db.execute(
"INSERT INTO user_roles (user_id, role) VALUES (?, 'admin')", (admin_id,)
)
await db.commit()
async with app.test_client() as c:
async with c.session_transaction() as sess:
sess["user_id"] = admin_id
yield c
# ── DB helpers ────────────────────────────────────────────────────────────────
async def _insert_article(
slug,
url_path,
status="published",
language="en",
template_slug="city-cost-de",
created_at=None,
):
"""Insert a minimal article row and return its id."""
ts = created_at or utcnow_iso()
return await execute(
"""INSERT INTO articles
(url_path, slug, title, meta_description, country, region,
status, published_at, language, template_slug, created_at, updated_at)
VALUES (?, ?, ?, ?, 'DE', 'Europe', ?, ?, ?, ?, ?, ?)""",
(
url_path,
slug,
f"Title {slug}",
f"Desc {slug}",
status,
ts if status == "published" else None,
language,
template_slug,
ts,
ts,
),
)
async def _insert_scenario(slug="test-scenario"):
"""Insert a minimal published_scenario row."""
from padelnomics.planner.calculator import calc, validate_state
state = validate_state({"dblCourts": 2})
d = calc(state)
return await execute(
"""INSERT INTO published_scenarios
(slug, title, subtitle, location, country, venue_type, ownership,
court_config, state_json, calc_json)
VALUES (?, ?, '', 'TestCity', 'TC', 'indoor', 'rent', '2 double', ?, ?)""",
(slug, f"Scenario {slug}", json.dumps(state), json.dumps(d)),
)
async def _insert_task(status="pending", progress_current=0, progress_total=0):
"""Insert a generate_articles task row and return its id."""
now = utcnow_iso()
async with core._db.execute(
"""INSERT INTO tasks
(task_name, payload, status, run_at, progress_current, progress_total, created_at)
VALUES ('generate_articles', '{}', ?, ?, ?, ?, ?)""",
(status, now, progress_current, progress_total, now),
) as cursor:
task_id = cursor.lastrowid
await core._db.commit()
return task_id
# ── DuckDB mock rows ──────────────────────────────────────────────────────────
_DUCKDB_ROWS = [
{"city_slug": "berlin", "city": "Berlin", "country": "DE"},
{"city_slug": "munich", "city": "Munich", "country": "DE"},
{"city_slug": "hamburg", "city": "Hamburg", "country": "DE"},
]
async def _mock_fetch_duckdb(query, params=None):
return _DUCKDB_ROWS
# ════════════════════════════════════════════════════════════════════════════
# get_template_stats()
# ════════════════════════════════════════════════════════════════════════════
class TestGetTemplateStats:
async def test_empty_db_returns_zeros(self, db):
stats = await get_template_stats("city-cost-de")
assert stats["total"] == 0
assert stats["published"] == 0
assert stats["draft"] == 0
assert stats["by_language"] == {}
async def test_counts_per_status(self, db):
await _insert_article("city-cost-de-en-berlin", "/en/markets/germany/berlin",
status="published", language="en")
await _insert_article("city-cost-de-en-munich", "/en/markets/germany/munich",
status="draft", language="en")
await _insert_article("city-cost-de-de-berlin", "/de/markets/germany/berlin",
status="published", language="de")
stats = await get_template_stats("city-cost-de")
assert stats["total"] == 3
assert stats["published"] == 2
assert stats["draft"] == 1
assert stats["by_language"]["en"]["total"] == 2
assert stats["by_language"]["de"]["total"] == 1
async def test_ignores_other_templates(self, db):
await _insert_article("other-en-berlin", "/en/other/berlin", template_slug="other")
stats = await get_template_stats("city-cost-de")
assert stats["total"] == 0
# ════════════════════════════════════════════════════════════════════════════
# get_template_freshness()
# ════════════════════════════════════════════════════════════════════════════
_SAMPLE_TEMPLATES = [
{
"slug": "city-cost-de",
"name": "City Cost DE",
"data_table": "serving.pseo_city_costs_de",
"languages": ["en", "de"],
}
]
class TestGetTemplateFreshness:
async def test_no_meta_file_returns_no_data(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {})
result = await get_template_freshness(_SAMPLE_TEMPLATES)
assert len(result) == 1
assert result[0]["status"] == "no_data"
async def test_meta_present_no_articles_returns_no_articles(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
"exported_at_utc": "2026-01-15T10:00:00+00:00",
"tables": {"pseo_city_costs_de": {"row_count": 100}},
})
result = await get_template_freshness(_SAMPLE_TEMPLATES)
assert result[0]["status"] == "no_articles"
assert result[0]["row_count"] == 100
async def test_article_older_than_export_returns_stale(self, db, monkeypatch):
import padelnomics.content.health as health_mod
# Article created Jan 10, data exported Jan 15 → stale
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
status="published", language="en", created_at="2026-01-10T08:00:00",
)
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
"exported_at_utc": "2026-01-15T10:00:00+00:00",
"tables": {"pseo_city_costs_de": {"row_count": 100}},
})
result = await get_template_freshness(_SAMPLE_TEMPLATES)
assert result[0]["status"] == "stale"
async def test_article_newer_than_export_returns_fresh(self, db, monkeypatch):
import padelnomics.content.health as health_mod
# Data exported Jan 10, article updated Jan 15 → fresh
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
status="published", language="en", created_at="2026-01-15T12:00:00",
)
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
"exported_at_utc": "2026-01-10T10:00:00+00:00",
"tables": {},
})
result = await get_template_freshness(_SAMPLE_TEMPLATES)
assert result[0]["status"] == "fresh"
# ════════════════════════════════════════════════════════════════════════════
# get_content_gaps()
# ════════════════════════════════════════════════════════════════════════════
class TestGetContentGaps:
async def test_no_articles_returns_all_duckdb_rows(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
gaps = await get_content_gaps(
template_slug="city-cost-de",
data_table="serving.pseo_city_costs_de",
natural_key="city_slug",
languages=["en"],
)
assert len(gaps) == len(_DUCKDB_ROWS)
assert all(g["_missing_languages"] == ["en"] for g in gaps)
async def test_existing_article_excluded_from_gaps(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en",
)
gaps = await get_content_gaps(
template_slug="city-cost-de",
data_table="serving.pseo_city_costs_de",
natural_key="city_slug",
languages=["en"],
)
gap_keys = {g["_natural_key"] for g in gaps}
assert "berlin" not in gap_keys
assert "munich" in gap_keys
assert "hamburg" in gap_keys
async def test_partial_language_gap_detected(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
# EN exists for berlin, DE is missing → berlin has a gap for "de"
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en",
)
gaps = await get_content_gaps(
template_slug="city-cost-de",
data_table="serving.pseo_city_costs_de",
natural_key="city_slug",
languages=["en", "de"],
)
berlin = next((g for g in gaps if g["_natural_key"] == "berlin"), None)
assert berlin is not None
assert berlin["_missing_languages"] == ["de"]
async def test_no_gaps_when_all_articles_exist(self, db, monkeypatch):
import padelnomics.content.health as health_mod
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
for key in ("berlin", "munich", "hamburg"):
await _insert_article(
f"city-cost-de-en-{key}", f"/en/markets/germany/{key}", language="en",
)
gaps = await get_content_gaps(
template_slug="city-cost-de",
data_table="serving.pseo_city_costs_de",
natural_key="city_slug",
languages=["en"],
)
assert gaps == []
# ════════════════════════════════════════════════════════════════════════════
# check_hreflang_orphans()
# ════════════════════════════════════════════════════════════════════════════
class TestCheckHreflangOrphans:
async def test_single_lang_template_no_orphans(self, db):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}]
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
orphans = await check_hreflang_orphans(templates)
assert orphans == []
async def test_bilingual_both_present_no_orphans(self, db):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
await _insert_article(
"city-cost-de-de-berlin", "/de/markets/germany/berlin",
language="de", status="published",
)
orphans = await check_hreflang_orphans(templates)
assert orphans == []
async def test_missing_de_sibling_detected(self, db):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
# Only EN for berlin — DE is missing
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
orphans = await check_hreflang_orphans(templates)
assert len(orphans) == 1
assert orphans[0]["template_slug"] == "city-cost-de"
assert "de" in orphans[0]["missing_languages"]
assert "en" in orphans[0]["present_languages"]
async def test_draft_articles_not_counted(self, db):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
# Draft articles should be ignored
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="draft",
)
orphans = await check_hreflang_orphans(templates)
assert orphans == []
# ════════════════════════════════════════════════════════════════════════════
# check_missing_build_files()
# ════════════════════════════════════════════════════════════════════════════
class TestCheckMissingBuildFiles:
async def test_no_articles_returns_empty(self, db, tmp_path):
result = await check_missing_build_files(build_dir=tmp_path)
assert result == []
async def test_build_file_present_not_reported(self, db, tmp_path):
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
build_file = tmp_path / "en" / "city-cost-de-en-berlin.html"
build_file.parent.mkdir(parents=True)
build_file.write_text("<h1>Berlin</h1>")
result = await check_missing_build_files(build_dir=tmp_path)
assert result == []
async def test_missing_build_file_reported(self, db, tmp_path):
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
# No build file created
result = await check_missing_build_files(build_dir=tmp_path)
assert len(result) == 1
assert result[0]["slug"] == "city-cost-de-en-berlin"
assert result[0]["language"] == "en"
async def test_draft_articles_ignored(self, db, tmp_path):
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="draft",
)
result = await check_missing_build_files(build_dir=tmp_path)
assert result == []
# ════════════════════════════════════════════════════════════════════════════
# check_broken_scenario_refs()
# ════════════════════════════════════════════════════════════════════════════
class TestCheckBrokenScenarioRefs:
async def test_no_markdown_files_returns_empty(self, db, tmp_path):
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
result = await check_broken_scenario_refs(build_dir=tmp_path)
assert result == []
async def test_valid_scenario_ref_not_reported(self, db, tmp_path):
await _insert_scenario("berlin-scenario")
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
md_dir = tmp_path / "en" / "md"
md_dir.mkdir(parents=True)
(md_dir / "city-cost-de-en-berlin.md").write_text(
"# Berlin\n\n[scenario:berlin-scenario:capex]\n"
)
result = await check_broken_scenario_refs(build_dir=tmp_path)
assert result == []
async def test_missing_scenario_ref_reported(self, db, tmp_path):
# No scenario in DB, but markdown references one
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
md_dir = tmp_path / "en" / "md"
md_dir.mkdir(parents=True)
(md_dir / "city-cost-de-en-berlin.md").write_text(
"# Berlin\n\n[scenario:ghost-scenario:capex]\n"
)
result = await check_broken_scenario_refs(build_dir=tmp_path)
assert len(result) == 1
assert "ghost-scenario" in result[0]["broken_scenario_refs"]
async def test_no_template_slug_articles_ignored(self, db, tmp_path):
# Legacy article (no template_slug) should not be checked
await execute(
"""INSERT INTO articles
(url_path, slug, title, status, language, created_at)
VALUES ('/en/legacy', 'legacy', 'Legacy', 'published', 'en', ?)""",
(utcnow_iso(),),
)
md_dir = tmp_path / "en" / "md"
md_dir.mkdir(parents=True)
(md_dir / "legacy.md").write_text("# Legacy\n\n[scenario:ghost]\n")
result = await check_broken_scenario_refs(build_dir=tmp_path)
assert result == []
# ════════════════════════════════════════════════════════════════════════════
# get_all_health_issues()
# ════════════════════════════════════════════════════════════════════════════
class TestGetAllHealthIssues:
async def test_clean_state_returns_zero_counts(self, db, tmp_path):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}]
result = await get_all_health_issues(templates, build_dir=tmp_path)
assert result["counts"]["total"] == 0
assert result["counts"]["hreflang_orphans"] == 0
assert result["counts"]["missing_build_files"] == 0
assert result["counts"]["broken_scenario_refs"] == 0
assert "hreflang_orphans" in result
assert "missing_build_files" in result
assert "broken_scenario_refs" in result
async def test_orphan_counted_in_total(self, db, tmp_path):
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
# EN article with no DE sibling → orphan
await _insert_article(
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
language="en", status="published",
)
result = await get_all_health_issues(templates, build_dir=tmp_path)
assert result["counts"]["hreflang_orphans"] == 1
assert result["counts"]["total"] >= 1
# ════════════════════════════════════════════════════════════════════════════
# pSEO Route tests
# ════════════════════════════════════════════════════════════════════════════
# Mock objects for route tests — avoids needing a live DuckDB
_MOCK_TEMPLATE_CFG = {
"slug": "city-cost-de",
"name": "City Cost DE",
"data_table": "serving.pseo_city_costs_de",
"natural_key": "city_slug",
"languages": ["en", "de"],
"url_pattern": "/markets/{country}/{city_slug}",
}
_MOCK_TEMPLATES = [_MOCK_TEMPLATE_CFG]
def _discover_mock():
return _MOCK_TEMPLATES
def _load_template_mock(slug):
if slug == "city-cost-de":
return _MOCK_TEMPLATE_CFG
raise FileNotFoundError(f"Template {slug!r} not found")
async def _freshness_mock(templates):
return [
{
"slug": t["slug"],
"name": t["name"],
"data_table": t["data_table"],
"status": "fresh",
"exported_at_utc": None,
"last_generated": None,
"row_count": 100,
}
for t in templates
]
async def _stats_mock(slug):
return {
"total": 10, "published": 8, "draft": 2, "scheduled": 0,
"by_language": {
"en": {"total": 5, "published": 4, "draft": 1, "scheduled": 0},
"de": {"total": 5, "published": 4, "draft": 1, "scheduled": 0},
},
}
async def _health_mock(templates, build_dir=None):
return {
"hreflang_orphans": [],
"missing_build_files": [],
"broken_scenario_refs": [],
"counts": {"hreflang_orphans": 0, "missing_build_files": 0,
"broken_scenario_refs": 0, "total": 0},
}
async def _gaps_empty_mock(template_slug, data_table, natural_key, languages, limit=200):
return []
async def _gaps_two_mock(template_slug, data_table, natural_key, languages, limit=200):
return [
{"city_slug": "munich", "_natural_key": "munich", "_missing_languages": ["en"]},
{"city_slug": "hamburg", "_natural_key": "hamburg", "_missing_languages": ["de"]},
]
class TestPseoRoutes:
"""Tests for all pSEO Engine admin blueprint routes."""
# -- Access control --------------------------------------------------------
async def test_dashboard_requires_admin(self, client, db):
resp = await client.get("/admin/pseo/")
assert resp.status_code in (302, 403)
async def test_health_requires_admin(self, client, db):
resp = await client.get("/admin/pseo/health")
assert resp.status_code in (302, 403)
async def test_gaps_requires_admin(self, client, db):
resp = await client.get("/admin/pseo/gaps/city-cost-de")
assert resp.status_code in (302, 403)
async def test_jobs_requires_admin(self, client, db):
resp = await client.get("/admin/pseo/jobs")
assert resp.status_code in (302, 403)
# -- Dashboard -------------------------------------------------------------
async def test_dashboard_renders(self, admin_client, db):
with (
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock),
patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock),
):
resp = await admin_client.get("/admin/pseo/")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "pSEO Engine" in text
async def test_dashboard_shows_template_name(self, admin_client, db):
with (
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock),
patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock),
):
resp = await admin_client.get("/admin/pseo/")
text = await resp.get_data(as_text=True)
assert "City Cost DE" in text
# -- Health HTMX partial ---------------------------------------------------
async def test_health_partial_renders(self, admin_client, db):
with (
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
patch("padelnomics.admin.pseo_routes.get_all_health_issues", _health_mock),
):
resp = await admin_client.get("/admin/pseo/health")
assert resp.status_code == 200
# -- Content gaps HTMX partial ---------------------------------------------
async def test_gaps_unknown_template_returns_404(self, admin_client, db):
def _raise(slug):
raise FileNotFoundError("not found")
with patch("padelnomics.admin.pseo_routes.load_template", _raise):
resp = await admin_client.get("/admin/pseo/gaps/no-such-template")
assert resp.status_code == 404
async def test_gaps_partial_renders(self, admin_client, db):
with (
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
):
resp = await admin_client.get("/admin/pseo/gaps/city-cost-de")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
# Should show gap count or row content
assert "munich" in text or "missing" in text.lower()
async def test_gaps_empty_shows_no_gaps_message(self, admin_client, db):
with (
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock),
):
resp = await admin_client.get("/admin/pseo/gaps/city-cost-de")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "No gaps" in text or "all" in text.lower()
# -- Generate gaps POST ----------------------------------------------------
async def test_generate_gaps_redirects(self, admin_client, db):
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
with (
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
):
resp = await admin_client.post(
"/admin/pseo/gaps/city-cost-de/generate",
form={"csrf_token": "test"},
)
assert resp.status_code == 302
async def test_generate_gaps_enqueues_task(self, admin_client, db):
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
with (
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
):
await admin_client.post(
"/admin/pseo/gaps/city-cost-de/generate",
form={"csrf_token": "test"},
)
tasks = await core.fetch_all(
"SELECT task_name FROM tasks WHERE task_name = 'generate_articles'"
)
assert len(tasks) == 1
async def test_generate_gaps_no_gaps_redirects_without_task(self, admin_client, db):
async with admin_client.session_transaction() as sess:
sess["csrf_token"] = "test"
with (
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock),
):
resp = await admin_client.post(
"/admin/pseo/gaps/city-cost-de/generate",
form={"csrf_token": "test"},
)
assert resp.status_code == 302
tasks = await core.fetch_all(
"SELECT task_name FROM tasks WHERE task_name = 'generate_articles'"
)
assert len(tasks) == 0
# -- Jobs list -------------------------------------------------------------
async def test_jobs_renders_empty(self, admin_client, db):
resp = await admin_client.get("/admin/pseo/jobs")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "Generation Jobs" in text
async def test_jobs_shows_task_row(self, admin_client, db):
await _insert_task(status="complete", progress_current=20, progress_total=20)
resp = await admin_client.get("/admin/pseo/jobs")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "Complete" in text
# -- Job status HTMX polled ------------------------------------------------
async def test_job_status_not_found_returns_404(self, admin_client, db):
resp = await admin_client.get("/admin/pseo/jobs/9999/status")
assert resp.status_code == 404
async def test_job_status_renders_pending(self, admin_client, db):
job_id = await _insert_task(
status="pending", progress_current=5, progress_total=20
)
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "Running" in text
async def test_job_status_renders_complete(self, admin_client, db):
job_id = await _insert_task(
status="complete", progress_current=20, progress_total=20
)
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
assert resp.status_code == 200
text = await resp.get_data(as_text=True)
assert "Complete" in text
async def test_job_status_complete_no_htmx_poll_trigger(self, admin_client, db):
"""A completed job should not include hx-trigger="every 2s" (stops HTMX polling)."""
job_id = await _insert_task(
status="complete", progress_current=20, progress_total=20
)
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
text = await resp.get_data(as_text=True)
assert "every 2s" not in text
async def test_job_status_pending_includes_htmx_poll_trigger(self, admin_client, db):
"""A pending job should include hx-trigger="every 2s" (keeps HTMX polling)."""
job_id = await _insert_task(
status="pending", progress_current=0, progress_total=20
)
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
text = await resp.get_data(as_text=True)
assert "every 2s" in text