feat(pseo): add pSEO Engine admin tab
Operational dashboard at /admin/pseo for the programmatic SEO system: content gap detection, data freshness signals, article health checks (hreflang orphans, missing build files, broken scenario refs), and live generation job monitoring with HTMX progress bars. - _serving_meta.json written by export_serving.py after atomic DB swap - content/health.py: pure async query functions for all health checks - Migration 0021: progress_current/total/error_log on tasks table - generate_articles() writes progress every 50 articles + on completion - admin/pseo_routes.py: 6 routes, standalone blueprint - 5 HTML templates + sidebar nav + fromjson Jinja filter - 45 tests (all passing); 2 bugs caught and fixed during testing Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> # Conflicts: # src/padelnomics/export_serving.py
This commit is contained in:
209
web/src/padelnomics/admin/pseo_routes.py
Normal file
209
web/src/padelnomics/admin/pseo_routes.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""
|
||||
pSEO Engine admin blueprint.
|
||||
|
||||
Operational visibility for the programmatic SEO system:
|
||||
/admin/pseo/ → dashboard (template stats, freshness, recent jobs)
|
||||
/admin/pseo/health → HTMX partial: health issues
|
||||
/admin/pseo/gaps/<slug> → HTMX partial: content gaps for one template
|
||||
/admin/pseo/gaps/<slug>/generate → POST: enqueue gap-fill job
|
||||
/admin/pseo/jobs → recent generation jobs
|
||||
/admin/pseo/jobs/<id>/status → HTMX polled: progress bar for one job
|
||||
|
||||
Registered as a standalone blueprint so admin/routes.py (already ~2,100 lines)
|
||||
stays focused on its own domain.
|
||||
"""
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from quart import Blueprint, flash, redirect, render_template, url_for
|
||||
|
||||
from ..auth.routes import role_required
|
||||
from ..content import discover_templates, load_template
|
||||
from ..content.health import (
|
||||
get_all_health_issues,
|
||||
get_content_gaps,
|
||||
get_template_freshness,
|
||||
get_template_stats,
|
||||
)
|
||||
from ..core import csrf_protect, fetch_all, fetch_one
|
||||
|
||||
bp = Blueprint(
|
||||
"pseo",
|
||||
__name__,
|
||||
template_folder=str(Path(__file__).parent / "templates"),
|
||||
url_prefix="/admin/pseo",
|
||||
)
|
||||
|
||||
|
||||
@bp.before_request
|
||||
async def _inject_sidebar_data():
|
||||
"""Load unread inbox count for the admin sidebar badge."""
|
||||
from quart import g
|
||||
|
||||
try:
|
||||
row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0")
|
||||
g.admin_unread_count = row["cnt"] if row else 0
|
||||
except Exception:
|
||||
g.admin_unread_count = 0
|
||||
|
||||
|
||||
@bp.context_processor
|
||||
def _admin_context():
|
||||
"""Expose admin-specific variables to all pSEO templates."""
|
||||
from quart import g
|
||||
|
||||
return {"unread_count": getattr(g, "admin_unread_count", 0)}
|
||||
|
||||
|
||||
# ── Dashboard ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/")
|
||||
@role_required("admin")
|
||||
async def pseo_dashboard():
|
||||
"""pSEO Engine dashboard: template stats, freshness, recent jobs."""
|
||||
templates = discover_templates()
|
||||
|
||||
freshness = await get_template_freshness(templates)
|
||||
freshness_by_slug = {f["slug"]: f for f in freshness}
|
||||
|
||||
template_rows = []
|
||||
for t in templates:
|
||||
stats = await get_template_stats(t["slug"])
|
||||
template_rows.append({
|
||||
"template": t,
|
||||
"stats": stats,
|
||||
"freshness": freshness_by_slug.get(t["slug"], {}),
|
||||
})
|
||||
|
||||
total_articles = sum(r["stats"]["total"] for r in template_rows)
|
||||
total_published = sum(r["stats"]["published"] for r in template_rows)
|
||||
stale_count = sum(1 for f in freshness if f["status"] == "stale")
|
||||
|
||||
# Recent generation jobs — enough for the dashboard summary.
|
||||
jobs = await fetch_all(
|
||||
"SELECT id, task_name, status, progress_current, progress_total,"
|
||||
" error, error_log, created_at, completed_at"
|
||||
" FROM tasks WHERE task_name = 'generate_articles'"
|
||||
" ORDER BY created_at DESC LIMIT 5",
|
||||
)
|
||||
|
||||
return await render_template(
|
||||
"admin/pseo_dashboard.html",
|
||||
template_rows=template_rows,
|
||||
total_articles=total_articles,
|
||||
total_published=total_published,
|
||||
total_templates=len(templates),
|
||||
stale_count=stale_count,
|
||||
jobs=jobs,
|
||||
admin_page="pseo",
|
||||
)
|
||||
|
||||
|
||||
# ── Health checks (HTMX partial) ─────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/health")
|
||||
@role_required("admin")
|
||||
async def pseo_health():
|
||||
"""HTMX partial: all health issue lists."""
|
||||
templates = discover_templates()
|
||||
health = await get_all_health_issues(templates)
|
||||
return await render_template("admin/pseo_health.html", health=health)
|
||||
|
||||
|
||||
# ── Content gaps (HTMX partial + generate action) ────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/gaps/<slug>")
|
||||
@role_required("admin")
|
||||
async def pseo_gaps_template(slug: str):
|
||||
"""HTMX partial: content gaps for a specific template."""
|
||||
try:
|
||||
config = load_template(slug)
|
||||
except (AssertionError, FileNotFoundError):
|
||||
return "Template not found", 404
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug=slug,
|
||||
data_table=config["data_table"],
|
||||
natural_key=config["natural_key"],
|
||||
languages=config["languages"],
|
||||
)
|
||||
return await render_template(
|
||||
"admin/pseo_gaps.html",
|
||||
template=config,
|
||||
gaps=gaps,
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/gaps/<slug>/generate", methods=["POST"])
|
||||
@role_required("admin")
|
||||
@csrf_protect
|
||||
async def pseo_generate_gaps(slug: str):
|
||||
"""Enqueue a generation job limited to filling gaps for this template."""
|
||||
from ..worker import enqueue
|
||||
|
||||
try:
|
||||
config = load_template(slug)
|
||||
except (AssertionError, FileNotFoundError):
|
||||
await flash("Template not found.", "error")
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug=slug,
|
||||
data_table=config["data_table"],
|
||||
natural_key=config["natural_key"],
|
||||
languages=config["languages"],
|
||||
)
|
||||
|
||||
if not gaps:
|
||||
await flash(f"No gaps found for '{config['name']}' — nothing to generate.", "info")
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
await enqueue("generate_articles", {
|
||||
"template_slug": slug,
|
||||
"start_date": date.today().isoformat(),
|
||||
"articles_per_day": 500,
|
||||
"limit": 500,
|
||||
})
|
||||
await flash(
|
||||
f"Queued generation for {len(gaps)} missing articles in '{config['name']}'.",
|
||||
"success",
|
||||
)
|
||||
return redirect(url_for("pseo.pseo_dashboard"))
|
||||
|
||||
|
||||
# ── Generation job monitoring ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
@bp.route("/jobs")
|
||||
@role_required("admin")
|
||||
async def pseo_jobs():
|
||||
"""Full list of recent article generation jobs."""
|
||||
jobs = await fetch_all(
|
||||
"SELECT id, task_name, status, progress_current, progress_total,"
|
||||
" error, error_log, created_at, completed_at"
|
||||
" FROM tasks WHERE task_name = 'generate_articles'"
|
||||
" ORDER BY created_at DESC LIMIT 20",
|
||||
)
|
||||
return await render_template(
|
||||
"admin/pseo_jobs.html",
|
||||
jobs=jobs,
|
||||
admin_page="pseo",
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/jobs/<int:job_id>/status")
|
||||
@role_required("admin")
|
||||
async def pseo_job_status(job_id: int):
|
||||
"""HTMX polled endpoint: progress bar for a running generation job."""
|
||||
job = await fetch_one(
|
||||
"SELECT id, status, progress_current, progress_total, error, error_log,"
|
||||
" created_at, completed_at"
|
||||
" FROM tasks WHERE id = ?",
|
||||
(job_id,),
|
||||
)
|
||||
if not job:
|
||||
return "Job not found", 404
|
||||
return await render_template("admin/pseo_job_status.html", job=job)
|
||||
@@ -95,6 +95,12 @@
|
||||
Templates
|
||||
</a>
|
||||
|
||||
<div class="admin-sidebar__section">pSEO</div>
|
||||
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="{% if admin_page == 'pseo' %}active{% endif %}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M9.75 3.104v5.714a2.25 2.25 0 0 1-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 0 1 4.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0 1 12 15a9.065 9.065 0 0 1-6.23-.693L5 14.5m14.8.8 1.402 1.402c1.232 1.232.65 3.318-1.067 3.611A48.309 48.309 0 0 1 12 21c-2.773 0-5.491-.235-8.135-.687-1.718-.293-2.3-2.379-1.067-3.61L5 14.5"/></svg>
|
||||
pSEO Engine
|
||||
</a>
|
||||
|
||||
<div class="admin-sidebar__section">Email</div>
|
||||
<a href="{{ url_for('admin.emails') }}" class="{% if admin_page == 'emails' %}active{% endif %}">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" d="M21.75 6.75v10.5a2.25 2.25 0 0 1-2.25 2.25h-15a2.25 2.25 0 0 1-2.25-2.25V6.75m19.5 0A2.25 2.25 0 0 0 19.5 4.5h-15a2.25 2.25 0 0 0-2.25 2.25m19.5 0v.243a2.25 2.25 0 0 1-1.07 1.916l-7.5 4.615a2.25 2.25 0 0 1-2.36 0L3.32 8.91a2.25 2.25 0 0 1-1.07-1.916V6.75"/></svg>
|
||||
|
||||
195
web/src/padelnomics/admin/templates/admin/pseo_dashboard.html
Normal file
195
web/src/padelnomics/admin/templates/admin/pseo_dashboard.html
Normal file
@@ -0,0 +1,195 @@
|
||||
{% extends "admin/base_admin.html" %}
|
||||
{% set admin_page = "pseo" %}
|
||||
|
||||
{% block title %}pSEO Engine - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.pseo-status-badge {
|
||||
display: inline-flex; align-items: center; gap: 4px;
|
||||
font-size: 0.6875rem; font-weight: 600; padding: 2px 8px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
.pseo-status-fresh { background: #D1FAE5; color: #065F46; }
|
||||
.pseo-status-stale { background: #FEF3C7; color: #92400E; }
|
||||
.pseo-status-no_data { background: #F1F5F9; color: #64748B; }
|
||||
.pseo-status-no_articles { background: #EDE9FE; color: #5B21B6; }
|
||||
|
||||
.pseo-gaps-panel { border-top: 1px solid #E2E8F0; margin-top: 8px; padding-top: 8px; }
|
||||
|
||||
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 80px; }
|
||||
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-8">
|
||||
<div>
|
||||
<h1 class="text-2xl">pSEO Engine</h1>
|
||||
<p class="text-slate text-sm mt-1">Operational dashboard for programmatic SEO</p>
|
||||
</div>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}" class="btn-outline btn-sm">All Jobs</a>
|
||||
</header>
|
||||
|
||||
<!-- Summary Cards -->
|
||||
<div class="grid-4 mb-8">
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Total Articles</p>
|
||||
<p class="text-3xl font-bold text-navy">{{ total_articles }}</p>
|
||||
<p class="text-xs text-slate mt-1">{{ total_published }} published</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Templates</p>
|
||||
<p class="text-3xl font-bold text-navy">{{ total_templates }}</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Stale Templates</p>
|
||||
<p class="text-3xl font-bold {% if stale_count > 0 %}text-amber-600{% else %}text-navy{% endif %}">
|
||||
{{ stale_count }}
|
||||
</p>
|
||||
<p class="text-xs text-slate mt-1">data newer than articles</p>
|
||||
</div>
|
||||
<div class="card text-center">
|
||||
<p class="card-header">Health Checks</p>
|
||||
<p class="text-3xl font-bold text-navy">—</p>
|
||||
<p class="text-xs text-slate mt-1">see Health section below</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Per-Template Table -->
|
||||
<div class="card mb-8">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Templates</span>
|
||||
<span class="text-xs text-slate">Click "Gaps" to load missing articles per template</span>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Template</th>
|
||||
<th>Data rows</th>
|
||||
<th>Articles EN</th>
|
||||
<th>Articles DE</th>
|
||||
<th>Freshness</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for r in template_rows %}
|
||||
{% set t = r.template %}
|
||||
{% set stats = r.stats %}
|
||||
{% set fr = r.freshness %}
|
||||
<tr>
|
||||
<td>
|
||||
<strong>{{ t.name }}</strong><br>
|
||||
<span class="text-xs text-slate">{{ t.slug }}</span>
|
||||
</td>
|
||||
<td>{{ fr.row_count if fr.row_count is not none else '—' }}</td>
|
||||
<td>{{ stats.by_language.get('en', {}).get('total', 0) }}</td>
|
||||
<td>{{ stats.by_language.get('de', {}).get('total', 0) }}</td>
|
||||
<td>
|
||||
{% set status = fr.status | default('no_data') %}
|
||||
<span class="pseo-status-badge pseo-status-{{ status }}">
|
||||
{% if status == 'fresh' %}🟢 Fresh
|
||||
{% elif status == 'stale' %}🟡 Stale
|
||||
{% elif status == 'no_articles' %}🟣 No articles
|
||||
{% else %}⚪ No data
|
||||
{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td class="flex gap-2 items-center">
|
||||
<button class="btn-outline btn-sm"
|
||||
hx-get="{{ url_for('pseo.pseo_gaps_template', slug=t.slug) }}"
|
||||
hx-target="#gaps-panel-{{ t.slug }}"
|
||||
hx-swap="innerHTML"
|
||||
hx-indicator="#gaps-panel-{{ t.slug }}">
|
||||
Gaps
|
||||
</button>
|
||||
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=t.slug) }}" class="m-0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn btn-sm">Generate gaps</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="6" class="p-0">
|
||||
<div id="gaps-panel-{{ t.slug }}" class="pseo-gaps-panel" style="padding: 0 1rem 0.5rem;">
|
||||
<!-- Loaded via HTMX on "Gaps" click -->
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Recent Jobs -->
|
||||
{% if jobs %}
|
||||
<div class="card mb-8">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Recent Generation Jobs</span>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}" class="text-xs text-blue">View all →</a>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Job</th>
|
||||
<th>Status</th>
|
||||
<th>Progress</th>
|
||||
<th>Started</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in jobs %}
|
||||
<tr>
|
||||
<td>
|
||||
<a href="{{ url_for('pseo.pseo_jobs') }}#job-{{ job.id }}" class="text-blue">#{{ job.id }}</a>
|
||||
{% if job.payload %}
|
||||
— {{ (job.payload | fromjson).get('template_slug', '') }}
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% elif job.status == 'pending' %}
|
||||
<span class="badge-warning">Running</span>
|
||||
{% else %}
|
||||
<span class="badge">{{ job.status }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap">
|
||||
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}
|
||||
—
|
||||
{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ job.created_at | default('') | truncate(16, True, '') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Health Issues (HTMX-loaded) -->
|
||||
<div id="health-panel"
|
||||
hx-get="{{ url_for('pseo.pseo_health') }}"
|
||||
hx-trigger="load delay:500ms"
|
||||
hx-target="#health-panel"
|
||||
hx-swap="outerHTML">
|
||||
<div class="card">
|
||||
<p class="text-slate text-sm">Loading health checks…</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
43
web/src/padelnomics/admin/templates/admin/pseo_gaps.html
Normal file
43
web/src/padelnomics/admin/templates/admin/pseo_gaps.html
Normal file
@@ -0,0 +1,43 @@
|
||||
{# HTMX partial — rendered inside the gaps panel for one template.
|
||||
Loaded via GET /admin/pseo/gaps/<slug>. #}
|
||||
|
||||
{% if not gaps %}
|
||||
<p class="text-success text-sm p-2">✓ No gaps — all {{ template.name }} rows have articles.</p>
|
||||
{% else %}
|
||||
<div class="flex justify-between items-center mb-2">
|
||||
<span class="text-sm font-semibold">{{ gaps | length }} missing row{{ 's' if gaps | length != 1 else '' }}</span>
|
||||
<form method="post" action="{{ url_for('pseo.pseo_generate_gaps', slug=template.slug) }}" class="m-0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn btn-sm">Generate {{ gaps | length }} missing</button>
|
||||
</form>
|
||||
</div>
|
||||
<div class="table-wrap" style="max-height: 300px; overflow-y: auto;">
|
||||
<table class="table text-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>{{ template.natural_key }}</th>
|
||||
<th>Missing languages</th>
|
||||
{% for key in (gaps[0].keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
|
||||
<th>{{ key }}</th>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for gap in gaps[:100] %}
|
||||
<tr>
|
||||
<td class="font-mono text-xs">{{ gap._natural_key }}</td>
|
||||
<td class="text-xs text-amber-700">{{ gap._missing_languages | join(', ') }}</td>
|
||||
{% for key in (gap.keys() | list | reject('equalto', '_natural_key') | reject('equalto', '_missing_languages') | list)[:4] %}
|
||||
<td class="text-xs text-slate">{{ gap[key] | truncate(30) if gap[key] is string else gap[key] }}</td>
|
||||
{% endfor %}
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if gaps | length > 100 %}
|
||||
<tr>
|
||||
<td colspan="10" class="text-xs text-slate text-center">… and {{ gaps | length - 100 }} more rows</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endif %}
|
||||
99
web/src/padelnomics/admin/templates/admin/pseo_health.html
Normal file
99
web/src/padelnomics/admin/templates/admin/pseo_health.html
Normal file
@@ -0,0 +1,99 @@
|
||||
{# HTMX partial — loaded by pseo_dashboard.html and /admin/pseo/health directly.
|
||||
When loaded via HTMX (hx-swap="outerHTML"), renders a full card.
|
||||
When loaded standalone (full page), also works since it just outputs HTML. #}
|
||||
|
||||
<div class="card" id="health-panel">
|
||||
<div class="card-header mb-4 flex justify-between items-center">
|
||||
<span>Health Checks</span>
|
||||
<span class="text-xs text-slate">{{ health.counts.total }} issue{{ 's' if health.counts.total != 1 else '' }}</span>
|
||||
</div>
|
||||
|
||||
{% if health.counts.total == 0 %}
|
||||
<p class="text-success text-sm">✓ No issues found — all articles are healthy.</p>
|
||||
{% else %}
|
||||
|
||||
<!-- Hreflang Orphans -->
|
||||
{% if health.hreflang_orphans %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-amber-700">
|
||||
⚠ Hreflang orphans ({{ health.counts.hreflang_orphans }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— articles missing a sibling language</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Template</th><th>URL path</th><th>Present</th><th>Missing</th></tr></thead>
|
||||
<tbody>
|
||||
{% for o in health.hreflang_orphans[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs text-slate">{{ o.template_slug }}</td>
|
||||
<td><a href="{{ o.url_path }}" class="text-blue text-xs" target="_blank">{{ o.url_path }}</a></td>
|
||||
<td class="text-xs">{{ o.present_languages | join(', ') }}</td>
|
||||
<td class="text-xs text-red-600">{{ o.missing_languages | join(', ') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.hreflang_orphans | length > 50 %}
|
||||
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.hreflang_orphans | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<!-- Missing Build Files -->
|
||||
{% if health.missing_build_files %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-red-700">
|
||||
❌ Missing build files ({{ health.counts.missing_build_files }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— published articles with no HTML on disk</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Slug</th><th>Language</th><th>URL path</th><th>Expected path</th></tr></thead>
|
||||
<tbody>
|
||||
{% for m in health.missing_build_files[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs font-mono">{{ m.slug }}</td>
|
||||
<td class="text-xs">{{ m.language }}</td>
|
||||
<td class="text-xs"><a href="{{ m.url_path }}" class="text-blue" target="_blank">{{ m.url_path }}</a></td>
|
||||
<td class="text-xs text-slate font-mono">{{ m.expected_path }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.missing_build_files | length > 50 %}
|
||||
<tr><td colspan="4" class="text-xs text-slate text-center">… and {{ health.missing_build_files | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
<!-- Broken Scenario Refs -->
|
||||
{% if health.broken_scenario_refs %}
|
||||
<details class="mb-4">
|
||||
<summary class="cursor-pointer font-semibold text-sm text-red-700">
|
||||
❌ Broken scenario refs ({{ health.counts.broken_scenario_refs }})
|
||||
<span class="text-xs font-normal text-slate ml-2">— [scenario:slug] markers referencing deleted scenarios</span>
|
||||
</summary>
|
||||
<div class="table-wrap mt-2">
|
||||
<table class="table text-sm">
|
||||
<thead><tr><th>Slug</th><th>Language</th><th>Broken refs</th></tr></thead>
|
||||
<tbody>
|
||||
{% for b in health.broken_scenario_refs[:50] %}
|
||||
<tr>
|
||||
<td class="text-xs font-mono">{{ b.slug }}</td>
|
||||
<td class="text-xs">{{ b.language }}</td>
|
||||
<td class="text-xs text-red-600 font-mono">{{ b.broken_scenario_refs | join(', ') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% if health.broken_scenario_refs | length > 50 %}
|
||||
<tr><td colspan="3" class="text-xs text-slate text-center">… and {{ health.broken_scenario_refs | length - 50 }} more</td></tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
|
||||
{% endif %}
|
||||
</div>
|
||||
@@ -0,0 +1,45 @@
|
||||
{# HTMX partial — replaces the entire <tr> for a job row while it's running.
|
||||
Stops polling once the job is complete or failed (hx-trigger="every 2s" only applies
|
||||
while this partial keeps returning a polling trigger). #}
|
||||
|
||||
{% set pct = [((job.progress_current / job.progress_total) * 100) | int, 100] | min if job.progress_total else 0 %}
|
||||
|
||||
<tr id="job-{{ job.id }}"
|
||||
{% if job.status == 'pending' %}
|
||||
hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
|
||||
hx-trigger="every 2s"
|
||||
hx-target="this"
|
||||
hx-swap="outerHTML"
|
||||
{% endif %}>
|
||||
<td class="text-xs text-slate">#{{ job.id }}</td>
|
||||
<td>—</td>{# payload not re-fetched in status endpoint — static display #}
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% else %}
|
||||
<span class="badge-warning">Running…</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap" style="min-width:120px;">
|
||||
<div class="progress-bar-fill" style="width: {{ pct }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
|
||||
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
|
||||
<td>
|
||||
{% if job.error %}
|
||||
<details>
|
||||
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
|
||||
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
|
||||
</details>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
95
web/src/padelnomics/admin/templates/admin/pseo_jobs.html
Normal file
95
web/src/padelnomics/admin/templates/admin/pseo_jobs.html
Normal file
@@ -0,0 +1,95 @@
|
||||
{% extends "admin/base_admin.html" %}
|
||||
{% set admin_page = "pseo" %}
|
||||
|
||||
{% block title %}pSEO Jobs - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.progress-bar-wrap { height: 6px; background: #E2E8F0; border-radius: 9999px; overflow: hidden; min-width: 120px; }
|
||||
.progress-bar-fill { height: 100%; background: #1D4ED8; border-radius: 9999px; transition: width 0.3s; }
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-8">
|
||||
<div>
|
||||
<h1 class="text-2xl">Generation Jobs</h1>
|
||||
<p class="text-slate text-sm mt-1">Recent article generation runs</p>
|
||||
</div>
|
||||
<a href="{{ url_for('pseo.pseo_dashboard') }}" class="btn-outline btn-sm">← pSEO Engine</a>
|
||||
</header>
|
||||
|
||||
{% if not jobs %}
|
||||
<div class="card">
|
||||
<p class="text-slate text-sm">No generation jobs found. Use the pSEO Engine dashboard to generate articles.</p>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="card">
|
||||
<div class="table-wrap">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th>Template</th>
|
||||
<th>Status</th>
|
||||
<th>Progress</th>
|
||||
<th>Started</th>
|
||||
<th>Completed</th>
|
||||
<th>Error</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in jobs %}
|
||||
<tr id="job-{{ job.id }}">
|
||||
<td class="text-xs text-slate">#{{ job.id }}</td>
|
||||
<td>
|
||||
{% if job.payload %}
|
||||
{% set payload = job.payload | fromjson %}
|
||||
<span class="font-mono text-xs">{{ payload.get('template_slug', '—') }}</span>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.status == 'complete' %}
|
||||
<span class="badge-success">Complete</span>
|
||||
{% elif job.status == 'failed' %}
|
||||
<span class="badge-danger">Failed</span>
|
||||
{% elif job.status == 'pending' %}
|
||||
{# Poll live status for running jobs #}
|
||||
<div hx-get="{{ url_for('pseo.pseo_job_status', job_id=job.id) }}"
|
||||
hx-trigger="load, every 2s"
|
||||
hx-target="closest tr"
|
||||
hx-swap="outerHTML">
|
||||
<span class="badge-warning">Running…</span>
|
||||
</div>
|
||||
{% else %}
|
||||
<span class="badge">{{ job.status }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if job.progress_total and job.progress_total > 0 %}
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="progress-bar-wrap">
|
||||
<div class="progress-bar-fill" style="width: {{ [((job.progress_current / job.progress_total) * 100) | int, 100] | min }}%"></div>
|
||||
</div>
|
||||
<span class="text-xs text-slate">{{ job.progress_current }}/{{ job.progress_total }}</span>
|
||||
</div>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
<td class="text-xs text-slate">{{ (job.created_at or '') | truncate(19, True, '') }}</td>
|
||||
<td class="text-xs text-slate">{{ (job.completed_at or '') | truncate(19, True, '') }}</td>
|
||||
<td>
|
||||
{% if job.error %}
|
||||
<details>
|
||||
<summary class="text-xs text-red-600 cursor-pointer">Error</summary>
|
||||
<pre class="text-xs mt-1 p-2 bg-gray-50 rounded overflow-auto max-w-xs">{{ job.error[:500] }}</pre>
|
||||
</details>
|
||||
{% else %}—{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
@@ -1,13 +1,22 @@
|
||||
"""
|
||||
Padelnomics - Application factory and entry point.
|
||||
"""
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from quart import Quart, Response, abort, g, redirect, request, session, url_for
|
||||
|
||||
from .analytics import close_analytics_db, open_analytics_db
|
||||
from .core import close_db, config, get_csrf_token, init_db, is_flag_enabled, setup_logging, setup_request_id
|
||||
from .core import (
|
||||
close_db,
|
||||
config,
|
||||
get_csrf_token,
|
||||
init_db,
|
||||
is_flag_enabled,
|
||||
setup_logging,
|
||||
setup_request_id,
|
||||
)
|
||||
|
||||
setup_logging()
|
||||
from .i18n import LANG_BLUEPRINTS, SUPPORTED_LANGS, get_country_name, get_translations
|
||||
@@ -97,6 +106,7 @@ def create_app() -> Quart:
|
||||
app.jinja_env.filters["fmt_n"] = _fmt_n
|
||||
app.jinja_env.filters["tformat"] = _tformat # translate with placeholders: {{ t.key | tformat(count=n) }}
|
||||
app.jinja_env.filters["country_name"] = get_country_name # {{ article.country | country_name(lang) }}
|
||||
app.jinja_env.filters["fromjson"] = json.loads # {{ job.payload | fromjson }}
|
||||
|
||||
# Session config
|
||||
app.config["SESSION_COOKIE_SECURE"] = not config.DEBUG
|
||||
@@ -303,6 +313,7 @@ def create_app() -> Quart:
|
||||
# Blueprint registration
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
from .admin.pseo_routes import bp as pseo_bp
|
||||
from .admin.routes import bp as admin_bp
|
||||
from .auth.routes import bp as auth_bp
|
||||
from .billing.routes import bp as billing_bp
|
||||
@@ -327,6 +338,7 @@ def create_app() -> Quart:
|
||||
app.register_blueprint(dashboard_bp)
|
||||
app.register_blueprint(billing_bp)
|
||||
app.register_blueprint(admin_bp)
|
||||
app.register_blueprint(pseo_bp)
|
||||
app.register_blueprint(webhooks_bp)
|
||||
|
||||
# Content catch-all LAST — lives under /<lang> too
|
||||
|
||||
@@ -284,6 +284,7 @@ async def generate_articles(
|
||||
*,
|
||||
limit: int = 500,
|
||||
base_url: str = "https://padelnomics.io",
|
||||
task_id: int | None = None,
|
||||
) -> int:
|
||||
"""
|
||||
Generate articles from a git template + DuckDB data.
|
||||
@@ -297,8 +298,14 @@ async def generate_articles(
|
||||
- write HTML to disk
|
||||
- upsert article row in SQLite
|
||||
|
||||
Returns count of articles generated.
|
||||
If task_id is given, writes progress_current / progress_total / error_log
|
||||
to the tasks table every _PROGRESS_BATCH articles so the pSEO dashboard
|
||||
can show a live progress bar. Per-article errors are logged and collected
|
||||
rather than aborting the run — the full task still completes.
|
||||
|
||||
Returns count of articles generated (excluding per-article errors).
|
||||
"""
|
||||
from ..core import execute as db_execute
|
||||
from ..planner.calculator import DEFAULTS, calc, validate_state
|
||||
from .routes import bake_scenario_cards, is_reserved_path
|
||||
|
||||
@@ -330,6 +337,15 @@ async def generate_articles(
|
||||
t_calc = t_render = t_bake = 0.0
|
||||
|
||||
_BATCH_SIZE = 200
|
||||
_PROGRESS_BATCH = 50 # write task progress every N articles (avoid write amplification)
|
||||
|
||||
# Write progress_total before the loop so the dashboard can show 0/N immediately.
|
||||
if task_id is not None:
|
||||
total = len(rows) * len(config["languages"])
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_total = ? WHERE id = ?",
|
||||
(total, task_id),
|
||||
)
|
||||
|
||||
async with transaction() as db:
|
||||
for row in rows:
|
||||
@@ -515,12 +531,27 @@ async def generate_articles(
|
||||
elif generated % 25 == 0:
|
||||
logger.info("%s: %d articles written…", slug, generated)
|
||||
|
||||
# Write progress every _PROGRESS_BATCH articles so the pSEO
|
||||
# dashboard live-updates without excessive write amplification.
|
||||
if task_id is not None and generated % _PROGRESS_BATCH == 0:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
# Stagger dates
|
||||
published_today += 1
|
||||
if published_today >= articles_per_day:
|
||||
published_today = 0
|
||||
publish_date += timedelta(days=1)
|
||||
|
||||
# Write final progress so the dashboard shows 100% on completion.
|
||||
if task_id is not None:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs",
|
||||
slug, generated, t_calc, t_render, t_bake,
|
||||
|
||||
397
web/src/padelnomics/content/health.py
Normal file
397
web/src/padelnomics/content/health.py
Normal file
@@ -0,0 +1,397 @@
|
||||
"""
|
||||
pSEO Engine health checks and content gap queries.
|
||||
|
||||
All functions are async, pure queries — no side effects.
|
||||
Used by the pSEO Engine admin dashboard.
|
||||
|
||||
Functions overview:
|
||||
get_template_stats() — article counts per status/language for one template
|
||||
get_template_freshness() — compare _serving_meta.json timestamp vs last article generation
|
||||
get_content_gaps() — DuckDB rows with no matching article for a template+language
|
||||
check_hreflang_orphans() — published articles missing a sibling language
|
||||
check_missing_build_files()— published articles whose HTML file is absent from disk
|
||||
check_broken_scenario_refs()— articles referencing [scenario:slug] that doesn't exist
|
||||
get_all_health_issues() — run all checks, return counts + details
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from ..analytics import fetch_analytics
|
||||
from ..core import fetch_all
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Directory where generate_articles() writes HTML + markdown source files.
|
||||
BUILD_DIR = Path("data/content/_build")
|
||||
|
||||
# Pattern matching [scenario:slug] and [scenario:slug:section] markers.
|
||||
_SCENARIO_REF_RE = re.compile(r"\[scenario:([a-z0-9_-]+)(?::[a-z]+)?\]")
|
||||
|
||||
|
||||
def _validate_table_name(data_table: str) -> None:
|
||||
"""Guard against SQL injection in table names."""
|
||||
assert re.match(r"^[a-z_][a-z0-9_.]*$", data_table), (
|
||||
f"Invalid table name: {data_table}"
|
||||
)
|
||||
|
||||
|
||||
def _read_serving_meta() -> dict:
|
||||
"""Read _serving_meta.json written by export_serving.py. Returns {} if absent."""
|
||||
serving_path = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb")
|
||||
meta_path = Path(serving_path).parent / "_serving_meta.json"
|
||||
if not meta_path.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(meta_path.read_text())
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {}
|
||||
|
||||
|
||||
def _parse_dt(s: str | None) -> datetime | None:
|
||||
"""Parse an ISO datetime string to a naive UTC datetime. Returns None on failure."""
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
dt = datetime.fromisoformat(s)
|
||||
# Strip timezone info so both aware (from meta) and naive (from SQLite) compare cleanly.
|
||||
return dt.replace(tzinfo=None)
|
||||
except (ValueError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
# ── Template statistics ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_template_stats(template_slug: str) -> dict:
|
||||
"""Article counts for a template: total, published, draft, scheduled, by language.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"total": N,
|
||||
"published": N,
|
||||
"draft": N,
|
||||
"scheduled": N,
|
||||
"by_language": {"en": {"total": N, "published": N, ...}, ...},
|
||||
}
|
||||
"""
|
||||
rows = await fetch_all(
|
||||
"SELECT status, language, COUNT(*) as cnt FROM articles"
|
||||
" WHERE template_slug = ? GROUP BY status, language",
|
||||
(template_slug,),
|
||||
)
|
||||
stats: dict = {"total": 0, "published": 0, "draft": 0, "scheduled": 0, "by_language": {}}
|
||||
for r in rows:
|
||||
cnt = r["cnt"]
|
||||
status = r["status"]
|
||||
lang = r["language"]
|
||||
|
||||
stats["total"] += cnt
|
||||
if status in stats:
|
||||
stats[status] += cnt
|
||||
|
||||
if lang not in stats["by_language"]:
|
||||
stats["by_language"][lang] = {"total": 0, "published": 0, "draft": 0, "scheduled": 0}
|
||||
stats["by_language"][lang]["total"] += cnt
|
||||
if status in stats["by_language"][lang]:
|
||||
stats["by_language"][lang][status] += cnt
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# ── Data freshness ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_template_freshness(templates: list[dict]) -> list[dict]:
|
||||
"""Compare _serving_meta.json exported_at vs max(articles.updated_at) per template.
|
||||
|
||||
Returns list of dicts — one per template:
|
||||
{
|
||||
"slug": str,
|
||||
"name": str,
|
||||
"data_table": str,
|
||||
"exported_at_utc": str | None, # from _serving_meta.json
|
||||
"last_generated": str | None, # max(updated_at) in articles
|
||||
"row_count": int | None, # DuckDB row count from meta
|
||||
"status": "fresh" | "stale" | "no_articles" | "no_data",
|
||||
}
|
||||
|
||||
Freshness semantics:
|
||||
"fresh" — articles generated after last data export (up to date)
|
||||
"stale" — data export is newer than last article generation (regen needed)
|
||||
"no_articles" — DuckDB data exists but no articles generated yet
|
||||
"no_data" — _serving_meta.json absent (export_serving not yet run)
|
||||
"""
|
||||
meta = _read_serving_meta()
|
||||
exported_at_str = meta.get("exported_at_utc")
|
||||
exported_at = _parse_dt(exported_at_str)
|
||||
table_meta = meta.get("tables", {})
|
||||
|
||||
result = []
|
||||
for t in templates:
|
||||
slug = t["slug"]
|
||||
data_table = t.get("data_table", "")
|
||||
# Strip schema prefix to match the key in _serving_meta.json tables dict.
|
||||
# e.g. "serving.pseo_city_costs_de" → "pseo_city_costs_de"
|
||||
table_key = data_table.split(".")[-1] if "." in data_table else data_table
|
||||
|
||||
rows = await fetch_all(
|
||||
"SELECT MAX(COALESCE(updated_at, created_at)) as last_gen FROM articles"
|
||||
" WHERE template_slug = ?",
|
||||
(slug,),
|
||||
)
|
||||
last_gen_str = rows[0]["last_gen"] if rows else None
|
||||
last_gen = _parse_dt(last_gen_str)
|
||||
|
||||
row_count = table_meta.get(table_key, {}).get("row_count")
|
||||
|
||||
if not exported_at_str:
|
||||
status = "no_data"
|
||||
elif last_gen is None:
|
||||
status = "no_articles"
|
||||
elif exported_at and last_gen and exported_at > last_gen:
|
||||
# New data available — articles haven't been regenerated against it yet.
|
||||
status = "stale"
|
||||
else:
|
||||
status = "fresh"
|
||||
|
||||
result.append({
|
||||
"slug": slug,
|
||||
"name": t.get("name", slug),
|
||||
"data_table": data_table,
|
||||
"exported_at_utc": exported_at_str,
|
||||
"last_generated": last_gen_str,
|
||||
"row_count": row_count,
|
||||
"status": status,
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ── Content gaps ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_content_gaps(
|
||||
template_slug: str,
|
||||
data_table: str,
|
||||
natural_key: str,
|
||||
languages: list[str],
|
||||
limit: int = 200,
|
||||
) -> list[dict]:
|
||||
"""Return DuckDB rows that have no matching article for at least one language.
|
||||
|
||||
The article slug is constructed as: "{template_slug}-{lang}-{natural_key_value}"
|
||||
This lets us efficiently detect gaps without rendering URL patterns.
|
||||
|
||||
Returns list of dicts — each is the DuckDB row with two extra keys:
|
||||
"_natural_key": str — the natural key value for this row
|
||||
"_missing_languages": list[str] — languages with no article
|
||||
"""
|
||||
assert languages, "languages must not be empty"
|
||||
_validate_table_name(data_table)
|
||||
|
||||
# Fetch all article slugs for this template to determine which rows exist.
|
||||
slug_rows = await fetch_all(
|
||||
"SELECT slug, language FROM articles WHERE template_slug = ?",
|
||||
(template_slug,),
|
||||
)
|
||||
|
||||
# Build lookup: (lang, natural_key_value) → True
|
||||
prefix_by_lang = {lang: f"{template_slug}-{lang}-" for lang in languages}
|
||||
existing: set[tuple[str, str]] = set()
|
||||
for r in slug_rows:
|
||||
lang = r["language"]
|
||||
if lang not in prefix_by_lang:
|
||||
continue
|
||||
prefix = prefix_by_lang[lang]
|
||||
if r["slug"].startswith(prefix):
|
||||
nk_val = r["slug"][len(prefix):]
|
||||
existing.add((lang, nk_val))
|
||||
|
||||
duckdb_rows = await fetch_analytics(
|
||||
f"SELECT * FROM {data_table} LIMIT ?",
|
||||
[limit],
|
||||
)
|
||||
|
||||
gaps = []
|
||||
for row in duckdb_rows:
|
||||
nk_val = str(row.get(natural_key, ""))
|
||||
missing = [lang for lang in languages if (lang, nk_val) not in existing]
|
||||
if missing:
|
||||
gaps.append({**row, "_natural_key": nk_val, "_missing_languages": missing})
|
||||
|
||||
return gaps
|
||||
|
||||
|
||||
# ── Health checks ─────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def check_hreflang_orphans(templates: list[dict]) -> list[dict]:
|
||||
"""Published articles missing a sibling language expected by their template.
|
||||
|
||||
For example: city-cost-de generates EN + DE. If the EN article exists but
|
||||
DE is absent, that article is an hreflang orphan.
|
||||
|
||||
Orphan detection is based on the slug pattern "{template_slug}-{lang}-{natural_key}".
|
||||
Articles are grouped by natural key; if any expected language is missing, the group
|
||||
is an orphan.
|
||||
|
||||
Returns list of dicts:
|
||||
{
|
||||
"template_slug": str,
|
||||
"url_path": str, # url_path of one present article for context
|
||||
"present_languages": list[str],
|
||||
"missing_languages": list[str],
|
||||
}
|
||||
"""
|
||||
orphans = []
|
||||
for t in templates:
|
||||
expected = set(t.get("languages", ["en"]))
|
||||
if len(expected) < 2:
|
||||
continue # Single-language template — no orphans possible.
|
||||
|
||||
rows = await fetch_all(
|
||||
"SELECT slug, language, url_path FROM articles"
|
||||
" WHERE template_slug = ? AND status = 'published'",
|
||||
(t["slug"],),
|
||||
)
|
||||
|
||||
# Group by natural key extracted from slug pattern:
|
||||
# "{template_slug}-{lang}-{natural_key}" → strip template prefix, then lang prefix.
|
||||
slug_prefix = t["slug"] + "-"
|
||||
by_nk: dict[str, dict] = {} # nk → {"langs": set, "url_path": str}
|
||||
for r in rows:
|
||||
slug = r["slug"]
|
||||
lang = r["language"]
|
||||
if not slug.startswith(slug_prefix):
|
||||
continue
|
||||
rest = slug[len(slug_prefix):] # "{lang}-{natural_key}"
|
||||
lang_prefix = lang + "-"
|
||||
if not rest.startswith(lang_prefix):
|
||||
continue
|
||||
nk = rest[len(lang_prefix):]
|
||||
if nk not in by_nk:
|
||||
by_nk[nk] = {"langs": set(), "url_path": r["url_path"]}
|
||||
by_nk[nk]["langs"].add(lang)
|
||||
|
||||
for nk, info in by_nk.items():
|
||||
present = info["langs"]
|
||||
missing = sorted(expected - present)
|
||||
if missing:
|
||||
orphans.append({
|
||||
"template_slug": t["slug"],
|
||||
"url_path": info["url_path"],
|
||||
"present_languages": sorted(present),
|
||||
"missing_languages": missing,
|
||||
})
|
||||
|
||||
return orphans
|
||||
|
||||
|
||||
async def check_missing_build_files(build_dir: Path | None = None) -> list[dict]:
|
||||
"""Published articles whose HTML file is absent from disk.
|
||||
|
||||
Expected path: BUILD_DIR/{language}/{slug}.html
|
||||
|
||||
Returns list of dicts:
|
||||
{"id", "slug", "language", "url_path", "template_slug", "expected_path"}
|
||||
"""
|
||||
bd = build_dir or BUILD_DIR
|
||||
rows = await fetch_all(
|
||||
"SELECT id, slug, language, url_path, template_slug FROM articles"
|
||||
" WHERE status = 'published'",
|
||||
)
|
||||
missing = []
|
||||
for r in rows:
|
||||
path = bd / r["language"] / f"{r['slug']}.html"
|
||||
if not path.exists():
|
||||
missing.append({
|
||||
"id": r["id"],
|
||||
"slug": r["slug"],
|
||||
"language": r["language"],
|
||||
"url_path": r["url_path"],
|
||||
"template_slug": r["template_slug"],
|
||||
"expected_path": str(path),
|
||||
})
|
||||
return missing
|
||||
|
||||
|
||||
async def check_broken_scenario_refs(build_dir: Path | None = None) -> list[dict]:
|
||||
"""pSEO articles referencing [scenario:slug] markers that don't exist.
|
||||
|
||||
Reads markdown source from BUILD_DIR/{language}/md/{slug}.md.
|
||||
Only checks published articles with a template_slug (pSEO-generated).
|
||||
|
||||
Returns list of dicts:
|
||||
{"id", "slug", "language", "url_path", "broken_scenario_refs": [str, ...]}
|
||||
"""
|
||||
bd = build_dir or BUILD_DIR
|
||||
|
||||
scenario_rows = await fetch_all("SELECT slug FROM published_scenarios")
|
||||
valid_slugs = {r["slug"] for r in scenario_rows}
|
||||
|
||||
articles = await fetch_all(
|
||||
"SELECT id, slug, language, url_path FROM articles"
|
||||
" WHERE status = 'published' AND template_slug IS NOT NULL",
|
||||
)
|
||||
|
||||
broken = []
|
||||
for a in articles:
|
||||
md_path = bd / a["language"] / "md" / f"{a['slug']}.md"
|
||||
if not md_path.exists():
|
||||
continue
|
||||
markdown = md_path.read_text()
|
||||
refs = {m.group(1) for m in _SCENARIO_REF_RE.finditer(markdown)}
|
||||
missing_refs = sorted(refs - valid_slugs)
|
||||
if missing_refs:
|
||||
broken.append({
|
||||
"id": a["id"],
|
||||
"slug": a["slug"],
|
||||
"language": a["language"],
|
||||
"url_path": a["url_path"],
|
||||
"broken_scenario_refs": missing_refs,
|
||||
})
|
||||
|
||||
return broken
|
||||
|
||||
|
||||
# ── Aggregate check ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def get_all_health_issues(
|
||||
templates: list[dict],
|
||||
build_dir: Path | None = None,
|
||||
) -> dict:
|
||||
"""Run all health checks, return issue counts and full detail lists.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"hreflang_orphans": [...],
|
||||
"missing_build_files": [...],
|
||||
"broken_scenario_refs": [...],
|
||||
"counts": {
|
||||
"hreflang_orphans": N,
|
||||
"missing_build_files": N,
|
||||
"broken_scenario_refs": N,
|
||||
"total": N,
|
||||
},
|
||||
}
|
||||
"""
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
missing_files = await check_missing_build_files(build_dir)
|
||||
broken_refs = await check_broken_scenario_refs(build_dir)
|
||||
|
||||
return {
|
||||
"hreflang_orphans": orphans,
|
||||
"missing_build_files": missing_files,
|
||||
"broken_scenario_refs": broken_refs,
|
||||
"counts": {
|
||||
"hreflang_orphans": len(orphans),
|
||||
"missing_build_files": len(missing_files),
|
||||
"broken_scenario_refs": len(broken_refs),
|
||||
"total": len(orphans) + len(missing_files) + len(broken_refs),
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Add progress tracking columns to the tasks table.
|
||||
|
||||
Enables the pSEO Engine dashboard to show live progress during article
|
||||
generation jobs: a progress bar (current/total) and an error log for
|
||||
per-article failures without aborting the whole run.
|
||||
"""
|
||||
|
||||
|
||||
def up(conn) -> None:
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_current INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_total INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN error_log TEXT NOT NULL DEFAULT '[]'"
|
||||
)
|
||||
@@ -4,11 +4,10 @@ Background task worker - SQLite-based queue (no Redis needed).
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
EMAIL_ADDRESSES,
|
||||
config,
|
||||
@@ -754,8 +753,11 @@ async def handle_generate_articles(payload: dict) -> None:
|
||||
start_date = date_cls.fromisoformat(payload["start_date"])
|
||||
articles_per_day = payload.get("articles_per_day", 3)
|
||||
limit = payload.get("limit", 500)
|
||||
task_id = payload.get("_task_id")
|
||||
|
||||
count = await generate_articles(slug, start_date, articles_per_day, limit=limit)
|
||||
count = await generate_articles(
|
||||
slug, start_date, articles_per_day, limit=limit, task_id=task_id
|
||||
)
|
||||
logger.info("Generated %s articles for template '%s'", count, slug)
|
||||
|
||||
|
||||
@@ -777,6 +779,9 @@ async def process_task(task: dict) -> None:
|
||||
|
||||
try:
|
||||
payload = json.loads(task["payload"]) if task["payload"] else {}
|
||||
# Inject task_id so progress-aware handlers (e.g. generate_articles) can
|
||||
# write progress_current to the tasks table without a separate lookup.
|
||||
payload["_task_id"] = task_id
|
||||
await handler(payload)
|
||||
await mark_complete(task_id)
|
||||
logger.info("Completed: %s (id=%s)", task_name, task_id)
|
||||
|
||||
765
web/tests/test_pseo.py
Normal file
765
web/tests/test_pseo.py
Normal file
@@ -0,0 +1,765 @@
|
||||
"""
|
||||
Tests for the pSEO Engine: health checks, content gaps, freshness, and admin routes.
|
||||
|
||||
Covers:
|
||||
- content/health.py: get_template_stats, get_template_freshness, get_content_gaps,
|
||||
check_hreflang_orphans, check_missing_build_files, check_broken_scenario_refs,
|
||||
get_all_health_issues
|
||||
- admin/pseo_routes.py: all 6 routes (dashboard, health, gaps, generate, jobs, job status)
|
||||
"""
|
||||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from padelnomics.content.health import (
|
||||
check_broken_scenario_refs,
|
||||
check_hreflang_orphans,
|
||||
check_missing_build_files,
|
||||
get_all_health_issues,
|
||||
get_content_gaps,
|
||||
get_template_freshness,
|
||||
get_template_stats,
|
||||
)
|
||||
from padelnomics.core import execute, utcnow_iso
|
||||
|
||||
from padelnomics import core
|
||||
|
||||
# ── Fixtures ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def admin_client(app, db):
|
||||
"""Authenticated admin test client."""
|
||||
now = utcnow_iso()
|
||||
async with db.execute(
|
||||
"INSERT INTO users (email, name, created_at) VALUES (?, ?, ?)",
|
||||
("pseo-admin@test.com", "pSEO Admin", now),
|
||||
) as cursor:
|
||||
admin_id = cursor.lastrowid
|
||||
await db.execute(
|
||||
"INSERT INTO user_roles (user_id, role) VALUES (?, 'admin')", (admin_id,)
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
async with app.test_client() as c:
|
||||
async with c.session_transaction() as sess:
|
||||
sess["user_id"] = admin_id
|
||||
yield c
|
||||
|
||||
|
||||
# ── DB helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def _insert_article(
|
||||
slug,
|
||||
url_path,
|
||||
status="published",
|
||||
language="en",
|
||||
template_slug="city-cost-de",
|
||||
created_at=None,
|
||||
):
|
||||
"""Insert a minimal article row and return its id."""
|
||||
ts = created_at or utcnow_iso()
|
||||
return await execute(
|
||||
"""INSERT INTO articles
|
||||
(url_path, slug, title, meta_description, country, region,
|
||||
status, published_at, language, template_slug, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, 'DE', 'Europe', ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
url_path,
|
||||
slug,
|
||||
f"Title {slug}",
|
||||
f"Desc {slug}",
|
||||
status,
|
||||
ts if status == "published" else None,
|
||||
language,
|
||||
template_slug,
|
||||
ts,
|
||||
ts,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def _insert_scenario(slug="test-scenario"):
|
||||
"""Insert a minimal published_scenario row."""
|
||||
from padelnomics.planner.calculator import calc, validate_state
|
||||
|
||||
state = validate_state({"dblCourts": 2})
|
||||
d = calc(state)
|
||||
return await execute(
|
||||
"""INSERT INTO published_scenarios
|
||||
(slug, title, subtitle, location, country, venue_type, ownership,
|
||||
court_config, state_json, calc_json)
|
||||
VALUES (?, ?, '', 'TestCity', 'TC', 'indoor', 'rent', '2 double', ?, ?)""",
|
||||
(slug, f"Scenario {slug}", json.dumps(state), json.dumps(d)),
|
||||
)
|
||||
|
||||
|
||||
async def _insert_task(status="pending", progress_current=0, progress_total=0):
|
||||
"""Insert a generate_articles task row and return its id."""
|
||||
now = utcnow_iso()
|
||||
async with core._db.execute(
|
||||
"""INSERT INTO tasks
|
||||
(task_name, payload, status, run_at, progress_current, progress_total, created_at)
|
||||
VALUES ('generate_articles', '{}', ?, ?, ?, ?, ?)""",
|
||||
(status, now, progress_current, progress_total, now),
|
||||
) as cursor:
|
||||
task_id = cursor.lastrowid
|
||||
await core._db.commit()
|
||||
return task_id
|
||||
|
||||
|
||||
# ── DuckDB mock rows ──────────────────────────────────────────────────────────
|
||||
|
||||
_DUCKDB_ROWS = [
|
||||
{"city_slug": "berlin", "city": "Berlin", "country": "DE"},
|
||||
{"city_slug": "munich", "city": "Munich", "country": "DE"},
|
||||
{"city_slug": "hamburg", "city": "Hamburg", "country": "DE"},
|
||||
]
|
||||
|
||||
|
||||
async def _mock_fetch_duckdb(query, params=None):
|
||||
return _DUCKDB_ROWS
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# get_template_stats()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestGetTemplateStats:
|
||||
async def test_empty_db_returns_zeros(self, db):
|
||||
stats = await get_template_stats("city-cost-de")
|
||||
assert stats["total"] == 0
|
||||
assert stats["published"] == 0
|
||||
assert stats["draft"] == 0
|
||||
assert stats["by_language"] == {}
|
||||
|
||||
async def test_counts_per_status(self, db):
|
||||
await _insert_article("city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
status="published", language="en")
|
||||
await _insert_article("city-cost-de-en-munich", "/en/markets/germany/munich",
|
||||
status="draft", language="en")
|
||||
await _insert_article("city-cost-de-de-berlin", "/de/markets/germany/berlin",
|
||||
status="published", language="de")
|
||||
|
||||
stats = await get_template_stats("city-cost-de")
|
||||
|
||||
assert stats["total"] == 3
|
||||
assert stats["published"] == 2
|
||||
assert stats["draft"] == 1
|
||||
assert stats["by_language"]["en"]["total"] == 2
|
||||
assert stats["by_language"]["de"]["total"] == 1
|
||||
|
||||
async def test_ignores_other_templates(self, db):
|
||||
await _insert_article("other-en-berlin", "/en/other/berlin", template_slug="other")
|
||||
stats = await get_template_stats("city-cost-de")
|
||||
assert stats["total"] == 0
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# get_template_freshness()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
_SAMPLE_TEMPLATES = [
|
||||
{
|
||||
"slug": "city-cost-de",
|
||||
"name": "City Cost DE",
|
||||
"data_table": "serving.pseo_city_costs_de",
|
||||
"languages": ["en", "de"],
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
class TestGetTemplateFreshness:
|
||||
async def test_no_meta_file_returns_no_data(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {})
|
||||
|
||||
result = await get_template_freshness(_SAMPLE_TEMPLATES)
|
||||
assert len(result) == 1
|
||||
assert result[0]["status"] == "no_data"
|
||||
|
||||
async def test_meta_present_no_articles_returns_no_articles(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
|
||||
"exported_at_utc": "2026-01-15T10:00:00+00:00",
|
||||
"tables": {"pseo_city_costs_de": {"row_count": 100}},
|
||||
})
|
||||
|
||||
result = await get_template_freshness(_SAMPLE_TEMPLATES)
|
||||
assert result[0]["status"] == "no_articles"
|
||||
assert result[0]["row_count"] == 100
|
||||
|
||||
async def test_article_older_than_export_returns_stale(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
# Article created Jan 10, data exported Jan 15 → stale
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
status="published", language="en", created_at="2026-01-10T08:00:00",
|
||||
)
|
||||
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
|
||||
"exported_at_utc": "2026-01-15T10:00:00+00:00",
|
||||
"tables": {"pseo_city_costs_de": {"row_count": 100}},
|
||||
})
|
||||
|
||||
result = await get_template_freshness(_SAMPLE_TEMPLATES)
|
||||
assert result[0]["status"] == "stale"
|
||||
|
||||
async def test_article_newer_than_export_returns_fresh(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
# Data exported Jan 10, article updated Jan 15 → fresh
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
status="published", language="en", created_at="2026-01-15T12:00:00",
|
||||
)
|
||||
monkeypatch.setattr(health_mod, "_read_serving_meta", lambda: {
|
||||
"exported_at_utc": "2026-01-10T10:00:00+00:00",
|
||||
"tables": {},
|
||||
})
|
||||
|
||||
result = await get_template_freshness(_SAMPLE_TEMPLATES)
|
||||
assert result[0]["status"] == "fresh"
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# get_content_gaps()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestGetContentGaps:
|
||||
async def test_no_articles_returns_all_duckdb_rows(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug="city-cost-de",
|
||||
data_table="serving.pseo_city_costs_de",
|
||||
natural_key="city_slug",
|
||||
languages=["en"],
|
||||
)
|
||||
assert len(gaps) == len(_DUCKDB_ROWS)
|
||||
assert all(g["_missing_languages"] == ["en"] for g in gaps)
|
||||
|
||||
async def test_existing_article_excluded_from_gaps(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en",
|
||||
)
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug="city-cost-de",
|
||||
data_table="serving.pseo_city_costs_de",
|
||||
natural_key="city_slug",
|
||||
languages=["en"],
|
||||
)
|
||||
gap_keys = {g["_natural_key"] for g in gaps}
|
||||
assert "berlin" not in gap_keys
|
||||
assert "munich" in gap_keys
|
||||
assert "hamburg" in gap_keys
|
||||
|
||||
async def test_partial_language_gap_detected(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
|
||||
# EN exists for berlin, DE is missing → berlin has a gap for "de"
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin", language="en",
|
||||
)
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug="city-cost-de",
|
||||
data_table="serving.pseo_city_costs_de",
|
||||
natural_key="city_slug",
|
||||
languages=["en", "de"],
|
||||
)
|
||||
berlin = next((g for g in gaps if g["_natural_key"] == "berlin"), None)
|
||||
assert berlin is not None
|
||||
assert berlin["_missing_languages"] == ["de"]
|
||||
|
||||
async def test_no_gaps_when_all_articles_exist(self, db, monkeypatch):
|
||||
import padelnomics.content.health as health_mod
|
||||
|
||||
monkeypatch.setattr(health_mod, "fetch_analytics", _mock_fetch_duckdb)
|
||||
for key in ("berlin", "munich", "hamburg"):
|
||||
await _insert_article(
|
||||
f"city-cost-de-en-{key}", f"/en/markets/germany/{key}", language="en",
|
||||
)
|
||||
|
||||
gaps = await get_content_gaps(
|
||||
template_slug="city-cost-de",
|
||||
data_table="serving.pseo_city_costs_de",
|
||||
natural_key="city_slug",
|
||||
languages=["en"],
|
||||
)
|
||||
assert gaps == []
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# check_hreflang_orphans()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestCheckHreflangOrphans:
|
||||
async def test_single_lang_template_no_orphans(self, db):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}]
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
assert orphans == []
|
||||
|
||||
async def test_bilingual_both_present_no_orphans(self, db):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
await _insert_article(
|
||||
"city-cost-de-de-berlin", "/de/markets/germany/berlin",
|
||||
language="de", status="published",
|
||||
)
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
assert orphans == []
|
||||
|
||||
async def test_missing_de_sibling_detected(self, db):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
|
||||
# Only EN for berlin — DE is missing
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
assert len(orphans) == 1
|
||||
assert orphans[0]["template_slug"] == "city-cost-de"
|
||||
assert "de" in orphans[0]["missing_languages"]
|
||||
assert "en" in orphans[0]["present_languages"]
|
||||
|
||||
async def test_draft_articles_not_counted(self, db):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
|
||||
# Draft articles should be ignored
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="draft",
|
||||
)
|
||||
orphans = await check_hreflang_orphans(templates)
|
||||
assert orphans == []
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# check_missing_build_files()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestCheckMissingBuildFiles:
|
||||
async def test_no_articles_returns_empty(self, db, tmp_path):
|
||||
result = await check_missing_build_files(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
async def test_build_file_present_not_reported(self, db, tmp_path):
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
build_file = tmp_path / "en" / "city-cost-de-en-berlin.html"
|
||||
build_file.parent.mkdir(parents=True)
|
||||
build_file.write_text("<h1>Berlin</h1>")
|
||||
|
||||
result = await check_missing_build_files(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
async def test_missing_build_file_reported(self, db, tmp_path):
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
# No build file created
|
||||
result = await check_missing_build_files(build_dir=tmp_path)
|
||||
assert len(result) == 1
|
||||
assert result[0]["slug"] == "city-cost-de-en-berlin"
|
||||
assert result[0]["language"] == "en"
|
||||
|
||||
async def test_draft_articles_ignored(self, db, tmp_path):
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="draft",
|
||||
)
|
||||
result = await check_missing_build_files(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# check_broken_scenario_refs()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestCheckBrokenScenarioRefs:
|
||||
async def test_no_markdown_files_returns_empty(self, db, tmp_path):
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
result = await check_broken_scenario_refs(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
async def test_valid_scenario_ref_not_reported(self, db, tmp_path):
|
||||
await _insert_scenario("berlin-scenario")
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
md_dir = tmp_path / "en" / "md"
|
||||
md_dir.mkdir(parents=True)
|
||||
(md_dir / "city-cost-de-en-berlin.md").write_text(
|
||||
"# Berlin\n\n[scenario:berlin-scenario:capex]\n"
|
||||
)
|
||||
result = await check_broken_scenario_refs(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
async def test_missing_scenario_ref_reported(self, db, tmp_path):
|
||||
# No scenario in DB, but markdown references one
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
md_dir = tmp_path / "en" / "md"
|
||||
md_dir.mkdir(parents=True)
|
||||
(md_dir / "city-cost-de-en-berlin.md").write_text(
|
||||
"# Berlin\n\n[scenario:ghost-scenario:capex]\n"
|
||||
)
|
||||
result = await check_broken_scenario_refs(build_dir=tmp_path)
|
||||
assert len(result) == 1
|
||||
assert "ghost-scenario" in result[0]["broken_scenario_refs"]
|
||||
|
||||
async def test_no_template_slug_articles_ignored(self, db, tmp_path):
|
||||
# Legacy article (no template_slug) should not be checked
|
||||
await execute(
|
||||
"""INSERT INTO articles
|
||||
(url_path, slug, title, status, language, created_at)
|
||||
VALUES ('/en/legacy', 'legacy', 'Legacy', 'published', 'en', ?)""",
|
||||
(utcnow_iso(),),
|
||||
)
|
||||
md_dir = tmp_path / "en" / "md"
|
||||
md_dir.mkdir(parents=True)
|
||||
(md_dir / "legacy.md").write_text("# Legacy\n\n[scenario:ghost]\n")
|
||||
|
||||
result = await check_broken_scenario_refs(build_dir=tmp_path)
|
||||
assert result == []
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# get_all_health_issues()
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestGetAllHealthIssues:
|
||||
async def test_clean_state_returns_zero_counts(self, db, tmp_path):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en"]}]
|
||||
result = await get_all_health_issues(templates, build_dir=tmp_path)
|
||||
|
||||
assert result["counts"]["total"] == 0
|
||||
assert result["counts"]["hreflang_orphans"] == 0
|
||||
assert result["counts"]["missing_build_files"] == 0
|
||||
assert result["counts"]["broken_scenario_refs"] == 0
|
||||
assert "hreflang_orphans" in result
|
||||
assert "missing_build_files" in result
|
||||
assert "broken_scenario_refs" in result
|
||||
|
||||
async def test_orphan_counted_in_total(self, db, tmp_path):
|
||||
templates = [{"slug": "city-cost-de", "name": "City Cost DE", "languages": ["en", "de"]}]
|
||||
# EN article with no DE sibling → orphan
|
||||
await _insert_article(
|
||||
"city-cost-de-en-berlin", "/en/markets/germany/berlin",
|
||||
language="en", status="published",
|
||||
)
|
||||
result = await get_all_health_issues(templates, build_dir=tmp_path)
|
||||
assert result["counts"]["hreflang_orphans"] == 1
|
||||
assert result["counts"]["total"] >= 1
|
||||
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
# pSEO Route tests
|
||||
# ════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
# Mock objects for route tests — avoids needing a live DuckDB
|
||||
_MOCK_TEMPLATE_CFG = {
|
||||
"slug": "city-cost-de",
|
||||
"name": "City Cost DE",
|
||||
"data_table": "serving.pseo_city_costs_de",
|
||||
"natural_key": "city_slug",
|
||||
"languages": ["en", "de"],
|
||||
"url_pattern": "/markets/{country}/{city_slug}",
|
||||
}
|
||||
_MOCK_TEMPLATES = [_MOCK_TEMPLATE_CFG]
|
||||
|
||||
|
||||
def _discover_mock():
|
||||
return _MOCK_TEMPLATES
|
||||
|
||||
|
||||
def _load_template_mock(slug):
|
||||
if slug == "city-cost-de":
|
||||
return _MOCK_TEMPLATE_CFG
|
||||
raise FileNotFoundError(f"Template {slug!r} not found")
|
||||
|
||||
|
||||
async def _freshness_mock(templates):
|
||||
return [
|
||||
{
|
||||
"slug": t["slug"],
|
||||
"name": t["name"],
|
||||
"data_table": t["data_table"],
|
||||
"status": "fresh",
|
||||
"exported_at_utc": None,
|
||||
"last_generated": None,
|
||||
"row_count": 100,
|
||||
}
|
||||
for t in templates
|
||||
]
|
||||
|
||||
|
||||
async def _stats_mock(slug):
|
||||
return {
|
||||
"total": 10, "published": 8, "draft": 2, "scheduled": 0,
|
||||
"by_language": {
|
||||
"en": {"total": 5, "published": 4, "draft": 1, "scheduled": 0},
|
||||
"de": {"total": 5, "published": 4, "draft": 1, "scheduled": 0},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def _health_mock(templates, build_dir=None):
|
||||
return {
|
||||
"hreflang_orphans": [],
|
||||
"missing_build_files": [],
|
||||
"broken_scenario_refs": [],
|
||||
"counts": {"hreflang_orphans": 0, "missing_build_files": 0,
|
||||
"broken_scenario_refs": 0, "total": 0},
|
||||
}
|
||||
|
||||
|
||||
async def _gaps_empty_mock(template_slug, data_table, natural_key, languages, limit=200):
|
||||
return []
|
||||
|
||||
|
||||
async def _gaps_two_mock(template_slug, data_table, natural_key, languages, limit=200):
|
||||
return [
|
||||
{"city_slug": "munich", "_natural_key": "munich", "_missing_languages": ["en"]},
|
||||
{"city_slug": "hamburg", "_natural_key": "hamburg", "_missing_languages": ["de"]},
|
||||
]
|
||||
|
||||
|
||||
class TestPseoRoutes:
|
||||
"""Tests for all pSEO Engine admin blueprint routes."""
|
||||
|
||||
# -- Access control --------------------------------------------------------
|
||||
|
||||
async def test_dashboard_requires_admin(self, client, db):
|
||||
resp = await client.get("/admin/pseo/")
|
||||
assert resp.status_code in (302, 403)
|
||||
|
||||
async def test_health_requires_admin(self, client, db):
|
||||
resp = await client.get("/admin/pseo/health")
|
||||
assert resp.status_code in (302, 403)
|
||||
|
||||
async def test_gaps_requires_admin(self, client, db):
|
||||
resp = await client.get("/admin/pseo/gaps/city-cost-de")
|
||||
assert resp.status_code in (302, 403)
|
||||
|
||||
async def test_jobs_requires_admin(self, client, db):
|
||||
resp = await client.get("/admin/pseo/jobs")
|
||||
assert resp.status_code in (302, 403)
|
||||
|
||||
# -- Dashboard -------------------------------------------------------------
|
||||
|
||||
async def test_dashboard_renders(self, admin_client, db):
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock),
|
||||
):
|
||||
resp = await admin_client.get("/admin/pseo/")
|
||||
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "pSEO Engine" in text
|
||||
|
||||
async def test_dashboard_shows_template_name(self, admin_client, db):
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_template_freshness", _freshness_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_template_stats", _stats_mock),
|
||||
):
|
||||
resp = await admin_client.get("/admin/pseo/")
|
||||
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "City Cost DE" in text
|
||||
|
||||
# -- Health HTMX partial ---------------------------------------------------
|
||||
|
||||
async def test_health_partial_renders(self, admin_client, db):
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.discover_templates", _discover_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_all_health_issues", _health_mock),
|
||||
):
|
||||
resp = await admin_client.get("/admin/pseo/health")
|
||||
|
||||
assert resp.status_code == 200
|
||||
|
||||
# -- Content gaps HTMX partial ---------------------------------------------
|
||||
|
||||
async def test_gaps_unknown_template_returns_404(self, admin_client, db):
|
||||
def _raise(slug):
|
||||
raise FileNotFoundError("not found")
|
||||
|
||||
with patch("padelnomics.admin.pseo_routes.load_template", _raise):
|
||||
resp = await admin_client.get("/admin/pseo/gaps/no-such-template")
|
||||
|
||||
assert resp.status_code == 404
|
||||
|
||||
async def test_gaps_partial_renders(self, admin_client, db):
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
|
||||
):
|
||||
resp = await admin_client.get("/admin/pseo/gaps/city-cost-de")
|
||||
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
# Should show gap count or row content
|
||||
assert "munich" in text or "missing" in text.lower()
|
||||
|
||||
async def test_gaps_empty_shows_no_gaps_message(self, admin_client, db):
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock),
|
||||
):
|
||||
resp = await admin_client.get("/admin/pseo/gaps/city-cost-de")
|
||||
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "No gaps" in text or "all" in text.lower()
|
||||
|
||||
# -- Generate gaps POST ----------------------------------------------------
|
||||
|
||||
async def test_generate_gaps_redirects(self, admin_client, db):
|
||||
async with admin_client.session_transaction() as sess:
|
||||
sess["csrf_token"] = "test"
|
||||
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
|
||||
):
|
||||
resp = await admin_client.post(
|
||||
"/admin/pseo/gaps/city-cost-de/generate",
|
||||
form={"csrf_token": "test"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 302
|
||||
|
||||
async def test_generate_gaps_enqueues_task(self, admin_client, db):
|
||||
async with admin_client.session_transaction() as sess:
|
||||
sess["csrf_token"] = "test"
|
||||
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_two_mock),
|
||||
):
|
||||
await admin_client.post(
|
||||
"/admin/pseo/gaps/city-cost-de/generate",
|
||||
form={"csrf_token": "test"},
|
||||
)
|
||||
|
||||
tasks = await core.fetch_all(
|
||||
"SELECT task_name FROM tasks WHERE task_name = 'generate_articles'"
|
||||
)
|
||||
assert len(tasks) == 1
|
||||
|
||||
async def test_generate_gaps_no_gaps_redirects_without_task(self, admin_client, db):
|
||||
async with admin_client.session_transaction() as sess:
|
||||
sess["csrf_token"] = "test"
|
||||
|
||||
with (
|
||||
patch("padelnomics.admin.pseo_routes.load_template", _load_template_mock),
|
||||
patch("padelnomics.admin.pseo_routes.get_content_gaps", _gaps_empty_mock),
|
||||
):
|
||||
resp = await admin_client.post(
|
||||
"/admin/pseo/gaps/city-cost-de/generate",
|
||||
form={"csrf_token": "test"},
|
||||
)
|
||||
|
||||
assert resp.status_code == 302
|
||||
tasks = await core.fetch_all(
|
||||
"SELECT task_name FROM tasks WHERE task_name = 'generate_articles'"
|
||||
)
|
||||
assert len(tasks) == 0
|
||||
|
||||
# -- Jobs list -------------------------------------------------------------
|
||||
|
||||
async def test_jobs_renders_empty(self, admin_client, db):
|
||||
resp = await admin_client.get("/admin/pseo/jobs")
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "Generation Jobs" in text
|
||||
|
||||
async def test_jobs_shows_task_row(self, admin_client, db):
|
||||
await _insert_task(status="complete", progress_current=20, progress_total=20)
|
||||
|
||||
resp = await admin_client.get("/admin/pseo/jobs")
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "Complete" in text
|
||||
|
||||
# -- Job status HTMX polled ------------------------------------------------
|
||||
|
||||
async def test_job_status_not_found_returns_404(self, admin_client, db):
|
||||
resp = await admin_client.get("/admin/pseo/jobs/9999/status")
|
||||
assert resp.status_code == 404
|
||||
|
||||
async def test_job_status_renders_pending(self, admin_client, db):
|
||||
job_id = await _insert_task(
|
||||
status="pending", progress_current=5, progress_total=20
|
||||
)
|
||||
|
||||
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "Running" in text
|
||||
|
||||
async def test_job_status_renders_complete(self, admin_client, db):
|
||||
job_id = await _insert_task(
|
||||
status="complete", progress_current=20, progress_total=20
|
||||
)
|
||||
|
||||
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
|
||||
assert resp.status_code == 200
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "Complete" in text
|
||||
|
||||
async def test_job_status_complete_no_htmx_poll_trigger(self, admin_client, db):
|
||||
"""A completed job should not include hx-trigger="every 2s" (stops HTMX polling)."""
|
||||
job_id = await _insert_task(
|
||||
status="complete", progress_current=20, progress_total=20
|
||||
)
|
||||
|
||||
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "every 2s" not in text
|
||||
|
||||
async def test_job_status_pending_includes_htmx_poll_trigger(self, admin_client, db):
|
||||
"""A pending job should include hx-trigger="every 2s" (keeps HTMX polling)."""
|
||||
job_id = await _insert_task(
|
||||
status="pending", progress_current=0, progress_total=20
|
||||
)
|
||||
|
||||
resp = await admin_client.get(f"/admin/pseo/jobs/{job_id}/status")
|
||||
text = await resp.get_data(as_text=True)
|
||||
assert "every 2s" in text
|
||||
Reference in New Issue
Block a user